(* Lifted map-reduce, loops over individuals and (all) subsets of individuals *)
(* This is a different approach, more in the spirit of HANSEI.
   It partitions the work between reify and map_reduce_sw differently.
   The end results are identical to those achieved in lifted_nested_mp.ml
   and lifted_tuples.ml.

   The present code extends lifted.ml to handle loops over sets of
   individuals. We add more constraints to our language: membership
   and cardinality constraints. The first part of this file is virtually
   identical to that of lifted.ml (some data types are more general though).
   Iterating over all subsets of a the domain is implemented near the end
   of this file.

   Although this code is not described in the paper, it should be easy
   to understand from the description in the paper. After all, it solves
   the same problem.

   The code to lift is a regular OCaml code, which may use loop variables
   using a restricted set of operations. At present, there are only two
   operation on loop variables: the equality comparison equ and
   the inequality lte (which is used for subset inclusion rather for
   numeric inequalities).


Future work: express the queries like the following:
  what is the product, over every individual x, of the number of subsets
  of the domain that contain x?
and of queries that express intersection (for example, count the number of
subsets of individuals with a non-empty intersection).
*)

open Delimcc;;
open Big_int;;

(*
#load "nums.cma";;
*)

(* The `Logic' (or loop) variable of type 'i *)
(* We use the value of the type `generation' to impose the total order on
   logic variables. The variable of the greatest generation has been
   created by the most recent reify. Of course that holds only
   if no loop variable escapes from the scope of the reify that created
   it. Well, if it does, the corresponding prompt will no longer
   be set so that any attempt to use such a variable will lead to
   failure.
*)
type generation = int
let new_generation = 
  let cnt = ref 0 in
  fun () ->
    incr cnt; !cnt
;;

(* The variable and the interface to the reifier, 
   which `runs' the lifted computation.
   The request is delivered to the closest reify and always concerns
   its own variable. Therefore, we don't need to mention that
   variable in the request.

   We also define available constrain operations
*)

type indiv = string			(* I'll generalize later *)
;;

type 'i var = Val of 'i | Var of generation * 'i req prompt
and  ('i,'r) ctx = 
   (* Equality of the loop variable with some other variable of value *)
 | C_eq of 'i var * (bool -> 'r)
 (* The loop variable contains the individual *)
 | C_in of indiv * (bool -> 'r) (* This should be replaced with 'i ivar
                                   to answer intersection-like queries *)
 | C_card of (int -> 'r)
and  'i req = Req of ('i,'i req) ctx
(* The result of reification: the lazy tree of `cases' terminated with
   the default case. Compare with the result of HANSEI reification, the
   lazy tree of weighted choices.
   It is a binary tree: the first branch corresponds to the case when
   the constraint 'i ctx holds; the second branch is the result when
   the constraint does not hold.
*)
and ('i,'r) switch = Default of 'r 
  | Case of ('i, ('i,'r) switch) ctx
;;


(* Polymorphic equality over loop variables *)

(*--start-equ--*)
let equ : 'i var -> 'i var -> bool = fun x y ->
  match (x,y) with
  | (Var (gx,px), Var (gy,py)) ->
      if gx > gy then
	shift px (fun k -> Req (C_eq (y,k)))
      else if gx < gy then
	shift py (fun k -> Req (C_eq (x,k)))
      else true
  | (Val x, Val y) -> x = y
  | (Var (_,px), y) | (y, Var (_,px)) ->
      shift px (fun k -> Req (C_eq (y,k)))
(*--end-equ--*)
;;

let rec member x = function
  | [] -> false
  | (h::t) -> equ x h || member x t
;;


(* The reifier *)
(* Unlike the reifier in the paper, it does not try to solve the
   constraints. It merely reifies the loop body to a tree.
   It is quite like reify0 in HANSEI: it is very dumb.
   All smarts are in map_reduce, in the function that uses the
   reified data structure.
*)

let reify : ('i var -> 'r) -> ('i,'r) switch = fun f ->
  let var_prompt = new_prompt () in
  let var = Var (new_generation (), var_prompt) in
  let presult = new_prompt () in
  let k_switch loop k r = push_prompt presult (fun () -> loop (k r)) in
  let rec loop = function 
    | Req C_eq (v,k) ->
	abort presult (Case (C_eq (v, k_switch loop k)))
    | Req C_in (v,k) ->
	abort presult (Case (C_in (v, k_switch loop k)))
    | Req C_card (k) ->
	abort presult (Case (C_card (k_switch loop k)))
  in 
  push_prompt presult (fun () ->
    loop (push_prompt var_prompt (fun () -> 
      abort presult (Default (f var)))))
;;
 

(* The map-reduce part *)

type 'i coll = {coll_member : 'i var -> bool;
                coll_cardinality : unit -> int};;


(* A sample collection used in all of the examples *)
(* The sample domain of people of cardinality 100.
   We distinguish three special people
*)
let people =
  {coll_member = (fun v -> true);  (* Assume this is the whole domain *)
   coll_cardinality = (fun () -> 100);
 };;


(* A commutative monoid *)
type 'r monoid = {union : 'r -> 'r -> 'r;
                  empty : 'r}

let sum_monoid = {union = (+); empty = 0}
;;

let rec repeat monoid r = function	
  | 0 -> monoid.empty
  | 1 -> r
  | n when n mod 2 = 0 -> 
      repeat monoid (monoid.union r r) (n/2)
  | n ->
      monoid.union r (repeat monoid r (n-1))
;;

(* Fold over the collection coll. We apply the map function represented
   by the switch, and reduce the result with the commutative monoid.
   This is quite like explore of HANSEI
*)
let map_reduce_sw monoid coll fsw =
  let rec loop coll = function
    | Default x -> repeat monoid x (coll.coll_cardinality ())
    | Case (C_eq (v,k)) -> 
	(match v with Var _ -> print_endline "case-var" 
	|  _ -> print_endline "case-val");
	if coll.coll_member v 
	then 
	  monoid.union 
	    (loop {coll_member = (fun x -> equ x v); 
		   coll_cardinality = (fun () -> 1)}
	       (k true))
	    (loop {coll_member = 
		   (fun x -> if equ x v then false else coll.coll_member x);
		   coll_cardinality = 
		   (fun () -> let c = coll.coll_cardinality () in
		              if c = 0 then 0 else pred c)}
	       (k false))
	 else
	  loop coll (k false)
  in loop coll fsw;;

let sum_over coll = map_reduce_sw sum_monoid coll;;

(* Simple tests, identical to the ones in lifted.ml *)

let 100 = sum_over people (reify (fun x -> 1));;

let 101 = sum_over people (reify (fun x -> 
  if equ x (Val "alice") then 2 else 1));;


let 103 = sum_over people (reify (fun x -> 
  if equ x (Val "alice") then 2 else 
  if equ x (Val "bob") then 3 else 1));;

let 203 = sum_over people (reify (fun x -> 
  if equ x (Val "alice") then 
    (if equ (Val "alice") x then 102 else -100)
  else if equ x (Val "bob") then 3 else 1));;

let 1 = sum_over people (reify (fun x -> 
  if equ x (Val "alice") then 
    (if equ (Val "bob") x then 102 else -100)
  else if equ x (Val "bob") then 3 else 1));;


let 10000 = 
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 1))));;

let 9900 =
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ y (Val "alice") then 0 else 1))));;

let 9900 =
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ x (Val "alice") then 0 else 1))));;


let 9900 =
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ y x then 0 else 1))));;

let 9900 =
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ x y then 0 else 1))));;


let 29800 =  (* 99 * (99 + 200) + (99+100) *)
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ x y then 
	 (if equ x (Val "alice") then
	     100 else 200)
	 else 1))));;

let 29800 =  (* 99 * (99 + 200) + (99+100) *)
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
       if equ x y then 
	 (if equ x (Val "alice") then
	   (if equ y (Val "alice") then 100 else -100000)
	     else 200)
	 else 1))));;

(* Our running example in the paper *)
let 9703 =
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      if equ x (Val "alice") ||
         equ y (Val "bob") ||
	 equ x y
      then 0 else 1))))
;;


let 990000 =      
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ x z then 0 else 1))))));;

let 990000 =      
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ z x then 0 else 1))))));;

let 990000 =      
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ x y then 0 else 1))))));;

let 990000 =      
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ y x then 0 else 1))))));;

let 999900 = (* 100 * (9900 + 99) *)
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ y x && equ x z then 0 else 1))))));;


let 999900 = (* 100 * (9900 + 99) *)
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ y x && equ x z && equ y z then 0 else 1))))));;

let 980100 = 
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ y x || equ x z then 0 else 1))))));;

let 970200 = (* 100 * 99 * 98 *)
  sum_over people (reify (fun x -> 
    sum_over people (reify (fun y -> 
      sum_over people (reify (fun z -> 
	if equ x z || equ z y || equ x y then 0 else 1))))));;


(* Iterating over all subsets of a the domain *)

(* We represent a set of subsets of a domain in the following compact
   way, using two disjoint subsets S1 and S2 of the domain and the optional
   cardinality constraint ccard:

   The two sets S1 and S2 denote the following set of subsets:
   { S1 U s | s <- Powerset(S2), card(S1 U s) = ccard}

 If ccard is given, the cardinality is 
   choose (ccard - card(S1)) (card S2).
 Otherwise, is the cardinality of Powerset(S2), that is,
 2^Card(S2)
*)

let zero = zero_big_int
let one = big_int_of_int 1
and two = big_int_of_int 2
and three = big_int_of_int 3
;;

let union_big = add_big_int
and empty_big = zero_big_int;;


let rec repeat_big r n = mult_big_int r n;;

(*
let rec repeat_big r n = 
  if sign_big_int n = 0 then empty_big
  else if eq_big_int n unit_big_int then r
  else 
    let (q,rem) = quomod_big_int n (big_int_of_int 2) in
    let r2    = repeat_big (union_big r r) q in
    if sign_big_int rem = 0 then r2 else union_big r r2
;;
*)

type indiv_sets = 			(* A set of groups of individuals *)
   {indivs_s1 : indiv list;		(* Set S1, see description above *)
    indivs_s2 : indiv list;		(* Set S2 *)
    ccard     : int option;
   }
;;

(* A sample collection used in all of the examples *)
(* The sample domain of people of cardinality 100.
   We distinguish three special people
*)
let all_people = 
  let rec loop acc = function
    | 0 -> acc
    | n -> loop (("p" ^ string_of_int n) :: acc) (n-1)
  in loop ["alice"; "bob"; "carol"] 97
;;

(* The powerset of all_people, the domain of the power-set iteration *)
let all_people_groups =
   {indivs_s1 = []; indivs_s2 = all_people; ccard = None};;


(* Check if a collection s contains the element x
 
  Suppose we have a loop

  map_reduce_powerset coll (reify (fun s -> 
   ...
   map_reduce_sw S1 (reify (fun x ->
	if contains x s then ....
  )))

In the inner loop, x ranges over all elements of the collection
S1. Therefore, (contains x s) is true when x happens to be in the
intersection of S1 and s. At present, we will only consider
the examples where S1 is s, so x is always in the intersection, 
providing s is not empty (in the latter
case, there should be no loop iterations to start with).

   In the future, perhaps we need a special request: C_not_empty
*)

let contains x s = match (x,s) with
  | (Var _, Var _) -> true
  | (Val v, Var (g,p)) -> shift p (fun k -> Req (C_in (v,k)))
  | (_, Val _) -> 
      failwith "Memb in literal collections not needed so far"
;;

let get_card = function
  | Var (_,p) -> shift p (fun k -> Req (C_card k))
  | Val _ -> failwith "Don't handle literal collections; not needed so far"
;;


(* Turn a loop variable of the set type into a collection *)
let make_coll s =
  {coll_member = (fun x -> contains x s);
   coll_cardinality = (fun () -> get_card s)}
;;

(* Delete an element from a list. Not tail recursive: will optimize later *)
let rec del x = function [] -> []
  | (h::t) -> if x = h then t else h::del x t
;;

(* Pre-computed Pascal triangle (again, automatically sized) *)
type 'a llist = Cons of 'a * 'a llist Lazy.t;;

let rec lnth n (Cons (h,t)) = 
  if n = 0 then h else lnth (pred n) (Lazy.force t)
;;

let rec lmap f (Cons (h,t)) = 
  Cons (f h,lazy (lmap f (Lazy.force t)));;

let rec pascal = 
  Cons ([big_int_of_int 1;big_int_of_int 2;big_int_of_int 1],
	lazy 
	  (lmap (fun line -> 
	    List.map2 add_big_int 
	     (line @ [zero_big_int]) (zero_big_int::line)) pascal))
;;

let comb_choose n m = 
  if n = 0 then unit_big_int
  else if n = m then unit_big_int
  else List.nth (lnth (m-2) pascal) n;;

(*
 string_of_big_int (comb_choose 2 4);;
 6
 string_of_big_int (comb_choose 1 10);;
 10
 string_of_big_int (comb_choose 2 10);;
 45
*)


(* Obtain the cardinality of a set of sets of individuals *)
let get_group_card = function
  | {ccard = None} as coll -> 
      power_int_positive_int 2 (List.length coll.indivs_s2)
  | {ccard = Some n} as coll ->
      comb_choose (n - List.length coll.indivs_s1) (List.length coll.indivs_s2)
;;


(* Implicitly we use the sum monoid for big_int *)

let map_reduce_powerset group_coll fsw =
  let rec loop coll = function
    | Default x -> 
	Printf.printf "default %d %s\n" x 
	  (string_of_big_int (get_group_card coll));
	repeat_big (big_int_of_int x) (get_group_card coll)
    | Case (C_in (v,k)) -> 	(* Inclusion constraint for indiv v *)
	Printf.printf "inclusion of %s\n" v;
	if List.mem v coll.indivs_s1 then
	  loop coll (k true)		(* any subset definitely has v *)
	else if not (List.mem v coll.indivs_s2) then
	  loop coll (k false)		(* no subset can have v *)
	else if begin 			(* otherwise, card(S1) becomes > *)
	           match coll.ccard with (* ccard *)
	           | None -> false
		   | Some n -> n = List.length coll.indivs_s1
		end
	     then loop coll (k false)
        else
	  let coll = {coll with indivs_s2 = del v coll.indivs_s2} in
	  union_big 
	    (loop {coll with indivs_s1 = v :: coll.indivs_s1}
	       (k true))
	    (loop coll (k false))
    | Case (C_card k) ->		(* Find cardinality of the subset s *)
	begin match coll.ccard with
	| Some n -> loop coll (k n)
	| None   -> 
	    let nmax = List.length coll.indivs_s1 + 
		       List.length coll.indivs_s2 in
	    let rec iter acc i = 
	      if i > nmax then acc else
	      let coll = {coll with ccard = Some i} in
	      iter (union_big (loop coll (k i)) acc) (succ i)
	    in iter empty_big (List.length coll.indivs_s1)
	end
  in loop group_coll fsw;;

(* The number of all subsets of people that include Alice. It is 2^99 *)
let "633825300114114700748351602688" =
string_of_big_int 
    (map_reduce_powerset all_people_groups (reify (fun s ->
      if contains (Val "alice") s then 1 else 0)))
;;


let indic_monoid = {union = (fun x y -> if x > 0 || y > 0 then 1 else 0); 
                    empty = 0}
;;

(* The same as above *)
let "633825300114114700748351602688" =
string_of_big_int 
 (map_reduce_powerset all_people_groups (reify (fun s ->
   map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
     if equ x (Val "alice") then 1 else 0)))))
;;

(* The number of subsets that contain either Alice or Bob.
   This is 2^99 + 2^98
*)

let "950737950171172051122527404032" =
string_of_big_int 
 (map_reduce_powerset all_people_groups (reify (fun s ->
   map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
   map_reduce_sw indic_monoid (make_coll s) (reify (fun y ->
     if equ x (Val "alice") || equ y (Val "bob") then 1 else 0)))))))
;;

(* Find all subsets that contain two elements that are equal to each other.
   This is all subsets but the empty set.
   The result is 2^100 - 1
*)

let "1267650600228229401496703205375" =
string_of_big_int 
 (map_reduce_powerset all_people_groups (reify (fun s ->
   map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
   map_reduce_sw indic_monoid (make_coll s) (reify (fun y ->
     if equ x y then 1 else 0)))))))
;;

(* The simplified example in Sec 5.2 of the paper; see check.hs *)

let "3075003443503627470680627800440832" =
string_of_big_int 
 (map_reduce_powerset all_people_groups (reify (fun s ->
   map_reduce_sw sum_monoid (make_coll s) (reify (fun x ->
   map_reduce_sw sum_monoid (make_coll s) (reify (fun y ->
     if equ x (Val "alice") || equ y (Val "bob") || equ x y 
     then 0 else 1)))))))
;;

let big_prod_monoid = {union = mult_big_int; empty = unit_big_int}
;;

(* big-int version *)

let map_reduce_powerset group_coll fsw =
  let rec loop coll = function
    | Default x -> 
	repeat_big x (get_group_card coll)
    | Case (C_in (v,k)) -> 	(* Inclusion constraint for indiv v *)
	if List.mem v coll.indivs_s1 then
	  loop coll (k true)		(* any subset definitely has v *)
	else if not (List.mem v coll.indivs_s2) then
	  loop coll (k false)		(* no subset can have v *)
	else if begin 			(* otherwise, card(S1) becomes > *)
	           match coll.ccard with (* ccard *)
	           | None -> false
		   | Some n -> n = List.length coll.indivs_s1
		end
	     then loop coll (k false)
        else
	  let coll = {coll with indivs_s2 = del v coll.indivs_s2} in
	  union_big 
	    (loop {coll with indivs_s1 = v :: coll.indivs_s1}
	       (k true))
	    (loop coll (k false))
    | Case (C_card k) ->		(* Find cardinality of the subset s *)
	begin match coll.ccard with
	| Some n -> loop coll (k n)
	| None   -> 
	    let nmax = List.length coll.indivs_s1 + 
		       List.length coll.indivs_s2 in
	    let rec iter acc i = 
	      if i > nmax then acc else
	      let coll = {coll with ccard = Some i} in
	      iter (union_big (loop coll (k i)) acc) (succ i)
	    in iter empty_big (List.length coll.indivs_s1)
	end
  in loop group_coll fsw;;

(* The complete example in Sec 5.2 of the paper; see check.hs *)

let answer_prod =
 (map_reduce_powerset all_people_groups (reify (fun s ->
   map_reduce_sw big_prod_monoid (make_coll s) (reify (fun x ->
   map_reduce_sw big_prod_monoid (make_coll s) (reify (fun y ->
     if equ x (Val "alice") || equ y (Val "bob") || equ x y 
     then one else three)))))))
;;
(* "321761846005034 ..." *)

let 4630 =
String.length (string_of_big_int answer_prod);;


(* Other possible examples: find the number of all subsets
   of individuals that contain Alice and Bob and whose cardinality
   is an even number.
*)

print_endline "\nAll done";;