(* Lifted map-reduce, loops over tuples: flattening nested loops
   into a single one.
   The application: iterating over all subsets of the domain.
*)

(*
#load "lifted_nested_mp.cmo";;
*)

open Delimcc;;
open Lifted_nested_mp;;

(* 
In the first part (which corresponds to Sec 5.1 of the paper) our goal
is to convert a nested loop (loop over tuples) into a loop over
individuals. We limit ourselves to loops over 2-tuples (that is,
double-nested loops over individuals).  Per se, this `flattening' of
the nested loop doesn't bring us anything new. However, the procedure
turns out useful when we iterate over subsets of the domain, in the
second part of the present file.

To be precise, our goal here is to compute the nested sum
        SUM_{i in S} SUM_{j in S} B
where B is some expression containing i and j (to be precise,
containing comparisons of i and j with the elements of the domain S   
and with each other). The presence of the comparisons i=j in B is
a particular complication.

Let us assume that the domain S is a disjoint union of two sets S1 and S2.
We obtain
        SUM_{i in S} SUM_{j in S} B =
  
        SUM_{i in S1} SUM_{j in S1} B +
        SUM_{i in S2} SUM_{j in S2} B +
        SUM_{i in S1} SUM_{j in S2} B +
        SUM_{i in S2} SUM_{j in S1} B

We would like to find a data structure PS, an operation
read_off : PS -> int, and an associative and commutative operation
op : PS -> PS -> PS such that the above sum can be represented as
   read_off (PS[S1] `op` PS[S2])
where PS[S1] is the data structure value whose components depend only
on S1 but not on S2.

If we manage that, we can write
        SUM_{i in S} SUM_{j in S} B
      = read_off ( MonoidSUM_{k in S} PS[{k}] )
where {k} is the singleton set of the individual k and MonoidSum
is the sum that uses `op` instead of addition. MonoidSum is the
desired non-nested sum; we can use MapReduce to compute it.

Let us look at
        SUM_{i in S1} SUM_{j in S2} B
We observe that since i ranges over S1 and j ranges over S2 and S1 and S2
are disjoint, i/=j. Thus we can replace all occurrences of i=j in B
with false, thus removing the main complication.

Using reification, we convert B to a switch. Since we know that i/=j,
we use reify_without (see below) to convert B to a switch assuming
that no two variables are equal to each other.

For exposition, let us consider a sample B of the form
        if i = alice || j = bob || i = j then 0 else 1
Conversion to a switch (upon assumption that no two variables are
equal to each other) gives us int switch switch:

        Case (Val "alice", Default 0,
        Case (Val "bob",   Default 1,
        Default( Case (Val "bob", 0, Default 1))))

We can then write
        SUM_{i in S1} SUM_{j in S2} B =

        [SUM_{j in S2} B[i=alice] ] * Ia1 +
        [SUM_{j in S2} B[i=bob] ] * Ib1 +
        [SUM_{j in S2} B[default for i] ] * (|S1| - Ia1 - Ib1)

where Iai is the indicator function of alice \in Si, and
Ibi is the indicator function of bob \in Sb1 and |S| is the
cardinality of the set S. We can simplify the sum further
        [|S2| * 0 ] * Ia1 +
        [|S2| * 1 ] * Ib1 +
        [0 * Ib2 + 1 * (|S2| - Ib2) ] * (|S1| - Ia1 - Ib1)

Thus
        SUM_{i in S1} SUM_{j in S2} B +
        SUM_{i in S2} SUM_{j in S1} B
=
        [|S2| * 0 ] * Ia1 +
        [|S2| * 1 ] * Ib1 +
        [0 * Ib2 + 1 * (|S2| - Ib2) ] * (|S1| - Ia1 - Ib1) +
        [|S1| * 0 ] * Ia2 +
        [|S1| * 1 ] * Ib2 +
        [0 * Ib1 + 1 * (|S1| - Ib1) ] * (|S2| - Ia2 - Ib2)

This shows that PS[S] is a tuple, whose first component is
SUM_{i in S} SUM_{j in S} B and the other component is a switch

        Case (Val "alice", (Ia, [SUM_{j in S} B[i=alice]]),
        Case (Val "bob",   (Ib, [SUM_{j in S} B[i=bob]]),
        Default( (|S|-Ia-Ib,
                 [SUM_{j in S} B[default for i] ] ))

The operation op is defined by eq. (6) in the paper and read_off
returns the first component of PS. It is relatively easy to compute
PS[{k}] for a singleton set of one individual k. In fact, the function
k -> PS[{k}] can be reified into a switch itself.

In the paper (end of Sec 5.1), PS is called `pairs', which is perhaps
too generic. We call it PS[S] here to indicate that it represents,
for a _subset_ S of the whole domain, (SUM_{i in S} SUM_{j in S} B)
along with intermediate results. Better naming suggestions are welcome.

In the paper, the second component of PS is a tuple. Here, we represent
it as a switch, which isn't too good either (for example, we really don't care
of the first argument of the Case constructor). Suggestions for a better
name and a lucid representation are welcome.

*)


(* A generalization of reify that takes a list of variables that
   the current loop variable cannot be equal to.
   It a sense, we iterate over a smaller domain, which omits
   the individuals associated with the variables in the exclusion list.
*)
let reify_without : 'i var list -> ('i var -> 'r) -> ('i,'r) switch = 
  fun ws f ->
  let var_prompt = new_prompt () in
  let var = Var (new_generation (), var_prompt) in
  let presult = new_prompt () in
  let make_case y (Default r) rest = 
    abort presult (Case (y,r,rest)) in
  let rec loop_known w = function
    | Compare (y,k) ->
	loop_known w (k (equ y w)) in
  let rec loop ws = function
    | Compare (y,k) ->
	if member y ws then loop ws (k false)
	else 
	  make_case y 
	    (push_prompt presult (fun () -> 
	      loop_known y (k true)))
	    (push_prompt presult (fun () -> 
	      loop (y::ws) (k false)))
  in 
  push_prompt presult (fun () ->
    loop ws (push_prompt var_prompt (fun () -> 
      abort presult (Default (f var)))))
;;

(* The regular reify is an instance of reify_without: *)
let reify : ('i var -> 'r) -> ('i,'r) switch = 
  fun f -> reify_without [] f
;;

(* Reify the function of two arguments: the body of the 2-nested loop *)
let reify2 : ('i var -> 'i var -> 'r) -> ('i,('i,'r) switch) switch =
   fun f -> 
   reify (fun x -> reify (fun y -> f x y))
;;

(* Reify the function of two arguments: the body of the 2-nested loop,
   assuming that no two variables are equal to each other.
*)
let reify2_distinct : ('i var -> 'i var -> 'r) -> ('i,('i,'r) switch) switch =
   fun f -> 
   reify (fun x -> reify_without [x] (fun y -> f x y))
;;


(* The following are utility functions *)

(* Given a function 'i var -> 'r represented in the reified form,
   i.e., as a ('i,'r) switch, evaluate the function at the argument w.

   If the function represents the body of a summation loop
   SUM_{i in S}, eval_switch_spec below evaluates the loop in the case
   S is a singleton set {w}, containing one particular individual w,
   and nobody else.
*)

let rec eval_switch_spec (w : 'i var) : ('i,'r) switch -> 'r = function
    | Default x -> x
    | Case (i,v,_) when i = w -> v
    | Case (_,_,t) -> eval_switch_spec w t
;;

(* Given a function 'i var -> 'r represented in the reified form,
   i.e., as a ('i,'r) switch, evaluate the function at the argument
   that is different from any values in the given list ws.

   We essentially lookup the default case of the switch --
   but we also check that the list of exceptions ws excludes
   all the special Cases of the switch. Otherwise, eval_switch_gen
   is ill-defined.
*)

let rec eval_switch_gen (ws : 'i var list) : ('i,'r) switch -> 'r = function
   | Default x -> x
   | Case (i,v,t) when member i ws -> eval_switch_gen ws t
   | Case (i,_,t) -> failwith "eval_switch_gen: ill-defined"
;;


(* A body of a sample loop, the running example *)
let sample_tuple_cnt x y =
   if equ x (Val "alice") ||
      equ y (Val "bob") ||
      equ x y
   then 0 else 1
;;

(* Note the Case (Var _, 0, ...)
   This case corresponds to x = y. The result for this case
   is 0, since we handle only pairs of distinct elements.
   In fact, if the nested switch contains such a case whose result
   is empty, it validates our assumption of pairs of distinct elements.
*)
(* The inferred type is (string, (string, int) switch) switch *)

let Case (Val "alice", Default 0,
      Case (Val "bob", Case (Val "bob", 0, Default 1),
	Default (Case (Val "bob", 0, Case (Var _, 0, Default 1)))))
= reify2 sample_tuple_cnt
;;

let 
  (Case (Val "alice", Default 0,
     Case (Val "bob", Default 1, Default (Case (Val "bob", 0, Default 1))))
   as sample_swsw)
=
reify2_distinct sample_tuple_cnt
;;

(* The data structure PS[S]; see the title comments *)
type ('i,'r) ps = 'r * ('i, int * 'r) switch;;

(* Associative and commutative operation of ('i,'r) ps monoid.
   See eq. (6) in the paper.
*)
let ps_union : 'r monoid -> (('i,'r) ps as 'x) -> 'x -> 'x
  = function mnd -> 
  let rec sw_add_pointwise sw1 sw2 =
    match (sw1,sw2) with
    | (Default (m1,r1), Default (m2,r2)) ->
	Default (m1+m2, mnd.union r1 r2)
    | (Case (i1,(m1,r1),t1), Case (i2,(m2,r2),t2)) ->
	assert (i1 = i2);
	Case (i1, (m1+m2, mnd.union r1 r2), sw_add_pointwise t1 t2)
  in
  let rec sw_cross_union sw1 sw2 =
    match (sw1,sw2) with
    | (Default (m1,r1), Default (m2,r2)) ->
	mnd.union (repeat mnd r1 m2) (repeat mnd r2 m1)
    | (Case (i1,(m1,r1),t1), Case (i2,(m2,r2),t2)) ->
	assert (i1 = i2);
	mnd.union (mnd.union (repeat mnd r1 m2) (repeat mnd r2 m1))
	   (sw_cross_union t1 t2)
  in
  fun (r1,sw1) (r2,sw2) ->
    (mnd.union (mnd.union r1 r2) (sw_cross_union sw1 sw2),
     sw_add_pointwise sw1 sw2)
;;

let read_off : ('i,'r) ps -> 'r = fst;;

(* Make PS[S] for the empty domain S: the unit of the ('i,'r) ps monoid. *)
let make_empty_ps : 'r monoid -> ('i,('i,'r) switch) switch -> ('i,'r) ps
   = function {empty=z} -> 
   let rec loop = function 
     | Default _    -> Default (0,z)
     | Case (i,_,t) -> Case (i,(0,z),loop t)
   in fun sw -> (z,loop sw)
;;

let rec map_switch : ('a -> 'b) -> ('i,'a) switch -> ('i,'b) switch =
  fun f -> function
    | Default x    -> Default (f x)
    | Case (i,v,t) -> Case (i,f v, map_switch f t)
;;


(* Make a switch representing a function k -> PS[{k}]
   where {k} is a singleton set containing the individual k.
*)
let singleton_ps : 
    'r monoid -> ('i var -> 'i var -> 'r) -> ('i,('i,'r) ps) switch
  = fun mnd ->
  (* Build the second component of PS, for the singleton S = {w} *)
  let rec make_ps2_spec w card = function
    | Case (i,v,t) -> 
	let ind = if i = w then 1 else 0 in 
	Case (i,(ind, eval_switch_spec w v), make_ps2_spec w (card - ind) t)
    | Default v -> assert (card = 0 || card = 1);
	Default (card, eval_switch_spec w v) in
  (* Build the second component of PS, for the singleton S that does not
     intersect the set ws *)
  let rec make_ps2_gen ws = function
    | Case (i,v,t) -> 
	assert (member i ws);
	Case (i,(0, eval_switch_gen ws v), make_ps2_gen ws t)
    | Default v -> Default (1, eval_switch_gen ws v) in
  fun f ->
  let fswitch = reify2_distinct f in
  let fii_switch = reify (fun x -> f x x) in
  let rec build_pssw ws = function
    | Case (i,v,t) -> 
	Case (i, (eval_switch_spec i fii_switch, (* f i i *)
                  make_ps2_spec i 1 fswitch),
	      build_pssw (i::ws) t)
    | Default v -> 
	Default (
            eval_switch_gen ws fii_switch,   (* f i i, generic i *)
            make_ps2_gen ws fswitch)
  in build_pssw [] fswitch
;;


(* This function is called `pairs' at the end of Sec 5.1.
   To be precise, `pairs' produces the function 'i var -> ('i,'r) ps
   whereas we produce the function in the reified form, as a switch.
*)
let ps_body : 'r monoid -> ('i var -> 'i var -> 'r) ->
   (('i,'r) ps monoid * ('i,('i,'r) ps) switch)
 = fun mnd f ->
  let fswitch = reify2_distinct f in
  ({empty = make_empty_ps mnd fswitch; union = ps_union mnd},
   singleton_ps mnd f)
;;


(* Running example *)

let (9703,
 Case (Val "alice", (1, 0), Case (Val "bob", (1, 100), Default (98, 99))))
=
 let (mnd',body') = ps_body sum_monoid sample_tuple_cnt in
 map_reduce_sw mnd' body'
;;


(* Reproducing a few old results *)

(* The result is now correct for the nested loop: it is
   99 * 99 = 9801
   The body of the loop below does not contain i=j comparison.
   So, we can count pairs with distinct components and pairs that
   may have the same components.
*)

let (9801,
 Case (Val "alice", (1, 0), Case (Val "bob", (1, 100), Default (98, 99))))
=
 let body x y = if equ x (Val "alice") || equ y (Val "bob") then 0 else 1 in
 let (mnd',body') = ps_body sum_monoid body in
 map_reduce_sw mnd' body'
;;


(* 
In the second part (which corresponds to Sec 5.2 of the paper) our goal
is to loop over subsets of the domain. The data structure PS[S] defined
above turns out exactly what we need. 

We have seen that given PS[{}] and the operation op, PS[S] form a 
commutative monoid. We need to extend that monoid to a commutative
semiring. The operation op of the PS monoid becomes the multiplication
of the semiring. We need an addition operation, which we define
freely (that is, as a multi-set union). We can't afford
to store duplicates in that multi-set; rather, we should store elements
along with their counts. The appropriate data structure for that purpose
is a bag defined below.
*)

(*
#load "pMap.cmo";;
*)

(* We need some big numbers to compute eq (7) of the paper *)
(*
#load "nums.cma";;
*)
open Num;;

let zero = num_of_int 0
let one = num_of_int 1
and two = num_of_int 2
and three = num_of_int 3
;;

type 'r bag = ('r, num) PMap.t;;

(* Bags form a monad *)
let unit : 'r -> 'r bag 
 = fun x -> PMap.add x one PMap.empty;;

let bind : 'r bag -> ('r -> 's bag) -> 's bag
 = fun m f ->
   PMap.foldi (fun v cnt -> 
   PMap.foldi (fun r rcnt -> PMap.insert_with (+/) r (cnt */ rcnt)) (f v)) 
   m PMap.empty
 ;;

(* The additive monoid of the semiring *)
let bag_add : 'r bag monoid =
   {empty = PMap.empty;
    union = 
     fun m1 m2 -> PMap.foldi (PMap.insert_with (+/)) m1 m2}
;;

(* Furthermore, we can loop over the contents of a bag; in other words, we
   can homomorphically map a bag monoid to another monoid.  This operation
   is efficient, using repeated reduction in the target monoid.
*)
(*
See the specialized version below...

let reduce_bag : 'r monoid -> 'r bag -> 'r
 = fun mnd bag ->
   PMap.foldi (fun k v -> mnd.union (repeat mnd k v)) bag mnd.empty
;;
*)

(* 
Finally, any monoid structure on the bags' elements (such as produced by PS)
gives rise to another monoid structure on the bags -- 
the multiplicative monoid of the semiring.
*)

let bag_mul : 'r monoid -> 'r bag monoid
 = fun mnd ->
   {empty = unit (mnd.empty);
    union = 
      fun m1 m2 -> 
         bind m1 (fun v1 -> bind m2 (fun v2 -> unit (mnd.union v1 v2)))}
;;


let big_sum_monoid =
   {empty = zero;
    union = (+/)}
;;

(* First, we compute

  SUM_{s \subset S} 
    SUM_{x \in s, x /= Alice} 
     SUM_{y \in s, y /= Bob, x /= y} 1
  Where S is the domain of 100 people that includes Alice and Bob.

We have seen in the first part that
    SUM_{x \in s, x /= Alice} 
     SUM_{y \in s, y /= Bob, x /= y} 1
   = read_off PS[s]

We now turn to the sum over all subsets: SUM_{s \subset S}.
The trick is to convert the sum over the subsets into the sum over individuals.
For each individual, a particular subset s may either include
the individual, or not. Thus we evaluate the whole sum as follows.
Let us suppose the domain has two individuals, i1 and i2.

For each individual k, we make a bag of PS[{}] and PS[{k}]. We obtain
two bags, (PS[{}] + PS[{i1}]) and (PS[{}] + PS[{i2}]). We then
multiply them together, obtaining
   (PS[{}] + PS[{i1}]) * (PS[{}] + PS[{i2}])
 = PS[{}]*PS[{}] + PS[{}]*PS[{i1}] + PS[{}]*PS[{i2}] + PS[{i1}]*PS[{i2}]

The multiplication operation is the operation op introduced at the beginning
of the present file. By definition of that operation,
PS[s1] * PS[s2] = PS[s1 U s2] provided s1 and s2 are disjoint. Thus we obtain
 = PS[{}] + PS[{i1}] + PS[{i2}] + PS[{i1,i2}]

That is, the collection of PS[s] over each subset s of our sample domain.
To obtain the final sum, we apply the reduce_bag operation.

*)

let big_sample_tuple_cnt x y =
   if equ x (Val "alice") ||
      equ y (Val "bob") ||
      equ x y
   then zero else one
;;

let (sample_mnd,sample_ps) = ps_body big_sum_monoid big_sample_tuple_cnt;;
(*
val sample_mnd : (string, Num.num) ps Lifted_nested_mp.monoid = ...
val sample_ps :
  (string, (string, Num.num) ps) Lifted_nested_mp.switch = ...
*)

let sample_bag_mnd = bag_mul sample_mnd;;
(*
val sample_bag_mnd : (string, Num.num) ps bag Lifted_nested_mp.monoid =
  {union = <fun>; empty = <abstr>}
*)

let reduce_bag_num : num bag -> num
 = fun bag ->
   PMap.foldi (fun k v -> 
   (* Printf.printf "reduce %s %s\n" (string_of_num k) (string_of_num v); *)
   (+/) (v */ k)) bag zero
;;

(* The following expression uses the generic reduce_bag. It helps
   specialize it so that multiplications are performed natively rather
   than via repeated additions.

let answer_sum =
 let f' = reflect sample_ps in
 reduce_bag big_sum_monoid
 (bind
  (map_reduce_sw sample_bag_mnd (reify (fun x ->
    bag_add.union sample_bag_mnd.empty (unit (f' x)))))
  (fun r -> unit (read_off r)))
;;
*)

let answer_sum =
 let f' = reflect sample_ps in
 reduce_bag_num
 (bind
  (map_reduce_sw sample_bag_mnd (reify (fun x ->
    bag_add.union sample_bag_mnd.empty (unit (f' x)))))
  (fun r -> unit (read_off r)))
;;

let "3075003443503627470680627800440832"
   = string_of_num answer_sum;;

(* We now solve the problem posed in Sec 5.2
   SUM_{s \subset S}
    Prod_{x \in s, y \in s, x /= Alice, y /= Bob, x /= 3} 3

It turns out a simple variation of the above.
*)

(*
let answer_prod =
 let f' = reflect sample_ps in
 reduce_bag_num
 (bind
  (map_reduce_sw sample_bag_mnd (reify (fun x ->
    bag_add.union sample_bag_mnd.empty (unit (f' x)))))
  (fun r -> unit (three **/ read_off r)))
;;
*)

(* The reason above is commented out is the bug in the standard
   library.

compare (three **/ (num_of_int 9703)) three;;
- : int = 1

compare three (three **/ (num_of_int 9703));;
- : int = -1

compare (three **/ (num_of_int 9703)) (three **/ (num_of_int 9703));;
Exception: Invalid_argument "equal: abstract value".
*)


print_endline "\nAll done";;