(*
   Coordination games and nested inference. Planning as inference
                         specifically

   Bounded-Rational Theory of Mind for Conversational Implicature

 Agent S tries to induce agent A to take a specific action. Agent S
 speaks to agent A and so communicates information that agent A uses to
 make decision. Agent S needs a model of agent's A mind so to predict
 how agent A can understand a particular utterance and how this
 understanding leads to a specific action.

 This example is intentionally very simple. We assume the language of
 agents is a single boolean formula (to be called the language
 formula) over two sorts of propositional variables: form variables
 and state variables. State variables describe the state of the
 world. They are `hidden' in a sense they cannot be communicated
 directly. Rather, in his utterance the speaker directly specifies a
 complete assignment to form variables (informally, in the natural
 language utterance, we can take the set of form variables is the set
 of all words in the natural language; a spoken phrase gives the
 assignment to the form variables, by setting the variables
 corresponding to the spoken words to true and the rest to false.)
 
 The state variables, the result of the communication, can be deduced
 by finding out which variables satisfy the language formula given a
 particular assignment to form variables. Generally, several assignments
 to state variables may satisfy the language formula.
 We assume a mapping from state variables and actions to
 utilities (which are real numbers). We assume that agent A takes the
 action that has the maximum expected utility in the set of the worlds
 that satisfies the language formula given the utterance (the
 assignments to form varaibles).  We assume agent S knows agent A's
 utility function. The agent S goal is to produce the utterance that
 maximizes the expected utility of a particular desired action.

 The nuance comes from the fact that computing the set of state
 variables that satisfy the language formula given the utterance is
 hard: it is the SAT problem. Therefore, agent A uses a particular
 _approximate_ SAT-solving algorithm; whose details thus influence the
 decision process of S. We assume that S knows the SAT-solving
 algorithm of A precisely. Thus S must do nested inference:
 determining the assignment to form variables relies on the the
 result of nested inference: determining mathematical expectation for
 the utility of a certain action.  

NOTE: explain this as a coordination game. Agents share the same goal,
the same utility function and aim to maximize the common utility.

This particular file implements the following example of scalar
implicature. Suppose a speaker and a hearer are arranging a party.
The speaker knows exactly how many of the invited people are coming to
the party and needs to tell the hearer a headcount so that the hearer
prepares an appropriate amount of food -- having too much food is bad,
but having too little food is much worse. The speaker may say the
precise number, a rough estimate such as (`all' (meaning all the
invited people), `some', `none'), or a more precise estimate (`some but not
all'). A more precise headcount report is more informative -- and so
may be harder to interpret. The speaker therefore has to balance the
benefits of precision with the risk of mis-interpretation. To estimate
these factors, the speaker needs to model the decision procedure of
the (bounded-rational) hearer: for each possible utterance, the
speaker should evaluate its likely interpretations by the hearer and
estimate how much food the hearer would buy. The speaker would chose
to say the phrase that induces the hearer to buy as much food as
needed, but not too much more.

Because we express probabilistic models and inference algorithms in
the same language, using the same primitive for random choice, it is
straightforward to represent how the speaker's probabilistic model
includes the hearer's decision making, in particular inference
algorithms the hearer might use.
*)

open Ptypes
open ProbM;;

type statev  = S_b0 | S_b1
type formv   = F_b0 | F_b1
type action  = A of int     (* How many pieces of food to buy: 0..3 *)
;;
type utility = float
;;
(* The value of the state variable in the current world *)
(* let vs state statev = List.member statev state;; *)

let utility state (A bought_food) = 
    let people_came = 
      (if state S_b0 then 1 else 0) +
      (if state S_b1 then 2 else 0) in
    - 10.0 *. float_of_int (abs(bought_food - people_came))
;;

let language form state =
  form F_b0 == state S_b0 &&
  form F_b1 == state S_b1;;

(* A random action *)
(* Since we do planning by inference, all `probabilities' are 1!
*)
let an_action () = 
  dist [(1.,A 0); (1.,A 1); (1.,A 2); (1.,A 3)];;


(* Agent A: accepts the assignment to form variables and
   determines how much food to buy.
   It chooses the action with the highest utility, or fails
*)
let agent_A_model form () =
  let s0 = letlazy (fun () -> flip 0.5) in
  let s1 = letlazy (fun () -> flip 0.5) in
  let state = function S_b0 -> s0 () | S_b1 -> s1 () in
  if not (language form state) then fail () else
  let action = an_action () in
  (utility state action, action)
;;

(* Max element of an non-empty array *)
(*
let argmax arr =
  let rec loop i (imax,vmax) =
    if i >= Array.length arr then imax 
    else let curr = arr.(i) in
    loop (succ i) (if curr >= vmax then (i,curr) else (imax,vmax))
  in loop 1 (0,arr.(0))
;;
*)

let ut1 = function F_b1 -> true | _ -> false;;

let 
  [(0.25, V (-0., A 2)); (0.25, V (-10., A 3)); (0.25, V (-10., A 1));
   (0.25, V (-20., A 0))] =
  exact_reify (agent_A_model ut1);;

let
  [(0.32, V (-0., A 2)); (0.28, V (-10., A 3)); (0.28, V (-10., A 1));
   (0.36, V (-20., A 0))] =
  sample_rejection (random_selector 17) 100 (agent_A_model ut1);;

(* Compute the max expected utility and the corresponding action
   The input is (utility * action) pV
   We compute weighted average of  utilities with the probabilities, 
   find actions
   We return 0, 1 or more maxima (0 maxima if the table is empty)
*)
let argmax plans =
  let pl = List.fold_left (fun pm (p,V (u,a)) ->
    PMap.insert_with (fun (x1,y1) (x2,y2) -> (x1+.x2, y1+.y2)) 
      a (u *. p, p) pm) (PMap.empty) plans in
  PMap.foldi (fun a (u,p) -> 
    let u = u /. p in
    function [] -> [(u,a)]
      | ((umax,_)::_) as acc -> 
	  if u > umax then [(u,a)] else
	  if u = umax then (u,a)::acc else acc) pl [];;

let [(-0., A 2)] = argmax (exact_reify (agent_A_model ut1));;

let inner_model inferencer form () =
 argmax (inferencer (agent_A_model form));;

(* The outer model *)
let agent_S_ideal_state = 
  function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)

let agent_S_model inferencer () =
  let formvs = dist [(1.,[F_b1; F_b0]); (1.,[F_b1]);
		     (1.,[F_b0]); (1.,[]);] in
  let form fv = List.mem fv formvs in
  let action_A = match inner_model inferencer form () with
  | [(_,x)] -> x
  | [] -> A (uniform 4) (* If the hearer module produced nothing
			     assume the hearer will do something random *)
  | lst -> let len = List.length lst in (* several maxima, choose one *)
           let p = 1. /. float_of_int len in
           dist (List.map (fun (_,x) -> (p,x)) lst) 
  in (utility agent_S_ideal_state action_A, formvs)
;;

let [(1., V (-0., [F_b1])); (1., V (-10., [F_b1; F_b0])); (1., V (-10., [F_b0]));
     (1., V (-20., []))] =
exact_reify (agent_S_model (fun m -> exact_reify m));;

let
[(0.25, V (-0., [F_b1; F_b0])); (0.25, V (-0., [F_b1]));
 (0.25, V (-0., [F_b0])); (0.25, V (-0., [])); 
 (0.5, V (-10., [F_b1; F_b0]));
 (0.5, V (-10., [F_b1])); (0.5, V (-10., [F_b0])); (0.5, V (-10., []));
 (0.25, V (-20., [F_b1; F_b0])); (0.25, V (-20., [F_b1]));
 (0.25, V (-20., [F_b0])); (0.25, V (-20., []))] =
exact_reify (agent_S_model (fun m -> sample_rejection dist_selector 1 m));;

(* Since rejection sampling of 1 just returns a random action, it doesn't matter
   what the speaker says.
*)

let 
[(0.25390625, V (-0., [F_b1; F_b0])); (0.26171875, V (-0., [F_b1]));
 (0.25, V (-0., [F_b0])); (0.24609375, V (-0., []));
 (0.5078125, V (-10., [F_b1; F_b0])); (0.5, V (-10., [F_b1]));
 (0.5, V (-10., [F_b0])); (0.4921875, V (-10., []));
 (0.23828125, V (-20., [F_b1; F_b0])); (0.23828125, V (-20., [F_b1]));
 (0.25, V (-20., [F_b0])); (0.26171875, V (-20., []))] =
exact_reify (agent_S_model (fun m -> sample_rejection dist_selector 2 m));;

(* But when the hearer uses two samples, it pays, by 1%, to tell the truth. *)


(* More complex model: some-all game *)
type statev  = S_b0 | S_b1 | S_conj
type formv   = F_all | F_some | F_notall | F_none
;;

let utility state (A bought_food) = 
    let people_came = 
      (if state S_b0 then 1 else 0) +
      (if state S_b1 then 2 else 0) in
    - 10.0 *. float_of_int (abs(bought_food - people_came))
;;


let int_of_bit = function true -> 1 | false -> 0
;;

let language form state =
  (if (int_of_bit (form F_all) +
      int_of_bit (form F_some) +
      int_of_bit (form F_notall) +
      int_of_bit (form F_none) > 1) then state S_conj else true)
  &&
  let people_came = 
    (if state S_b0 then 1 else 0) +
      (if state S_b1 then 2 else 0) in
  (if form F_all then people_came == 3 else true) &&
  (if form F_some then people_came > 0 else true) &&
  (if form F_notall then people_came < 3 else true) &&
  (if form F_none then people_came == 0 else true);;

let agent_A_model form () =
  let s0 = letlazy (fun () -> flip 0.5) in
  let s1 = letlazy (fun () -> flip 0.5) in
  let sc = letlazy (fun () -> flip 0.5) in
  let state = function S_b0 -> s0 () | S_b1 -> s1 () | S_conj -> sc () in
  if not (language form state) then fail () else
  let action = an_action () in
  (utility state action, action)
;;

let ut1 = function F_some -> true | _ -> false;;

let
[(0.25, V (-0., A 3)); (0.25, V (-0., A 2)); (0.25, V (-0., A 1));
 (0.25, V (-10., A 3)); (0.5, V (-10., A 2)); (0.25, V (-10., A 1));
 (0.25, V (-10., A 0)); (0.25, V (-20., A 3)); (0.25, V (-20., A 1));
 (0.25, V (-20., A 0)); (0.25, V (-30., A 0))] =
  exact_reify (agent_A_model ut1);;

let inner_model inferencer form () =
 argmax (inferencer (agent_A_model form));;

(* Note the weighted utility *)
let [(-6.66666666666666696, A 2)] = argmax (exact_reify (agent_A_model ut1));;

let ut2 = function F_some -> true | F_notall -> true | _ -> false;;

let 
[(0.125, V (-0., A 2)); (0.125, V (-0., A 1)); (0.125, V (-10., A 3));
 (0.125, V (-10., A 2)); (0.125, V (-10., A 1)); (0.125, V (-10., A 0));
 (0.125, V (-20., A 3)); (0.125, V (-20., A 0))] =
exact_reify (agent_A_model ut2);; 

let [(-5., A 2); (-5., A 1)] = argmax (exact_reify (agent_A_model ut2));;

(* The outer model *)
let agent_S_ideal_state = 
  function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)

let all_forms = 
  let bit0 n = n mod 2 == 1 in
  let bit1 n = n / 2 mod 2 == 1 in
  let bit2 n = n / 4 mod 2 == 1 in
  let bit3 n = n / 8 mod 2 == 1 in
  let rec loop i =
    if i > 15 then [] else
   ((if bit0 i then [F_all] else []) @
      (if bit1 i then [F_some] else []) @
      (if bit2 i then [F_notall] else []) @
      (if bit3 i then [F_none] else []) @ []) :: loop (succ i)
  in loop 0;;

let agent_S_model inferencer () =
  let formvs = dist (List.map (fun v -> (1., v)) all_forms) in 
  let form fv = List.mem fv formvs in
  let action_A = match inner_model inferencer form () with
  | [(_,x)] -> x
  | [] -> A (uniform 4) (* If the hearer module produced nothing
			     assume the hearer will do something random *)
  | lst -> let len = List.length lst in (* several maxima, choose one *)
           let p = 1. /. float_of_int len in
           dist (List.map (fun (_,x) -> (p,x)) lst) 
  in (utility agent_S_ideal_state action_A, formvs)
;;

let
[(0.25, V (-0., [F_some; F_none]));
 (0.25, V (-0., [F_some; F_notall; F_none]));
 (0.5, V (-0., [F_some; F_notall])); (1., V (-0., [F_some]));
 (0.25, V (-0., [F_all; F_none]));
 (0.25, V (-0., [F_all; F_notall; F_none]));
 (0.25, V (-0., [F_all; F_notall]));
 (0.25, V (-0., [F_all; F_some; F_none]));
 (0.25, V (-0., [F_all; F_some; F_notall; F_none]));
 (0.25, V (-0., [F_all; F_some; F_notall])); (0.5, V (-0., []));
 (1., V (-10., [F_notall])); (0.5, V (-10., [F_some; F_none]));
 (0.5, V (-10., [F_some; F_notall; F_none]));
 (0.5, V (-10., [F_some; F_notall])); (0.5, V (-10., [F_all; F_none]));
 (0.5, V (-10., [F_all; F_notall; F_none]));
 (0.5, V (-10., [F_all; F_notall]));
 (0.5, V (-10., [F_all; F_some; F_none]));
 (0.5, V (-10., [F_all; F_some; F_notall; F_none]));
 (0.5, V (-10., [F_all; F_some; F_notall])); (1., V (-10., [F_all; F_some]));
 (1., V (-10., [F_all])); (0.5, V (-10., [])); (1., V (-20., [F_none]));
 (1., V (-20., [F_notall; F_none])); (0.25, V (-20., [F_some; F_none]));
 (0.25, V (-20., [F_some; F_notall; F_none]));
 (0.25, V (-20., [F_all; F_none]));
 (0.25, V (-20., [F_all; F_notall; F_none]));
 (0.25, V (-20., [F_all; F_notall]));
 (0.25, V (-20., [F_all; F_some; F_none]));
 (0.25, V (-20., [F_all; F_some; F_notall; F_none]));
 (0.25, V (-20., [F_all; F_some; F_notall]))] =
exact_reify (agent_S_model (fun m -> exact_reify m));;

let [(-0., [F_some])] =
argmax (exact_reify (agent_S_model (fun m -> exact_reify m)));;

let [(-8.90625, [F_some])] =
argmax (exact_reify 
(agent_S_model (fun m -> sample_rejection dist_selector 2 m)));;

(* Asymmetric utility *)

let utility state (A bought_food) = 
    let people_came = 
      (if state S_b0 then 1 else 0) +
      (if state S_b1 then 2 else 0) in
    - 10.0 *. float_of_int (
       let diff = bought_food - people_came in
       if diff = 0 then 0 else
       if diff > 0 then diff else 10*(-diff))
;;

let agent_A_model form () =
  let s0 = letlazy (fun () -> flip 0.5) in
  let s1 = letlazy (fun () -> flip 0.5) in
  let sc = letlazy (fun () -> flip 0.5) in
  let state = function S_b0 -> s0 () | S_b1 -> s1 () | S_conj -> sc () in
  if not (language form state) then fail () else
  let action = an_action () in
  (utility state action, action)
;;

let ut1 = function F_some -> true | _ -> false;;

let
[(0.25, V (-0., A 3)); (0.25, V (-0., A 2)); (0.25, V (-0., A 1));
 (0.25, V (-10., A 3)); (0.25, V (-10., A 2)); (0.25, V (-20., A 3));
 (0.25, V (-100., A 2)); (0.25, V (-100., A 1)); (0.25, V (-100., A 0));
 (0.25, V (-200., A 1)); (0.25, V (-200., A 0)); (0.25, V (-300., A 0))] =
  exact_reify (agent_A_model ut1);;

let inner_model inferencer form () =
 argmax (inferencer (agent_A_model form));;

(* Note the weighted utility *)
let [(-10., A 3)] = argmax (exact_reify (agent_A_model ut1));;

let ut2 = function F_some -> true | F_notall -> true | _ -> false;;

let 
[(0.125, V (-0., A 2)); (0.125, V (-0., A 1)); (0.125, V (-10., A 3));
 (0.125, V (-10., A 2)); (0.125, V (-20., A 3)); (0.125, V (-100., A 1));
 (0.125, V (-100., A 0)); (0.125, V (-200., A 0))] =
exact_reify (agent_A_model ut2);; 

let [(-5., A 2)] = argmax (exact_reify (agent_A_model ut2));;

(* The outer model *)
let agent_S_ideal_state = 
  function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)

let agent_S_model inferencer () =
  let formvs = dist (List.map (fun v -> (1., v)) all_forms) in 
  let form fv = List.mem fv formvs in
  let action_A = match inner_model inferencer form () with
  | [(_,x)] -> x
  | [] -> A (uniform 4) (* If the hearer module produced nothing
			     assume the hearer will do something random *)
  | lst -> let len = List.length lst in (* several maxima, choose one *)
           let p = 1. /. float_of_int len in
           dist (List.map (fun (_,x) -> (p,x)) lst) 
  in (utility agent_S_ideal_state action_A, formvs)
;;

(*
exact_reify (agent_S_model (fun m -> exact_reify m));;
*)

let [(-0., [F_notall]); (-0., [F_some; F_notall])] =
argmax (exact_reify (agent_S_model (fun m -> exact_reify m)));;

let [(-60.078125, [F_some]); (-60.078125, [])] =
argmax (exact_reify 
(agent_S_model (fun m -> sample_rejection dist_selector 2 m)));;

let [(-5.31250000000000089, [F_all])] =
argmax (sample_importance (random_selector 17) 100 
(agent_S_model (fun m -> sample_rejection dist_selector 10 m)));;

let [(-15.9212239583333304, [F_some])] =
argmax (sample_importance (random_selector 17) 1000 
(agent_S_model (fun m -> sample_rejection dist_selector 10 m)));;

let [(-0., [F_notall])] =
argmax (exact_reify
(agent_S_model (fun m -> sample_importance dist_selector 2 m)));;