(*
Coordination games and nested inference. Planning as inference
specifically
Bounded-Rational Theory of Mind for Conversational Implicature
Agent S tries to induce agent A to take a specific action. Agent S
speaks to agent A and so communicates information that agent A uses to
make decision. Agent S needs a model of agent's A mind so to predict
how agent A can understand a particular utterance and how this
understanding leads to a specific action.
This example is intentionally very simple. We assume the language of
agents is a single boolean formula (to be called the language
formula) over two sorts of propositional variables: form variables
and state variables. State variables describe the state of the
world. They are `hidden' in a sense they cannot be communicated
directly. Rather, in his utterance the speaker directly specifies a
complete assignment to form variables (informally, in the natural
language utterance, we can take the set of form variables is the set
of all words in the natural language; a spoken phrase gives the
assignment to the form variables, by setting the variables
corresponding to the spoken words to true and the rest to false.)
The state variables, the result of the communication, can be deduced
by finding out which variables satisfy the language formula given a
particular assignment to form variables. Generally, several assignments
to state variables may satisfy the language formula.
We assume a mapping from state variables and actions to
utilities (which are real numbers). We assume that agent A takes the
action that has the maximum expected utility in the set of the worlds
that satisfies the language formula given the utterance (the
assignments to form varaibles). We assume agent S knows agent A's
utility function. The agent S goal is to produce the utterance that
maximizes the expected utility of a particular desired action.
The nuance comes from the fact that computing the set of state
variables that satisfy the language formula given the utterance is
hard: it is the SAT problem. Therefore, agent A uses a particular
_approximate_ SAT-solving algorithm; whose details thus influence the
decision process of S. We assume that S knows the SAT-solving
algorithm of A precisely. Thus S must do nested inference:
determining the assignment to form variables relies on the the
result of nested inference: determining mathematical expectation for
the utility of a certain action.
NOTE: explain this as a coordination game. Agents share the same goal,
the same utility function and aim to maximize the common utility.
This particular file implements the following example of scalar
implicature. Suppose a speaker and a hearer are arranging a party.
The speaker knows exactly how many of the invited people are coming to
the party and needs to tell the hearer a headcount so that the hearer
prepares an appropriate amount of food -- having too much food is bad,
but having too little food is much worse. The speaker may say the
precise number, a rough estimate such as (`all' (meaning all the
invited people), `some', `none'), or a more precise estimate (`some but not
all'). A more precise headcount report is more informative -- and so
may be harder to interpret. The speaker therefore has to balance the
benefits of precision with the risk of mis-interpretation. To estimate
these factors, the speaker needs to model the decision procedure of
the (bounded-rational) hearer: for each possible utterance, the
speaker should evaluate its likely interpretations by the hearer and
estimate how much food the hearer would buy. The speaker would chose
to say the phrase that induces the hearer to buy as much food as
needed, but not too much more.
Because we express probabilistic models and inference algorithms in
the same language, using the same primitive for random choice, it is
straightforward to represent how the speaker's probabilistic model
includes the hearer's decision making, in particular inference
algorithms the hearer might use.
*)
open Ptypes
open ProbM;;
type statev = S_b0 | S_b1
type formv = F_b0 | F_b1
type action = A of int (* How many pieces of food to buy: 0..3 *)
;;
type utility = float
;;
(* The value of the state variable in the current world *)
(* let vs state statev = List.member statev state;; *)
let utility state (A bought_food) =
let people_came =
(if state S_b0 then 1 else 0) +
(if state S_b1 then 2 else 0) in
- 10.0 *. float_of_int (abs(bought_food - people_came))
;;
let language form state =
form F_b0 == state S_b0 &&
form F_b1 == state S_b1;;
(* A random action *)
(* Since we do planning by inference, all `probabilities' are 1!
*)
let an_action () =
dist [(1.,A 0); (1.,A 1); (1.,A 2); (1.,A 3)];;
(* Agent A: accepts the assignment to form variables and
determines how much food to buy.
It chooses the action with the highest utility, or fails
*)
let agent_A_model form () =
let s0 = letlazy (fun () -> flip 0.5) in
let s1 = letlazy (fun () -> flip 0.5) in
let state = function S_b0 -> s0 () | S_b1 -> s1 () in
if not (language form state) then fail () else
let action = an_action () in
(utility state action, action)
;;
(* Max element of an non-empty array *)
(*
let argmax arr =
let rec loop i (imax,vmax) =
if i >= Array.length arr then imax
else let curr = arr.(i) in
loop (succ i) (if curr >= vmax then (i,curr) else (imax,vmax))
in loop 1 (0,arr.(0))
;;
*)
let ut1 = function F_b1 -> true | _ -> false;;
let
[(0.25, V (-0., A 2)); (0.25, V (-10., A 3)); (0.25, V (-10., A 1));
(0.25, V (-20., A 0))] =
exact_reify (agent_A_model ut1);;
let
[(0.32, V (-0., A 2)); (0.28, V (-10., A 3)); (0.28, V (-10., A 1));
(0.36, V (-20., A 0))] =
sample_rejection (random_selector 17) 100 (agent_A_model ut1);;
(* Compute the max expected utility and the corresponding action
The input is (utility * action) pV
We compute weighted average of utilities with the probabilities,
find actions
We return 0, 1 or more maxima (0 maxima if the table is empty)
*)
let argmax plans =
let pl = List.fold_left (fun pm (p,V (u,a)) ->
PMap.insert_with (fun (x1,y1) (x2,y2) -> (x1+.x2, y1+.y2))
a (u *. p, p) pm) (PMap.empty) plans in
PMap.foldi (fun a (u,p) ->
let u = u /. p in
function [] -> [(u,a)]
| ((umax,_)::_) as acc ->
if u > umax then [(u,a)] else
if u = umax then (u,a)::acc else acc) pl [];;
let [(-0., A 2)] = argmax (exact_reify (agent_A_model ut1));;
let inner_model inferencer form () =
argmax (inferencer (agent_A_model form));;
(* The outer model *)
let agent_S_ideal_state =
function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)
let agent_S_model inferencer () =
let formvs = dist [(1.,[F_b1; F_b0]); (1.,[F_b1]);
(1.,[F_b0]); (1.,[]);] in
let form fv = List.mem fv formvs in
let action_A = match inner_model inferencer form () with
| [(_,x)] -> x
| [] -> A (uniform 4) (* If the hearer module produced nothing
assume the hearer will do something random *)
| lst -> let len = List.length lst in (* several maxima, choose one *)
let p = 1. /. float_of_int len in
dist (List.map (fun (_,x) -> (p,x)) lst)
in (utility agent_S_ideal_state action_A, formvs)
;;
let [(1., V (-0., [F_b1])); (1., V (-10., [F_b1; F_b0])); (1., V (-10., [F_b0]));
(1., V (-20., []))] =
exact_reify (agent_S_model (fun m -> exact_reify m));;
let
[(0.25, V (-0., [F_b1; F_b0])); (0.25, V (-0., [F_b1]));
(0.25, V (-0., [F_b0])); (0.25, V (-0., []));
(0.5, V (-10., [F_b1; F_b0]));
(0.5, V (-10., [F_b1])); (0.5, V (-10., [F_b0])); (0.5, V (-10., []));
(0.25, V (-20., [F_b1; F_b0])); (0.25, V (-20., [F_b1]));
(0.25, V (-20., [F_b0])); (0.25, V (-20., []))] =
exact_reify (agent_S_model (fun m -> sample_rejection dist_selector 1 m));;
(* Since rejection sampling of 1 just returns a random action, it doesn't matter
what the speaker says.
*)
let
[(0.25390625, V (-0., [F_b1; F_b0])); (0.26171875, V (-0., [F_b1]));
(0.25, V (-0., [F_b0])); (0.24609375, V (-0., []));
(0.5078125, V (-10., [F_b1; F_b0])); (0.5, V (-10., [F_b1]));
(0.5, V (-10., [F_b0])); (0.4921875, V (-10., []));
(0.23828125, V (-20., [F_b1; F_b0])); (0.23828125, V (-20., [F_b1]));
(0.25, V (-20., [F_b0])); (0.26171875, V (-20., []))] =
exact_reify (agent_S_model (fun m -> sample_rejection dist_selector 2 m));;
(* But when the hearer uses two samples, it pays, by 1%, to tell the truth. *)
(* More complex model: some-all game *)
type statev = S_b0 | S_b1 | S_conj
type formv = F_all | F_some | F_notall | F_none
;;
let utility state (A bought_food) =
let people_came =
(if state S_b0 then 1 else 0) +
(if state S_b1 then 2 else 0) in
- 10.0 *. float_of_int (abs(bought_food - people_came))
;;
let int_of_bit = function true -> 1 | false -> 0
;;
let language form state =
(if (int_of_bit (form F_all) +
int_of_bit (form F_some) +
int_of_bit (form F_notall) +
int_of_bit (form F_none) > 1) then state S_conj else true)
&&
let people_came =
(if state S_b0 then 1 else 0) +
(if state S_b1 then 2 else 0) in
(if form F_all then people_came == 3 else true) &&
(if form F_some then people_came > 0 else true) &&
(if form F_notall then people_came < 3 else true) &&
(if form F_none then people_came == 0 else true);;
let agent_A_model form () =
let s0 = letlazy (fun () -> flip 0.5) in
let s1 = letlazy (fun () -> flip 0.5) in
let sc = letlazy (fun () -> flip 0.5) in
let state = function S_b0 -> s0 () | S_b1 -> s1 () | S_conj -> sc () in
if not (language form state) then fail () else
let action = an_action () in
(utility state action, action)
;;
let ut1 = function F_some -> true | _ -> false;;
let
[(0.25, V (-0., A 3)); (0.25, V (-0., A 2)); (0.25, V (-0., A 1));
(0.25, V (-10., A 3)); (0.5, V (-10., A 2)); (0.25, V (-10., A 1));
(0.25, V (-10., A 0)); (0.25, V (-20., A 3)); (0.25, V (-20., A 1));
(0.25, V (-20., A 0)); (0.25, V (-30., A 0))] =
exact_reify (agent_A_model ut1);;
let inner_model inferencer form () =
argmax (inferencer (agent_A_model form));;
(* Note the weighted utility *)
let [(-6.66666666666666696, A 2)] = argmax (exact_reify (agent_A_model ut1));;
let ut2 = function F_some -> true | F_notall -> true | _ -> false;;
let
[(0.125, V (-0., A 2)); (0.125, V (-0., A 1)); (0.125, V (-10., A 3));
(0.125, V (-10., A 2)); (0.125, V (-10., A 1)); (0.125, V (-10., A 0));
(0.125, V (-20., A 3)); (0.125, V (-20., A 0))] =
exact_reify (agent_A_model ut2);;
let [(-5., A 2); (-5., A 1)] = argmax (exact_reify (agent_A_model ut2));;
(* The outer model *)
let agent_S_ideal_state =
function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)
let all_forms =
let bit0 n = n mod 2 == 1 in
let bit1 n = n / 2 mod 2 == 1 in
let bit2 n = n / 4 mod 2 == 1 in
let bit3 n = n / 8 mod 2 == 1 in
let rec loop i =
if i > 15 then [] else
((if bit0 i then [F_all] else []) @
(if bit1 i then [F_some] else []) @
(if bit2 i then [F_notall] else []) @
(if bit3 i then [F_none] else []) @ []) :: loop (succ i)
in loop 0;;
let agent_S_model inferencer () =
let formvs = dist (List.map (fun v -> (1., v)) all_forms) in
let form fv = List.mem fv formvs in
let action_A = match inner_model inferencer form () with
| [(_,x)] -> x
| [] -> A (uniform 4) (* If the hearer module produced nothing
assume the hearer will do something random *)
| lst -> let len = List.length lst in (* several maxima, choose one *)
let p = 1. /. float_of_int len in
dist (List.map (fun (_,x) -> (p,x)) lst)
in (utility agent_S_ideal_state action_A, formvs)
;;
let
[(0.25, V (-0., [F_some; F_none]));
(0.25, V (-0., [F_some; F_notall; F_none]));
(0.5, V (-0., [F_some; F_notall])); (1., V (-0., [F_some]));
(0.25, V (-0., [F_all; F_none]));
(0.25, V (-0., [F_all; F_notall; F_none]));
(0.25, V (-0., [F_all; F_notall]));
(0.25, V (-0., [F_all; F_some; F_none]));
(0.25, V (-0., [F_all; F_some; F_notall; F_none]));
(0.25, V (-0., [F_all; F_some; F_notall])); (0.5, V (-0., []));
(1., V (-10., [F_notall])); (0.5, V (-10., [F_some; F_none]));
(0.5, V (-10., [F_some; F_notall; F_none]));
(0.5, V (-10., [F_some; F_notall])); (0.5, V (-10., [F_all; F_none]));
(0.5, V (-10., [F_all; F_notall; F_none]));
(0.5, V (-10., [F_all; F_notall]));
(0.5, V (-10., [F_all; F_some; F_none]));
(0.5, V (-10., [F_all; F_some; F_notall; F_none]));
(0.5, V (-10., [F_all; F_some; F_notall])); (1., V (-10., [F_all; F_some]));
(1., V (-10., [F_all])); (0.5, V (-10., [])); (1., V (-20., [F_none]));
(1., V (-20., [F_notall; F_none])); (0.25, V (-20., [F_some; F_none]));
(0.25, V (-20., [F_some; F_notall; F_none]));
(0.25, V (-20., [F_all; F_none]));
(0.25, V (-20., [F_all; F_notall; F_none]));
(0.25, V (-20., [F_all; F_notall]));
(0.25, V (-20., [F_all; F_some; F_none]));
(0.25, V (-20., [F_all; F_some; F_notall; F_none]));
(0.25, V (-20., [F_all; F_some; F_notall]))] =
exact_reify (agent_S_model (fun m -> exact_reify m));;
let [(-0., [F_some])] =
argmax (exact_reify (agent_S_model (fun m -> exact_reify m)));;
let [(-8.90625, [F_some])] =
argmax (exact_reify
(agent_S_model (fun m -> sample_rejection dist_selector 2 m)));;
(* Asymmetric utility *)
let utility state (A bought_food) =
let people_came =
(if state S_b0 then 1 else 0) +
(if state S_b1 then 2 else 0) in
- 10.0 *. float_of_int (
let diff = bought_food - people_came in
if diff = 0 then 0 else
if diff > 0 then diff else 10*(-diff))
;;
let agent_A_model form () =
let s0 = letlazy (fun () -> flip 0.5) in
let s1 = letlazy (fun () -> flip 0.5) in
let sc = letlazy (fun () -> flip 0.5) in
let state = function S_b0 -> s0 () | S_b1 -> s1 () | S_conj -> sc () in
if not (language form state) then fail () else
let action = an_action () in
(utility state action, action)
;;
let ut1 = function F_some -> true | _ -> false;;
let
[(0.25, V (-0., A 3)); (0.25, V (-0., A 2)); (0.25, V (-0., A 1));
(0.25, V (-10., A 3)); (0.25, V (-10., A 2)); (0.25, V (-20., A 3));
(0.25, V (-100., A 2)); (0.25, V (-100., A 1)); (0.25, V (-100., A 0));
(0.25, V (-200., A 1)); (0.25, V (-200., A 0)); (0.25, V (-300., A 0))] =
exact_reify (agent_A_model ut1);;
let inner_model inferencer form () =
argmax (inferencer (agent_A_model form));;
(* Note the weighted utility *)
let [(-10., A 3)] = argmax (exact_reify (agent_A_model ut1));;
let ut2 = function F_some -> true | F_notall -> true | _ -> false;;
let
[(0.125, V (-0., A 2)); (0.125, V (-0., A 1)); (0.125, V (-10., A 3));
(0.125, V (-10., A 2)); (0.125, V (-20., A 3)); (0.125, V (-100., A 1));
(0.125, V (-100., A 0)); (0.125, V (-200., A 0))] =
exact_reify (agent_A_model ut2);;
let [(-5., A 2)] = argmax (exact_reify (agent_A_model ut2));;
(* The outer model *)
let agent_S_ideal_state =
function S_b1 -> true | _ -> false;; (* S knows 2 people come to the party *)
let agent_S_model inferencer () =
let formvs = dist (List.map (fun v -> (1., v)) all_forms) in
let form fv = List.mem fv formvs in
let action_A = match inner_model inferencer form () with
| [(_,x)] -> x
| [] -> A (uniform 4) (* If the hearer module produced nothing
assume the hearer will do something random *)
| lst -> let len = List.length lst in (* several maxima, choose one *)
let p = 1. /. float_of_int len in
dist (List.map (fun (_,x) -> (p,x)) lst)
in (utility agent_S_ideal_state action_A, formvs)
;;
(*
exact_reify (agent_S_model (fun m -> exact_reify m));;
*)
let [(-0., [F_notall]); (-0., [F_some; F_notall])] =
argmax (exact_reify (agent_S_model (fun m -> exact_reify m)));;
let [(-60.078125, [F_some]); (-60.078125, [])] =
argmax (exact_reify
(agent_S_model (fun m -> sample_rejection dist_selector 2 m)));;
let [(-5.31250000000000089, [F_all])] =
argmax (sample_importance (random_selector 17) 100
(agent_S_model (fun m -> sample_rejection dist_selector 10 m)));;
let [(-15.9212239583333304, [F_some])] =
argmax (sample_importance (random_selector 17) 1000
(agent_S_model (fun m -> sample_rejection dist_selector 10 m)));;
let [(-0., [F_notall])] =
argmax (exact_reify
(agent_S_model (fun m -> sample_importance dist_selector 2 m)));;