{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE GADTs #-}

-- Deriving non-strict non-determinism Monad with explicit sharing: MP a
-- Free term representation
--
-- We implement a small step semantics, rewriting a term in the
-- monadic language into another term. We can watch the progress
-- of re-writing. The re-writing is non-deterministic,
-- and we use a MonadPlus m to express (or, observe) the non-determinism.
-- Therefore, the step function has the type:
--   step:: MP a -> m (MP a)
-- The monad m could be the state monad for the sake of generating 
-- variable names.
--
-- We use the equality laws of non-strict non-determinism with sharing
-- discussed elsewhere. In this code, we orient the laws and apply
-- them in the `right' sequence (which avoids non-productive applications
-- of the laws).

-- This code is defined for ease of reasoning, not for convenience or
-- performance. The user is required to name let-bound variables uniquely.
-- Free variables are identified by lists of integers.
-- The user should give names like [n] where n is a unique integer.
-- The names that are longer lists are built internally,
-- to ensure uniqueness when we need to name the components
-- (some rules build new let-expressions dynamically)

module Deriving1 where

import Control.Monad
import Text.Show.Functions

-- Free term representation of the lazy sharing monad
data MP a where
  Return  :: a -> MP a
  (:>>=)  :: MP b  -> (b -> MP a) -> MP a
  MZero   :: MP a
  MPlus   :: MP a -> MP a -> MP a
  Let     :: (Var U, LetLHS) -> MP a -> MP a
  Shared  :: Var U -> MP U	-- reference to a `free' variable
  Canon   :: U -> MP U		-- make a value canonical

-- The left-hand side of Let is one of the following
data LetLHS = LHS_unexamined (MP U) | LHS_value U | LHS_normal (MP U)
	    deriving Show

newtype Var a = Var [Int] deriving (Eq, Show)

-- The universe of our data. For simplicity, we use
-- the Universal type for now

-- Lists with non-deterministic components
data BP = Nil | BP (MP U) (MP U)	-- pair with non-det components
  deriving Show

data U = UB Bool 
       | UL BP
       | UF (U -> U)
       | UA (MP U)			-- monadic actions are values too
 deriving Show

instance Show a => Show (MP a) where
 show (Return x) = "Ret " ++ show x
 show (Return _ :>>= k)    = "(Ret " ++ " >>=...)"
 show (( _ :>>= _) :>>= k) = "(bind " ++ " >>=...)"
 show (MZero :>>= k)       = "(mzero " ++ " >>=...)"
 show (MPlus _ _ :>>= k)   = "(mplus " ++ " >>=...)"
 show (Let _ _ :>>= k)     = "(let " ++ " >>=...)"
 show (Shared n :>>= k)    = "(ref " ++ show n ++ " >>=...)"
 show MZero          = "mzero"
 show (MPlus e1 e2)  = "(" ++ show e1 ++ "," ++ show e2 ++ ")"
 show (Let e b)      = "Let " ++ show e ++ " in " ++ show b
 show (Shared n)     = "Ref " ++ show n
 show (Canon x)      = "Canon " ++ show x


-- Various classification predicates
-- All canonical values are normal values
-- All normal values are values
-- The difference is solely due to the fact the values like BP may
-- contain monadic actions or references to free variables.
-- Normal values may contain or be variable references.
-- Just values may contain non-deterministic actions
-- Only normal values are substitutable. We can't substitute
-- values that contain non-deterministic components.

-- Checking if a value is canonical
canonicalV :: U -> Bool
canonicalV (UB _) = True
canonicalV (UL Nil) = True
canonicalV (UL (BP h t)) = canonicalA h && canonicalA t
canonicalV (UF _) = True  -- abstractions are always canonical
canonicalV (UA _) = True  -- actions are too canonical

-- Checking if an action represents a canonical value
canonicalA :: MP U -> Bool
canonicalA (Return x) = canonicalV x
canonicalA _          = False

-- Extract a non-canonical component from a non-canonical value
-- This is used during canonicalization
nonCanonical :: U -> Maybe (MP U, MP U -> U)
nonCanonical (UL (BP (Return h) t)) | Just (c,k) <- nonCanonical h
	 = Just (c, (\c' -> (UL (BP (Return (k c')) t))))
nonCanonical (UL (BP (Return h) (Return t))) | Just (c,k) <- nonCanonical t
	 = Just (c, (\c' -> (UL (BP (Return h) (Return (k c'))))))
nonCanonical (UL (BP (Return _) (Return _))) = Nothing
nonCanonical (UL (BP (Return h) t)) = Just (t, \t -> (UL (BP (Return h) t)))
nonCanonical (UL (BP h t)) = Just (h, \h -> (UL (BP h t)))
nonCanonical _ = Nothing

-- If a value has immediate sub-components that _may_ be non-deterministic, 
-- extract them. We do not check if the components are really non-deterministic.
-- This is the essence of (lazy) normalization in our other code.
-- In this code, only BP may have such components.
nonDet :: U -> Maybe ((MP U, MP U), (MP U, MP U) -> U)
nonDet (UL (BP h t)) = Just ((h,t), \ (h,t) -> (UL (BP h t)))
nonDet _ = Nothing

instance Monad MP where
    return = Return
    (>>=)  = (:>>=)

instance MonadPlus MP where
    mzero = MZero
    mplus = MPlus


-- Things to prove
-- satisfaction of monad and monad-plus laws

-- One-step reduction
-- The function is defined only on non-values:
-- Shared n and (Return v) where v is normal
-- are outside of the domain of step.

step :: MonadPlus m => MP U -> m (MP U)

-- Monadic laws
step (Return x :>>= k) = return (k x)
step ((m1 :>>= k1) :>>= k2) = return (m1 :>>= (\x -> k1 x :>>= k2))

-- MPlus laws
step (MZero :>>= k) = mzero
step ((MPlus m1 m2) :>>= k) = 
    return (m1 :>>= k) `mplus` return (m2 :>>= k)

-- `Observing' (or, reflecting) non-determinism in a MonadPlus m
step MZero           = mzero
step (MPlus m1 m2)   = return m1 `mplus` return m2

-- Canonicalization laws
step (Canon x) | canonicalV x = return (Return x)
step (Canon x) | Just (c,k) <- nonCanonical x
  = return (c :>>= (Canon . k . Return))

-- The Let laws

-- Let-percolation: let-bind law
step ((Let (n,e) b) :>>= k) = return (Let (n,e) (b :>>= k))


-- The following three rules implement the substitution rule: 
-- let x = e in b --> b[x:=e] if e is normal 
-- We substitute lazily, only when demanded. That's
-- why we avoid traversing the right-hand-side of binds. We traverse
-- values only when they are returned. We never substitute in the
-- left-hand side of let. 	

-- The ignore-val rule, canonical values don't contain any variable references
step (Let (n,e) (Return x)) | canonicalV x = return (Return x)

-- The var rule. By construction, v is a normal value (substitutable)
step (Let (n,LHS_normal v) (Shared n')) | n == n' =
   return v

-- The val-demand rule. By construction, v is a normal value (substitutable)
step (Let (n,LHS_normal v) ((Shared n') :>>= k)) | n == n' =
   return $ (Let (n,LHS_normal v) (v :>>= k))

-- The HNF rule, specifically
--         let_ (return (c e1 e2)) f =
--           let_ e1 (\x -> let_ e2 (\y -> f (c x y)))
--           where x and y are fresh and c is a binary constructor
step (Let (n,LHS_value v) m) | Nothing <- nonDet v =
   return (Let (n,LHS_normal (Return v)) m)

step (Let (Var l,LHS_value v) m) | Just ((c1,c2),k) <- nonDet v =
    let n1 = Var (3:l) in
    let n2 = Var (4:l) in
    return (Let (n1,LHS_unexamined c1)
	    (Let (n2,LHS_unexamined c2)
	     (Let (Var l, LHS_normal (Return (k (Shared n1, Shared n2)))) m)))

-- Evaluate the left-hand-side of Let if the variable is demanded
-- The result of evaluating e is not necessarily a normal value
-- The demand-var rule
-- step (Let (n,LHS_unexamined e) (Shared n')) | n == n' =
--    return (e :>>= (\v -> (Let (n,LHS_value v) (Shared n'))))
{-
-- The demand rule, unoptimized
step (Let (n,LHS_unexamined e) ((Shared n') :>>= k)) | n == n' =
   return (e :>>= (\v -> (Let (n,LHS_value v) ((Shared n') :>>= k))))
-}

-- The optimized rule. The only difference is the case analysis on e:
-- since we have to evaluate e anyway, it is OK to examine it, to see
-- if it is shared.
step (Let (n,LHS_unexamined e) (Shared n')) | n == n' =
   case e of 
      Shared n'' -> return (Let (n,LHS_normal (Shared n'')) (Shared n'))
      _ -> return (e :>>= (\v -> (Let (n,LHS_value v) (Shared n'))))

-- The demand rule
step (Let (n,LHS_unexamined e) ((Shared n') :>>= k)) | n == n' =
   case e of 
      Shared n'' -> 
         return (Let (n,LHS_normal (Shared n'')) (Shared n') :>>= k)
      _ -> return (e :>>= (\v -> (Let (n,LHS_value v) ((Shared n') :>>= k))))

-- The ignore-var rule
step (Let (n,e) (Shared n')) | n /= n' =
   return (Shared n')

-- The let-commute rule
step (Let (n,e) ((Shared n') :>>= k)) | n /= n' =
   return ((Shared n') :>>= (\v -> Let (n,e) (k v)))

-- If it is not clear if the variable is demanded, eval the body, 1 step
-- This is just the congruence
step (Let (n,e) b) = step b >>= (\b' -> return (Let (n,e) b'))

step x = error $ "Not handled: " ++ show x

-- Run step repeatedly a few times on a given expression.
-- We wrap the expression into Canon.

observe :: Int -> MP U -> [MP U]
observe n m = observe' n (m :>>= Canon)
 where
 observe' 0 x = return x
 observe' _ m | canonicalA m = return m
 observe' n m = step m >>= observe' (n-1)

-- For debugging: given an expression, return the length of the
-- Let-prefix. That is, the number of nested Let at the beginning of the
-- expression. This number is the size of the `memo' table.
let_prefix_length :: MP U -> Int
let_prefix_length (Let _ e) = succ $ let_prefix_length e
let_prefix_length _ = 0

-- We wrap the expression into Canon, and tell the result as well
-- the size of the longest let-prefix. The latter is the estimate of
-- the size of the Memo table.

observed :: MP U -> [(MP U,Int)]
observed m = observe' 0 (m :>>= Canon)
 where
 observe' n m | canonicalA m = return (m,n)
 observe' n m = step m >>= observe' (max n (let_prefix_length m))


-- Tests

-- convenience functions
let_ n e f = Let (var,LHS_unexamined e) (f (Shared var))
 where var = Var [n]

true, false, coin :: MP U

true  = return (UB True)
false = return (UB False)

notU m = m >>= \ (UB x) -> return $ UB (not x)

coin  = true `mplus` false

bool :: MP U -> Bool
bool (Return (UB x)) = x

test1 = (==) [True,False] $
	map bool $ observe 5 coin

test1' = (==) [False,True] $
         map bool $ observe 5 (notU coin)


list :: MP U -> [Bool]
list (Return (UL Nil)) = []
list (Return (UL (BP (Return (UB x)) t))) = x: list t
list x = error $ "Unexpected item in canonical list: " ++ show x

listbb :: MP U -> [[Bool]]
listbb (Return (UL Nil)) = []
listbb (Return (UL (BP x t))) = list x: listbb t
listbb x = error $ "Unexpected item in listbb: " ++ show x

cons x y = return (UL (BP x y))
nil = return (UL Nil)

pair x y = cons x . cons y $ nil

test2 = (==) [[True,True],[True,False],[False,True],[False,False]] $
	map list $ observe 15 $ pair coin coin

dup x = let_ 100 x (\xv -> pair xv xv)
dupn x = let_ 1000 x (\xv -> pair xv xv)


test3 = (==) [[True,True],[False,False]] $
	map list $ observe 15 $ dup coin

dupnot x = let_ 101 x (\x -> pair (notU x) (notU x))
test_dupnot = (==) [[False,False],[True,True]] $
    map list $ observe 45 $ dupnot coin

-- More illustration of differences between let, let_ and bind
testp0 = (==) [[True,True],[True,False],[False,True],[False,False]] $
    map list $ observe 35 $ let x = coin in pair x x
testp1 = (==) [[True,True],[False,False]] $
    map list $ observe 35 $ do x <- coin; pair (return x) (return x)
testp2 = (==) [[True,True],[False,False]] $
    map list $ observe 35 $ let_ 103 coin (\x -> pair x x)

nhead p = p >>= \x -> case x of
		       UL (BP m1 m2) -> m1
		       UL Nil -> mzero
		       x -> error $ "head: unexpected: " ++ show x
ntail p = p >>= \x -> case x of
		       UL (BP m1 m2) -> m2
		       UL Nil -> mzero
		       x -> error $ "tail: unexpected: " ++ show x

nrepeat x = cons x (nrepeat x)
nrepeat_ n x = let_ n x (\y -> cons y (nrepeat_ (n+1) y))

-- test4_ really needs laziness
test4 = (==) [True] $
    map bool $ observe 35 $ nhead (nhead (nrepeat (nrepeat true)))
test4_ = (==) [True] $
    map bool $ observe 75 $ nhead (nhead (nrepeat_ 300 (nrepeat_ 400 true)))


test5 = (==) [[True,True],[True,False],[False,True],[False,False]] $
	map list $ observe 75 $
	nrepeat coin :>>= \l' -> let l = return l' in
				 pair (nhead l) (nhead $ ntail l)

test5_ = (==) [[True,True],[False,False]] $
	map list $ observe 75 $
	nrepeat_ 300 coin :>>= \l' -> let l = return l' in
				      pair (nhead l) (nhead $ ntail l)

-- Infinite list of booleans
a_list x = nil `mplus` (cons x (a_list x))

test7 = (==) [[[],[]],
	      [[True],[True]],
	      [[True,True],[True,True]],
	      [[True,True,True],[True,True,True]],
	      [[True,True,True,True],[True,True,True,True]]] $
	take 5 $ map listbb $ observe 205 $ (dup (a_list coin))

test7' = (==) [[True,True],[False,False]] $
        map list $ observe 205 $ 
        let_ 101 (a_list coin) (\x -> 
	pair (nhead $ ntail $ ntail $ ntail x)
	     (nhead $ ntail $ ntail $ ntail x))

test7'' = (==) [[True,True],[False,False]] $
        map list $ observe 205 $ 
        let_ 101 (a_list coin) (\x -> 
	pair (nhead  x)
	     (nhead x))


test61 = (==) [[True,True]] $
	 map list $ observe 15 $ 
	 let_ 100 undefined (\x -> pair (const true x) (const true x))

test62 = (==) [[True,True]] $
	 map list $ observe 15 $ 
	 let_ 100 (pair undefined undefined) 
		   (\x -> pair (const true x) (const true x))

test66 = (==) [[True,True]] $
	 map list $ observe 35 $ 
	 let_ 102 (pair true undefined) 
		  (\x -> dup (nhead x))

test66div = (==) [[True,True]] $	-- this, of course, diverges
	 map list $ observe 35 $ 
	 let_ 102 (pair undefined true) 
		  (\x -> dup (nhead x))


-- A few basic tests of laziness

testlc1 = (==) [True] $
	  map bool $ observe 25 $ let_ 101 (pair true mzero) (\x -> nhead x)

testlc2 = (==) [True] $
	  map bool $ observe 25 $ let_ 101 (pair true undefined) (\x -> nhead x)

testlc3 = (==) [True] $			-- only one solution is obtained
	  map bool $ observe 25 $ let_ 101 coin (\x -> true)

nnull l = l >>= \x -> case x of 
			     (UL Nil) -> true
			     (UL (BP _ _)) -> false

test8 = (==) [False] $
	map bool $ observe 25 $ nnull (nrepeat_ 300 (undefined))

-- testing nested let

testd2 = (==) [[True,True],[False,False]] $
	 map list $ observe 35 $ 
         let_ 101 (let_ 201 true (\x -> coin)) (\y -> pair y y)

testd21 = (==) [[True,True],[False,False]] $
	  map list $ observe 35 $ 
          let_ 101 (let_ 201 coin id) (\y -> pair y y)

test_dup2 = (==) [[[True,True],[True,True]],[[False,False],[False,False]]] $
	    map listbb $ observe 175 $ dupn (dup coin)

test_dup2' = (==) [[[True,True],[True,True]],[[False,False],[False,False]]] $
	 map listbb $ observe 175 $
	 let dup' m = do x <- m; pair (return x) (return x)
	 in dup' (dup' coin)


test9 = (==) [[[True,True],[True,True]],
	      [[True,False],[True,False]],
	      [[False,True],[False,True]],
	      [[False,False],[False,False]]] $
	map listbb $ observe 75  $
	let_ 101 coin (\x -> 
	let_ 1001 coin (\y -> 
	pair (pair x y) (pair x y)))

-- Testing that components of shared values are shared, too

-- test10 and test10' should give the same results

pairs, pairs2 :: MP U

pairs = pair coin coin
pairs2 = pairs `mplus` pairs

heads xs = let_ 104 xs (\xs -> pair (nhead xs) (nhead xs))

test10 = (==) [[True,True],[False,False]] $
	 map list $ observe 75 $ 
	 heads (pair coin undefined)

heads' xs = let_ 107 xs (\xs -> dup (nhead xs))

test10' = (==) [[True,True],[False,False]] $
	 map list $ observe 75 $ 
	 heads' (pair coin undefined)

test12 = (==) [[True,True],[False,False]] $
	 map list $ observe 75 $ 
	 let_ 101 pairs (\x -> pair (nhead x) (nhead x))

test12' = (==) [[True,True],[False,False]] $
	 map list $ observe 35 $ 
	 let_ 102 pairs (\x -> let_ 103 (nhead x) (\y -> pair y y))

test12'' = (==) [[True,True],[False,False],[True,True],[False,False]] $
	 map list $ observe 75 $ 
	 let_ 101 pairs2 (\x -> pair (nhead x) (nhead x))

-- Again, bind vs let_
test101 = (==) [[True,True],[True,False],[False,True],[False,False]] $
	  map list $ observe 75 $
	  do x <- pair coin undefined
	     pair (nhead (return x)) (nhead (return x))

test102 = (==) [[True,True],[False,False]] $
	  map list $ observe 75 $
	  let_ 101 (pair coin undefined) (\x -> pair (nhead x) (nhead x))


main :: IO ()
main = mapM_ (\ (i,t) -> if t then return () else error ("test " ++ show i)) .
       zip [1..] $
       [test1, test1', test2, test3, test_dupnot,
	testp0, testp1, testp2,
	test4, test4_,
	test5, test5_,
	test7, test7',
	test61, test62, test66,
	testlc1, testlc2, testlc3,
	test8,
	testd2, testd21, test_dup2, test_dup2',
	test9,
	test10, test10', test12, test12', test12'', test101, test102 
	{-- , test11,
	tex1_f1, tex1_f1', tex1_f2, tex1_f2', test_ho, test_ho',
	test_sorted_lazy, test_sorted, test_sorted' -}
       ]


complete l 0 = nil
complete l n = let_ l (complete (10+l) (pred n)) (\t -> cons t t)

test_complete_tree = 
    [ (depth, get_memo_size $ complete 100 depth) | depth <- [1..5]]
 where
 get_memo_size = snd . head . observed
-- Originally:
-- [(1,1),(2,4),(3,11),(4,26),(5,57)]
-- When the demand-rules are optimized, the results are better, linear:
-- [(1,1),(2,4),(3,7),(4,10),(5,13)]