(* Hand-written reference code for the Stencil challenge and the unit tests *) (* The code is based on the problem posed by Takayuki Muranushi. The code below properly handles the edge cases. *) (* Specification Signal processing or finite-element algorithms can often be stated as element-wise computations on shifted arrays. The following is the running example in the challenge. w = a * S[1] a b = a - w + S[-1] w Here |a| is a global input array, |b| is a global output array, and |w| is a working array. The operation |S[n]| shifts the argument vector by |n| (to the left, if |n| is positive). All arithmetic operations on vectors (addition, multiplication, subtraction) are assumed elementwise. Global arrays are shared or distributed throughout a supercomputer; reading or writing them requires accessing off-the-chip memory or the inter-node communication. *) let filler = 0.;; (* The filler for the values out of the array: *) (* halo values *) let spec_1step : int -> float array -> float array -> unit = fun n a b -> for i=0 to n-1 do let a j = if j >= 0 && j < n then a.(j) else filler in let w j = a j *. a (j+1) in b.(i) <- a i -. w i +. w (i-1) done ;; (* sample arrays *) let a1 = Array.init 10 (fun i -> 1. +. 0.1 *. float_of_int i);; let t1 = let b1 = Array.make (Array.length a1) (-100.0) in spec_1step (Array.length a1) a1 b1; b1 ;; (* Check the result *) let true = t1 = [|-0.100000000000000089; 0.880000000000000115; 0.96; 1.04000000000000026; 1.12; 1.19999999999999929; 1.2799999999999998; 1.36000000000000032; 1.44000000000000061; 5.32|] ;; (* Repeat spec_1step twice: apply spec_1step to the input array a producing b. Then apply spec_1step to b producing c. *) let t20 = let n = Array.length a1 in let b1 = Array.make n (-100.0) in spec_1step n a1 b1; let c1 = Array.make n (-100.0) in spec_1step n b1 c1; c1 ;; (* Unit test *) let true = t20 = [|-0.0119999999999999968; -0.052800000000000083; 0.806399999999999895; 0.873599999999999932; 0.940800000000001191; 1.00799999999999979; 1.0751999999999986; 1.14239999999999919; -4.26240000000000219; 12.9808000000000039|] ;; (* Hand-written optimized code for the two-step computation w1 = a * S[1] a wm = a - w1 + S[-1] w1 w2 = wm * S[1] wm b = wm - w2 + S[-1] w2 The challenge is to produce the code automatically, by applying the stencil optimization to the above specification. *) let optimized_2step n a b = let a_m2 = ref filler in let a_m1 = ref filler in let a_0 = ref a.(0) in let a_1 = ref a.(1) in let a_2 = ref a.(2) in let w1_m2 = ref (!a_m2 *. !a_m1) in let w1_m1 = ref (!a_m1 *. !a_0) in let w1_0 = ref (!a_0 *. !a_1) in let w1_1 = ref (!a_1 *. !a_2) in let wm_m1 = ref (!a_m1 -. !w1_m1 +. !w1_m2) in let wm_0 = ref (!a_0 -. !w1_0 +. !w1_m1) in let wm_1 = ref (!a_1 -. !w1_1 +. !w1_0) in let w2_m1 = ref (!wm_m1 *. !wm_0) in let w2_0 = ref (!wm_0 *. !wm_1) in b.(0) <- !wm_0 -. !w2_0 +. !w2_m1; a_1 := !a_2; a_2 := a.(3); w1_0 := !w1_1; w1_1 := (!a_1 *. !a_2); wm_0 := !wm_1; wm_1 := (!a_1 -. !w1_1 +. !w1_0); w2_m1 := !w2_0; w2_0 := (!wm_0 *. !wm_1); b.(1) <- !wm_0 -. !w2_0 +. !w2_m1; for i = 2 to (n-3) do a_1 := !a_2; a_2 := a.(i+2); w1_0 := !w1_1; w1_1 := (!a_1 *. !a_2); wm_0 := !wm_1; wm_1 := (!a_1 -. !w1_1 +. !w1_0); w2_m1 := !w2_0; w2_0 := (!wm_0 *. !wm_1); b.(i) <- !wm_0 -. !w2_0 +. !w2_m1; done; a_1 := !a_2; a_2 := filler; w1_0 := !w1_1; w1_1 := (!a_1 *. !a_2); wm_0 := !wm_1; wm_1 := (!a_1 -. !w1_1 +. !w1_0); w2_m1 := !w2_0; w2_0 := (!wm_0 *. !wm_1); b.(n-2) <- !wm_0 -. !w2_0 +. !w2_m1; a_1 := !a_2; a_2 := filler; w1_0 := !w1_1; w1_1 := (!a_1 *. !a_2); wm_0 := !wm_1; wm_1 := (!a_1 -. !w1_1 +. !w1_0); w2_m1 := !w2_0; w2_0 := (!wm_0 *. !wm_1); b.(n-1) <- !wm_0 -. !w2_0 +. !w2_m1; ;; let t25 = let b1 = Array.make (Array.length a1) (-100.0) in optimized_2step (Array.length a1) a1 b1; b1 ;; let true = t20 = t25;; print_endline "\nAll done\n";;