(* Gaussian Elimination for floating-point matrices *) (* Generating Gaussian Elimination (GE) codes has been the subject of the paper Jacques Carette and Oleg Kiselyov, Multi-stage programming with functors and monads: Eliminating abstraction overhead from generic code. Science of Computer Programming, 2008, N9. doi:10.1016/j.scico.2008.09.008 We attempt here to build a GE generation framework in direct style, without either monads or functors. We turn the unstaged GE code in the file ge-unstaged.ml into a code generator. The generator should produce the code like that in ge-unstaged.ml. We do not aim here at the full generality of the Carette's et al paper. We deal with only a couple of aspects: the choice of pivoting, whether to compute the determinant, and the choice of the output (e.g., whether to output the determinant or the rank or do not). Nevertheless, we have achieved desiderata of the previous approach: -- the generator can build functions of different types: if we wish to receive the U factor, the determinant and the rank, the result type of the generated function is a triple of a float matrix, a float and an integer. If we are interested only in the U factor, the generated code returns only the value of the type float matrix. To achieve this polymorphism, we use a simple form of type-indexed values, reminiscent of Zhe Yang's encodings. -- The choice of whether to compute the determinant or not is done at the generation time, based on the desired output and on the details of the algorithm. For example, determinant should be computed if it is part of the output. The determinant should also be computed when doing GE on an integer matrix, since the magnitude of the determinant is used in row reductions. We don't handle integer matrices at the moment, but we easily can. We emphasize that some aspects of the generated GE algorithm are _functions_ of some other aspects; it is the generator itself (rather than the user) that evaluates this `function' -- which incorporates domain-specific knowledge. -- If the generator decided that determinant should not be computed, the generated code contains no traces of the determinant (save for a few extra "();" statements that compile to no code). The generated code does not even contain declarations for the variables that accumulate the sign and the magnitude of the determinant. We emphasize that the flexibility of the generator (the ability to add the computation of the determinant) comes at no expense of the performance of the generated code. *) (* ------------------------------------------------------------------------ *) (* A bit of scaffolding *) open Delimcc;; (* the let-insertion combinator *) (* Given a code value e (which is a general future-stage _computation_), the result of (genlet p e) is a code value that is a future-stage _value_. At the point where the prompt p is set we insert a future stage `let' binding `e' to a fresh variable. The result of (genlet p e) is then the code that contains exactly that variable. *) let genlet p e = shift p (fun k -> ..)>.);; (* ------------------------------------------------------------------------ *) (* Aspects of GE *) (* Determinant aspect *) type 'c det = {det_computes : bool; (* true if det is being computed *) det_flip_sign : ('c,unit) code; det_accum : ('c,float) code -> ('c,unit) code; det_set_zero : ('c,unit) code; det_fin : unit -> ('c,float) code} ;; (* The instance of the aspect that computes no determinant *) let make_det_nodet p = {det_computes = false; det_flip_sign = .<()>.; det_accum = (fun _ -> .<()>.); det_set_zero = .<()>.; det_fin = (fun () -> failwith "Determinant was not computed") };; (* This instance of the aspect does compute the determinant *) let make_det_compute_det p = let det_sign = genlet p .. in (* Accumulate sign and magnitude *) let det_magn = genlet p .. in (* of the determinant *) {det_computes = true; det_flip_sign = .<.~det_sign := - !(.~det_sign)>.; det_accum = (fun x -> .<.~det_magn := !(.~det_magn) *. .~x>.); det_set_zero = .<.~det_magn := 0.0>.; det_fin = (fun () -> ..) };; type 'a container2dfromvector = {arr:('a array); n:int; m:int} ;; (* Environment of the generator *) type ('c,'result) env = {env_a : ('c,float array) code; env_n : ('c,int) code; (* number of rows *) env_m : ('c,int) code; (* number of columns *) env_r : ('c,int ref) code; (* rank, used internally *) env_p : ('c, 'result) code Delimcc.prompt; env_pivot : ('c,'result) env -> (* See gfind_pivot_row *) ('c, int) code -> ('c, int) code -> ('c, ((int * int) * float) option) code; env_det : 'c det };; (* Choice of the output *) type ('c,'result,'out) outchoice = {oc_init : ('c,'result) env -> ('c,'result) env; oc_fin : ('c,'result) env -> ('c,'out) code; };; let out_U_factor = {oc_init = (fun x -> x); oc_fin = (fun env -> .<{arr = .~(env.env_a); n= .~(env.env_n); m= .~(env.env_m)}>.)};; let out_rank = {oc_init = (fun x -> x); oc_fin = (fun env -> ..)};; let out_det = {oc_init = (fun env -> if env.env_det.det_computes then env (* det is already being computed *) else {env with env_det = make_det_compute_det env.env_p}); oc_fin = (fun env -> env.env_det.det_fin ())};; let out_tuple t1 t2 = {oc_init = (fun env -> t2.oc_init (t1.oc_init env)); oc_fin = (fun env -> .<(.~(t1.oc_fin env), .~(t2.oc_fin env))>.)};; let out_triple t1 t2 t3 = {oc_init = (fun env -> t3.oc_init (t2.oc_init (t1.oc_init env))); oc_fin = (fun env -> .<(.~(t1.oc_fin env), .~(t2.oc_fin env), .~(t3.oc_fin env))>.)};; (* ------------------------------------------------------------------------ *) (* Generating GE *) (* The following are generating extensions of the corresponding functions in ge-unstaged.ml We merely wrap the body of the functions in brackets, and add escapes. *) let gswap a ic jc = (* Swap two elements of a vector *) ..;; (* Swap the row r with the row i in the non-yet examined portion of the matrix, rectangular block (r,c)-(n,m). We do not touch the elements to the left of the column c because they are all zeros. Because we know the layout of the matrix, we can avoid 2D index computations. *) let gswap_rows env (r,c) i = let a = env.env_a and n = env.env_n and m = env.env_m in .. ..) done>.;; (* In a (n,m) matrix, swap the column c with the column j. Because we know the layout of the matrix, we can avoid 2D index computations. *) let gswap_cols env c j = let a = env.env_a and n = env.env_n and m = env.env_m in .. ..); loop (col_c + .~m) (col_j + .~m) end in loop .~c .~j>. ;; (* Row pivoting *) (* Search the non-yet examined portion of the cmatrix, rectangular *) (* block (r,c)-(n,m), for the element with the max abs value. *) (* For row pivoting, we only search the part of the c-th column, from *) (* row r down. *) (* Remember the value of that element and its location. *) let gfind_pivot_row env r cc = let a = env.env_a and n = env.env_n and m = env.env_m in . if abs_float oldpivot < abs_float cur then pivot := Some ((i,c), cur) | None -> pivot := Some ((i,c),cur) done; !pivot end>.;; (* The generator itself. Its structure is very close to the structure of the code it generates, see the function ge in ge-unstaged.ml *) let gge outchoice = . let r = ref 0 in (* current row index, 0-based *) let c = ref 0 in (* current column index, 0-based *) let a = Array.copy (a_orig.arr) in (* to avoid clobbering A, save it *) let m = a_orig.m in (* the number of columns *) let n = a_orig.n in (* the number of rows *) .~(let p = new_prompt () in push_prompt p (fun () -> let env = {env_p = p; env_a = ..; env_n = ..; env_m = ..; env_r = ..; env_pivot = gfind_pivot_row; env_det = make_det_nodet p} in let env = outchoice.oc_init env in .. ..) in (* if we found a pivot, swap the current column with the pivot column, and swap the current row with the pivot row. After the swap, a[r,c] element of the matrix is the pivot. Swapping two rows or two columns changes the sign of the det *) let piv_val = (match pivot with | Some ((piv_r, piv_c),piv_val) -> if piv_c <> !c then begin .~(gswap_cols env .. ..); .~(env.env_det.det_flip_sign) (* flip the sign of the det *) end; if piv_r <> !r then begin .~(gswap_rows env (..,..) ..); .~(env.env_det.det_flip_sign) (* flip the sign of the det *) end; Some piv_val | None -> None) in (* now do the row-reduction over the (r,c)-(n,m) block *) (match piv_val with | Some a_rc -> begin for ii = !r+1 to n-1 do let cur = a.(ii*m + !c) in if not (cur = 0.0) then begin for j = !c+1 to m-1 do a.(ii*m+j) <- a.(ii*m+j) -. a.(!r*m+j) *. (cur /. a_rc) done; a.(ii*m+ !c) <- 0.0 end; done; .~(env.env_det.det_accum ..); r := !r + 1 (* advance the rank only if pivot > 0*) end | None -> .~(env.env_det.det_set_zero)); c := !c + 1 done; (* Final result *) .~(outchoice.oc_fin env) end>.)) >.;; let test_UDR = gge (out_triple out_U_factor out_det out_rank);; let test_U = gge (out_U_factor);; (* The output shows no signs of computing the determinant. There is no even a declaration for determinant variables! The result type is just a matrix rather than a triple. *) (* val test_U : ('a, float container2dfromvector -> float container2dfromvector) code = . let r_2 = (ref 0) in let c_3 = (ref 0) in let a_4 = (Array.copy a_orig_1.arr) in let m_5 = a_orig_1.m in let n_6 = a_orig_1.n in while (((! c_3) < m_5) && ((! r_2) < n_6)) do let pivot_7 = let c_8 = (! c_3) in let pivot_9 = (ref (None)) in for i_10 = (! r_2) to (n_6 - 1) do let cur_11 = a_4.((i_10 * m_5) + c_8) in if (not (cur_11 = 0.0)) then (match (! pivot_9) with | Some (_, oldpivot_12) -> if ((abs_float oldpivot_12) < (abs_float cur_11)) then (pivot_9 := (Some ((i_10, c_8), cur_11))) | None -> (pivot_9 := (Some ((i_10, c_8), cur_11)))) done; (! pivot_9) in let piv_val_13 = (match pivot_7 with | Some ((piv_r_14, piv_c_15), piv_val_16) -> if (piv_c_15 <> (! c_3)) then begin let end_vector_23 = (n_6 * m_5) in let rec loop_24 = fun col_c_25 -> fun col_j_26 -> if (col_j_26 < end_vector_23) then begin let i_27 = col_c_25 and j_28 = col_j_26 in let t_29 = a_4.(i_27) in a_4.(i_27) <- a_4.(j_28); a_4.(j_28) <- t_29; (loop_24 (col_c_25 + m_5) (col_j_26 + m_5)) end in (loop_24 (! c_3) piv_c_15); () end; if (piv_r_14 <> (! r_2)) then begin let row_r_17 = ((! r_2) * m_5) in let row_i_18 = (piv_r_14 * m_5) in for k_19 = (! c_3) to (m_5 - 1) do let i_20 = (row_r_17 + k_19) and j_21 = (row_i_18 + k_19) in let t_22 = a_4.(i_20) in a_4.(i_20) <- a_4.(j_21); a_4.(j_21) <- t_22 done; () end; (Some (piv_val_16)) | None -> (None)) in (match piv_val_13 with | Some (a_rc_30) -> for ii_31 = ((! r_2) + 1) to (n_6 - 1) do let cur_32 = a_4.((ii_31 * m_5) + (! c_3)) in if (not (cur_32 = 0.0)) then begin for j_33 = ((! c_3) + 1) to (m_5 - 1) do a_4.((ii_31 * m_5) + j_33) <- (a_4.((ii_31 * m_5) + j_33) -. (a_4.(((! r_2) * m_5) + j_33) *. (cur_32 /. a_rc_30))) done; a_4.((ii_31 * m_5) + (! c_3)) <- 0.0 end done; (); (r_2 := ((! r_2) + 1)) | None -> ()); (c_3 := ((! c_3) + 1)) done; {arr = a_4; n = n_6; m = m_5}>. *) (* tests *) let fv0 = {arr=Array.make 1 1.0; n=1; m=1} let fv1 = {arr=Array.of_list [ 1. ; 2.; 3.; 4. ; 13.; 5.; (-1.); 3.; 0.]; n=3; m=3} let fv2 = {arr=Array.of_list [ 1. ; 2.; 3.; 0.; 4. ; 13.; 5.; 0.; (-1.); 3.; 0.; 0.;]; n=3; m=4} let fv3 = {arr=Array.of_list [ 1. ; 2.; 3.; 4. ; 13.; 5.; (-1.); 3.; 0.; 0. ; 0.; 0.;]; n=4; m=3} let fv4 = {arr=Array.of_list [ 0.; 2.; 3.; 0.; 10.; 5.; 0.; 3.; 0.]; n=3; m=3} ;; let ge_UDR = .! test_UDR;; assert (ge_UDR fv0 = ({arr = [|1.0|]; n = 1; m = 1}, 1., 1) );; assert (ge_UDR fv1 = ({arr = [|4.; 13.; 5.; 0.; 6.25; 1.25; 0.; 0.; 2.|]; n=3; m=3}, 50., 3) );; assert (ge_UDR fv2 = ({arr = [|4.; 13.; 5.; 0.; 0.; 6.25; 1.25; 0.; 0.; 0.; 2.; 0.|]; n=3; m=4}, 50., 3) );; assert (ge_UDR fv3 = ({arr = [|4.; 13.; 5.; 0.; 6.25; 1.25; 0.; 0.; 2.; 0.; 0.; 0.|]; n=4; m=3}, 50., 3) );; assert (ge_UDR fv4 = ({arr = [|0.; 10.; 5.; 0.; 0.; 2.; 0.; 0.; 0.|]; n=3; m=3}, 0., 2) );;