(* Code accompanying the paper Relating FFTW and Split-Radix Oleg Kiselyov and Walid Taha Embedded Software and Systems, First International Conference, ICESS 2004, Hangzhou, China, December 9-10, 2004, Revised Selected Papers LNCS 3605, pp. 488-493 http://dx.doi.org/10.1007/11535409_71 This self-contained MetaOCaml code derives radix-2 FFT kernels with the number of additions and multiplications *exactly* matching those in the kernels produced by FFTW. A slight change in the generation algorithm produces mixed-radix FFT kernels. Copyright, Oleg Kiselyov and Walid Taha, 2004. *) (* An attempt at a better FFT Use parsing in frequency rather than in time Formulas: The forward transform from an N-sample sequence x[j], j=0..N-1 to an N-sample sequence xf[k], k=0..N-1 is (1) xf[k] = SUM{ x[j] * w^(j*k), j=0..N-1 }, k=0..N-1 where w_N = exp(-2*PI*I/N) is the N-th primitive root of unity. For the inverse transform, w_N=exp(+2*PI*I/N) (like FFTW, we omit the scaling factor for the inverse transform). We assume that N is an even number, N=2*N2 We re-write (1) as two equations: (2a) xf[2k] = SUM{ x[j] * w^(2*j*k), j=0..N-1 }, k=0..N2-1 = SUM{ x[j] * w^(2*j*k), j=0..N2-1} + SUM{ x[j+N2] * w^(2*(j+N2)*k), j=0..N2-1} = SUM{ x[j] * w2^(j*k), j=0..N2-1} + SUM{ x[j+N2] * w2^(j*k), j=0..N2-1} w^(2*N2*k) = w^(N*k) = 1^k = 1 Here w2 = w^2 = w_N2 = FFT{ x[j] + x[j+N2], j=0..N2-1 } (2b) xf[2k+1] = SUM{ x[j] * w^(2*j*k+j), j=0..N-1 }, k=0..N2-1 = SUM{ x[j] * w^(2*j*k+j), j=0..N2-1 } + SUM{ x[j+N2] * w^(2*(j+N2)*k+(j+N2)), j=0..N2-1 } = SUM{ x[j]*w^j * w2^(j*k), j=0..N2-1 } + SUM{-x[j+N2]*w^j * w2^(j*k), j=0..N2-1 } w^N2 = exp(-PI*I) = -1 (the same for the inverse FFT) = FFT{ (x[j] - x[j+N2])*w^j, j=0..N2-1 } So, an N-point FFT reduces to two N/2-point FFTs. \$Id: fft-neo.ml,v 1.13 2004/08/31 07:52:36 oleg Exp \$ *) (* ------------ unstaged nonmonadic list-based representation ------------ *) let pi = 4.0 *. atan(1.0) type dir = Forward | Inverse let w dir n j = (* exp(-2PI dir/N)^j *) let theta = ((float_of_int ((if dir = Forward then -2 else 2)* j)) *. pi) /. (float_of_int n) in ((cos theta), (sin theta)) (* complex arithm *) let add_u ((r1,i1), (r2, i2)) = ((r1 +. r2), (i1 +. i2)) let sub_u ((r1,i1), (r2, i2)) = ((r1 -. r2), (i1 -. i2)) let mult_u ((r1, i1), (r2, i2)) = let rp = (r1 *. r2) -. (i1 *. i2) in let ip = (r1 *. i2) +. (r2 *. i1) in (rp, ip) (* multiplex evens and odds. They are of the same size *) let rec merge_u = function ([], []) -> [] | (y0::y0rest,y1::y1rest) -> y0::y1::(merge_u (y0rest,y1rest)) let cleave_list l = (* split a non-empty list l into two equal halves *) let rec take_drop n l acc = if n = 0 then (List.rev acc, l) else take_drop (n-1) (List.tl l) ((List.hd l)::acc) in take_drop ((List.length l) / 2) l [] (* given list l, size n, return (l1+l2, (li-l2)*w^j) see the title comments *) let rec split_u dir l = let n = List.length l in let rec comb j = function (x::xs, y::ys) -> let (a,b) = comb (j+1) (xs,ys) in ((add_u (x,y))::a, (mult_u ((sub_u (x,y)), w dir n j))::b) | _ -> ([],[]) in comb 0 (cleave_list l) let rec fft_u dir (l) = if (List.length l = 1) then l else let (p0,p1) = split_u dir l in let y0 = fft_u dir p0 in let y1 = fft_u dir p1 in merge_u (y0,y1) let gen_u = fft_u (* List of size n with (1,0) at position l *) let rec impulse n l = if n = 0 then [] else let t = if l = 0 then (1.0,0.0) else (0.0,0.0) in t::(impulse (n-1) (l-1)) let rec unpair = function ((x,y)::tail) -> x::y::(unpair tail) | [] -> [] (* Determine the max abs difference (the inf-norm) of two lists of pairs. *) let inf_norm l1 l2 = let rec scan pos loc diff = function ((x::tailx),(y::taily)) -> let curr_diff = abs_float (x -. y) in if curr_diff >= diff then scan (succ pos) pos curr_diff (tailx,taily) else scan (succ pos) loc diff (tailx,taily) | ([],[]) -> (loc,diff) in scan 0 (-1) 0.0 (l1,l2) (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair ( fft_u Inverse (fft_u Forward (impulse 4 1)) )) ) let () = assert ((15,0.0) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair ( fft_u Inverse (fft_u Forward (impulse 8 1)) )) ) let () = assert ((31,0.0) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair ( fft_u Inverse (fft_u Forward (impulse 16 1)) )) ) (* --- unstaged monadic let-binding list-based representation ------- *) (* monad *) let (ret,bind, y_m, run) = let ret a s k = k s a in let bind a f s k = a s (fun s' x -> f x s' k) in (* Y combinator *) let rec y_m f = f (fun x s k -> y_m f x s k) in (* convert code list to list code *) let lift l = List.fold_right (fun (x,y) z -> (x, y) :: z) l [] in let run f = f [] (fun s x -> x) in (ret,bind, y_m, run) (* Monadic split *) let rec split_mu dir l = let n = List.length l in let rec comb j = function (x::xs, y::ys) -> bind (comb (j+1) (xs,ys)) (fun (a,b) -> ret ((add_u (x,y))::a, (mult_u ((sub_u (x,y)), w dir n j))::b)) | _ -> ret ([],[]) in comb 0 (cleave_list l) let fft_mu dir f l = if List.length l = 1 then ret l else bind (split_mu dir l) (fun (p0,p1) -> bind (f p0) (fun y0 -> bind (f p1) (fun y1 -> ret (merge_u (y0,y1))))) let gen_mu dir nums = run ((y_m (fft_mu dir)) nums) (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair ( gen_mu Inverse (gen_mu Forward (impulse 4 1)) )) ) let () = assert ((15,0.0) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair ( gen_mu Inverse (gen_mu Forward (impulse 8 1)) )) ) let () = assert ((31,0.0) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair ( gen_mu Inverse (gen_mu Forward (impulse 16 1)) )) ) (* --- staged monadic let-binding array-based representation ----*) (* Monadic library. See the EMSOFT paper *) let (retS,retN,bind,y_sm,run) = let retS a = fun s k -> k s a in let retN a = fun s k -> ..)>. in let bind a f = fun s k -> a s (fun s' b -> f b s' k) in let rec y_sm f = f (fun x s k -> y_sm f x s k) in let run f (abstract_simple, concretize) size nums = (* Write the results into the array *) let array_write l arr = let rec lfta l m = match l with [] -> arr | (c::tail) -> let sm = m+1 and ssm = m+2 in let (x,y) = concretize c in .<((.~arr).(m) <- .~x; (.~arr).(sm) <- .~y; (.~(lfta tail (ssm))))>. in lfta l 0 in (* Read from an array *) let array_read arr n = let rec gl m l = if m = n then retS l else let sm = m+1 in bind (abstract_simple (.<(.~arr).(m)>., .<(.~arr).(sm)>.)) (fun c -> gl (m+2) (l @ [c])) in gl 0 [] in let run_basic m = m [] (fun s x -> array_write x nums) in run_basic (bind (array_read nums size) f) in (retS,retN,bind,y_sm,run) (* Standard auxiliary monadic operators *) let liftM f = fun x -> retS (f x) let compM a b = fun x -> bind (b x) (fun nx -> a nx) let liftcM op (x,y) = bind (op x) (fun nx -> bind (op y) (fun ny -> retS (nx,ny))) let w_s dir n j = let fj = if (dir = Forward) then (float_of_int (-2 * j)) else (float_of_int (2 * j)) in let fn = (float_of_int n) in let theta = (fj *. pi) /. fn in let ct = cos theta in let st = sin theta in (.., ..) let add_s ((r1,i1), (r2, i2)) = ((.<.~r1 +. .~r2>.), (.<.~i1 +. .~i2>.)) let sub_s ((r1,i1), (r2, i2)) = ((.<.~r1 -. .~r2>.), (.<.~i1 -. .~i2>.)) let mult_s ((r1, i1), (r2, i2)) = let rp = .<(.~r1 *. .~r2) -. (.~i1 *. .~i2)>. in let ip = .<(.~r1 *. .~i2) +. (.~r2 *. .~i1)>. in (rp, ip) (* Staged Monadic split *) let split_sm dir l = let n = List.length l in let rec comb j = function (x::xs, y::ys) -> bind (comb (j+1) (xs,ys)) (fun (a,b) -> retS ((add_s (x,y))::a, (mult_s ((sub_s (x,y)), w_s dir n j))::b)) | _ -> retS ([],[]) in comb 0 (cleave_list l) let fft_sm dir f l = if List.length l = 1 then ret l else bind (split_sm dir l) (fun (p0,p1) -> bind (f p0) (fun y0 -> bind (f p1) (fun y1 -> retS (merge_u (y0,y1))))) let gen_marr dir size nums = run (y_sm (fft_sm dir)) ((liftcM retN),fun x -> x) size nums (* To see the code . .~(gen_marr Forward (4 * 2) ..)>. *) let test_marr dir lst = let n = List.length lst in let arr = Array.make (n * 2) 0.0 in let _ = List.fold_left (fun n (a,b) -> (arr.(n) <- a; arr.(n+1) <-b; n+2)) 0 lst in let rec tolist arr i = if i = Array.length arr then [] else (arr.(i),arr.(i+1))::(tolist arr (i+2)) in let tranc = . .~(gen_marr dir (n * 2) ..)>. in let tran = (.!tranc) (arr) in tolist tran 0 (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair (test_marr Inverse (test_marr Forward (impulse 4 1))) )) let () = assert ((10, 5.11946041115152184e-12) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair (test_marr Inverse (test_marr Forward (impulse 8 1))) )) let () = assert ((18, 7.34878824459883617e-12) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair (test_marr Inverse (test_marr Forward (impulse 16 1))) )) (* --- staged monadic let-binding array-based representation ----*) (* --- Abstract interpretation ----*) (* We note that FFT is a linear, and a uniform transform. Therefore, all complex expressions have a grammar * ``` + Thus the abstraction domain is a pair: known_complex_factor, unknown_piece_of_code Unknown pieces of complex code are represented as pairs of real code. Furthermore, in FFT, all factors are roots of unity. So, we can represent them as pairs of integers (j,n) where abs(j) < n A pair (j,n) represents exp(2PI*I j/n) When performing additions of two pieces of code, we may need to do some let-binding. *) (* Rational arithmetics *) type rat = int * int (* j/n *) let rat_one = (0,1) (* type newfloats_aa = Code of rat * (float) code * (float) code *) (* *) (* The abstraction domain *) type 'a newfloats_aa = Code of rat * ('a,float) code * ('a,float) code (* *) let rec gcd a b = if a > b then gcd b a else if a = 1 then 1 else if a = 0 then b else gcd (b-a) a let rec rationalize (j,n) = if j = 0 then rat_one else if j < 0 then rationalize (n+j,n) else if j >= n then rationalize (j-n,n) else let k = gcd j n in if k = 1 then (j,n) else (j/k, n/k) let add_rat (j1,n1) (j2,n2) = rationalize (j1*n2+j2*n1, n1*n2) let sub_rat (j1,n1) (j2,n2) = rationalize (j1*n2-j2*n1, n1*n2) let min_rat ((j1,n1) as r1) ((j2,n2) as r2) = if j1*n2 > j2*n1 then r2 else r1 let str_rat (j,n) = "("^ (string_of_int j) ^","^ (string_of_int n) ^")" (* rotate rat by 90 or 180 degrees to bring it to the 1 quadrant Return the rotation factor and the new rat. Their sum is the original rat. *) let rec rotate_to_QI ((j,n) as f) = if 2*j >= n then let (rf,newf) = rotate_to_QI (sub_rat f (1,2)) in (add_rat rf (1,2),newf) else if 4*j >= n then ((1,4),sub_rat f (1,4)) else (rat_one,f) (* This is a semi-concretization function that forces the multiplication up to trivail factors, +/- 1, +/- I *) let force_mult (Code (f,rx,ix)) = let (newf,(j,n)) = rotate_to_QI f in let theta = float_of_int (2*j) *. pi /. float_of_int n in let c = cos theta in let s = sin theta in let () = assert (c > 0.0 && s > 0.0) in bind (retN ..) (fun rcs -> bind (retN ..) (fun ics -> ret (Code (newf,rcs,ics)))) let w_aa dir n j = match dir with Forward -> rationalize (-j,n) | Inverse -> rationalize (j,n) (* Multiply an abstract value by a known factor: a root of unity *) let mult_aa (Code (f,re,im)) ru = Code (add_rat f ru, re,im) let neg_aa c = mult_aa c (1,2) (* second root of unity of order 2 is -1 *) (* Perform (rx,ix) +/- _complex_fy*(ry,iy) and set the resulting factor to fx Return a pair: fx* (x + complex_fy*y), fx*(x - complex_fy*y) This is where many simplifications happen *) let rec do_linear fx rx ix ((j,n) as fy) ry iy = if 2*j >= n then (* fy in III and IV quadrants: reflect *) bind (do_linear fx rx ix (sub_rat fy (1,2)) ry iy) (fun (p,m) -> retS (m,p)) else match fy with (* x + y *) (0,1) -> bind (retN .<.~rx +. .~ry>.) (fun re -> bind (retN .<.~ix +. .~iy>.) (fun im -> bind (retN .<.~rx -. .~ry>.) (fun re1 -> bind (retN .<.~ix -. .~iy>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1)))))) |(1,4) -> (* x + I*y *) bind (retN .<.~rx -. .~iy>.) (fun re -> bind (retN .<.~ix +. .~ry>.) (fun im -> bind (retN .<.~rx +. .~iy>.) (fun re1 -> bind (retN .<.~ix -. .~ry>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1) ))))) |(j,8) -> (* x + (cos pi/4 + I*sin pi/4)*y *) let cs = sin (pi /. 4.0) in bind (retN ..) (fun rcs -> bind (retN ..) (fun ics -> do_linear fx rx ix (rationalize (j-1,8)) rcs ics)) |(j,n) -> bind (force_mult (Code (fy,ry,iy))) (fun (Code (fy,ry,iy)) -> do_linear fx rx ix fy ry iy) (* Decide, if multiplication by f is trivial. See the table above for the execution of trivial multiplication *) let rec trivial_mult ((j,n) as f) = if 2*j >= n then (* f in III and IV quadrants: reflect *) trivial_mult (rationalize (j-n/2,n)) else match f with (0,1) -> true (* multiplication by 1 *) |(1,4) -> true (* multiplication by I *) | _ -> false (* This one is monadic: it may need to let-bind something Note: the add_sub_aa function is not symmetric! We need to compute fx* +/- fy* There are two ways to proceed: (1) let e1 = fx* and e2 = fy* in e1 +/- e2 (2) fx*( +/- (fy/fx)* ) The second way is better when either the multiplication by fx is trivial or by (fy/fx) is trivial Otherwise, the first way is better The second method is performed by do_linear *) let rec add_sub_aa ((Code (fx,rx,ix)) as x) ((Code (fy,ry,iy)) as y) = let fyx = (sub_rat fy fx) in if (trivial_mult fx) || (trivial_mult fyx) then do_linear fx rx ix fyx ry iy else bind (force_mult x) (fun x -> bind (force_mult y) (fun y -> add_sub_aa x y)) (* Staged Monadic split *) let split_aa dir l = (* let () = print_string "\nBefore split: "; List.iter (fun (Code (fx,_,_)) ->Printf.printf "%s " (str_rat fx)) l in *) let n = List.length l in let rec comb j = function (x::xs, y::ys) -> bind (add_sub_aa x y) (fun (xpy,xmy) -> bind (comb (j+1) (xs,ys)) (fun (a,b) -> retS (xpy::a, (mult_aa xmy (w_aa dir n j))::b))) | _ -> retS ([],[]) in comb 0 (cleave_list l) let fft_aa dir f l = if List.length l = 1 then ret l else bind (split_aa dir l) (fun (p0,p1) -> bind (f p0) (fun y0 -> bind (f p1) (fun y1 -> retS (merge_u (y0,y1))))) let gen_aa dir size nums = run (y_sm (fft_aa dir)) ((fun p -> (bind (liftcM retN p) (fun (x,y) -> retS (Code (rat_one, x, y))))), (* here we assert c is a simple piece of code!*) (* And it is because the last stage of *) (* FFT is only a+b and a-b *) fun (Code ((0,1),x,y)) -> (x,y)) size nums (* To see the code . .~(gen_aa Forward (4 * 2) ..)>.;; *) let test_aa dir lst = let n = List.length lst in let arr = Array.make (n * 2) 0.0 in let _ = List.fold_left (fun n (a,b) -> (arr.(n) <- a; arr.(n+1) <-b; n+2)) 0 lst in let rec tolist arr i = if i = Array.length arr then [] else (arr.(i),arr.(i+1))::(tolist arr (i+2)) in let tranc = . .~(gen_aa dir (n * 2) ..)>. in let tran = (.!tranc) (arr) in tolist tran 0 (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair (test_aa Inverse (test_aa Forward (impulse 4 1))) )) let () = assert ((10, 5.11946041115152184e-12) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair (test_aa Inverse (test_aa Forward (impulse 8 1))) )) let () = assert ((18, 9.90851845017459709e-12) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair (test_aa Inverse (test_aa Forward (impulse 16 1))) )) (* Sample of the genereated code: . .~(gen_aa Forward (16 * 2) ..)>.;; - : ('a, float array -> float array) code = . let x1_3601 = (x_3600).(0) in let y1_3602 = (x_3600).(1) in let x1_3603 = (x_3600).(2) in let y1_3604 = (x_3600).(3) in let x1_3605 = (x_3600).(4) in let y1_3606 = (x_3600).(5) in let x1_3607 = (x_3600).(6) in let y1_3608 = (x_3600).(7) in let x1_3609 = (x_3600).(8) in let y1_3610 = (x_3600).(9) in let x1_3611 = (x_3600).(10) in let y1_3612 = (x_3600).(11) in let x1_3613 = (x_3600).(12) in let y1_3614 = (x_3600).(13) in let x1_3615 = (x_3600).(14) in let y1_3616 = (x_3600).(15) in let x1_3617 = (x_3600).(16) in let y1_3618 = (x_3600).(17) in let x1_3619 = (x_3600).(18) in let y1_3620 = (x_3600).(19) in let x1_3621 = (x_3600).(20) in let y1_3622 = (x_3600).(21) in let x1_3623 = (x_3600).(22) in let y1_3624 = (x_3600).(23) in let x1_3625 = (x_3600).(24) in let y1_3626 = (x_3600).(25) in let x1_3627 = (x_3600).(26) in let y1_3628 = (x_3600).(27) in let x1_3629 = (x_3600).(28) in let y1_3630 = (x_3600).(29) in let x1_3631 = (x_3600).(30) in let y1_3632 = (x_3600).(31) in let (t_3633) = (x1_3601 +. x1_3617) in let (t_3634) = (y1_3602 +. y1_3618) in let (t_3635) = (x1_3601 -. x1_3617) in let (t_3636) = (y1_3602 -. y1_3618) in let (t_3637) = (x1_3603 +. x1_3619) in let (t_3638) = (y1_3604 +. y1_3620) in let (t_3639) = (x1_3603 -. x1_3619) in let (t_3640) = (y1_3604 -. y1_3620) in let (t_3641) = (x1_3605 +. x1_3621) in let (t_3642) = (y1_3606 +. y1_3622) in let (t_3643) = (x1_3605 -. x1_3621) in let (t_3644) = (y1_3606 -. y1_3622) in let (t_3645) = (x1_3607 +. x1_3623) in let (t_3646) = (y1_3608 +. y1_3624) in let (t_3647) = (x1_3607 -. x1_3623) in let (t_3648) = (y1_3608 -. y1_3624) in let (t_3649) = (x1_3609 +. x1_3625) in let (t_3650) = (y1_3610 +. y1_3626) in let (t_3651) = (x1_3609 -. x1_3625) in let (t_3652) = (y1_3610 -. y1_3626) in let (t_3653) = (x1_3611 +. x1_3627) in let (t_3654) = (y1_3612 +. y1_3628) in let (t_3655) = (x1_3611 -. x1_3627) in let (t_3656) = (y1_3612 -. y1_3628) in let (t_3657) = (x1_3613 +. x1_3629) in let (t_3658) = (y1_3614 +. y1_3630) in let (t_3659) = (x1_3613 -. x1_3629) in let (t_3660) = (y1_3614 -. y1_3630) in let (t_3661) = (x1_3615 +. x1_3631) in let (t_3662) = (y1_3616 +. y1_3632) in let (t_3663) = (x1_3615 -. x1_3631) in let (t_3664) = (y1_3616 -. y1_3632) in let (t_3665) = ((t_3633) +. (t_3649)) in let (t_3666) = ((t_3634) +. (t_3650)) in let (t_3667) = ((t_3633) -. (t_3649)) in let (t_3668) = ((t_3634) -. (t_3650)) in let (t_3669) = ((t_3637) +. (t_3653)) in let (t_3670) = ((t_3638) +. (t_3654)) in let (t_3671) = ((t_3637) -. (t_3653)) in let (t_3672) = ((t_3638) -. (t_3654)) in let (t_3673) = ((t_3641) +. (t_3657)) in let (t_3674) = ((t_3642) +. (t_3658)) in let (t_3675) = ((t_3641) -. (t_3657)) in let (t_3676) = ((t_3642) -. (t_3658)) in let (t_3677) = ((t_3645) +. (t_3661)) in let (t_3678) = ((t_3646) +. (t_3662)) in let (t_3679) = ((t_3645) -. (t_3661)) in let (t_3680) = ((t_3646) -. (t_3662)) in let (t_3681) = ((t_3665) +. (t_3673)) in let (t_3682) = ((t_3666) +. (t_3674)) in let (t_3683) = ((t_3665) -. (t_3673)) in let (t_3684) = ((t_3666) -. (t_3674)) in let (t_3685) = ((t_3669) +. (t_3677)) in let (t_3686) = ((t_3670) +. (t_3678)) in let (t_3687) = ((t_3669) -. (t_3677)) in let (t_3688) = ((t_3670) -. (t_3678)) in let (t_3689) = ((t_3681) +. (t_3685)) in let (t_3690) = ((t_3682) +. (t_3686)) in let (t_3691) = ((t_3681) -. (t_3685)) in let (t_3692) = ((t_3682) -. (t_3686)) in let (t_3693) = ((t_3683) -. (t_3688)) in let (t_3694) = ((t_3684) +. (t_3687)) in let (t_3695) = ((t_3683) +. (t_3688)) in let (t_3696) = ((t_3684) -. (t_3687)) in let (t_3697) = ((t_3667) -. (t_3676)) in let (t_3698) = ((t_3668) +. (t_3675)) in let (t_3699) = ((t_3667) +. (t_3676)) in let (t_3700) = ((t_3668) -. (t_3675)) in let (t_3701) = ((t_3671) -. (t_3680)) in let (t_3702) = ((t_3672) +. (t_3679)) in let (t_3703) = ((t_3671) +. (t_3680)) in let (t_3704) = ((t_3672) -. (t_3679)) in let (t_3705) = (0.707106781187 *. ((t_3703) -. (t_3704))) in let (t_3706) = (0.707106781187 *. ((t_3703) +. (t_3704))) in let (t_3707) = ((t_3699) -. (t_3706)) in let (t_3708) = ((t_3700) +. (t_3705)) in let (t_3709) = ((t_3699) +. (t_3706)) in let (t_3710) = ((t_3700) -. (t_3705)) in let (t_3711) = (0.707106781187 *. ((t_3701) -. (t_3702))) in let (t_3712) = (0.707106781187 *. ((t_3701) +. (t_3702))) in let (t_3713) = ((t_3697) +. (t_3711)) in let (t_3714) = ((t_3698) +. (t_3712)) in let (t_3715) = ((t_3697) -. (t_3711)) in let (t_3716) = ((t_3698) -. (t_3712)) in let (t_3717) = ((t_3635) -. (t_3652)) in let (t_3718) = ((t_3636) +. (t_3651)) in let (t_3719) = ((t_3635) +. (t_3652)) in let (t_3720) = ((t_3636) -. (t_3651)) in let (t_3721) = ((t_3639) -. (t_3656)) in let (t_3722) = ((t_3640) +. (t_3655)) in let (t_3723) = ((t_3639) +. (t_3656)) in let (t_3724) = ((t_3640) -. (t_3655)) in let (t_3725) = ((t_3643) -. (t_3660)) in let (t_3726) = ((t_3644) +. (t_3659)) in let (t_3727) = ((t_3643) +. (t_3660)) in let (t_3728) = ((t_3644) -. (t_3659)) in let (t_3729) = ((t_3647) -. (t_3664)) in let (t_3730) = ((t_3648) +. (t_3663)) in let (t_3731) = ((t_3647) +. (t_3664)) in let (t_3732) = ((t_3648) -. (t_3663)) in let (t_3733) = (0.707106781187 *. ((t_3727) -. (t_3728))) in let (t_3734) = (0.707106781187 *. ((t_3727) +. (t_3728))) in let (t_3735) = ((t_3719) -. (t_3734)) in let (t_3736) = ((t_3720) +. (t_3733)) in let (t_3737) = ((t_3719) +. (t_3734)) in let (t_3738) = ((t_3720) -. (t_3733)) in let (t_3739) = ((0.382683432365 *. (t_3723)) -. (0.923879532511 *. (t_3724))) in let (t_3740) = ((0.923879532511 *. (t_3723)) +. (0.382683432365 *. (t_3724))) in let (t_3741) = ((0.923879532511 *. (t_3731)) -. (0.382683432365 *. (t_3732))) in let (t_3742) = ((0.382683432365 *. (t_3731)) +. (0.923879532511 *. (t_3732))) in let (t_3743) = ((t_3739) +. (t_3741)) in let (t_3744) = ((t_3740) +. (t_3742)) in let (t_3745) = ((t_3739) -. (t_3741)) in let (t_3746) = ((t_3740) -. (t_3742)) in let (t_3747) = ((t_3737) -. (t_3744)) in let (t_3748) = ((t_3738) +. (t_3743)) in let (t_3749) = ((t_3737) +. (t_3744)) in let (t_3750) = ((t_3738) -. (t_3743)) in let (t_3751) = ((t_3735) +. (t_3745)) in let (t_3752) = ((t_3736) +. (t_3746)) in let (t_3753) = ((t_3735) -. (t_3745)) in let (t_3754) = ((t_3736) -. (t_3746)) in let (t_3755) = (0.707106781187 *. ((t_3725) -. (t_3726))) in let (t_3756) = (0.707106781187 *. ((t_3725) +. (t_3726))) in let (t_3757) = ((t_3717) +. (t_3755)) in let (t_3758) = ((t_3718) +. (t_3756)) in let (t_3759) = ((t_3717) -. (t_3755)) in let (t_3760) = ((t_3718) -. (t_3756)) in let (t_3761) = ((0.923879532511 *. (t_3721)) -. (0.382683432365 *. (t_3722))) in let (t_3762) = ((0.382683432365 *. (t_3721)) +. (0.923879532511 *. (t_3722))) in let (t_3763) = ((0.382683432365 *. (t_3729)) -. (0.923879532511 *. (t_3730))) in let (t_3764) = ((0.923879532511 *. (t_3729)) +. (0.382683432365 *. (t_3730))) in let (t_3765) = ((t_3761) +. (t_3763)) in let (t_3766) = ((t_3762) +. (t_3764)) in let (t_3767) = ((t_3761) -. (t_3763)) in let (t_3768) = ((t_3762) -. (t_3764)) in let (t_3769) = ((t_3759) -. (t_3768)) in let (t_3770) = ((t_3760) +. (t_3767)) in let (t_3771) = ((t_3759) +. (t_3768)) in let (t_3772) = ((t_3760) -. (t_3767)) in let (t_3773) = ((t_3757) +. (t_3765)) in let (t_3774) = ((t_3758) +. (t_3766)) in let (t_3775) = ((t_3757) -. (t_3765)) in let (t_3776) = ((t_3758) -. (t_3766)) in (x_3600).(0) <- (t_3689); (x_3600).(1) <- (t_3690); (x_3600).(2) <- (t_3749); (x_3600).(3) <- (t_3750); (x_3600).(4) <- (t_3709); (x_3600).(5) <- (t_3710); (x_3600).(6) <- (t_3771); (x_3600).(7) <- (t_3772); (x_3600).(8) <- (t_3695); (x_3600).(9) <- (t_3696); (x_3600).(10) <- (t_3753); (x_3600).(11) <- (t_3754); (x_3600).(12) <- (t_3715); (x_3600).(13) <- (t_3716); (x_3600).(14) <- (t_3775); (x_3600).(15) <- (t_3776); (x_3600).(16) <- (t_3691); (x_3600).(17) <- (t_3692); (x_3600).(18) <- (t_3747); (x_3600).(19) <- (t_3748); (x_3600).(20) <- (t_3707); (x_3600).(21) <- (t_3708); (x_3600).(22) <- (t_3769); (x_3600).(23) <- (t_3770); (x_3600).(24) <- (t_3693); (x_3600).(25) <- (t_3694); (x_3600).(26) <- (t_3751); (x_3600).(27) <- (t_3752); (x_3600).(28) <- (t_3713); (x_3600).(29) <- (t_3714); (x_3600).(30) <- (t_3773); (x_3600).(31) <- (t_3774); (x_3600)>. *) (* .!{Trx.run_gcc with compiler_flags="-g -O2"} *) (* Generating code files. let path = "/local/rap/oleg/fft-2/fft-output/aa" in let gencode n = let nr = 2*n in let code = . .~(gen_aa Forward nr ..); 0>. in let code_str = Trx.C.toC code in let chan = open_out (path ^ (string_of_int n) ^ ".c") in let () = output_string chan code_str in close_out chan in List.map gencode [4;8] ;; *) (* ----------------------------------------------------------------------- *) (* Avoiding initial assignments from array values to variables *) (* array references become duplicated *) (* convert input array to list format *) let gen_ab dir size nums = run (y_sm (fft_aa dir)) ((fun p -> (bind (liftcM retS p) (* The only difference is retS here *) (fun (x,y) -> retS (Code (rat_one, x, y))))), (* here we assert c is a simple piece of code!*) (* And it is because the last stage of *) (* FFT is only a+b and a-b *) fun (Code ((0,1),x,y)) -> (x,y)) size nums (* To see the code . .~(gen_ab Forward (4 * 2) ..)>.;; *) let test_ab dir lst = let n = List.length lst in let arr = Array.make (n * 2) 0.0 in let _ = List.fold_left (fun n (a,b) -> (arr.(n) <- a; arr.(n+1) <-b; n+2)) 0 lst in let rec tolist arr i = if i = Array.length arr then [] else (arr.(i),arr.(i+1))::(tolist arr (i+2)) in let tranc = . .~(gen_ab dir (n * 2) ..)>. in let tran = (.!tranc) (arr) in tolist tran 0 (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair (test_ab Inverse (test_ab Forward (impulse 4 1))) )) let () = assert ((10, 5.11946041115152184e-12) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair (test_ab Inverse (test_ab Forward (impulse 8 1))) )) let () = assert ((18, 9.90851845017459709e-12) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair (test_ab Inverse (test_ab Forward (impulse 16 1))) )) (* ----------------------------------------------------------------------- *) (* Using complex multiplication with three multiplies *) (* if x = a + ib and y = c + id *) (* we compute t1 = (a+b)*(c-d) *) (* t2 = a*d *) (* t3 = b*c *) (* so that *) (* Re(x*y) = t1 + t2 - t3 *) (* Im(x*y) = t2 + t3 *) (* 3 multipllications and 5 additions *) (* This is a semi-concretization function that forces the multiplication up to trivial factors, +/- 1, +/- I *) let force_mult_ac (Code (f,rx,ix)) = let (newf,(j,n)) = rotate_to_QI f in let theta = float_of_int (2*j) *. pi /. float_of_int n in let c = cos theta in let s = sin theta in let cs = c +. s in let () = assert (c > 0.0 && s > 0.0) in bind (retN ..) (fun t1 -> bind (retN ..) (fun t2 -> bind (retN ..) (fun t3 -> bind (retN .<.~t1 +. .~t2 -. .~t3>.) (fun rcs -> bind (retN .<.~t2 +. .~t3>.) (fun ics -> ret (Code (newf,rcs,ics))))))) (* Perform (rx,ix) +/- _complex_fy*(ry,iy) and set the resulting factor to fx Return a pair: fx* (x + complex_fy*y), fx*(x - complex_fy*y) This is where many simplifications happen *) let rec do_linear_ac fx rx ix ((j,n) as fy) ry iy = if 2*j >= n then (* fy in III and IV quadrants: reflect *) bind (do_linear_ac fx rx ix (sub_rat fy (1,2)) ry iy) (fun (p,m) -> retS (m,p)) else match fy with (* x + y *) (0,1) -> bind (retN .<.~rx +. .~ry>.) (fun re -> bind (retN .<.~ix +. .~iy>.) (fun im -> bind (retN .<.~rx -. .~ry>.) (fun re1 -> bind (retN .<.~ix -. .~iy>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1)))))) |(1,4) -> (* x + I*y *) bind (retN .<.~rx -. .~iy>.) (fun re -> bind (retN .<.~ix +. .~ry>.) (fun im -> bind (retN .<.~rx +. .~iy>.) (fun re1 -> bind (retN .<.~ix -. .~ry>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1) ))))) |(j,8) -> (* x + (cos pi/4 + I*sin pi/4)*y *) let cs = sin (pi /. 4.0) in bind (retN ..) (fun rcs -> bind (retN ..) (fun ics -> do_linear_ac fx rx ix (rationalize (j-1,8)) rcs ics)) |(j,n) -> bind (force_mult_ac (Code (fy,ry,iy))) (fun (Code (fy,ry,iy)) -> do_linear_ac fx rx ix fy ry iy) (* This one is monadic: it may need to let-bind something Note: the add_sub_ac function is not symmetric! We need to compute fx* +/- fy* There are two ways to proceed: (1) let e1 = fx* and e2 = fy* in e1 +/- e2 (2) fx*( +/- (fy/fx)* ) The second way is better when either the multiplication by fx is trivial or by (fy/fx) is trivial Otherwise, the first way is better The second method is performed by do_linear *) let rec add_sub_ac ((Code (fx,rx,ix)) as x) ((Code (fy,ry,iy)) as y) = let fyx = (sub_rat fy fx) in if (trivial_mult fx) || (trivial_mult fyx) then do_linear_ac fx rx ix fyx ry iy else bind (force_mult_ac x) (fun x -> bind (force_mult_ac y) (fun y -> add_sub_ac x y)) (* Staged Monadic split *) let split_ac dir l = (* let () = print_string "\nBefore split: "; List.iter (fun (Code (fx,_,_)) ->Printf.printf "%s " (str_rat fx)) l in *) let n = List.length l in let rec comb j = function (x::xs, y::ys) -> bind (add_sub_ac x y) (fun (xpy,xmy) -> bind (comb (j+1) (xs,ys)) (fun (a,b) -> retS (xpy::a, (mult_aa xmy (w_aa dir n j))::b))) | _ -> retS ([],[]) in comb 0 (cleave_list l) let fft_ac dir f l = if List.length l = 1 then ret l else bind (split_ac dir l) (fun (p0,p1) -> bind (f p0) (fun y0 -> bind (f p1) (fun y1 -> retS (merge_u (y0,y1))))) let gen_ac dir size nums = run (y_sm (fft_ac dir)) ((fun p -> (bind (liftcM retN p) (fun (x,y) -> retS (Code (rat_one, x, y))))), (* here we assert c is a simple piece of code!*) (* And it is because the last stage of *) (* FFT is only a+b and a-b *) fun (Code ((0,1),x,y)) -> (x,y)) size nums (* To see the code . .~(gen_ac Forward (4 * 2) ..)>.;; *) let test_ac dir lst = let n = List.length lst in let arr = Array.make (n * 2) 0.0 in let _ = List.fold_left (fun n (a,b) -> (arr.(n) <- a; arr.(n+1) <-b; n+2)) 0 lst in let rec tolist arr i = if i = Array.length arr then [] else (arr.(i),arr.(i+1))::(tolist arr (i+2)) in let tranc = . .~(gen_ac dir (n * 2) ..)>. in let tran = (.!tranc) (arr) in tolist tran 0 (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair (test_ac Inverse (test_ac Forward (impulse 4 1))) )) let () = assert ((10, 5.11946041115152184e-12) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair (test_ac Inverse (test_ac Forward (impulse 8 1))) )) let () = assert ((2, 6.30464569439936895e-11) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair (test_ac Inverse (test_ac Forward (impulse 16 1))) )) (* Sample of the genereated code: . .~(gen_ac Forward (16 * 2) ..)>.;; . let (t_3857) = (x_3856).(0) in let (t_3858) = (x_3856).(1) in let (t_3859) = (x_3856).(2) in let (t_3860) = (x_3856).(3) in let (t_3861) = (x_3856).(4) in let (t_3862) = (x_3856).(5) in let (t_3863) = (x_3856).(6) in let (t_3864) = (x_3856).(7) in let (t_3865) = (x_3856).(8) in let (t_3866) = (x_3856).(9) in let (t_3867) = (x_3856).(10) in let (t_3868) = (x_3856).(11) in let (t_3869) = (x_3856).(12) in let (t_3870) = (x_3856).(13) in let (t_3871) = (x_3856).(14) in let (t_3872) = (x_3856).(15) in let (t_3873) = (x_3856).(16) in let (t_3874) = (x_3856).(17) in let (t_3875) = (x_3856).(18) in let (t_3876) = (x_3856).(19) in let (t_3877) = (x_3856).(20) in let (t_3878) = (x_3856).(21) in let (t_3879) = (x_3856).(22) in let (t_3880) = (x_3856).(23) in let (t_3881) = (x_3856).(24) in let (t_3882) = (x_3856).(25) in let (t_3883) = (x_3856).(26) in let (t_3884) = (x_3856).(27) in let (t_3885) = (x_3856).(28) in let (t_3886) = (x_3856).(29) in let (t_3887) = (x_3856).(30) in let (t_3888) = (x_3856).(31) in let (t_3889) = ((t_3857) +. (t_3873)) in let (t_3890) = ((t_3858) +. (t_3874)) in let (t_3891) = ((t_3857) -. (t_3873)) in let (t_3892) = ((t_3858) -. (t_3874)) in let (t_3893) = ((t_3859) +. (t_3875)) in let (t_3894) = ((t_3860) +. (t_3876)) in let (t_3895) = ((t_3859) -. (t_3875)) in let (t_3896) = ((t_3860) -. (t_3876)) in let (t_3897) = ((t_3861) +. (t_3877)) in let (t_3898) = ((t_3862) +. (t_3878)) in let (t_3899) = ((t_3861) -. (t_3877)) in let (t_3900) = ((t_3862) -. (t_3878)) in let (t_3901) = ((t_3863) +. (t_3879)) in let (t_3902) = ((t_3864) +. (t_3880)) in let (t_3903) = ((t_3863) -. (t_3879)) in let (t_3904) = ((t_3864) -. (t_3880)) in let (t_3905) = ((t_3865) +. (t_3881)) in let (t_3906) = ((t_3866) +. (t_3882)) in let (t_3907) = ((t_3865) -. (t_3881)) in let (t_3908) = ((t_3866) -. (t_3882)) in let (t_3909) = ((t_3867) +. (t_3883)) in let (t_3910) = ((t_3868) +. (t_3884)) in let (t_3911) = ((t_3867) -. (t_3883)) in let (t_3912) = ((t_3868) -. (t_3884)) in let (t_3913) = ((t_3869) +. (t_3885)) in let (t_3914) = ((t_3870) +. (t_3886)) in let (t_3915) = ((t_3869) -. (t_3885)) in let (t_3916) = ((t_3870) -. (t_3886)) in let (t_3917) = ((t_3871) +. (t_3887)) in let (t_3918) = ((t_3872) +. (t_3888)) in let (t_3919) = ((t_3871) -. (t_3887)) in let (t_3920) = ((t_3872) -. (t_3888)) in let (t_3921) = ((t_3889) +. (t_3905)) in let (t_3922) = ((t_3890) +. (t_3906)) in let (t_3923) = ((t_3889) -. (t_3905)) in let (t_3924) = ((t_3890) -. (t_3906)) in let (t_3925) = ((t_3893) +. (t_3909)) in let (t_3926) = ((t_3894) +. (t_3910)) in let (t_3927) = ((t_3893) -. (t_3909)) in let (t_3928) = ((t_3894) -. (t_3910)) in let (t_3929) = ((t_3897) +. (t_3913)) in let (t_3930) = ((t_3898) +. (t_3914)) in let (t_3931) = ((t_3897) -. (t_3913)) in let (t_3932) = ((t_3898) -. (t_3914)) in let (t_3933) = ((t_3901) +. (t_3917)) in let (t_3934) = ((t_3902) +. (t_3918)) in let (t_3935) = ((t_3901) -. (t_3917)) in let (t_3936) = ((t_3902) -. (t_3918)) in let (t_3937) = ((t_3921) +. (t_3929)) in let (t_3938) = ((t_3922) +. (t_3930)) in let (t_3939) = ((t_3921) -. (t_3929)) in let (t_3940) = ((t_3922) -. (t_3930)) in let (t_3941) = ((t_3925) +. (t_3933)) in let (t_3942) = ((t_3926) +. (t_3934)) in let (t_3943) = ((t_3925) -. (t_3933)) in let (t_3944) = ((t_3926) -. (t_3934)) in let (t_3945) = ((t_3937) +. (t_3941)) in let (t_3946) = ((t_3938) +. (t_3942)) in let (t_3947) = ((t_3937) -. (t_3941)) in let (t_3948) = ((t_3938) -. (t_3942)) in let (t_3949) = ((t_3939) -. (t_3944)) in let (t_3950) = ((t_3940) +. (t_3943)) in let (t_3951) = ((t_3939) +. (t_3944)) in let (t_3952) = ((t_3940) -. (t_3943)) in let (t_3953) = ((t_3923) -. (t_3932)) in let (t_3954) = ((t_3924) +. (t_3931)) in let (t_3955) = ((t_3923) +. (t_3932)) in let (t_3956) = ((t_3924) -. (t_3931)) in let (t_3957) = ((t_3927) -. (t_3936)) in let (t_3958) = ((t_3928) +. (t_3935)) in let (t_3959) = ((t_3927) +. (t_3936)) in let (t_3960) = ((t_3928) -. (t_3935)) in let (t_3961) = (0.707106781187 *. ((t_3959) -. (t_3960))) in let (t_3962) = (0.707106781187 *. ((t_3959) +. (t_3960))) in let (t_3963) = ((t_3955) -. (t_3962)) in let (t_3964) = ((t_3956) +. (t_3961)) in let (t_3965) = ((t_3955) +. (t_3962)) in let (t_3966) = ((t_3956) -. (t_3961)) in let (t_3967) = (0.707106781187 *. ((t_3957) -. (t_3958))) in let (t_3968) = (0.707106781187 *. ((t_3957) +. (t_3958))) in let (t_3969) = ((t_3953) +. (t_3967)) in let (t_3970) = ((t_3954) +. (t_3968)) in let (t_3971) = ((t_3953) -. (t_3967)) in let (t_3972) = ((t_3954) -. (t_3968)) in let (t_3973) = ((t_3891) -. (t_3908)) in let (t_3974) = ((t_3892) +. (t_3907)) in let (t_3975) = ((t_3891) +. (t_3908)) in let (t_3976) = ((t_3892) -. (t_3907)) in let (t_3977) = ((t_3895) -. (t_3912)) in let (t_3978) = ((t_3896) +. (t_3911)) in let (t_3979) = ((t_3895) +. (t_3912)) in let (t_3980) = ((t_3896) -. (t_3911)) in let (t_3981) = ((t_3899) -. (t_3916)) in let (t_3982) = ((t_3900) +. (t_3915)) in let (t_3983) = ((t_3899) +. (t_3916)) in let (t_3984) = ((t_3900) -. (t_3915)) in let (t_3985) = ((t_3903) -. (t_3920)) in let (t_3986) = ((t_3904) +. (t_3919)) in let (t_3987) = ((t_3903) +. (t_3920)) in let (t_3988) = ((t_3904) -. (t_3919)) in let (t_3989) = (0.707106781187 *. ((t_3983) -. (t_3984))) in let (t_3990) = (0.707106781187 *. ((t_3983) +. (t_3984))) in let (t_3991) = ((t_3975) -. (t_3990)) in let (t_3992) = ((t_3976) +. (t_3989)) in let (t_3993) = ((t_3975) +. (t_3990)) in let (t_3994) = ((t_3976) -. (t_3989)) in let (t_3995) = (1.30656296488 *. ((t_3979) -. (t_3980))) in let (t_3996) = (0.382683432365 *. (t_3980)) in let (t_3997) = (0.923879532511 *. (t_3979)) in let (t_3998) = (((t_3995) +. (t_3996)) -. (t_3997)) in let (t_3999) = ((t_3996) +. (t_3997)) in let (t_4000) = (1.30656296488 *. ((t_3987) -. (t_3988))) in let (t_4001) = (0.923879532511 *. (t_3988)) in let (t_4002) = (0.382683432365 *. (t_3987)) in let (t_4003) = (((t_4000) +. (t_4001)) -. (t_4002)) in let (t_4004) = ((t_4001) +. (t_4002)) in let (t_4005) = ((t_3998) +. (t_4003)) in let (t_4006) = ((t_3999) +. (t_4004)) in let (t_4007) = ((t_3998) -. (t_4003)) in let (t_4008) = ((t_3999) -. (t_4004)) in let (t_4009) = ((t_3993) -. (t_4006)) in let (t_4010) = ((t_3994) +. (t_4005)) in let (t_4011) = ((t_3993) +. (t_4006)) in let (t_4012) = ((t_3994) -. (t_4005)) in let (t_4013) = ((t_3991) +. (t_4007)) in let (t_4014) = ((t_3992) +. (t_4008)) in let (t_4015) = ((t_3991) -. (t_4007)) in let (t_4016) = ((t_3992) -. (t_4008)) in let (t_4017) = (0.707106781187 *. ((t_3981) -. (t_3982))) in let (t_4018) = (0.707106781187 *. ((t_3981) +. (t_3982))) in let (t_4019) = ((t_3973) +. (t_4017)) in let (t_4020) = ((t_3974) +. (t_4018)) in let (t_4021) = ((t_3973) -. (t_4017)) in let (t_4022) = ((t_3974) -. (t_4018)) in let (t_4023) = (1.30656296488 *. ((t_3977) -. (t_3978))) in let (t_4024) = (0.923879532511 *. (t_3978)) in let (t_4025) = (0.382683432365 *. (t_3977)) in let (t_4026) = (((t_4023) +. (t_4024)) -. (t_4025)) in let (t_4027) = ((t_4024) +. (t_4025)) in let (t_4028) = (1.30656296488 *. ((t_3985) -. (t_3986))) in let (t_4029) = (0.382683432365 *. (t_3986)) in let (t_4030) = (0.923879532511 *. (t_3985)) in let (t_4031) = (((t_4028) +. (t_4029)) -. (t_4030)) in let (t_4032) = ((t_4029) +. (t_4030)) in let (t_4033) = ((t_4026) +. (t_4031)) in let (t_4034) = ((t_4027) +. (t_4032)) in let (t_4035) = ((t_4026) -. (t_4031)) in let (t_4036) = ((t_4027) -. (t_4032)) in let (t_4037) = ((t_4021) -. (t_4036)) in let (t_4038) = ((t_4022) +. (t_4035)) in let (t_4039) = ((t_4021) +. (t_4036)) in let (t_4040) = ((t_4022) -. (t_4035)) in let (t_4041) = ((t_4019) +. (t_4033)) in let (t_4042) = ((t_4020) +. (t_4034)) in let (t_4043) = ((t_4019) -. (t_4033)) in let (t_4044) = ((t_4020) -. (t_4034)) in (x_3856).(0) <- (t_3945); (x_3856).(1) <- (t_3946); (x_3856).(2) <- (t_4011); (x_3856).(3) <- (t_4012); (x_3856).(4) <- (t_3965); (x_3856).(5) <- (t_3966); (x_3856).(6) <- (t_4039); (x_3856).(7) <- (t_4040); (x_3856).(8) <- (t_3951); (x_3856).(9) <- (t_3952); (x_3856).(10) <- (t_4015); (x_3856).(11) <- (t_4016); (x_3856).(12) <- (t_3971); (x_3856).(13) <- (t_3972); (x_3856).(14) <- (t_4043); (x_3856).(15) <- (t_4044); (x_3856).(16) <- (t_3947); (x_3856).(17) <- (t_3948); (x_3856).(18) <- (t_4009); (x_3856).(19) <- (t_4010); (x_3856).(20) <- (t_3963); (x_3856).(21) <- (t_3964); (x_3856).(22) <- (t_4037); (x_3856).(23) <- (t_4038); (x_3856).(24) <- (t_3949); (x_3856).(25) <- (t_3950); (x_3856).(26) <- (t_4013); (x_3856).(27) <- (t_4014); (x_3856).(28) <- (t_3969); (x_3856).(29) <- (t_3970); (x_3856).(30) <- (t_4041); (x_3856).(31) <- (t_4042); (x_3856)>. *) (* Generating code files. let path = "/local/rap/oleg/fft-2/fft-output/ac" in let gencode n = let nr = 2*n in let code = . .~(gen_ac Forward nr ..); 0>. in let code_str = Trx.C.toC code in let chan = open_out (path ^ (string_of_int n) ^ ".c") in let () = output_string chan code_str in close_out chan in List.map gencode [4;8;16;32;64;128;256] *) (* ----------------------------------------------------------------------- *) (* Using complex multiplication with three multiplies *) (* A more optimal way! *) (* if x = a + ib and y = c + id *) (* we compute t1 = a*(c+d) *) (* t2 = d*(b+a) *) (* t3 = c*(b-a) *) (* so that *) (* Re(x*y) = t1 - t2 *) (* Im(x*y) = t1 + t3 *) (* 3 multipllications and 5 additions *) (* Note that if 'x' is a known factor, we can pre-compute a+b and a-b at compile time, so at run time we'll have only three multiplies and three additions *) (* This is a semi-concretization function that forces the multiplication up to trivial factors, +/- 1, +/- I *) let force_mult_ad (Code (f,rx,ix)) = let (newf,(j,n)) = rotate_to_QI f in let theta = float_of_int (2*j) *. pi /. float_of_int n in let c = cos theta in let s = sin theta in let spc = s +. c in let smc = s -. c in let () = assert (c > 0.0 && s > 0.0 ) in bind (retN ..) (fun t1 -> bind (retN ..) (fun t2 -> if smc > 0.0 then bind (retN ..) (fun t3 -> bind (retN .<.~t1 -. .~t2>.) (fun rcs -> bind (retN .<.~t1 +. .~t3>.) (fun ics -> ret (Code (newf,rcs,ics))))) else let cms = c -. s in (* keep all the constants positive *) bind (retN ..) (fun nt3 -> bind (retN .<.~t1 -. .~t2>.) (fun rcs -> bind (retN .<.~t1 -. .~nt3>.) (fun ics -> ret (Code (newf,rcs,ics))))) )) (* Perform (rx,ix) +/- _complex_fy*(ry,iy) and set the resulting factor to fx Return a pair: fx* (x + complex_fy*y), fx*(x - complex_fy*y) This is where many simplifications happen *) let rec do_linear_ad fx rx ix ((j,n) as fy) ry iy = if 2*j >= n then (* fy in III and IV quadrants: reflect *) bind (do_linear_ad fx rx ix (sub_rat fy (1,2)) ry iy) (fun (p,m) -> retS (m,p)) else match fy with (* x + y *) (0,1) -> bind (retN .<.~rx +. .~ry>.) (fun re -> bind (retN .<.~ix +. .~iy>.) (fun im -> bind (retN .<.~rx -. .~ry>.) (fun re1 -> bind (retN .<.~ix -. .~iy>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1)))))) |(1,4) -> (* x + I*y *) bind (retN .<.~rx -. .~iy>.) (fun re -> bind (retN .<.~ix +. .~ry>.) (fun im -> bind (retN .<.~rx +. .~iy>.) (fun re1 -> bind (retN .<.~ix -. .~ry>.) (fun im1 -> ret (Code (fx,re,im),Code (fx,re1,im1) ))))) |(j,8) -> (* x + (cos pi/4 + I*sin pi/4)*y *) let cs = sin (pi /. 4.0) in bind (retN ..) (fun rcs -> bind (retN ..) (fun ics -> do_linear_ad fx rx ix (rationalize (j-1,8)) rcs ics)) |(j,n) -> bind (force_mult_ad (Code (fy,ry,iy))) (fun (Code (fy,ry,iy)) -> do_linear_ad fx rx ix fy ry iy) (* This one is monadic: it may need to let-bind something Note: the add_sub_ad function is not symmetric! We need to compute fx* +/- fy* There are two ways to proceed: (1) let e1 = fx* and e2 = fy* in e1 +/- e2 (2) fx*( +/- (fy/fx)* ) The second way is better when either the multiplication by fx is trivial or by (fy/fx) is trivial Otherwise, the first way is better The second method is performed by do_linear *) let rec add_sub_ad ((Code (fx,rx,ix)) as x) ((Code (fy,ry,iy)) as y) = let fyx = (sub_rat fy fx) in if (trivial_mult fx) || (trivial_mult fyx) then do_linear_ad fx rx ix fyx ry iy else bind (force_mult_ad x) (fun x -> bind (force_mult_ad y) (fun y -> add_sub_ad x y)) (* Staged Monadic split *) let split_ad dir l = (* let () = print_string "\nBefore split: "; List.iter (fun (Code (fx,_,_)) ->Printf.printf "%s " (str_rat fx)) l in *) let n = List.length l in let rec comb j = function (x::xs, y::ys) -> bind (add_sub_ad x y) (fun (xpy,xmy) -> bind (comb (j+1) (xs,ys)) (fun (a,b) -> retS (xpy::a, (mult_aa xmy (w_aa dir n j))::b))) | _ -> retS ([],[]) in comb 0 (cleave_list l) let fft_ad dir f l = if List.length l = 1 then ret l else bind (split_ad dir l) (fun (p0,p1) -> bind (f p0) (fun y0 -> bind (f p1) (fun y1 -> retS (merge_u (y0,y1))))) let gen_ad dir size nums = run (y_sm (fft_ad dir)) ((fun p -> (bind (liftcM retN p) (fun (x,y) -> retS (Code (rat_one, x, y))))), (* here we assert c is a simple piece of code!*) (* And it is because the last stage of *) (* FFT is only a+b and a-b *) fun (Code ((0,1),x,y)) -> (x,y)) size nums (* To see the code . .~(gen_ad Forward (4 * 2) ..)>.;; *) let test_ad dir lst = let n = List.length lst in let arr = Array.make (n * 2) 0.0 in let _ = List.fold_left (fun n (a,b) -> (arr.(n) <- a; arr.(n+1) <-b; n+2)) 0 lst in let rec tolist arr i = if i = Array.length arr then [] else (arr.(i),arr.(i+1))::(tolist arr (i+2)) in let tranc = . .~(gen_ad dir (n * 2) ..)>. in let tran = (.!tranc) (arr) in tolist tran 0 (* Tests *) let () = assert ((7,0.0) = inf_norm (List.map (fun x -> 4.0 *. x) (unpair (impulse 4 1 ))) (unpair (test_ad Inverse (test_ad Forward (impulse 4 1))) )) let () = assert ((10, 5.11946041115152184e-12) = inf_norm (List.map (fun x -> 8.0 *. x) (unpair (impulse 8 1 ))) (unpair (test_ad Inverse (test_ad Forward (impulse 8 1))) )) let () = assert ((2, 2.12345696581905941e-11) = inf_norm (List.map (fun x -> 16.0 *. x) (unpair (impulse 16 1 ))) (unpair (test_ad Inverse (test_ad Forward (impulse 16 1))) )) (* Sample of the genereated code: . .~(gen_ad Forward (16 * 2) ..)>.;; . let (t_20463) = (x_20462).(0) in let (t_20464) = (x_20462).(1) in let (t_20465) = (x_20462).(2) in let (t_20466) = (x_20462).(3) in let (t_20467) = (x_20462).(4) in let (t_20468) = (x_20462).(5) in let (t_20469) = (x_20462).(6) in let (t_20470) = (x_20462).(7) in let (t_20471) = (x_20462).(8) in let (t_20472) = (x_20462).(9) in let (t_20473) = (x_20462).(10) in let (t_20474) = (x_20462).(11) in let (t_20475) = (x_20462).(12) in let (t_20476) = (x_20462).(13) in let (t_20477) = (x_20462).(14) in let (t_20478) = (x_20462).(15) in let (t_20479) = (x_20462).(16) in let (t_20480) = (x_20462).(17) in let (t_20481) = (x_20462).(18) in let (t_20482) = (x_20462).(19) in let (t_20483) = (x_20462).(20) in let (t_20484) = (x_20462).(21) in let (t_20485) = (x_20462).(22) in let (t_20486) = (x_20462).(23) in let (t_20487) = (x_20462).(24) in let (t_20488) = (x_20462).(25) in let (t_20489) = (x_20462).(26) in let (t_20490) = (x_20462).(27) in let (t_20491) = (x_20462).(28) in let (t_20492) = (x_20462).(29) in let (t_20493) = (x_20462).(30) in let (t_20494) = (x_20462).(31) in let (t_20495) = ((t_20463) +. (t_20479)) in let (t_20496) = ((t_20464) +. (t_20480)) in let (t_20497) = ((t_20463) -. (t_20479)) in let (t_20498) = ((t_20464) -. (t_20480)) in let (t_20499) = ((t_20465) +. (t_20481)) in let (t_20500) = ((t_20466) +. (t_20482)) in let (t_20501) = ((t_20465) -. (t_20481)) in let (t_20502) = ((t_20466) -. (t_20482)) in let (t_20503) = ((t_20467) +. (t_20483)) in let (t_20504) = ((t_20468) +. (t_20484)) in let (t_20505) = ((t_20467) -. (t_20483)) in let (t_20506) = ((t_20468) -. (t_20484)) in let (t_20507) = ((t_20469) +. (t_20485)) in let (t_20508) = ((t_20470) +. (t_20486)) in let (t_20509) = ((t_20469) -. (t_20485)) in let (t_20510) = ((t_20470) -. (t_20486)) in let (t_20511) = ((t_20471) +. (t_20487)) in let (t_20512) = ((t_20472) +. (t_20488)) in let (t_20513) = ((t_20471) -. (t_20487)) in let (t_20514) = ((t_20472) -. (t_20488)) in let (t_20515) = ((t_20473) +. (t_20489)) in let (t_20516) = ((t_20474) +. (t_20490)) in let (t_20517) = ((t_20473) -. (t_20489)) in let (t_20518) = ((t_20474) -. (t_20490)) in let (t_20519) = ((t_20475) +. (t_20491)) in let (t_20520) = ((t_20476) +. (t_20492)) in let (t_20521) = ((t_20475) -. (t_20491)) in let (t_20522) = ((t_20476) -. (t_20492)) in let (t_20523) = ((t_20477) +. (t_20493)) in let (t_20524) = ((t_20478) +. (t_20494)) in let (t_20525) = ((t_20477) -. (t_20493)) in let (t_20526) = ((t_20478) -. (t_20494)) in let (t_20527) = ((t_20495) +. (t_20511)) in let (t_20528) = ((t_20496) +. (t_20512)) in let (t_20529) = ((t_20495) -. (t_20511)) in let (t_20530) = ((t_20496) -. (t_20512)) in let (t_20531) = ((t_20499) +. (t_20515)) in let (t_20532) = ((t_20500) +. (t_20516)) in let (t_20533) = ((t_20499) -. (t_20515)) in let (t_20534) = ((t_20500) -. (t_20516)) in let (t_20535) = ((t_20503) +. (t_20519)) in let (t_20536) = ((t_20504) +. (t_20520)) in let (t_20537) = ((t_20503) -. (t_20519)) in let (t_20538) = ((t_20504) -. (t_20520)) in let (t_20539) = ((t_20507) +. (t_20523)) in let (t_20540) = ((t_20508) +. (t_20524)) in let (t_20541) = ((t_20507) -. (t_20523)) in let (t_20542) = ((t_20508) -. (t_20524)) in let (t_20543) = ((t_20527) +. (t_20535)) in let (t_20544) = ((t_20528) +. (t_20536)) in let (t_20545) = ((t_20527) -. (t_20535)) in let (t_20546) = ((t_20528) -. (t_20536)) in let (t_20547) = ((t_20531) +. (t_20539)) in let (t_20548) = ((t_20532) +. (t_20540)) in let (t_20549) = ((t_20531) -. (t_20539)) in let (t_20550) = ((t_20532) -. (t_20540)) in let (t_20551) = ((t_20543) +. (t_20547)) in let (t_20552) = ((t_20544) +. (t_20548)) in let (t_20553) = ((t_20543) -. (t_20547)) in let (t_20554) = ((t_20544) -. (t_20548)) in let (t_20555) = ((t_20545) -. (t_20550)) in let (t_20556) = ((t_20546) +. (t_20549)) in let (t_20557) = ((t_20545) +. (t_20550)) in let (t_20558) = ((t_20546) -. (t_20549)) in let (t_20559) = ((t_20529) -. (t_20538)) in let (t_20560) = ((t_20530) +. (t_20537)) in let (t_20561) = ((t_20529) +. (t_20538)) in let (t_20562) = ((t_20530) -. (t_20537)) in let (t_20563) = ((t_20533) -. (t_20542)) in let (t_20564) = ((t_20534) +. (t_20541)) in let (t_20565) = ((t_20533) +. (t_20542)) in let (t_20566) = ((t_20534) -. (t_20541)) in let (t_20567) = (0.707106781187 *. ((t_20565) -. (t_20566))) in let (t_20568) = (0.707106781187 *. ((t_20565) +. (t_20566))) in let (t_20569) = ((t_20561) -. (t_20568)) in let (t_20570) = ((t_20562) +. (t_20567)) in let (t_20571) = ((t_20561) +. (t_20568)) in let (t_20572) = ((t_20562) -. (t_20567)) in let (t_20573) = (0.707106781187 *. ((t_20563) -. (t_20564))) in let (t_20574) = (0.707106781187 *. ((t_20563) +. (t_20564))) in let (t_20575) = ((t_20559) +. (t_20573)) in let (t_20576) = ((t_20560) +. (t_20574)) in let (t_20577) = ((t_20559) -. (t_20573)) in let (t_20578) = ((t_20560) -. (t_20574)) in let (t_20579) = ((t_20497) -. (t_20514)) in let (t_20580) = ((t_20498) +. (t_20513)) in let (t_20581) = ((t_20497) +. (t_20514)) in let (t_20582) = ((t_20498) -. (t_20513)) in let (t_20583) = ((t_20501) -. (t_20518)) in let (t_20584) = ((t_20502) +. (t_20517)) in let (t_20585) = ((t_20501) +. (t_20518)) in let (t_20586) = ((t_20502) -. (t_20517)) in let (t_20587) = ((t_20505) -. (t_20522)) in let (t_20588) = ((t_20506) +. (t_20521)) in let (t_20589) = ((t_20505) +. (t_20522)) in let (t_20590) = ((t_20506) -. (t_20521)) in let (t_20591) = ((t_20509) -. (t_20526)) in let (t_20592) = ((t_20510) +. (t_20525)) in let (t_20593) = ((t_20509) +. (t_20526)) in let (t_20594) = ((t_20510) -. (t_20525)) in let (t_20595) = (0.707106781187 *. ((t_20589) -. (t_20590))) in let (t_20596) = (0.707106781187 *. ((t_20589) +. (t_20590))) in let (t_20597) = ((t_20581) -. (t_20596)) in let (t_20598) = ((t_20582) +. (t_20595)) in let (t_20599) = ((t_20581) +. (t_20596)) in let (t_20600) = ((t_20582) -. (t_20595)) in let (t_20601) = (0.382683432365 *. ((t_20585) +. (t_20586))) in let (t_20602) = (1.30656296488 *. (t_20586)) in let (t_20603) = (0.541196100146 *. (t_20585)) in let (t_20604) = ((t_20601) -. (t_20602)) in let (t_20605) = ((t_20601) +. (t_20603)) in let (t_20606) = (0.923879532511 *. ((t_20593) +. (t_20594))) in let (t_20607) = (1.30656296488 *. (t_20594)) in let (t_20608) = (0.541196100146 *. (t_20593)) in let (t_20609) = ((t_20606) -. (t_20607)) in let (t_20610) = ((t_20606) -. (t_20608)) in let (t_20611) = ((t_20604) +. (t_20609)) in let (t_20612) = ((t_20605) +. (t_20610)) in let (t_20613) = ((t_20604) -. (t_20609)) in let (t_20614) = ((t_20605) -. (t_20610)) in let (t_20615) = ((t_20599) -. (t_20612)) in let (t_20616) = ((t_20600) +. (t_20611)) in let (t_20617) = ((t_20599) +. (t_20612)) in let (t_20618) = ((t_20600) -. (t_20611)) in let (t_20619) = ((t_20597) +. (t_20613)) in let (t_20620) = ((t_20598) +. (t_20614)) in let (t_20621) = ((t_20597) -. (t_20613)) in let (t_20622) = ((t_20598) -. (t_20614)) in let (t_20623) = (0.707106781187 *. ((t_20587) -. (t_20588))) in let (t_20624) = (0.707106781187 *. ((t_20587) +. (t_20588))) in let (t_20625) = ((t_20579) +. (t_20623)) in let (t_20626) = ((t_20580) +. (t_20624)) in let (t_20627) = ((t_20579) -. (t_20623)) in let (t_20628) = ((t_20580) -. (t_20624)) in let (t_20629) = (0.923879532511 *. ((t_20583) +. (t_20584))) in let (t_20630) = (1.30656296488 *. (t_20584)) in let (t_20631) = (0.541196100146 *. (t_20583)) in let (t_20632) = ((t_20629) -. (t_20630)) in let (t_20633) = ((t_20629) -. (t_20631)) in let (t_20634) = (0.382683432365 *. ((t_20591) +. (t_20592))) in let (t_20635) = (1.30656296488 *. (t_20592)) in let (t_20636) = (0.541196100146 *. (t_20591)) in let (t_20637) = ((t_20634) -. (t_20635)) in let (t_20638) = ((t_20634) +. (t_20636)) in let (t_20639) = ((t_20632) +. (t_20637)) in let (t_20640) = ((t_20633) +. (t_20638)) in let (t_20641) = ((t_20632) -. (t_20637)) in let (t_20642) = ((t_20633) -. (t_20638)) in let (t_20643) = ((t_20627) -. (t_20642)) in let (t_20644) = ((t_20628) +. (t_20641)) in let (t_20645) = ((t_20627) +. (t_20642)) in let (t_20646) = ((t_20628) -. (t_20641)) in let (t_20647) = ((t_20625) +. (t_20639)) in let (t_20648) = ((t_20626) +. (t_20640)) in let (t_20649) = ((t_20625) -. (t_20639)) in let (t_20650) = ((t_20626) -. (t_20640)) in (x_20462).(0) <- (t_20551); (x_20462).(1) <- (t_20552); (x_20462).(2) <- (t_20617); (x_20462).(3) <- (t_20618); (x_20462).(4) <- (t_20571); (x_20462).(5) <- (t_20572); (x_20462).(6) <- (t_20645); (x_20462).(7) <- (t_20646); (x_20462).(8) <- (t_20557); (x_20462).(9) <- (t_20558); (x_20462).(10) <- (t_20621); (x_20462).(11) <- (t_20622); (x_20462).(12) <- (t_20577); (x_20462).(13) <- (t_20578); (x_20462).(14) <- (t_20649); (x_20462).(15) <- (t_20650); (x_20462).(16) <- (t_20553); (x_20462).(17) <- (t_20554); (x_20462).(18) <- (t_20615); (x_20462).(19) <- (t_20616); (x_20462).(20) <- (t_20569); (x_20462).(21) <- (t_20570); (x_20462).(22) <- (t_20643); (x_20462).(23) <- (t_20644); (x_20462).(24) <- (t_20555); (x_20462).(25) <- (t_20556); (x_20462).(26) <- (t_20619); (x_20462).(27) <- (t_20620); (x_20462).(28) <- (t_20575); (x_20462).(29) <- (t_20576); (x_20462).(30) <- (t_20647); (x_20462).(31) <- (t_20648); (x_20462)>. *) (* Generating code files. let path = "/local/rap/oleg/fft-2/fft-output/ad" in let gencode n = let nr = 2*n in let code = . .~(gen_ad Forward nr ..); 0>. in let code_str = Trx.C.toC code in let chan = open_out (path ^ (string_of_int n) ^ ".c") in let () = output_string chan code_str in close_out chan in List.map gencode [4;8;16;32;64;128;256] *) ```