!=========================================================================!
! file pwcr_fft.f90                                                       !
!=========================================================================!
!      IMPROVED CYCLIC REDUCTION FOR SOLVING QUEUEING PROBLEMS            !
!                     by D.A. Bini and B. Meini                           !
!               (bini@dm.unipi.it    meini@dm.unipi.it)                   !
!          Fortran 90 Program version 1.0, January 30 1997                !
!                         FFT subroutines                                 !
!=========================================================================!
! This file contains a set of subroutines needed in order                 !
! to perform computations involving the Discrete Fourier Transform (DFT). !
! More precisely the following subroutines are reported:                  !
!                                                                         !
! fillroots : compute the roots of 1                                      !
! ifft1     : compute the inverse DFT                                     !
! fft1      : compute the DFT                                             !
! iffts1    : compute the inverse DFT of the DFT of a real vector         !
!             (Problem 2)                                                 !
! ffts1     : compute the DFT of a real vector (Problem 1)                !
! iffts2    : solve Problem 4                                             !
! ffts2     : solve Problem 3                                             !
! twiddle   : scale a complex vector                                      !
! itwiddle  : scale a complex vector                                      !
! ftb1      : compute the block DFT of a real block vector (Problem 1 for !
!             matrix polynomials)                                         !
! ftb2      : solve Problem 3 for matrix polynomials                      !
! iftb1     : compute the block inverse DFT of the DFT of a real block    !
!             vector (Problem 2 for matrix polynomials)                   !
! iftb2     : solve Problem 4 for matrix polynomials                      !
!=========================================================================!
!                      SUBROUTINE FILLROOTS                               !
!=========================================================================!
! Compute the n-th roots of 1 if they have not been yet computed before,  !
! otherwise return.                                                       !
! The real and the imaginary parts of the roots are stored in the vectors !
!  wr, wi, respectively. These vectors are put in a common block.         !
!=========================================================================!
!  Included in                                                            !
!  Structured Markov Chains Solver      [  SMCSolver  ]                   !
!  Version 1.2 - Nov 2006                                                 !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

subroutine fillroots(n)
  use smc_tools
  use roots
  implicit none
  real(dp)                                  :: pi, pi2
  integer                                   :: i, j, k, m, mi1, n
  pi= 6.28318530717958647692528676656d0/n
  if(allocated(wr)) k=size(wr,1)
  if(.not.allocated(wwr))then
     allocate(wwr(n),stat=info)
     if(info/=0)return
     allocate(wwi(n),stat=info)
     if(info/=0)return
     allocate(wr(n),stat=info)
     if(info/=0)return
     allocate(wi(n),stat=info)
     if(info/=0)return
     loop1 : do i=1,n
	pi2=(i-1)*pi
	wwr(i)=cos(pi2)
	wwi(i)=sin(pi2)
     end do loop1
     wr=wwr
     wi=wwi
     return
  end if

  if(n<=k)then
     return
  end if
  if (allocated(wr)) deallocate(wr)
  if (allocated(wi)) deallocate(wi)
  allocate(wr(n),stat=info)
  if(info/=0)return
  allocate(wi(n),stat=info)
  if(info/=0)return
  m=n/k
  loop2 : do i=1,k
     mi1=m*(i-1)
     wr(mi1+1)=wwr(i)
     wi(mi1+1)=wwi(i)
     loop3 : do j=2,m
	wr(mi1+j)=cos(pi*(mi1+j-1))
	wi(mi1+j)=sin(pi*(mi1+j-1))
     end do loop3
  end do loop2
  if (allocated(wwr)) deallocate(wwr)
  if (allocated(wwi)) deallocate(wwi)
  allocate(wwr(n),stat=info)
  if(info/=0)return
  allocate(wwi(n),stat=info)
  if(info/=0)return
  wwr=wr
  wwi=wi
end subroutine fillroots




!========================================================================!
!                      SUBROUTINE IFFT1                                  !
!========================================================================!
! Compute the Inverse Discrete Fourier Transform  of the vector having   !
! real parts stored in the vector x and imaginary parts stored in the    !
! vector y. On output x and y contain the real and the imaginary parts   !
! of the transformed vector, respectively.                               !
! The algorithm is an adaptation of the split-radix FFT by               !
! Dhuamel-Hollman (Sorensen et al. IEEE trans.                           !
! on Acoustics Speech and Signal Processing, ASSP-34, 1986).             !
!========================================================================!
subroutine  ifft1(x,y) 
  use smc_tools
  use roots
  use fft_interface, only : fillroots
  implicit none
  real(dp), dimension(:)  :: x,y
  real(dp)                :: r1, r2, s1, s2, s3, ss1, &
       ss3, cc1,  cc3, xt, yt, un
  integer                 :: i, j, k, m, n, nmax, n2, &
       n4, ne, na, is, id,      &
       i0, i1, i2, i3, n1 ,na3
  n=size(x,1)
  m=log(n*1.d0)/log(2.d0)
  if(2**m<n)m=m+1
  n2=2*n
  call fillroots(n)
  nmax=size(wr,1)
  loop1 : do  k=1,m-1
     n2=n2/2
     n4=n2/4
     ne=nmax/n2  +1
     na=1
     loop2 : do j=1,n4
	na3=3*(na-1)+1
	cc1=wr(na)
	ss1=wi(na)
	cc3=wr(na3)
	ss3=wi(na3)
	na=j*(ne-1)+1
	is=j
	id=2*n2
40	loop3 : do i0=is,n-1,id
	   i1=i0+n4
	   i2=i1+n4
	   i3=i2+n4
	   r1=x(i0)-x(i2)
	   x(i0)=x(i0)+x(i2)
	   r2=x(i1)-x(i3)
	   x(i1)=x(i1)+x(i3)
	   s1=y(i0)-y(i2)
	   y(i0)=y(i0)+y(i2)
	   s2=y(i1)-y(i3)
	   y(i1)=y(i1)+y(i3)
	   s3=r1-s2
	   r1=r1+s2
	   s2=r2-s1
	   r2=r2+s1
	   x(i2)=r1*cc1-s2*ss1
	   y(i2)=-s2*cc1-r1*ss1
	   x(i3)=s3*cc3+r2*ss3
	   y(i3)=r2*cc3-s3*ss3
	end do loop3
	is=2*id-n2+j
	id=4*id
	if (is<n) goto 40
     end do loop2
  end do loop1
  is=1
  id=4
50 loop6 : do  i0=is,n,id
     i1=i0+1
     r1=x(i0)
     x(i0)=r1+x(i1)
     x(i1)=r1-x(i1)
     r1=y(i0)
     y(i0)=r1+y(i1)
     y(i1)=r1-y(i1)
  end do loop6
  is=2*id-1
  id=4*id
  if(is<n)goto 50
  ! ----DIGIT REVERSE COUNTER  -----------------
  j=1
  n1=n-1
  loop14 : do  i=1,n1
     if(i>=j)goto 101
     xt=x(j)
     yt=y(j)
     y(j)=y(i)
     y(i)=yt
     x(j)=x(i)
     x(i)=xt
101  k=n/2
102  if (k>=j)goto 103
     j=j-k
     k=k/2
     goto 102
103  j=j+k
  end do loop14
  un=1.d0/n
  x=x*un
  y=y*un
end subroutine ifft1



!========================================================================!
!                      SUBROUTINE FFT1                                   !
!========================================================================!
! Compute the Discrete Fourier Transform  of the vector having           !
! real parts stored in the vector x and imaginary parts stored in the    !
! vector y. On output x and y contain the real and the imaginary parts   !
! of the transformed vector, respectively.                               !
! The algorithm is an adaptation of the split-radix FFT by               !
! Dhuamel-Hollman (Sorensen et al. IEEE trans.                           !
! on Acoustics Speech and Signal Processing, ASSP-34, 1986).             !
!========================================================================!
subroutine  fft1(x,y) 
  use smc_tools
  use roots
  use fft_interface, only : fillroots
  implicit none
  real(dp), dimension(:) :: x,y
  real(dp)               :: r1, r2, r3, s2, ss1, ss3, &
       cc1, cc3, xt, yt, s1
  integer                :: i, j, k, m, n, nmax, n1,  &
       n2, n4, ne, na, is, id,   &
       i0, i1, i2, i3, na3
  n=size(x,1)
  m=log(n*1.d0)/log(2.d0)
  if(2**m<n)m=m+1
  n2=2*n
  call fillroots(n)
  nmax=size(wr,1)
  ! -------------DIGIT REVERSE COUNTER ---------------
  j=1
  n1=n-1
  loop104: do i=1,n1
     if(i>=j)goto 101
     xt=x(j)
     yt=y(j)
     y(j)=y(i)
     y(i)=yt
     x(j)=x(i)
     x(i)=xt
101  k=n/2
102  if (k>=j)goto 103
     j=j-k
     k=k/2
     goto 102
103  j=j+k
  end do loop104
  ! ---------------LENGTH TWO BUTTERFLIES--------------
  is=1
  id=4
70 loop60 : do i0=is,n,id
     i1=i0+1
     r1=x(i0)
     x(i0)=r1+x(i1)
     x(i1)=r1-x(i1)
     r1=-y(i0)
     y(i0)=-r1+y(i1)
     y(i1)=-r1-y(i1)
  end do loop60
  is=2*id-1
  id=4*id
  if (is<n) goto 70
  !----------------------------------------------------
  n2=2 
  loop10 : do  k=2,m
     n2=2*n2
     n4=n2/4
     ne=nmax/n2
     na=1
     loop20 : do  j=1,n4
	na3=3*(na-1)+1
	cc1=wr(na)
	ss1=wi(na)
	cc3=wr(na3)
	ss3=wi(na3)
	na=j*ne+1
	is=j
	id=2*n2
40	loop30 : do  i0=is,n-1,id
	   i1=i0+n4
	   i2=i1+n4
	   i3=i2+n4
	   r1=x(i2)*cc1-y(i2)*ss1
	   s1=-y(i2)*cc1-x(i2)*ss1
	   r2=x(i3)*cc3-y(i3)*ss3
	   s2=-y(i3)*cc3-x(i3)*ss3
	   r3=r1+r2
	   r2=r1-r2
	   r1=s1+s2
	   s2=s1-s2
	   x(i2)=x(i0)-r3
	   x(i0)=x(i0)+r3
	   x(i3)=x(i1)-s2
	   x(i1)=x(i1)+s2
	   y(i2)=y(i0)+r1
	   y(i0)=y(i0)-r1
	   y(i3)=y(i1)-r2
	   y(i1)=y(i1)+r2
	end do loop30
        is=2*id-n2+j
        id=4*id
        if(is<n)goto 40
     end do loop20
  end do loop10
end subroutine fft1



!========================================================================!
!                      SUBROUTINE FFTS1                                  !
!========================================================================!
! Compute the Discrete Fourier Transform (DFT) of the real vector u      !
! of size n, by means of an fft of half the size.                        !
! On output the vector v contains the components of the transform        !
! in such a way that v(j)+I*v(n-j+1) is the j-th component of the        !
! transformed vector, j=2,...,n/2, where I**2=-1. Moreover v(1) and      !
! v(n/2+1) coincide with  the first and the (n/2+1)-st components        !
! of the transformed vector, respectively.                               !
!========================================================================!
subroutine ffts1(u,v)
  use smc_tools
  use roots
  use fft_interface, only : fillroots,fft1
  implicit none
  real(dp), dimension(:)  :: u, v
  real(dp), dimension(:), allocatable  :: zr, zi
  real(dp)                             :: uq, x, y
  integer                              :: i, l, km, n, ln, nmax
  n=size(u,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  if(allocated(zr)) deallocate(zr)
  allocate(zr(n/2),stat=info)
  if(info/=0)return
  if(allocated(zi)) deallocate(zi)
  allocate(zi(n/2),stat=info)
  if(info/=0)return
  loop20 : do l=0,n/2-1
     zr(l+1)=u(2*l+1)
     zi(l+1)=u(2*l+2)
  end do loop20
  call fft1(zr,zi)
  v(1)=zr(1)+zi(1)
  v(n/2+1)=zr(1)-zi(1)
  uq=1.d0/2.d0
  loop30 : do  l=2,n/2
     i=2**(km-ln)*(l-1)+1
     x=wr(i)*(zr(l)-zr(n/2-l+2))
     y=wi(i)*(zi(l)+zi(n/2-l+2))
     v(n-l+2)=(zi(l)-zi(n/2-l+2)-x+y)*uq
     x=wr(i)*(zi(l)+zi(n/2-l+2))
     y=wi(i)*(zr(l)-zr(n/2-l+2))
     v(l)=(zr(l)+zr(n/2-l+2)+x+y)*uq
  end do loop30
  if (allocated(zi)) deallocate(zi)
  if (allocated(zr)) deallocate(zr)
end subroutine ffts1



!========================================================================!
!                      SUBROUTINE FFTS2                                  !
!========================================================================!
! Compute the odd components of the DFT of size 2*n of the real          !
! vector u having the last n components zero.                            !
! On output, the vector v is such that v(j)+I*v(n/2+j) is the            !
! 2*j-1 component of the transform, j=1,...,n/2.                         !
! This subroutine provides the solution of Problem 3.                    !
!========================================================================!
subroutine ffts2(u,v)
  use smc_tools
  use roots
  use fft_interface, only : fillroots, fft1,twiddle
  implicit none
  real(dp), dimension(:)  :: u, v
  real(dp), dimension(:), allocatable  :: zr, zi
  real(dp)                         :: uq, x, y
  integer                          :: i, l, km, n, ln, nmax
  n=size(u,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(2*n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  if(allocated(zr))deallocate(zr)
  allocate(zr(n/2),stat=info)
  if(info/=0)return
  if(allocated(zi))deallocate(zi)
  allocate(zi(n/2),stat=info)
  if(info/=0)return
  uq=1.d0/2.d0      
  loop20 : do l=0,n/2-1
     zr(l+1)=u(2*l+1)
     zi(l+1)=u(2*l+2)
  end do loop20
  call twiddle(zr,zi)
  call fft1(zr,zi)
  loop10 : do  l=1,n/2
     i=2**(km-ln-1)*(2*l-1)+1
     x=wr(i)*(zr(l)-zr(n/2-l+1))
     y=wi(i)*(zi(l)+zi(n/2-l+1))               
     v(n/2+l)=(zi(l)-zi(n/2-l+1)-x+y)*uq
     x=wr(i)*(zi(l)+zi(n/2-l+1))
     y=wi(i)*(zr(l)-zr(n/2-l+1))
     v(l)=(zr(l)+zr(n/2-l+1)+x+y)*uq
  end do loop10
  if (allocated(zr)) deallocate(zr)
  if (allocated(zi)) deallocate(zi)
end subroutine ffts2

!========================================================================!
!                      SUBROUTINE TWIDDLE                                !
!========================================================================!
! Scale the vector of components zr(j)+I*zi(j), j=1,...,n,  by           !
! multiplication of the factors 1,w,...,w**(n-1), where w=Exp(I*Pi/n)    !
! is the principal 2n-th root of 1.                                      !
! On output the real and imaginary parts of the scaled vector are        !
! stored in zr and zi, respectively.                                     !
!========================================================================!
subroutine twiddle(zr,zi)
  use smc_tools
  use roots
  use fft_interface, only :  fillroots
  implicit none
  real(dp), dimension(:) :: zr,zi
  real(dp)               :: x, y, z
  integer                :: i, l, km, n, ln, nmax
  n=size(zr,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(2*n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  loop10 : do  l=1,n
     i=2**(km-ln-1)*(l-1)+1
     x=wr(i)*zr(l)
     y=wi(i)*zi(l)
     z=(wr(i)+wi(i))*(zr(l)+zi(l))
     zr(l)=x-y
     zi(l)=z-x-y
  end do loop10
  return
end subroutine twiddle





!========================================================================!
!                      SUBROUTINE IFFTS1                                 !
!========================================================================!
! Compute the Inverse Discrete Fourier Transform (IDFT) of the vector    !
! u of size n which is the DFT of a real vector, by means of an ifft of  !
! half the size.                                                         !
! The input vector u is such that u(j)+I*u(n-j+1) is the j-th component  !
! of the vector to be transformed, j=2,...,n/2. Moreover u(1) and        !
! u(n/2+1) coincide with the first and the (n/2+1)-st components,        !
! respectively.                                                          !
! On output the vector v contains the (real) components of the IDFT      !
! This subroutine provides the solution of Problem 2.                    !
!========================================================================!
subroutine iffts1(u,v)
  use smc_tools
  use roots
  use fft_interface, only : fillroots, ifft1
  implicit none
  real(dp), dimension(:) :: u, v
  real(dp), dimension(:), allocatable  :: zr, zi
  real(dp)                         :: dr
  integer                          :: l, km, n, ln, nmax, nm, &
       k1, k2, mm, ml
  n=size(u,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  if(allocated(zr))deallocate(zr)
  allocate(zr(n/2),stat=info)
  if(info/=0)return
  if(allocated(zi))deallocate(zi)
  allocate(zi(n/2),stat=info)
  if(info/=0)return
  mm=2**(km-ln)
  nm=n/2
  dr=1.0d0/2.0d0
  loop20 : do  l=1,nm-1
     ml=l*mm+1
     k1=l+1
 !    k2=k1+nm
     zr(k1)=u(k1)+u(nm-l+1)+wr(ml)*(-u(nm+k1)-u(n-l+1))
     zr(k1)=(zr(k1)+wi(ml)*(-u(nm-l+1)+u(k1)))*dr
     zi(k1)=-u(nm+k1)+u(n-l+1)+wr(ml)*(u(k1)-u(nm-l+1))
     zi(k1)=(zi(k1)+wi(ml)*(u(nm+k1)+u(n-l+1)))*dr
  end do loop20
  zr(1)=(u(1)+u(nm+1))*dr
  zi(1)=(u(1)-u(nm+1))*dr
  call ifft1(zr,zi)
  loop30 : do  l=1,nm
     v(l*2)=zi(l)
     v(2*l-1)=zr(l)
  end do loop30
  if (allocated(zr)) deallocate(zr)
  if (allocated(zi)) deallocate(zi)
end subroutine iffts1


!========================================================================!
!                      SUBROUTINE IFFTS2                                 !
!========================================================================!
! Compute the IDFT of size 2*n of a vector which is the DFT of a         !
! real vector. On input, the vector u contains the odd components        !
! of the vector to be transformed, more precisely u(j)+I* u(n/2+j)       !
! is the (2*j-1)-st component of the vector to be transformed,           !
! j=1,...,n/2.                                                           !
! The vector v contains the (real) IDFT of size n of the even            !
! components of the vector to be transformed.                            !
! On output the vector t contains the (real) IDFT.                       !
! This subroutine provides the solution of Problem 4.                    !
!========================================================================!
subroutine iffts2(u,v,t)
  use smc_tools
  use roots
  use fft_interface, only : fillroots, ifft1,itwiddle
  implicit none
  real(dp), dimension(:) :: u, v,  t
  real(dp), dimension(:), allocatable  :: zr, zi
  real(dp)                     :: dr
  integer                      :: l, km, n, ln, nmax, &
       n2, mm, nm, k1, k2, ml
  n=size(u,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(2*n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  if(allocated(zr))deallocate(zr)
  allocate(zr(n/2),stat=info)
  if(info/=0)return
  if(allocated(zi))deallocate(zi)
  allocate(zi(n/2),stat=info)
  if(info/=0)return
  mm=2**(km-ln-1)
  nm=n/2
  !n2=2*n
  dr=1.0d0/2.0d0
  loop20 : do  l=1,nm
     ml=(2*l-1)*mm+1
     k1=2*l
!     k2=k1+nm
     zr(l)=u(l)+u(nm-l+1)+wr(ml)*(-u(nm+l)-u(n-l+1))
     zr(l)=(zr(l)+wi(ml)*(-u(nm-l+1)+u(l)))*dr
     zi(l)=-u(n-l+1)+u(nm+l)+wr(ml)*(u(l)-u(nm-l+1))
     zi(l)=(zi(l)+wi(ml)*(u(nm+l)+u(n-l+1)))*dr
  end do loop20
  call IFFT1(zr,zi)
  call itwiddle(zr,zi)
  loop40 : do l=1,nm
     t(2*l-1)=(v(2*l-1)+zr(l))*dr
     t(2*(nm+l)-1)=(v(2*l-1)-zr(l))*dr
     t(2*l)=(v(2*l)+zi(l))*dr
     t(2*(l+nm))=(v(2*l)-zi(l))*dr
  end do loop40
  if (allocated(zr)) deallocate (zr)
  if (allocated(zi)) deallocate(zi)
end subroutine iffts2


!========================================================================!
!                      SUBROUTINE ITWIDDLE                               !
!========================================================================!
! Scale the vector of components zr(j)+I*zi(j), j=1,...,n,  by           !
! multiplication of the factors 1,w,...,w**(n-1), where w=Exp(-I*Pi/n)   !
! On output the real and imaginary parts of the scaled vector are        !
! stored in zr and zi, respectively.                                     !
!========================================================================!
subroutine itwiddle(zr,zi)
  use smc_tools
  use roots
  use fft_interface, only : fillroots
  implicit none
  real(dp), dimension(:)  :: zr, zi
  real(dp)                ::  x,y,z
  integer                 ::  i, l, km, n, ln, nmax
  n=size(zr,1)
  ln=log(n*1.d0)/log(2.d0)
  if(2**ln<n)ln=ln+1
  call fillroots(2*n)
  nmax=size(wr,1)
  km=log(nmax*1.d0)/log(2.d0)
  if(2**km<nmax)km=km+1
  loop10 : do l=1,n
     i=2**(km-ln-1)*(l-1)+1
     x=wr(i)*zr(l)
     y=-wi(i)*zi(l)
     z=(wr(i)-wi(i))*(zr(l)+zi(l))
     zr(l)=x-y
     zi(l)=z-x-y
  end do loop10
end subroutine itwiddle



!========================================================================!
!                      SUBROUTINE FTB1                                   !
!========================================================================!
! Compute the Discrete Fourier Transform (DFT) of size n of the input    !
! block vector a having size (nb x nb x n). That is for i,j=1,...,nb     !
! compute the DFT of the vector a(i,j,:) by means of the subroutine FFTS1!
! On output the (nb x nb x n) vector ta is such that ta(i,j,:) is the    !
! DFT of the vector a(i,j,:), i,j=1,...,nb, arranged as the output of    !
! the subroutine FFTS1.                                                  !
!========================================================================!
subroutine ftb1(a,ta)
  use smc_tools
  use fft_interface, only : ffts1
  implicit none
  real(dp), dimension(:,:,:) :: a, ta
  real(dp), allocatable, dimension(:)  :: sa,st
  integer                              :: n,nb,i,j
  n=size(a,3)
  nb=size(a,1)
  if(n==2)then
     ta(:,:,1)=a(:,:,1)+a(:,:,2)
     ta(:,:,2)=a(:,:,1)-a(:,:,2)
     return
  endif
  if(allocated(sa))deallocate(sa)
  allocate(sa(n),stat=info)
  if(info/=0)return
  if(allocated(st))deallocate(st)
  allocate(st(n),stat=info)
  if(info/=0)return
  loopi : do i=1,nb
     loopj : do j=1,nb
	sa(:)=a(i,j,:)
	call ffts1(sa,st)
	ta(i,j,:)=st(:)
     end do loopj
  end do loopi
  if (allocated(sa)) deallocate(sa)
  if (allocated(st)) deallocate(st)
end subroutine ftb1


!========================================================================!
!                      SUBROUTINE FTB2                                   !
!========================================================================!
! Compute the odd components of the DFT of size 2*n of the real          !
! block vector a of size (nb x nb x 2n), having the last n block         !
! components zero. The input variable a contains only the first n        !
! block components.                                                      !
! On output, the block vector ta is such that v(i,j,k)+I*v(i,j,n/2+k)    !
! is the 2*k-1 component of the block transform, k=1,...,n/2,            !
! i,j=1,...,nb.                                                          !
! This subroutine provides the solution of Problem 3 for matrix          !
! polynomials                                                            !
!========================================================================!
subroutine ftb2(a,ta)
  use smc_tools
  use fft_interface, only : ffts2
  implicit none
  real(dp), dimension(:,:,:) :: a, ta
  real(dp), dimension(:), allocatable :: sa,st
  integer                             :: n,nb,i,j
  n=size(a,3)
  nb=size(a,1)
  if(n==2)then
     ta(:,:,1)=a(:,:,1)
     ta(:,:,2)=a(:,:,2)
     return
  endif
  if(allocated(sa))deallocate(sa)
  allocate(sa(n),stat=info)
  if(info/=0)return
  if(allocated(st))deallocate(st)
  allocate(st(n),stat=info)
  if(info/=0)return
  loopi : do i=1,nb
     loopj : do j=1,nb
        sa(:)=a(i,j,:)
        call ffts2(sa,st)
        ta(i,j,:)=st(:)
     end do loopj
  end do loopi
  if (allocated(sa)) deallocate(sa)
  if (allocated(st)) deallocate(st)
end subroutine ftb2



!========================================================================!
!                      SUBROUTINE IFTB1                                  !
!========================================================================!
! Compute the block IDFT of size n of the block vector ta of size        !
! (nb x nb xn) which is the DFT of a real block vector.                  !
! The input block vector ta is such that ta(i,j,k)+I*ta(i,j,n-k+1) is the!
! k-th component of the vector to be transformed, k=2,...,n/2,           !
! i,j=1,...,nb. Moreover ta(i,j,1) and ta(i,j, n/2+1) coincide with the  !
! first and the (n/2+1)-st components, respectively, i,j=1,...,nb.       !
! On output the block vector a contains the (real) components of the IDFT!
!========================================================================!
subroutine iftb1(ta,a)
  use smc_tools
  use fft_interface, only : iffts1
  implicit none
  real(dp), dimension(:,:,:) :: a, ta
  real(dp), dimension(:), allocatable :: sa,st
  real(dp)                            :: oneh
  integer                             :: n,nb,i,j
  n=size(ta,3)
  nb=size(ta,1)
  if(n==2)then
     oneh=1.0d0/2.0d0
     a(:,:,1)=(ta(:,:,1)+ta(:,:,2))*oneh
     a(:,:,2)=(ta(:,:,1)-ta(:,:,2))*oneh
     return
  endif
  if(allocated(st))deallocate(st)
  allocate(st(n),stat=info)
  if(info/=0)return
  if(allocated(sa))deallocate(sa)
  allocate(sa(n),stat=info)
  if(info/=0)return
  loopi : do i=1,nb
     loopj : do j=1,nb
	st(:)=ta(i,j,:) 
	call iffts1(st,sa)
	a(i,j,:)=sa(:)
     end do loopj
  end do loopi
  if (allocated(sa)) deallocate(sa)
  if (allocated(st)) deallocate(st)
end subroutine iftb1



!========================================================================!
!                      SUBROUTINE IFTB2                                  !
!========================================================================!
! Compute the block IDFT of size 2*n of a block vector ta of size        !
! (nb x nb x n) which is the block DFT of a real block vector.           !
! On input, the block vector ta contains the odd block components of the !
! block vector to be transformed, more precisely                         !
! ta(i,j,k)+I* ta(i,j,n/2+k) is the (2*k-1)-st component                 !
! of the vector to be transformed, k=1,...,n/2, i,j=1...,nb.             !
! The vector a contains the (real) IDFT of size n of the even            !
! components of the vector to be transformed.                            !
! On output the block vector a contains the block (real) IDFT.           !
! This subroutine provides the solution of Problem 4 for matrix          !
! polynomials.                                                           !
!========================================================================!
subroutine iftb2(ta,a)
  use smc_tools
  use fft_interface, only : iffts2
  implicit none
  real(dp), dimension(:,:,:), allocatable :: a,aux 
   real(dp), dimension(:,:,:) ::           ta
  real(dp), dimension(:), allocatable     :: sa, st, t
  real(dp)                            :: oneh
  integer                                     :: i,j,n,nb
  n=size(ta,3)
  nb=size(ta,1)
if(allocated(aux))deallocate(aux)
  allocate(aux(nb,nb,2*n),stat=info)
if(info/=0)return
  if(n==2)then
     oneh=1.0d0/2.0d0
     aux(:,:,1)=(a(:,:,1)+ta(:,:,1))*oneh
     aux(:,:,2)=(a(:,:,2)+ta(:,:,2))*oneh
     aux(:,:,3)=(a(:,:,1)-ta(:,:,1))*oneh
     aux(:,:,4)=(a(:,:,2)-ta(:,:,2))*oneh
     deallocate(a)
     allocate(a(nb,nb,2*n),stat=info)
     if(info/=0)return
     a=aux
     return
  endif
  if(allocated(st))deallocate(st)
  allocate(st(n),stat=info)
     if(info/=0)return
  if(allocated(sa))deallocate(sa)
  allocate(sa(n),stat=info)
     if(info/=0)return
  if(allocated(t))deallocate(t)
  allocate(t(2*n),stat=info)
     if(info/=0)return
  loopi : do i=1,nb
     loopj : do j=1,nb
	st(:)=ta(i,j,:)
	sa(:)=a(i,j,:)
	call iffts2(st,sa,t)
	aux(i,j,:)=t(:)
     end do loopj
  end do loopi
  if (allocated(a)) deallocate(a)
  allocate(a(nb,nb,2*n),stat=info)
  if(info/=0)return
  a=aux
  deallocate(st,sa,t,aux)
end subroutine iftb2
