!=========================================================================!
! file pwcr_sub.f90                                                       !
!=========================================================================!
!      IMPROVED CYCLIC REDUCTION FOR SOLVING QUEUEING PROBLEMS            !
!                     by D.A. Bini and B. Meini                           !
!            (dario.bini@unipi.it    beatrice.meini@unipi.it)             !
!           Fortran 90 Program version 1.0, March 30, 1997              !
!                          SUBROUTINES                                    !
!=========================================================================!
! The following subroutines constitute a package for the solution of      !
! the matrix equation                                                     !
!                A_1+X A_2+X^2 A_3+....X^(m-1) A_m-X=O             (1)    !
! where X is the (nb x nb) unknown matrix and the (nb x nb)               !
! nonnegative matrices A_i, i=1,2,..., are such that  A_1+A_2+...+A_m     !
! is column stochastic.                                                   !
! The method used in the subroutines is based on the cyclic reduction     !
! technique expressed in functional form and generates a sequence of      !
! approximations converging quadratically to the solution of (1).         !
! Unlike the method of Bini-Meini, SIMAX 1996, here the cyclic            !
! reduction step is implemented in a point-wise style. That is, all       !
! the intermediate matrix power series are evaluated at the set of        ! 
! Fourier points. This allows us to keep to the minimum value the         !
! size of the Fourier transforms involved in the subroutines with a       !
! consequent improvement of the performance of our algorithm.             !
!=========================================================================!
! This package is made up by the following subroutines                    !
!  pwcr (Point-Wise Cyclic Reduction): approximate the solution of        !
!             (1) by means of cyclic reduction.                           !
!  computeG : approximate the solution of (1) if m=2.                     !
!  residual : compute the 1-norm of the residual                          !
!             G-A_1+G A_2+G^2 A_3+....G^(m-1) A_m,                        !
!             where G is an approximation of the solution X of (1).       !
!  schur    : execute one step of point-wise cyclic reduction by          !
!             computing the Schur complement (formulae (7) and (8) in the !
!             paper)                                                      !
!  test     : auxiliary subroutine used by schur                          !
!  sc1p     : auxiliary subroutine used by schur                          !
!  sc2p     : auxiliary subroutine used by schur                          !
!  scc2p    : auxiliary subroutine used by schur                          !
!  sc1      : auxiliary subroutine used by schur                          !
!  sc2      : auxiliary subroutine used by schur                          !
!  scc2     : auxiliary subroutine used by schur                          !
!  prodc    : compute complex matrix product by performing 3 real         !
!             matrix multiplications.                                     !
!  means    : auxiliary subroutine used by test                           !
!  pmeans   : auxiliary subroutine used by test                           !
!  solver   : auxiliary subroutine for solving real linear systems        !
!  solvec   : auxiliary subroutine for solving complex linear systems     !
!=========================================================================!
! This package makes use also of auxiliary routines, for performing       !
! FFT computation, which have been collected in the separate file         !
! pwcr_fft.f90;  the requested interfaces have been collected in the file !
! fft_int.f90.                                                            !
! Moreover, LAPACK and LAPACK95 subroutines and interfaces are used       !
!=========================================================================!
!
!=========================================================================!
!                          SUBROUTINE PWCR                                !
!=========================================================================!
! This subroutine computes the matrix X solving the equation              !
!                A_1+XA_2+X^2A_3+....X^(m-1)A_m-X=O                       !
! where A_1+A_2+...+A_m is a column stochastic kxk matrix,                !
! by means of the method of pointwise cyclic reduction                    !
!=========================================================================!
! Input varables:                                                         !
!    a   : array associated with the blocks A_i,  i=1,m                 !
!    eps : error bound used for checking the stop condition at each       !
!          step of the cyclic reduction.                                  !
!=========================================================================!
! Output variables:                                                       !
!    g   : array associated with the approximation of the solution X    !
!          of (1)                                                         ! 
!    err : 1-norm of the left-hand side of (1) where X is replaced by  g  !
!=========================================================================!
! Interfaces used: schur_interface, pwcr_interface                        !
!=========================================================================!
! Subroutines used: schur, computeG, residual, pmeans                     !
!=========================================================================!
!  Included in                                                            !
!  Structured Markov Chains Solver      [  SMCSolver  ]                   !
!  Version 2.1 - June  2009                                               !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


SUBROUTINE PWCR(a,eps,g,err,maxit,intpmax)
  USE schur_interface
  USE pwcr_interface, ONLY : computeG, residual
  use smc_tools
!  use smc_int, only: drftmg1
  use roots
!  use ponte_f_f, only: fdrift
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:):: a
  REAL(dp), DIMENSION(:,:) :: g
  REAL(dp) :: eps,err

  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: ae, ao, hae, hao
  REAL(dp), DIMENSION(:,:), ALLOCATABLE   :: a1
  REAL(dp), DIMENSION(:), ALLOCATABLE     :: mean, hmean, a1s
  REAL(dp)                            :: stoc, a0norm
  INTEGER                                     :: m, n, nb, j, &
       l, l1, ln, step
  integer :: maxit,intpmax, intpmaxl

!  logical ::  ldoshift

  intpmaxl=256*2**intpmax

  !10 FORMAT(1X,"*",\)
10 FORMAT("*")
  if (debug) write(*,*) "pwcr:  Dimensions A =",size(a,1), size(a,2),size(a,3),& 
       "  eps=",eps," err=",err," maxit=",maxit," intpmax=",intpmax
  !-------------------------------------
  ! Prepare the input
  !-------------------------------------
  m=SIZE(a,3)
  nb=SIZE(a,1)
!  if(allocated(ae)) deallocate(ae)
!  ALLOCATE(ae(nb,nb,m),stat=info)
!  if (info/=0) then
!        if(debug) write(*,*) "pwcr: info=",info
!        info=1000
!        return
!  endif
!  do j=1,m
!     ae(:,:,j)=transpose(a(:,:,j))
!  end do
!  fdrift= drftmg1(ae)
!  deallocate(ae)
  l1=(m+1)/2
  n=l1
  ln=LOG(1.0d0*n)/LOG(2.0d0)
  IF(2**ln<n)n=2**(ln+1)

  if(allocated(ae)) deallocate(ae)
  if(allocated(ao)) deallocate(ao)
  if(allocated(hae)) deallocate(hae)
  if(allocated(hao)) deallocate(hao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a1s)) deallocate(a1s)
  ALLOCATE(ae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(ao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(hae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(hao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1(nb,nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1s(nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  if(allocated(mean)) deallocate(mean)
  allocate(mean(nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif
  if(allocated(hmean)) deallocate(hmean)
  allocate(hmean(nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "pwcr: info=",info
        info=1000
        return
  endif

  ae=0.0d0
  ao=0.0d0
  hae=0.0d0
  hao=0.0d0
  loop1 : DO l=1,l1-1 
     ao(:,:,l)=a(:,:,2*l)
     ae(:,:,l)=a(:,:,2*l-1)
  END DO loop1
  IF(2*l1==m)THEN  
     ao(:,:,l1)=a(:,:,m)
     ae(:,:,l1)=a(:,:,m-1)
  ELSE 
     ae(:,:,l1)=a(:,:,m)
  ENDIF
  DO l=1,nb
     stoc=SUM(ao(:,l,:))+SUM(ae(:,l,:))
     IF(1.0d0-stoc>eps)THEN
if(debug)  WRITE(*,*)"pwcr: The matrix A is not stochastic"
        info=5000
        return
     ENDIF
  END DO
  hae(:,:,1:n-1)=ae(:,:,2:n)
  hao=ao
  a1(:,:)=ae(:,:,1)
  loop6 : DO j=1,nb
     a1s(j)=SUM(a1(:,j))
  END DO loop6
  !---------------------------------
  ! Start Pointwise Cyclic Reduction
  !---------------------------------
  step=0
  n=SIZE(ae,3)
  a0norm=1
  pwcrstage : DO WHILE(n>1.and. a0norm>eps) 
     step=step+1
     CALL pmeans(ae,ao,hae,hao,mean,hmean)
     CALL schur(ae,ao,hae,hao,mean,hmean,a1s,a0norm,eps,intpmaxl)
     n=SIZE(ae,3)
     if(verb)then
          write(wout,*) "--iter=",step," Check=",a0norm," Interp. points=", n
          call print_it
          else
          write(wout,10)
          call print_it_nolf
       end if
        if (step==maxit) then
          write(wout,*)"Reached the maximum number of iterations in PWCR"
          call print_it
          exit
        endif
  END DO pwcrstage
  CALL computeG(a1,a1s,ae,ao,hae,hao,eps,g)
  CALL residual(a,g,err)
  g=transpose(g)
  if (allocated(ae)) deallocate(ae)
  if (allocated(ao)) deallocate(ao)
  if (allocated(hae)) deallocate(hae)
  if (allocated(hao)) deallocate(hao)
  if (allocated(a1)) deallocate(a1)
  if (allocated(a1s)) deallocate(a1s)
  if (allocated(wr)) deallocate(wr)
  if (allocated(wi)) deallocate(wi)
  if (allocated(wwr)) deallocate(wwr)
  if (allocated(wwi)) deallocate(wwi)
END SUBROUTINE PWCR



!=========================================================================!
!                          SUBROUTINE COMPUTEG                            !
!=========================================================================!
! This subroutine computes the matrix X solving the equation              !
!                A_1+XA_2+X^2A_3+....X^(m-1)A_m-X=O                       !
! in the case where m=2.                                                  !
!=========================================================================!
! Input variables:                                                        !
!    a1  : array associated with the block A_1                          !
!    a1s : array associated with the vector (1,1,...,1)A1               !
!    ae  : array associated with the block coefficients of the series   !
!          \phi_{even}, i.e.,  A_1, A_3, ...                              !
!    ao  : array associated with the block coefficients of the series   !
!          \phi_{odd}, i.e,   A_2, A_4, ...                               !
!    hae : array associated withthe block coefficients of the series    !
!          \hat\phi_{even}                                                !
!    hao : array associated with the block coefficients of the series   !
!          \hat \phi_{odd}                                                !
!    eps : error bound used for checking the stop condition.              !
!=========================================================================!
! Output variables:                                                       !
!    g   : array associated with the  approximation of  the solution    !
!          X of (1)                                                       ! 
!=========================================================================!
! Interfaces used: lapack90_interfaces                                    !
!=========================================================================!
! Subroutines used: la_gesv                                               !
!=========================================================================!
SUBROUTINE computeG(a1,a1s,ae,ao,hae,hao,eps,g)
  USE f95_lapack
  use smc_tools
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) :: ae, ao, hae, hao
  REAL(dp), DIMENSION(:,:)    :: a1, g
  REAL(dp), DIMENSION(:)       :: a1s
  REAL(dp)                                :: s, eps
  INTEGER                                 :: nb, i
 REAL(dp), DIMENSION(:,:),allocatable    :: r
  LOGICAL :: stoc
  INTRINSIC size
  nb=SIZE(ae,1)

  stoc=.TRUE.
  loop50 : DO i=1,nb
     s=a1s(i)+SUM(hao(:,i,1))
     IF(1.0d0-s>eps)stoc=.FALSE.
  END DO loop50
  IF(stoc)THEN
     g(:,:)=-TRANSPOSE(hao(:,:,1))
     loop10 : DO i=1,nb
        g(i,i)=g(i,i)+1.0d0
     END DO loop10
     a1=TRANSPOSE(a1)
     CALL la_gesv(g, a1, info=info)
     if (info/=0) then
        if(debug) write(*,*) "computeG:la_gesv info=",info
        info=2000
        return
     endif
     g=TRANSPOSE(a1)
     RETURN
  ENDIF
  if (allocated(r)) deallocate(r)
  ALLOCATE(r(nb,nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "computeG: info=",info
        info=1000
        return
  endif
  g=-TRANSPOSE(ao(:,:,1))
  loop100 : DO i=1,nb
     g(i,i)=g(i,i)+1.0d0
  END DO loop100
  r=TRANSPOSE(ae(:,:,1))
  CALL la_gesv(g, r,info=info)
  if (info/=0) then
        if(debug) write(*,*) "computeG:la_gesv info=",info
        info=2000
        return
  endif
  r=TRANSPOSE(r)
  g=-hao(:,:,1)-MATMUL(hae(:,:,1),r)
  g=TRANSPOSE(g)
  loop200 : DO i=1,nb
     g(i,i)=g(i,i)+1.0d0
  END DO loop200
  a1=TRANSPOSE(a1)
  CALL la_gesv(g, a1,info=info)  
  if (info/=0) then
        if(debug) write(*,*) "computeG:la_gesv info=",info
        info=2000
        return
  endif
  g=TRANSPOSE(a1) 
if(allocated(r))deallocate(r)
END SUBROUTINE computeG

!=========================================================================!
!                          SUBROUTINE RESIDUAL                            !
!=========================================================================!
! This subroutine computes the residual ERR, i.e., the 1-norm             !
!             ERR = || A_1+GA_2+G^2A_3+....G^(m-1)A_m-G ||                !
! where G is an approximation of the solution X of (1)                    !
!=========================================================================!
! Input variables:                                                        !
!    a   : array associated with the blocks A_1, A_2,..., A_m           !
!    g   : array associated with the approximation of the solution      !
!        X of (1)                                                         !
!=========================================================================!
! Output variables:                                                       !
!    err : the seeked residual                                            ! 
!=========================================================================!
SUBROUTINE residual(a,g,err)
  use smc_tools
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:):: a
  REAL(dp), DIMENSION(:,:)  :: g
  REAL(dp)                            :: err, s
  REAL(dp), DIMENSION(:,:), ALLOCATABLE   :: r
   INTEGER                                     :: n, nb, i
  INTRINSIC size 
  nb=SIZE(a,1)
  n=SIZE(a,3)
  if(allocated(r)) deallocate(r)
  ALLOCATE(r(nb,nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "residual: info=",info
        info=1000
        return
  endif
  r=a(:,:,n)
  loop1 : DO i=n-1,1,-1
     r=MATMUL(g,r)+a(:,:,i)
  END DO loop1
  r=ABS(r-g)
  err=0.0d0
  loop2 : DO i=1,nb
     s=SUM(r(:,i))
     IF(s>err) err=s
  END DO loop2
  if (allocated(r)) deallocate(r)
END SUBROUTINE residual

!=========================================================================!
!                          SUBROUTINE SCHUR                               !
!=========================================================================!
! This subroutine performs one step of cyclic reduction by computing      !
! the even and the odd components of the matrix series \phi^{(n+1)} and   !
! \hat\phi^{(n+1)} (compare formulae (7), (8) in the paper) defining the  !
! Schur complement in the cyclic reduction process.                       !
! The subroutine :                                                        !
! - interpolates the series \phi_{even}, \phi_{odd}, \hat\phi_{even},     !
!   \hat\phi_{odd}, numerically truncated to polynomials of degree n-1,   !
!    at the n-th roots of 1,                                              !
! - performes convolutions according to formula (7) of the paper          !
! - computes the matrix polynomials interpolating these values            !
! - checks the accuracy of the result; if the result is not accurate then !
!   doubles the number of interpolation points and repeats; otherwise     !
!   outputs the block coefficients of the matrix power series.            !
!=========================================================================!
! Input variables:                                                        !
!    ae  : array associated with the block coefficients of the series   !
!          \phi_{even}, i.e.,  A_1, A_3, ...                              !
!    ao  : array associated with the block coefficients of the series   !
!          \phi_{odd}, i.e,   A_2, A_4, ...                               !
!    hae : array associated withthe block coefficients of the series    !
!          \hat\phi_{even}                                                !
!    hao : array associated with the block coefficients of the series   !
!          \hat \phi_{odd}                                                !
!    mean :  array associated with the vector (1,...,1)\phi'(1),        !
!            for the current function \phi                                !
!    hmean:  array associated with the vector (1,...,1)\hat\phi'(1) for !
!            the current function   \hat\phi                              !
!    a1s  :  array associated with the vector (1,...,1)\phi(0), for     !
!            the initial function \phi                                    !
!    eps  : error bound used for checking the stop condition.             !
!=========================================================================!
! Output variables: new values for AE, AO, HAE, HAO, MEAN, HMEAN          !
!=========================================================================!
! Interfaces used:   fft_interface, schur_interface                       !
!=========================================================================!
! Subroutines used: ftb1, iftb1, ftb2, iftb2, sc1p, sc2p, scc2p, sc1, sc2 !
!           scc2, means, test                                             !
!=========================================================================!
SUBROUTINE schur(ae,ao,hae,hao,mean,hmean,a1s,a0norm,eps,intpmaxl)
  use smc_tools
  USE fft_interface
  USE schur_interface, ONLY : sc1, sc2p, sc1p, sc2, scc2p, scc2, &
       means, test
  IMPLICIT NONE
  REAL(dp), allocatable, DIMENSION(:,:,:) :: ae, ao, hae, hao
  REAL(dp), DIMENSION(:)  :: mean, hmean, a1s
  REAL(dp)                            :: eps
  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: tae, tao, thae, thao, &
       a1, a2, a3, a4  
  REAL(dp)                           :: s1, s2, a0norm
  INTEGER                                     :: l,nn, nb, n, nradd, intpmaxl
  LOGICAL                                     :: answer, hanswer
  nb=SIZE(ae,1)
  n=SIZE(ae,3)
  if(allocated(tae)) deallocate(tae)
  if(allocated(tao)) deallocate(tao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a2)) deallocate(a2)
  if(allocated(a3)) deallocate(a3)
  if(allocated(a4)) deallocate(a4)
  if(allocated(thae)) deallocate(thae)
  if(allocated(thao)) deallocate(thao)
  ALLOCATE(tae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(tao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a2(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a3(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(thae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(thao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a4(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shur: info=",info
        info=1000
        return
  endif

  CALL ftb1(ae,tae)
  CALL ftb1(ao,tao)
  CALL sc1p(tae,tao,a1)
  CALL sc2p(tae,tao,a1,a2)
  CALL iftb1(a2,a3)
  CALL ftb1(hae,thae)
  CALL ftb1(hao,thao)
  CALL scc2p(thae,thao,a1,a2)
  CALL iftb1(a2,a4)
  CALL means(tae,tao,thae,mean,hmean)
  nn=n
  nradd=0
100 nradd=nradd+1
  ! check the accuracy of the matrix coefficients
  CALL test(a3,mean,eps,answer)
  CALL test(a4,hmean,eps,hanswer)
  IF(answer.OR.hanswer) THEN ! double the number of interpolation points
     IF(nradd>1)THEN
        n=nn/2
        if(allocated(a1))deallocate(a1)
        allocate(a1(nb,nb,n),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        a1=ae
        if (allocated(ae)) DEALLOCATE(ae)
        ALLOCATE(ae(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        ae(:,:,1:n)=a1
        ae(:,:,n+1:nn)=0.0d0
        a1=ao
        if (allocated(ao)) DEALLOCATE(ao)
        ALLOCATE(ao(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        ao(:,:,1:n)=a1
        ao(:,:,n+1:nn)=0.0d0
        a1=hae
        if (allocated(hae)) DEALLOCATE(hae)
        ALLOCATE(hae(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        hae(:,:,1:n)=a1
        hae(:,:,n+1:nn)=0.0d0
        a1=hao
        if (allocated(hao)) DEALLOCATE(hao)
        ALLOCATE(hao(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        hao(:,:,1:n)=a1
        hao(:,:,n+1:nn)=0.0d0
        if (allocated(tae)) deallocate(tae)
        if (allocated(tao)) deallocate(tao)
        if (allocated(thae)) deallocate(thae)
        if (allocated(thao)) deallocate(thao)
        if (allocated(a1)) deallocate(a1)
        if (allocated(a2)) deallocate(a2)
        allocate(tae(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        allocate(tao(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        allocate(thae(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        allocate(thao(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        allocate(a1(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
        allocate(a2(nb,nb,nn),stat=info)
        if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
        endif
     ENDIF
     CALL ftb2(ae,tae)
     CALL ftb2(ao,tao)
     CALL sc1(tae,tao,a1)
     CALL sc2(tae,tao,a1,a2)
     CALL iftb2(a2,a3)
     CALL ftb2(hae,thae)
     CALL ftb2(hao,thao)
     CALL scc2(thae,thao,a1,a2)
     CALL iftb2(a2,a4)
     nn=2*nn
     if (nn/2==intpmaxl) then
        write(wout,*) "Reached the maximum number of interpolation points in PWCR"
        call print_it
        goto 51
     end if
     GOTO 100 
  END IF
51 continue
  if(allocated(ae)) DEALLOCATE(ae)
  if (allocated(ao)) DEALLOCATE(ao)
  if (allocated(hae)) DEALLOCATE(hae)
  if (allocated(hao)) DEALLOCATE(hao)
  n=nn/2
  ALLOCATE(ae(nb,nb,n),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
  endif
  ALLOCATE(ao(nb,nb,n),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
  endif
  ALLOCATE(hae(nb,nb,n),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
  endif
  ALLOCATE(hao(nb,nb,n),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "shur: info=",info
             info=1000
             return
  endif
  loop1 : DO l=1,nn/2
     ae(:,:,l)=a3(:,:,2*l-1)
     ao(:,:,l)=a3(:,:,2*l)
     hae(:,:,l)=a4(:,:,2*l)
     hao(:,:,l)=a4(:,:,2*l-1)
  END DO loop1
  loop2 : DO l=1,nb
     s1=a1s(l)+SUM(hae(:,l,:))+SUM(hao(:,l,:))
     s2=SUM(ae(:,l,:))+SUM(ao(:,l,:))
     s1=1.d0/s1
     s2=1.d0/s2
     ae(:,l,:)=ae(:,l,:)*s2
     ao(:,l,:)=ao(:,l,:)*s2
     hae(:,l,:)=hae(:,l,:)*s1
     hao(:,l,:)=hao(:,l,:)*s1
  END DO loop2
  a0norm=maxval(sum(abs(ae(:,:,1)),dim=2))
  if(allocated(tao)) deallocate(tao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a2)) deallocate(a2)
  if(allocated(a3)) deallocate(a3)
  if(allocated(a4)) deallocate(a4)
  if(allocated(thae)) deallocate(thae)
  if(allocated(thao)) deallocate(thao)

END SUBROUTINE schur


!=========================================================================!
!                          SUBROUTINE TEST                                !
!=========================================================================!
! This subroutine applies the test (12)  in the paper and outputs         !
! answer=.true. if the number of interpolation point must be doubled      !
! (failure of the test), otherwise outputs answer=.false.                 !
!=========================================================================!
! Input variables                                                         !
!   u   :  array associated with the matrix coefficients of the         !
!          polynomial interpolating \phi (or \hat\phi) at the root of 1.  !
!   mean:  array associated with the exact value of (1,...,1)\phi'(1),  !
!          or (1,...,1)\hat\phi'(1),                                      !
!   eps :  error bound                                                     !
!=========================================================================!
! Output variables                                                        !
!   answer                                                                !
!=========================================================================!
SUBROUTINE test(u, mean, eps, answer)
  use smc_tools
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:,:) :: u
  REAL(dp),  DIMENSION(:)     :: mean
  REAL(dp)                    :: eps, v, err
  INTEGER                     :: j, l, n, nb
  LOGICAL                     ::  answer
  INTRINSIC size
  n=SIZE(u,3)
  nb=SIZE(u,1)
  answer=.FALSE.
  err=0.d0
  loop10 : DO  j=1,nb
     v=0.d0
     loop20 : DO  l=2,n
        v=v+SUM(u(:,j,l))*(l-1)
     END DO loop20
     IF(ABS(v-mean(j))>eps)answer=.TRUE.
     IF(err<ABS(v-mean(j)))err=ABS(v-mean(j))
  END DO loop10
END SUBROUTINE test



!=========================================================================!
!                          SUBROUTINE SC1P                                !
!=========================================================================!
! This subroutine computes the values of the function                     !
!                     \phi_{even}(I-\phi_{odd})^{-1}                      !
! (compare formula (8) of the paper) at the n-th root of 1, where         !
! the matrix series \phi_{even}, \phi_{odd} are numerically truncated     !
! to polynomials of degree n-1.                                           !
!=========================================================================!
! Input variables                                                         !
!   tae  :  array associated with the matrix values of \phi_{even} at   !
!           the roots of 1                                                !
!   tao  :  array associated with the matrix values of \phi_{odd} at    !
!           the roots of 1                                                !
!=========================================================================!
! Output variables                                                        !
!   a    :  array associated with the matrix values of                  !
!           \phi_{even}(I-\phi_{odd})^{-1} at the roots of 1              !
!=========================================================================!
! Used interfaces:  schur_interface                                       !
!=========================================================================!
! Used subroutines: solver, solvec                                        !
!=========================================================================!
SUBROUTINE sc1p(tae,tao,a)
  use smc_tools
  USE schur_interface, ONLY : solver, solvec
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) :: tao, tae, a
  REAL(dp), ALLOCATABLE, DIMENSION(:,:)   :: rmat, termr
  COMPLEX(dp), ALLOCATABLE, DIMENSION(:,:):: cmat, termc
  INTEGER                                 :: i, n, nb, l, nm1
  n=SIZE(tae,3)
  nb=SIZE(tae,1)
  if(allocated(rmat))deallocate(rmat)
  ALLOCATE(rmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp1: info=",info
             info=1000
             return
  endif
  if(allocated(termr))deallocate(termr)
  ALLOCATE(termr(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp1: info=",info
             info=1000
             return
  endif
  if(allocated(cmat))deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp1: info=",info
             info=1000
             return
  endif
  if(allocated(termc))deallocate(termc)
  ALLOCATE(termc(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp1: info=",info
             info=1000
             return
  endif
  rmat(:,:)=-tao(:,:,1)
  loopi1 : DO i=1,nb
     rmat(i,i)=rmat(i,i)+1.0d0
  END DO loopi1
  termr(:,:)=tae(:,:,1)
  CALL solver(rmat,termr)
  a(:,:,1)=termr(:,:)
  nm1=n/2+1
  rmat(:,:)=-tao(:,:,nm1)
  loopi2 : DO i=1,nb
     rmat(i,i)=rmat(i,i)+1.0d0
  END DO loopi2
  termr(:,:)=tae(:,:,nm1)
  CALL solver(rmat,termr)
  a(:,:,nm1)=termr
  IF(n.EQ.2)THEN
     if (allocated(rmat)) deallocate(rmat)
     if (allocated(termr)) deallocate(termr)
     if (allocated(cmat)) deallocate(cmat)
     if (allocated(termc)) deallocate(termc)
     RETURN
  ENDIF
  loopl : DO l=1,n/2-1
     cmat(:,:)=-tao(:,:,l+1)-(0.0d0,1.0d0)*tao(:,:,n-l+1)
     loopi : DO i=1,nb
        cmat(i,i)=cmat(i,i)+1.0d0
     END DO loopi
     termc(:,:)=tae(:,:,l+1)+(0.0d0,1.0d0)*tae(:,:,n-l+1)
     CALL solvec(cmat,termc)
     a(:,:,l+1)=REAL(termc(:,:))
     a(:,:,n-l+1)=REAL((0.0d0,-1.0d0)*termc(:,:))
  END DO loopl
  if (allocated(rmat)) deallocate(rmat)
  if (allocated(termr)) deallocate(termr)
  if (allocated(cmat)) deallocate(cmat)
  if (allocated(termc)) deallocate(termc)
END SUBROUTINE sc1p


!=========================================================================!
!                          SUBROUTINE SC2P                                !
!=========================================================================!
! This subroutine computes the values of the function \phi in the         !
! left-hand side of (7) in the paper, at the n-th root of 1, where        !
! the matrix series \phi_{even}, \phi_{odd} are numerically truncated     !
! to polynomials of degree n-1.                                           !
!=========================================================================!
! Input variables                                                         !
!   tae :  array associated with the matrix values of \phi_{even} at    !
!          the roots of 1                                                 !
!   tao :  array associated with the matrix values of \phi_{odd} at     !
!          the roots of 1                                                 !
!   a1  : the value computed by the subroutine SC1P                       !
!=========================================================================!
! Output variables                                                        !
!   a2  :  array associated with the matrix values of the function      !
!          \phi of (7) in the paper, at the roots of 1                    !
!=========================================================================!
! Used interfaces:  schur_interface,   fft_interface                      !
!=========================================================================!
! Used subroutines: prodc, fillroots                                      !
!=========================================================================!
SUBROUTINE sc2p(tae,tao,a1,a2)
  use smc_tools
  use roots
  USE schur_interface, ONLY : prodc
  USE fft_interface, ONLY   : fillroots
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) :: tao, tae, a1, a2
  REAL(dp), ALLOCATABLE, DIMENSION(:,:)   :: rmat, cmat
  INTEGER                                     :: n, nb, l, nm1, &
       lm, mm, nmax
  n=SIZE(tae,3)
  nb=SIZE(tae,1)
  if(allocated(rmat)) deallocate(rmat)
  ALLOCATE(rmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp2: info=",info
             info=1000
             return
  endif
  if(allocated(cmat)) deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scp2: info=",info
             info=1000
             return
  endif
  rmat=MATMUL(a1(:,:,1),tae(:,:,1))
  a2(:,:,1)=tao(:,:,1)+rmat(:,:)
  nm1=n/2+1
  rmat=MATMUL(a1(:,:,nm1),tae(:,:,nm1))
  a2(:,:,nm1)=-tao(:,:,nm1)+rmat(:,:)
  IF(n.EQ.2)THEN
     deallocate(rmat,cmat)
     RETURN
  ENDIF
  CALL fillroots(n)
  nmax=SIZE(wr,1)
  mm=nmax/n
  loopl : DO l=1,n/2-1
     lm=mm*l+1
     CALL prodc(a1,tae,l+1,n-l+1,rmat,cmat)
     a2(:,:,l+1)=wr(lm)*tao(:,:,l+1)-wi(lm)*tao(:,:,n-l+1)
     a2(:,:,l+1)=a2(:,:,l+1)+rmat(:,:)
     a2(:,:,n-l+1)=wr(lm)*tao(:,:,n-l+1)+wi(lm)*tao(:,:,l+1)
     a2(:,:,n-l+1)=a2(:,:,n-l+1)+cmat(:,:)
  END DO loopl
  if (allocated(rmat)) deallocate(rmat)
  if (allocated(cmat)) deallocate(cmat)
END SUBROUTINE sc2p


!=========================================================================!
!                          SUBROUTINE SCC2P                               !
!=========================================================================!
! This subroutine computes the values of the function \hat\phi in the     !
! left-hand side of (7) in the paper, at the n-th root of 1, where        !
! the matrix series \hat\phi_{even}, \hat\phi_{odd} are numerically       !
!truncated  to polynomials of degree n-1.                                 !
!=========================================================================!
! Input variables                                                         !
!   thae :  array associated with the matrix values of                  !
!           \hat\phi_{even} at the roots of 1                             !
!   thao :  array associated with the matrix values of \hat\phi_{odd}   !
!           at the roots of 1                                             !
!   a1   :  array associated with the value computed by the subroutine  !
!           SC1P                                                          !
!=========================================================================!
! Output variables                                                        !
!   a2  :  array associated with the matrix values of the function      !
!          \hat\phi of (7) at the roots of 1                              !
!=========================================================================!
! Used interfaces:  schur_interface                                       !
!=========================================================================!
! Used subroutines: prodc                                                 !
!=========================================================================!
SUBROUTINE scc2p(thae,thao,a1,a2)
  use smc_tools
  USE schur_interface, ONLY : prodc
  IMPLICIT NONE
  REAL(dp),DIMENSION(:,:,:) :: thao, thae, a1, a2
  REAL(dp),ALLOCATABLE,DIMENSION(:,:) :: rmat, cmat
  INTEGER :: n,nb, l, nm1
  n=SIZE(thae,3)
  nb=SIZE(thae,1)
  if(allocated(rmat)) deallocate(rmat)
  ALLOCATE(rmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scc2p: info=",info
             info=1000
             return
  endif
  if(allocated(cmat)) deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scc2p: info=",info
             info=1000
             return
  endif
  rmat=MATMUL(a1(:,:,1),thae(:,:,1))
  a2(:,:,1)=thao(:,:,1)+rmat(:,:)
  nm1=n/2+1
  rmat=MATMUL(a1(:,:,nm1),thae(:,:,nm1))
  a2(:,:,nm1)=thao(:,:,nm1)+rmat(:,:)
  IF(n.EQ.2)THEN
     if (allocated(rmat)) deallocate(rmat)
     if (allocated(cmat)) deallocate(cmat)
     RETURN
  ENDIF
  loopl : DO l=1,n/2-1
     CALL prodc(a1,thae,l+1,n-l+1,rmat,cmat)
     a2(:,:,l+1)=thao(:,:,l+1)+rmat(:,:)
     a2(:,:,n-l+1)=thao(:,:,n-l+1)+cmat(:,:)
  END DO loopl
     if (allocated(rmat)) deallocate(rmat)
     if (allocated(cmat)) deallocate(cmat)
END SUBROUTINE scc2p

!=========================================================================!
!                          SUBROUTINE SC1                                 !
!=========================================================================!
! This subroutine computes the values of the function                     !
!                     \phi_{even}(I-\phi_{odd})^{-1}                      !
! (compare formula (7) of the paper) at the odd-numbered roots of 1       !
!=========================================================================!
! Input variables                                                         !
!   tae  :  array associated with the matrix values of \phi_{even}      !
!           at the odd-numbered roots of 1                                !
!   tao  :  array associated with the matrix values of \phi_{odd}       !
!           at the odd-numbered roots of 1                                !
!=========================================================================!
! Output variables                                                        !
!   a    :  array associated with the matrix values of                  !
!           \phi_{even}(I-\phi_{odd})^{-1} at the odd-numbered roots of 1 !
!=========================================================================!
! Used interfaces:  schur_interface                                       !
!=========================================================================!
! Used subroutines:  solvec                                               !
!=========================================================================!
SUBROUTINE sc1(tae,tao,a)
  use smc_tools
  USE schur_interface, ONLY : solvec
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:)  :: tae, tao, a
  COMPLEX(dp), ALLOCATABLE, DIMENSION(:,:) :: cmat, termc
  INTEGER                                      :: n, nb, num, i, l
  n=SIZE(tae,3)
  nb=SIZE(tae,1)
  if(allocated(cmat))deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "sc1: info=",info
             info=1000
             return
  endif
  if(allocated(termc))deallocate(termc)
  ALLOCATE(termc(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "sc1: info=",info
             info=1000
             return
  endif
  num=n/2
  loopl : DO l=1,num
     cmat=-tao(:,:,l)-(0.0d0,1.0d0)*tao(:,:,num+l)
     loopi : DO i=1,nb
        cmat(i,i)=cmat(i,i)+1.0d0
     END DO loopi
     termc=tae(:,:,l)+(0.0d0,1.0d0)*tae(:,:,num+l)
     CALL solvec(cmat,termc)
     a(:,:,l)=REAL(termc(:,:))
     a(:,:,num+l)=REAL((0.0d0,-1.0d0)*termc(:,:))
  END DO loopl
  if (allocated(cmat)) deallocate(cmat)
  if (allocated(termc)) deallocate(termc)
END SUBROUTINE sc1

!=========================================================================!
!                          SUBROUTINE SC2                                 !
!=========================================================================!
! This subroutine computes the values of the function \phi in the         !
! left-hand side of (7) in the paper, at the odd-numbered roots of 1      !
!=========================================================================!
! Input variables                                                         !
!   tae  :  array associated with the matrix values of \phi_{even}      !
!           at the odd-numbered roots of 1                                !
!   tao  :  array associated with the matrix values of \phi_{odd}       !
!           at the odd-numbered roots of 1                                !
!   a1   : array associated with the value computed by the subroutine   !
!           sc1                                                           !
!=========================================================================!
! Output variables                                                        !
!   a2  : array associated with the matrix values of the function       !
!         \phi of (7) at the odd-numbered roots of 1                      !
!=========================================================================!
! Used interfaces:  schur_interface,   fft_interface                      !
!=========================================================================!
! Used subroutines: prodc, fillroots                                      !
!=========================================================================!
SUBROUTINE sc2(tae,tao,a1,a2)
  use smc_tools
  use roots
  USE fft_interface, ONLY   : fillroots
  USE schur_interface, ONLY : prodc
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:,:) :: tae, tao, a1, a2
  REAL(dp), ALLOCATABLE, DIMENSION(:,:)   :: rmat, cmat
  INTEGER                                     :: n, nb, num, l, lm, &
       mm, nmax
  n=SIZE(tae,3)
  nb=SIZE(tae,1)
  if(allocated(cmat))deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "sc2: info=",info
             info=1000
             return
  endif
  if(allocated(rmat))deallocate(rmat)
  ALLOCATE(rmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "sc2: info=",info
             info=1000
             return
  endif
  num=n/2
  CALL fillroots(2*n)
  nmax=SIZE(wr,1)
  mm=nmax/(2*n)
  loopl : DO l=1,num
     lm=mm*(2*l-1)+1
     CALL prodc(a1,tae,l,num+l,rmat,cmat)
     a2(:,:,l)=wr(lm)*tao(:,:,l)-wi(lm)*tao(:,:,num+l)
     a2(:,:,l)=a2(:,:,l)+rmat(:,:)
     a2(:,:,num+l)=wr(lm)*tao(:,:,num+l)+wi(lm)*tao(:,:,l)
     a2(:,:,num+l)=a2(:,:,num+l)+cmat(:,:)
  END DO loopl
  if (allocated(rmat)) deallocate(rmat)
  if (allocated(cmat)) deallocate(cmat)
END SUBROUTINE sc2


!=========================================================================!
!                          SUBROUTINE SCC2                                !
!=========================================================================!
! This subroutine computes the values of the function \hat\phi in the     !
! left-hand side of (7) in the paper, at the odd-numbered  roots of 1     !
!=========================================================================!
! Input variables                                                         !
!   thae :  array associated with the matrix values of                  !
!           \hat\phi_{even} at the roots of 1                             !
!   thao :  array associated with the matrix values of \hat\phi_{odd}   !
!           at the roots of 1                                             !
!   a1   :  array associated with the value computed by the subroutine  !
!           SC1                                                           
!=========================================================================!
! Output variables                                                        !
!   a2  :   array associated with the matrix values of the function     !
!           \hat\phi of (7) at the  odd-numbered roots of 1               !
!=========================================================================!
! Used interfaces:  schur_interface                                       !
!=========================================================================!
! Used subroutines: prodc                                                 !
!=========================================================================!
SUBROUTINE scc2(thae, thao, a1, a2)
  use smc_tools
  USE schur_interface, ONLY : prodc
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:,:) :: thae, thao, a1, a2
  REAL(dp), ALLOCATABLE, DIMENSION(:,:)   :: rmat, cmat
  INTEGER                                     :: n, nb, l, num
  nb=SIZE(thae,1)
  n=SIZE(thae,3)
  if(allocated(rmat))deallocate(rmat)
  ALLOCATE(rmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scc2: info=",info
             info=1000
             return
  endif
  if(allocated(cmat))deallocate(cmat)
  ALLOCATE(cmat(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "scc2: info=",info
             info=1000
             return
  endif
  num=n/2
  loopl : DO l=1,num
     CALL prodc(a1,thae,l,num+l,rmat,cmat)
     a2(:,:,l)=thao(:,:,l)+rmat(:,:)
     a2(:,:,num+l)=thao(:,:,num+l)+cmat(:,:)
  END DO loopl
  if (allocated(rmat)) deallocate(rmat)
  if (allocated(cmat)) deallocate(cmat)
END SUBROUTINE scc2


!=========================================================================!
!                          SUBROUTINE PRODC                               !
!=========================================================================!
! This subroutine computes the matrix product                             !
!  rmat+I*cmat = (a(:,:,ir)+I*a(:,:,ic))* (b(:,:,ir)+I*b(:,:,ic))         !
! by using the algorithm that performs three real matrix multiplications  !    
!=========================================================================!
SUBROUTINE prodc(a, b, ir, ic, rmat, cmat)
  use smc_tools
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:,:) :: a, b
  REAL(dp),  DIMENSION(:,:)   :: rmat, cmat
  REAL(dp), allocatable, DIMENSION(:,:)   :: aux
  INTEGER                                     :: ir,ic,nb
  nb=SIZE(a,1)
  if(allocated(aux))deallocate(aux)
  ALLOCATE(aux(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "prodc: info=",info
             info=1000
             return
  endif
  aux(:,:)=a(:,:,ir)+a(:,:,ic)
  rmat(:,:)=b(:,:,ir)+b(:,:,ic)
  cmat=MATMUL(aux,rmat)
  aux=MATMUL(a(:,:,ic),b(:,:,ic))
  rmat=MATMUL(a(:,:,ir),b(:,:,ir))
  cmat=cmat-rmat-aux
  rmat=rmat-aux
  if (allocated(aux)) deallocate(aux)
END SUBROUTINE prodc

!=========================================================================!
!                          SUBROUTINE MEANS                               !
!=========================================================================!
! This subroutine computes the mean values mean=(1,...,1)\phi'(1) and     !
! hmean=(1,...,1)\hat\phi'(1) by means of formulae (11) in the paper      !
!=========================================================================!
! Input variables:                                                        !
!   tae  :  array associated with the matrix values of \phi_{even}      !
!           at the odd-numbered roots of 1                                !
!   tao  :  array associated with the matrix values of \phi_{odd}       !
!           at the odd-numbered roots of 1                                !
!   thae :  array associated with the matrix values of                  !
!           \hat\phi_{even} at the roots of 1                             !
!   mean :  array associated withthe value \alpha^{(n)} in the          !
!           formula (11)                                                  !
!   hmean: array associated with the value \hat\alpha^{(n)} in the      !
!          formula (11)                                                   !
!=========================================================================!
! Output variables                                                        !
!   mean : array associated with the value \alpha^{(n+1)} in the        !
!          formula (11)                                                   !
!   hmean: array associated with the value \hat\alpha^{(n+1)} in the    !
!          formula (11)                                                   !
!=========================================================================!
! Used interfaces:  schur_interface                                       !
!=========================================================================!
! Used subroutines: solver                                                !
!=========================================================================!

SUBROUTINE means(tae,tao,thae,mean,hmean)
  use smc_tools
  USE schur_interface, ONLY : solver
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) :: tae,tao, thae
  REAL(dp), DIMENSION(:)     :: mean,hmean
  REAL(dp),ALLOCATABLE,DIMENSION(:,:)   :: w,v
  REAL(dp)                          :: oneh
  INTEGER                                   :: nb, i
  nb=SIZE(tae,1)
  if(allocated(w))deallocate(w)
  ALLOCATE(w(nb,nb),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "means: info=",info
             info=1000
             return
  endif
  if(allocated(v))deallocate(v)
  ALLOCATE(v(nb,1),stat=info)
  if (info/=0) then
             if(debug) write(*,*) "means: info=",info
             info=1000
             return
  endif
  oneh=1.d0/2.d0
  v(:,1)=1.d0-mean
  w(:,:)=-tao(:,:,1)
  loop1 : DO i=1,nb
     w(i,i)=w(i,i)+1.d0
  END DO loop1
  CALL solver(w,v)
  mean=(mean+1.d0-MATMUL(TRANSPOSE(tae(:,:,1)),v(:,1)))*oneh
  hmean=(hmean-MATMUL(TRANSPOSE(thae(:,:,1)),v(:,1)))*oneh
  if (allocated(v)) deallocate(v)
  if (allocated(w)) deallocate(w)

END SUBROUTINE means

!=========================================================================!
!                          SUBROUTINE PMEANS                              !
!=========================================================================!
! This subroutine computes the mean values mean=(1,...,1)\phi'(1) and     !
! hmean=(1,...,1)\hat\phi'(1)                                             !
!=========================================================================!
! Input variables:                                                        !
!   ae :array associated with the matrix coefficients of \phi_{even}    !
!   ao :array associated with ehe matrix coefficients of \phi_{odd}     !
!   hae:array associated with the matrix coefficients of \hat\phi_{even}!
!   hao:array associated with the matrix coefficients of \hat\phi_{odd} !
!=========================================================================!
! Output variables                                                        !
!   mean : array associated with the value \alpha^{(0)} in the          !
!          formula (11)                                                   !
!   hmean: array associated with the value \hat\alpha^{(0)} in the      !
!          formula (11)                                                   !
!=========================================================================!
SUBROUTINE pmeans(ae, ao, hae, hao, mean, hmean)
  use smc_tools
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) :: ae,ao, hae, hao
  REAL(dp),DIMENSION(:) :: mean,hmean
  REAL(dp) :: sp, sd
  INTEGER :: n, nb, j, l, l2
  INTRINSIC size
  nb=SIZE(ae,1)
  n=SIZE(ae,3)
  mean=0.0d0
  hmean=0.0d0
  loop10 : DO j=1,nb
     loop20 : DO l=1,n
        l2=l+l
        sp=SUM(ae(:,j,l))
        sd=SUM(ao(:,j,l))
        mean(j)=mean(j)+sp*(l2-2)+sd*(l2-1)
     END DO loop20
  END DO loop10
  loop30 : DO j=1,nb
     loop40 : DO l=1,n
        l2=l+l
        sp=SUM(hae(:,j,l))
        sd=SUM(hao(:,j,l))
        hmean(j)=hmean(j)+sd*(l2-2)+sp*(l2-1)
     END DO loop40
  END DO loop30
END SUBROUTINE pmeans


!=========================================================================!
!                          SUBROUTINE SOLVER                              !
!=========================================================================!
! This subroutine solves the real linear system x rmat = termr, where     !
! rmat is a square matrix, termr is a matrix                              !
!=========================================================================!
! Input variables:                                                        !
!   rmat  : array associated with the matrix rmat                       !
!   termr :  array associated with the vector termr                     !
!=========================================================================!
! Output variables:                                                       !
!  termr: the solution x of the system                                    !
!=========================================================================!
! Used interfaces:  lapack90_interfaces                                   !
!=========================================================================!
! Used subroutines: la_gesv                                               !
!=========================================================================!
SUBROUTINE solver(rmat, termr)
  use smc_tools
  USE f95_lapack
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:) :: rmat,termr
  rmat=TRANSPOSE(rmat)
  IF(SIZE(termr,2)>1) termr=TRANSPOSE(termr)
  CALL la_gesv(rmat,termr,info=info)
  if (info/=0) then
        if(debug) write(*,*) "solver:la_gesv info=",info
        info=2000
        return
  endif
  IF(SIZE(termr,2)>1) termr=TRANSPOSE(termr)
END SUBROUTINE solver



!=========================================================================!
!                          SUBROUTINE SOLVEC                              !
!=========================================================================!
! This subroutine solves the complex linear system x cmat = termc, where  !
! cmat is a square matrix, termc is a matrix                              !
!=========================================================================!
! Input variables:                                                        !
!   cmat  : array associated with the matrix cmat                       !
!   termc : array associated with the vector termc                      !
!=========================================================================!
! Output variables:                                                       !
!  termc: array associated with the solution X of the system            !
!=========================================================================!
! Used interfaces:  lapack90_interfaces                                   !
!=========================================================================!
! Used subroutines: la_gesv                                               !
!=========================================================================!
SUBROUTINE solvec(cmat, termc)    
  use smc_tools
  USE f95_lapack
  IMPLICIT NONE
  COMPLEX(dp),  DIMENSION(:,:) :: cmat,termc
  cmat=TRANSPOSE(cmat)
  termc=TRANSPOSE(termc)
  CALL la_gesv(cmat,termc,info=info)
  if (info/=0) then
        if(debug) write(*,*) "solvec:la_gesv info=",info
        info=2000
        return
  endif
  termc=TRANSPOSE(termc)
END SUBROUTINE solvec

SUBROUTINE SPWCR(a,ds,eps,g,err,maxit,intpmax)
  USE schur_interface
  USE pwcr_interface, ONLY : scomputeG, residual
  use smc_tools
  use smc_int
  use roots
!  use ponte_f_f, only: fdrift
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:):: a
  REAL(dp), DIMENSION(:,:) :: g
  REAL(dp) :: eps,err
  integer :: ds

  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: ae, ao, hae, hao,sa
  REAL(dp), DIMENSION(:,:), ALLOCATABLE   :: a1,aa1
  REAL(dp), DIMENSION(:), ALLOCATABLE     :: a1s, uu,vv
  REAL(dp)                            :: stoc, a0norm,drift,tau
  INTEGER                                     :: m, n, nb, j, &
       l, l1, ln, step, i
  integer :: maxit,intpmax, intpmaxl, dual=-1
! dual va messo in input 

  intpmaxl=256*2**intpmax

!20 FORMAT(1X,"*",\)
20 FORMAT("*")
if (debug)  write(*,*) "spwcr:  Dimensions A =",size(a,1), size(a,2),size(a,3),&
"   eps=",eps," err=",err," maxit=",maxit," intpmax=",intpmax

  !-------------------------------------
  ! Prepare the input
  !-------------------------------------
  m=SIZE(a,3)
  nb=SIZE(a,1)
  if(allocated(aa1)) deallocate(aa1)
  ALLOCATE(aa1(nb,nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  if(allocated(uu)) deallocate(uu)
  if(allocated(vv)) deallocate(vv)
  ALLOCATE(uu(nb),vv(nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif



  if(allocated(sa)) deallocate(sa)
  ALLOCATE(sa(nb,nb,m),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  sa=a
  do j=1,m
     sa(:,:,j)=transpose(sa(:,:,j))
  end do

  DO l=1,nb
     stoc=SUM(sa(l,:,:))
     IF(1.0d0-stoc>eps)THEN
if(debug)  WRITE(*,*)"spwcr: The matrix A is not stochastic"
        info=5000
        return
     ENDIF
  END DO

call mg_startup(sa,ds,dual,tau,uu,vv,drift)

if (ds/=2) then
   aa1=sa(:,:,1)
end if
!  call shift(sa,aa1,drift) 
  call shift(sa,ds,tau,uu,vv,drift,sa)
if(debug) write(*,*) "spwcr: after shift tau=",tau," drift=",drift," uu=",uu,"  vv=",vv," ds=",ds

!  fdrift=drift
  do j=1,m
     sa(:,:,j)=transpose(sa(:,:,j))
  end do
  if (ds==2) then
     aa1=sa(:,:,1)
  end if

!  if(verb)then 
!         write(wout,*)"drift=",drift
!         call print_it
!  endif

!  do j=1,m
!     a(:,:,j)=transpose(a(:,:,j))
!     sa(:,:,j)=transpose(sa(:,:,j))
!  end do
  l1=(m+1)/2
  n=l1
  ln=LOG(1.0d0*n)/LOG(2.0d0)
  IF(2**ln<n)n=2**(ln+1)
  
  if(allocated(ae)) deallocate(ae)
  if(allocated(ao)) deallocate(ao)
  if(allocated(hae)) deallocate(hae) 
  if(allocated(hao)) deallocate(hao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a1s)) deallocate(a1s)
  ALLOCATE(ae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(ao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(hae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(hao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1(nb,nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1s(nb),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "spwcr: info=",info
        info=1000
        return
  endif

  ae=0.0d0
  ao=0.0d0
  hae=0.0d0
  hao=0.0d0
  loop1 : DO l=1,l1-1
     ao(:,:,l)=sa(:,:,2*l)
     ae(:,:,l)=sa(:,:,2*l-1)
  END DO loop1
  IF(2*l1==m)THEN
     ao(:,:,l1)=sa(:,:,m)
     ae(:,:,l1)=sa(:,:,m-1)
  ELSE
     ae(:,:,l1)=sa(:,:,m)
  ENDIF
  hae(:,:,1:n-1)=ae(:,:,2:n)
  hao=ao

  if(ds==2)then
     a1=ae(:,:,1)
  else
     a1=transpose(aa1)
  end if

  !---------------------------------
  ! Start Pointwise Cyclic Reduction
  !---------------------------------
 step=0
  n=SIZE(ae,3)
  a0norm=1
  pwcrstage : DO WHILE(n>1.and. a0norm>eps)
     step=step+1
     CALL sschur(ae,ao,hae,hao,a0norm,eps,intpmaxl)
     n=SIZE(ae,3)
     if(verb)then
        write(wout,*) "--iter=",step," Check=",a0norm," Interp. points=", n
        call print_it
        else
        write(wout,20)
        call print_it_nolf
     end if
        if (step==maxit) then
          write(wout,*)"Reached the maximum number of iterations in PWCR"
          call print_it
          exit
        endif
  END DO pwcrstage
  CALL scomputeG(a1,hao,g)
  if(drift>0.and.ds==2) then
     do i=1,nb
        do j=1,nb
           g(i,j)=g(i,j)+tau*uu(j)*vv(i)
        end do
     end do
  end if

  CALL residual(a,g,err)
  g=transpose(g)
  if (allocated(ae)) deallocate(ae)
  if (allocated(ao)) deallocate(ao)
  if (allocated(hae)) deallocate(hae)
  if (allocated(hao)) deallocate(hao)
  if (allocated(a1)) deallocate(a1)
  if (allocated(a1s)) deallocate(a1s)
  if (allocated(sa)) deallocate(sa)
  if (allocated(wr)) deallocate(wr)
  if (allocated(wi)) deallocate(wi)
  if (allocated(wwr)) deallocate(wwr)
  if (allocated(wwi)) deallocate(wwi)
  if(allocated(uu)) deallocate(uu)
  if(allocated(vv)) deallocate(vv)
END SUBROUTINE SPWCR


SUBROUTINE ScomputeG(a1,hao,g)
  USE f95_lapack
  use smc_tools
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:) ::  hao
  REAL(dp), DIMENSION(:,:)    :: a1, g
  INTEGER                                 :: nb, i

  nb = size(g,2)
  g(:,:)=-TRANSPOSE(hao(:,:,1))
  loop10 : DO i=1,nb
     g(i,i)=g(i,i)+1.0d0
  END DO loop10
  a1=TRANSPOSE(a1)
  CALL la_gesv(g, a1, info=info)
  if (info/=0) then
        if(debug) write(*,*) "scomputeG:la_gesv info=",info
        info=2000
        return
  endif
  g=TRANSPOSE(a1)

END SUBROUTINE scomputeG



SUBROUTINE sschur(ae,ao,hae,hao,a0norm,eps,intpmaxl)
  use smc_tools
  USE fft_interface
  USE schur_interface, ONLY : sc1, sc2p, sc1p, sc2, scc2p, scc2,stest
  IMPLICIT NONE
  REAL(dp), allocatable, DIMENSION(:,:,:) :: ae, ao, hae, hao

  REAL(dp)                            :: eps
  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE :: tae, tao, thae, thao, &
       a1, a2, a3, a4
  REAL(dp)                           :: s1, s2, a0norm, anorm
  INTEGER                                     :: l,nn, nb, n, nradd,intpmaxl
  LOGICAL                                     :: answer, hanswer
  nb=SIZE(ae,1)
  n=SIZE(ae,3)
  if(allocated(tae)) deallocate(tae)
  if(allocated(tao)) deallocate(tao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a2)) deallocate(a2)
  if(allocated(a3)) deallocate(a3)
  if(allocated(a4)) deallocate(a4)
  if(allocated(thae)) deallocate(thae)
  if(allocated(thao)) deallocate(thao)
  ALLOCATE(tae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(tao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a1(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a2(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a3(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(thae(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(thao(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif
  ALLOCATE(a4(nb,nb,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "sschur: info=",info
        info=1000
        return
  endif

  CALL ftb1(ae,tae)
  CALL ftb1(ao,tao)
  CALL sc1p(tae,tao,a1)
  CALL sc2p(tae,tao,a1,a2)
  CALL iftb1(a2,a3)
  CALL ftb1(hae,thae)
  CALL ftb1(hao,thao)
  CALL scc2p(thae,thao,a1,a2)
  CALL iftb1(a2,a4)
  nn=n
  nradd=0
100 nradd=nradd+1
 if (debug)  write(*,*)"sschur: interp point",nn
  ! check the accuracy of the matrix coefficients
  CALL stest(a3,eps,answer)
  CALL stest(a4,eps,hanswer)
  IF(answer.OR.hanswer) THEN ! double the number of interpolation points
     IF(nradd>1)THEN
        n=nn/2
        if(allocated(a1))deallocate(a1)
        allocate(a1(nb,nb,n),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        a1=ae
        if(allocated(ae)) DEALLOCATE(ae)
        ALLOCATE(ae(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        ae(:,:,1:n)=a1
        ae(:,:,n+1:nn)=0.0d0
        a1=ao
        if (allocated(ao)) DEALLOCATE(ao)
        ALLOCATE(ao(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        ao(:,:,1:n)=a1
        ao(:,:,n+1:nn)=0.0d0
        a1=hae
        if (allocated(hae)) DEALLOCATE(hae)
        ALLOCATE(hae(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        hae(:,:,1:n)=a1
        hae(:,:,n+1:nn)=0.0d0
        a1=hao
        if (allocated(hao)) DEALLOCATE(hao)
        ALLOCATE(hao(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        hao(:,:,1:n)=a1
        hao(:,:,n+1:nn)=0.0d0
        if (allocated(tae)) deallocate(tae)
        if (allocated(tao)) deallocate(tao)
        if (allocated(thae)) deallocate(thae)
        if (allocated(thao)) deallocate(thao)
        if (allocated(a1)) deallocate(a1)
        if (allocated(a2)) deallocate(a2)
        allocate(tae(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        allocate(tao(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        allocate(thae(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        allocate(thao(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        allocate(a1(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
        allocate(a2(nb,nb,nn),stat=info)
        if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
        endif
     ENDIF
     CALL ftb2(ae,tae)
     CALL ftb2(ao,tao)
     CALL sc1(tae,tao,a1)
     CALL sc2(tae,tao,a1,a2)
     CALL iftb2(a2,a3)
     CALL ftb2(hae,thae)
     CALL ftb2(hao,thao)
     CALL scc2(thae,thao,a1,a2)
     CALL iftb2(a2,a4)
     nn=2*nn
     if (nn/2==intpmaxl) then
        write(wout,*) "Reached the maximum number of interpolation points in PWCR"
        call print_it
        goto 51
     end if
     GOTO 100
  END IF
51 continue
  if(allocated(ae)) DEALLOCATE(ae)
  if (allocated(ao)) DEALLOCATE(ao)
  if (allocated(hae)) DEALLOCATE(hae)
  if (allocated(hao)) DEALLOCATE(hao)
  n=nn/2
!   n=nn/4
  ALLOCATE(ae(nb,nb,n),stat=info)
  if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
  endif
  ALLOCATE(ao(nb,nb,n),stat=info)
  if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
  endif
  ALLOCATE(hae(nb,nb,n),stat=info)
  if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
  endif
  ALLOCATE(hao(nb,nb,n),stat=info)
  if (info/=0) then
            if(debug) write(*,*) "sschur: info=",info
            info=1000
            return
  endif
  loop1 : DO l=1,nn/2
!  loop1 : DO l=1,nn/4
     ae(:,:,l)=a3(:,:,2*l-1)
     ao(:,:,l)=a3(:,:,2*l)
     hae(:,:,l)=a4(:,:,2*l)
     hao(:,:,l)=a4(:,:,2*l-1)
  END DO loop1
  a0norm=maxval(sum(abs(ae(:,:,1)),dim=2))
  anorm=maxval(sum(abs(ae(:,:,2:n)),dim=2))
  anorm=max(anorm, maxval(sum(abs(ao(:,:,2:n)),dim=2)))
  a0norm=min(a0norm,anorm)
  if(allocated(tao)) deallocate(tao)
  if(allocated(a1)) deallocate(a1)
  if(allocated(a2)) deallocate(a2)
  if(allocated(a3)) deallocate(a3)
  if(allocated(a4)) deallocate(a4)
  if(allocated(thae)) deallocate(thae)
  if(allocated(thao)) deallocate(thao)

END SUBROUTINE sschur

SUBROUTINE stest(u, eps, answer)
  use smc_tools
  IMPLICIT NONE
  REAL(dp),  DIMENSION(:,:,:) :: u
  REAL(dp)                    :: eps, err
  LOGICAL                     ::  answer
  integer                     :: n, nb
  INTRINSIC size
  n=SIZE(u,3)
  nb=SIZE(u,1)
  answer=.FALSE.
  err=0.d0
  err=maxval(abs(u(:,:,n/2+1:n)))
  if(err>eps*n)answer=.true.
end SUBROUTINE stest


