!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!  Structured Markov Chains Solver      [  SMCSolver  ]       !
!  Dario Bini, Beatrice Meini, Sergio Steffe'                 !
!  bini@dm.unipi.it, meini@dm.unipi.it, steffe@dm.unipi.it    !
!  Dipartimento di Matematica "Leonida Tonelli"               !
!  Largo Pontecorvo 5                                         !
!  56127 Pisa                                                 !
!  Italy                                                      !
!  Version 2.1 - June  2009                                   !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
!
!

function drftmg1(a)
  use smc_tools
  use smc_int, only: gth

  implicit none
  real(dp),dimension(:,:,:)::a
  real(dp)::drftmg1
  !
  real(dp),dimension(:,:),allocatable::s
  real(dp),dimension(:),allocatable:: w,p
  integer:: i,m,n
   m=size(a,1)
  n=size(a,3)


  if (allocated(s)) deallocate(s)
  if (allocated(p)) deallocate(p)
    if (allocated(w)) deallocate(w)
    allocate(s(m,m),p(m),w(m),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "is: info=",info
        info=1000
        return
  endif
  s=sum(a,dim=3)

  call gth(s,w)
  s=0.d0
  do i=2,n
     s=s+(i-1)*a(:,:,i)
  end do
  p=sum(s,dim=2)
  drftmg1=dot_product(p,w)-1.d0
  if (allocated(s)) deallocate(s)
  if (allocated(p)) deallocate(p)
end function drftmg1



  subroutine drft(am1, a0, a1, v, drift)
    ! compute the drift and the left dominant eigenvector v of Am1+A0+A1, where
    ! Am1,A0,A1 are nonnegative and their sum is stochastic
    use smc_int, only: gth
    use smc_tools
    implicit none
    ! arguments
    real(dp),intent(in),dimension(:,:) :: am1,a0,a1
    real(dp),intent(out),dimension(:)  :: v
    real(dp),intent(out)               :: drift
    ! local variables
    real(dp),allocatable,dimension(:)  :: p
    real(dp),dimension(:,:),allocatable :: a
    integer :: m
    m=size(a0,1)
    if (allocated(a)) deallocate(a) 
    if (allocated(p)) deallocate(p) 
    allocate(a(m,m),p(m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "drft: info=",info
        info=1000
        return
    endif
    a=am1+a0+a1
    call gth(a,v)
    a=a0+2*a1
    p=sum(a,dim=2);
    drift=dot_product(p,v)-1.d0;
    if (allocated(a)) deallocate(a)
    if (allocated(p)) deallocate(p)
  end subroutine drft

  subroutine gth(a,p)
    ! Compute the probability invariant vector p of the stochastic matrix A
    ! by using the gth trick  
    use smc_tools
    use f95_lapack, only: la_gesv
    implicit none
    ! arguments
    real(dp),intent(in),dimension(:,:) :: a
    real(dp),intent(out),dimension(:)  :: p
    ! local variables
    real(dp),dimension(:,:),allocatable :: aa
    real(dp) :: m, s
    integer  :: n, i, k, ii, j
!
    n=size(a,1)
    if (allocated(aa)) deallocate(aa) 
    allocate(aa(n,n),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "gth: info=",info
        info=1000
        return
    endif
    aa=-a
    do i=1,n
       aa(i,i)=aa(i,i)+1.0d0
    end do
    do k=1,n-1
       do ii=k+1,n
          if (abs(aa(k,k))<epsilon(1.d0)) then
             if(verb)then
                write(wout,*)"Zero diagonal entry in gth. La_gesv used"
                call print_it
             end if
             goto 50
          end if
          m=aa(ii,k)/aa(k,k)
          aa(ii,k)=m
          do j=k+1,n
             if (ii /= j) aa(ii,j)=aa(ii,j)-m*aa(k,j)
          end do
       end do
       ! diagonal adjustment
       do ii=k+1,n
          aa(ii,ii)=-sum(aa(ii,k+1:ii-1))-sum(aa(ii,ii+1:n));
       end do
    end do
    p(n)=1
    do ii=n-1,1,-1
       s=0
       do j=ii+1,n
          s=s+aa(j,ii)*p(j);
       end do
       p(ii)=-s
    end do
    p=p/sum(p)
    return
50 continue
    aa=-a
    do i=1,n
       aa(i,i)=aa(i,i)+1.0d0
    end do
    aa =transpose(aa)
    aa(n,:)=1.d0
    p=0.d0
    p(n)=1.d0
    call la_gesv(aa,p)
    if (allocated(aa)) deallocate(aa)
  end subroutine gth


subroutine bgth(v,a,b)
  ! Solve the system Ax=b where A is an M-matrix and v=Ae, e=(1,...,1)
  ! by using the generalization of the GTH trick
  ! Input:  A, v and b
  ! Output: b
    use smc_tools
    implicit none
    real(dp),intent(in),dimension(:,:) :: a
    real(dp),intent(inout),dimension(:,:) :: b
    real(dp),intent(inout),dimension(:)   :: v
    ! local variables
    real(dp),dimension(:), allocatable :: alfa
    real(dp),dimension(:,:),allocatable:: p, y
    integer:: m, n, i, j, k
    real(dp) :: ak
    n=size(a,1)
    m=size(b,2)
    if (allocated(alfa)) deallocate(alfa)
    if (allocated(p)) deallocate(p)
    allocate(alfa(n),p(n,n),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "bgth: info=",info
        info=1000
        return
    endif
    p=-a
    ! LU factorization
    do k=1,n-1
       alfa(k)=v(k)+sum(p(k,k+1:n))
       ak=1.d0/alfa(k)
       Do i=k+1,n
          Do j=k+1,n
             If(i/=j)then
                p(i,j)=p(i,j)+p(i,k)*p(k,j)*ak
             End If
          End Do
       End Do
       Do j=k+1,n
          v(j)=v(j)+v(k)*p(j,k)*ak
       End Do
    End do
    alfa(n)=v(n)
    alfa=1.d0/alfa
    ! Solving LY=B
    allocate(y(n,m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "bgth: info=",info
        info=1000
        return
    endif
    ak=alfa(1)
    y(1,:)=b(1,:)*ak
    Do k=2,n
       ak=alfa(k)
       y(k,:)=(b(k,:)+matmul(p(k,1:k-1),y(1:k-1,:)))*ak
    End Do
    ! Solving UB=Y
    b(n,:)=y(n,:)
    Do k=n-1,1,-1
       ak=alfa(k)
       b(k,:)=y(k,:)+Matmul(p(k,k+1:n),b(k+1:n,:))*ak
    End Do
    if (allocated(alfa)) deallocate(alfa)
    if (allocated(p)) deallocate(p)
  end subroutine bgth

  subroutine crqbd(Am1, A0, A1,  G, R, U, doshift, dogth,  drift, nerror, maxit)
! doshift= 0=basic, 1=shift acc., 2=tau shift 3=double shift  4=diag adj
! compute the minimal solutions of the equations
!   Am1 + A0 G + A1 G^2   = G
!   A1  + R A0 + R^2 Am1  = R
!   A0 + Am1(I-U)^(-1) A1 = U
!   by using (shifted) cyclic reduction
! Input variables
!   Am1, A0, A1
!   doshift:  if .true. the shift acceleration is performed
!   dogth:    if .true. the diagonal adjustment technique is applied
!   verb: (global)   if .true. an error estimate is printed at each step
!   debug (global)   if .true. some printings are performed on the console
! Output variables:
!   G, R, U (optional)
!   drift (optional)
    use smc_tools
    use smc_int, only: drft, bgth, compute_tau, taushift
    USE f95_lapack, ONLY: la_getrf, la_getri, la_gesv
!    use ponte_f_f, only: fdrift
    implicit none
    ! arguments
    real(dp),intent(in),dimension(:,:) :: Am1, A0, A1
    logical, intent(in), optional  ::  dogth
    integer, intent(in), optional  :: doshift
    real(dp),intent(out), dimension(:,:) :: G
    real(dp),intent(out),optional, dimension(:,:) ::  R, U
    real(dp),intent(out),optional ::drift
    integer,optional,intent(in) :: nerror
    integer, optional,intent(in) :: maxit
    ! parameters
    !real(dp),parameter :: eps=epsilon(1.d0)
    !integer, parameter :: nitmax=50  
    ! local variables
    integer :: m, i, k, nitmax
    real(dp),dimension(:,:),allocatable :: tam1, ta0, ta1, tha, aux, auxbig, gg
    real(dp),dimension(:),allocatable   :: v,w,vgth,uu
    real(dp) :: f, err, ldrift, eps,tau
    logical :: ldogth
    integer :: ldoshift
    integer,dimension(:),allocatable:: ipiv
    real(dp),dimension(:,:,:),allocatable:: ama,tama

!10 FORMAT(1X,"*",\)
10 FORMAT("*")
    m=size(a0,1)
    if (debug)  write(*,*) "crqbd:  Dimension AM1 =",size(am1,1), size(am1,2)
    if (debug)  write(*,*) "crqbd:  Dimension A0 =",size(a0,1), size(a0,2)
    if (debug)  write(*,*) "crqbd:  Dimension A1 =",size(a1,1), size(a1,2) 
!   defaults parameters if missing
    if ( .not. present(nerror)) then
          eps=epsilon(1.d0) 
       else 
          eps=epsilon(1.d0) * 10.0d0 ** nerror
    endif
    if ( .not. present(maxit)) then
          nitmax = 50
       else
          nitmax = maxit
    endif 
    if (debug)  write(*,*) "crqbd: eps=",eps," nitmax=",nitmax
!   from here nitmax and eps are the values of the parameters
 
    if (allocated(tam1)) deallocate(tam1)
    if (allocated(ta0)) deallocate(ta0)
    if (allocated(ta1)) deallocate(ta1)
    if (allocated(tha)) deallocate(tha)
    if (allocated(auxbig)) deallocate(auxbig)
    if (allocated(aux)) deallocate(aux)
    if (allocated(gg)) deallocate(gg)
    if (allocated(v)) deallocate(v)
    if (allocated(ipiv)) deallocate(ipiv)
    if (allocated(w)) deallocate(w)
    if (allocated(vgth)) deallocate(vgth)
    allocate(tam1(m,m),ta0(m,m),ta1(m,m),tha(m,m),auxbig(m,2*m),aux(m,m), &
      gg(m,m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "crqbd: info=",info
        info=1000
        return
    endif
    allocate(v(m),uu(m),ipiv(m),w(m),vgth(m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "crqbd: info=",info
        info=1000
        return
    endif
    ! -1- initialize

    ldoshift= 1;ldogth=.false.
    if(present(doshift))then
       ldoshift=doshift
       if(present(dogth))then
          ldogth=dogth
          if(dogth.and.doshift/=0)then
             if(debug) write(*,*)"crqbd: Warning: doshift and dogth cannot be both .true."
             ldogth=.false.
          end if
       else
          ldoshift=1
          ldogth=.false.
       end if
    end if

    call drft(Am1,A0,A1,v,ldrift)
!    fdrift=ldrift
    if(present(drift))drift=ldrift
    if(present(dogth).and. .not.present(doshift))then
       ldogth=dogth
       if(dogth)then
         ldoshift=0
      else
         ldoshift=1
      end if
    end if


    tam1=-Am1; ta0=-A0; ta1=-A1
    do i=1,m
       ta0(i,i)=1+ta0(i,i)
    end do
!! Am1+A0+A1 is row stochastic
    !-2.1- shift of 1
    if(ldoshift==1)then
       f=1.d0/m
       if(ldrift<0) then  
          v=sum(tam1,dim=2)
          do i=1,m
             tam1(i,:)=tam1(i,:)-v(i)*f
          end do
          v=sum(ta1,dim=2)
          do i=1,m
             ta0(i,:)=ta0(i,:)+v(i)*f
          end do
       else
          w=matmul(transpose(a1),v)
          do i=1,m
             ta1(:,i)=ta1(:,i)+w(i)
          end do
          w=matmul(transpose(am1),v)
          do i=1,m
             ta0(:,i)=ta0(:,i)-w(i)
          end do
       end if
    end if
    tha=ta0

    !-2.2- shift of tau
    if(ldoshift==2)then
      if(allocated(ama)) deallocate(ama)
      if(allocated(tama)) deallocate(tama)
       allocate (ama(m,m,3),tama(m,m,3),stat=info)
       if (info/=0) then
           if(debug) write(*,*) "crqbd: info=",info
           info=1000
            return
        endif
       ama(:,:,1)=am1;ama(:,:,2)=a0;ama(:,:,3)=a1
       call compute_tau(ama,tau,uu,v)
       if(debug) write(*,*)"tau=",tau
       ama=-ama
       do i=1,m
          ama(i,i,2)=ama(i,i,2)+1.d0
       end do
       call taushift(ama,tau,uu,v,drift,tama)
       tam1=tama(:,:,1)
       ta0=tama(:,:,2)
       ta1=tama(:,:,3)
       tha=ta0       
    end if

    !-2.3- shift double
    if(ldoshift==3)then
       f=1.d0/m
       if(ldrift<0) then  
          v=sum(tam1,dim=2)
          do i=1,m
             tam1(i,:)=tam1(i,:)-v(i)*f
          end do
          v=sum(ta1,dim=2)
          do i=1,m
             ta0(i,:)=ta0(i,:)+v(i)*f
          end do
       else
          w=matmul(transpose(a1),v)
          do i=1,m
             ta1(:,i)=ta1(:,i)+w(i)
          end do
          w=matmul(transpose(am1),v)
          do i=1,m
             ta0(:,i)=ta0(:,i)-w(i)
          end do
       end if

       allocate (ama(m,m,3),tama(m,m,3),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "crqbd: info=",info
        info=1000
        return
    endif
       ama(:,:,1)=am1;ama(:,:,2)=a0;ama(:,:,3)=a1
       call compute_tau(ama,tau,uu,v)
       if(debug) write(*,*)"tau=",tau
       ama(:,:,1)=tam1;ama(:,:,2)=ta0;ama(:,:,3)=ta1
       call taushift(ama,tau,uu,v,drift,tama)
       tam1=tama(:,:,1)
       ta0=tama(:,:,2)
       ta1=tama(:,:,3)
       tha=ta0
     end if

    !-3- cr iteration
    do k=1,nitmax
       auxbig(1:m,1:m)=tam1
       auxbig(1:m,m+1:2*m)=ta1
       aux=ta0
       if(ldogth)then
          vgth = -sum(tam1+ta1,dim=2)
          call bgth(vgth,aux,auxbig)
       else
          call la_gesv(aux,auxbig)
       end if
       aux=matmul(ta1,auxbig(1:m,1:m))
       ta0=ta0-aux-matmul(tam1,auxbig(1:m,m+1:2*m))
       tha=tha-aux
       tam1=-matmul(tam1,auxbig(1:m,1:m))
       ta1=-matmul(ta1,auxbig(1:m,m+1:2*m))
       !-3.1- check stop 
       if(ldoshift/=0)then
          err = maxval(sum(abs(ta1),dim=1))*maxval(sum(abs(tam1),dim=1))
       else
          if(ldrift<0) then
             err = maxval(-sum(ta1,dim=1))
          else
             err = maxval(-sum(tam1,dim=1))
          end if
       end if
       if(verb)then
          write(wout,*)"--iter=",k,"  Check  ",err
          call print_it
          else
          write(wout,10)
          call print_it_nolf
       end if
       if(err<eps)exit
    end do

 
! compute the solutions
    if(k>= nitmax) then 
           write(wout,*)"Reached the maximum number of iterations in CR"
           call print_it
           endif
    aux=tha
    call la_getrf(aux,ipiv)
    call la_getri(aux,ipiv)
    gg=matmul(aux,am1)

    aux=a0+matmul(a1,gg)
    if(present(u)) u=aux
    
    g=gg
    if(present(R))  then
       aux=-aux
       do i=1,m
          aux(i,i)=aux(i,i)+1
       end do
       call la_getrf(aux,ipiv)
       call la_getri(aux,ipiv)
       r=matmul(a1,aux)
    end if
    if(verb)then
!       write(wout,*)"drift=",ldrift
!        call print_it
       write(wout,*)"Flags: doshift=",ldoshift,"dogth=",ldogth
       call print_it
       write(wout,*)"G-residual=",maxval(abs(matmul(matmul(a1,g)+a0,g)+am1-g))
       call print_it
       if(present(R)) then
           write(wout,*) "R-residual=", &
                              maxval(abs(matmul(r,matmul(r,am1)+a0)+a1-r))
            call print_it
        endif
       if(present(u)) then
          ta0=u
          ta1=-u
          do i=1,m
             ta1(i,i)=1.d0+ta1(i,i)
          end do
          call la_getrf(ta1,ipiv)
          call la_getri(ta1,ipiv)
          ta1=ta0-matmul(a1,matmul(ta1,am1))-a0
          write(wout,*)"U-residual=",maxval(abs(ta1))
          call print_it
       end if
    else
! if not verbose, print something anyway ....
        write(wout,*)" "
        call print_it
        write(wout,*)"G-residual=",maxval(abs(matmul(matmul(a1,g)+a0,g)+am1-g))
        call print_it
        if(present(R)) then
            write(wout,*) "R-residual=", &
                            maxval(abs(matmul(r,matmul(r,am1)+a0)+a1-r))
            call print_it
         end if
        if(present(u)) then
          ta0=u
          ta1=-u
          do i=1,m
             ta1(i,i)=1.d0+ta1(i,i)
          end do
          call la_getrf(ta1,ipiv)
          call la_getri(ta1,ipiv)
          ta1=ta0-matmul(a1,matmul(ta1,am1))-a0
          write(wout,*)"U-residual=",maxval(abs(ta1))
          call print_it
        end if
    end if
    if (allocated(tam1)) deallocate(tam1)
    if (allocated(ta0)) deallocate(ta0)
    if (allocated(ta1)) deallocate(ta1)
    if (allocated(tha)) deallocate(tha)
    if (allocated(auxbig)) deallocate(auxbig)
    if (allocated(aux)) deallocate(aux)
    if (allocated(gg)) deallocate(gg)
    if (allocated(v)) deallocate(v)
    if (allocated(ipiv)) deallocate(ipiv)
    if (allocated(w)) deallocate(w)
    if (allocated(vgth)) deallocate(vgth)
    if(allocated(ama)) deallocate(ama)
    if(allocated(tama)) deallocate(tama)

    if(debug)  write(*,*) "crqbd: info=",info
  end subroutine crqbd



  subroutine lrqbd(Am1, A0, A1, G, R, U, doshift, dogth, drift, nerror, maxit)
! compute the minimal solutions of the equations
!   Am1 + A0 G + A1 G^2   = G
!   A1  + R A0 + R^2 Am1  = R
!   A0 + Am1(I-U)^(-1) A1 = U
!   by using (shifted) logarithmic reduction
! Input variables
!   Am1, A0, A1
!   doshift: if .true. the shift acceleration is performed
!   dogth:   if .true. the gth trick is applied
!   verb: (global) if .true. an error estimate is printed at each step
! Output variables:
!   G, R, U (optional)
!   drift (optional)
    use smc_tools
    use smc_int, only: drft, bgth, compute_tau, taushift
    USE f95_lapack, ONLY: la_getrf, la_getri, la_gesv
!    use ponte_f_f, only: fdrift
    implicit none
    ! arguments
    real(dp),dimension(:,:),intent(in) :: Am1, A0, A1
    real(dp),dimension(:,:),intent(out),optional :: G, R, U
    real(dp),intent(out),optional ::drift
    logical, optional,intent(in) :: dogth
    integer, optional,intent(in) :: doshift
    integer,optional,intent(in) :: nerror
    integer, optional,intent(in) :: maxit
    ! parameters
    !real(dp),parameter :: eps=epsilon(1.d0)
    !integer, parameter :: nitmax=50  
    ! local variables
    integer :: m, i, k, nitmax,j
    real(dp),dimension(:,:),allocatable :: tam1, ta0, ta1, aux, auxbig
    real(dp),dimension(:,:),allocatable :: ww, uu,v1, vm1
    real(dp) :: f, err, ldrift, eps, tau
    logical :: ldogth
    integer :: ldoshift
    real(dp),dimension(:),allocatable :: v,w,vgth,uu1
    real(dp),dimension(:,:,:),allocatable:: ama,tama
    integer,dimension(:),allocatable:: ipiv
!10 FORMAT(1X,"*",\)
10 FORMAT("*")
    m=size(a0,1)
    if (debug) write(*,*) "lrqbd:  Dimension AM1 =",size(am1,1), size(am1,2)
    if (debug) write(*,*) "lrqbd:  Dimension A0 =",size(a0,1), size(a0,2)
    if (debug) write(*,*) "lrqbd:  Dimension A1 =",size(a1,1), size(a1,2)
!   defaults parameters if missing
    if ( .not. present(nerror)) then 
          eps=epsilon(1.d0) 
       else 
          eps=epsilon(1.d0) * 10.0d0 ** nerror
    endif
    if ( .not. present(maxit)) then
          nitmax = 50
       else
          nitmax = maxit
    endif
   if (debug)  write(*,*) "lrqbd: eps=",eps," nitmax=",nitmax
!   from here nitmax and eps are the values of the parameters
    if (debug)  write(*,*) "lrqbd:  Iteration max =",nitmax,"  epsilon =",eps
    if (allocated(tam1)) deallocate(tam1)
    if (allocated(ta0)) deallocate(ta0)
    if (allocated(ta1)) deallocate(ta1)
    if (allocated(auxbig)) deallocate(auxbig)
    if (allocated(aux)) deallocate(aux)
    if (allocated(v)) deallocate(v)
    if (allocated(ipiv)) deallocate(ipiv)
    if (allocated(w)) deallocate(w)
    if (allocated(vgth)) deallocate(vgth)
    if (allocated(uu)) deallocate(uu)
    if (allocated(uu1)) deallocate(uu1)
    if (allocated(v1)) deallocate(v1)
    if (allocated(vm1)) deallocate(vm1)
    if (allocated(ww)) deallocate(ww)
    allocate(tam1(m,m),ta0(m,m),ta1(m,m),auxbig(m,2*m),&
       aux(m,m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "lrqbd: info=",info
        info=1000
        return
    endif
    allocate(v(m),uu1(m),ipiv(m),w(m),vgth(m),uu(m,m),v1(m,m),vm1(m,m),&
       ww(m,m),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "lrqbd: info=",info
        info=1000
        return
    endif

!!$   ! -1- initialize
!!$  ldoshift=.true.;ldogth=.false.
!!$
!!$    if(present(doshift))then
!!$       ldoshift=doshift
!!$       if(present(dogth))then
!!$          ldogth=dogth
!!$          if(dogth.and.doshift)then
!!$             if(debug)  write(*,*)"lrqbd:  Warning: doshift and dogth cannot be both .true."
!!$             ldogth=.false.
!!$          end if
!!$       else
!!$          ldoshift=.true.
!!$          ldogth=.false.
!!$       end if
!!$    end if
!!$    call drft(Am1,A0,A1,v,ldrift)
!!$    fdrift=ldrift
!!$    if(present(drift))drift=ldrift
!!$    if(present(dogth).and. .not.present(doshift))then
!!$       ldogth=dogth
!!$       ldoshift=.not. dogth
!!$    end if
!!$
!!$    tam1=-Am1; ta0=-A0; ta1=-A1
!!$    do i=1,m
!!$       ta0(i,i)=1+ta0(i,i)
!!$    end do
!!$    !-2- shift
!!$    if(ldoshift)then
!!$       f=1.d0/m
!!$       if(ldrift<0) then  
!!$          v=sum(tam1,dim=2)
!!$          do i=1,m
!!$             tam1(i,:)=tam1(i,:)-v(i)*f
!!$          end do
!!$          v=sum(ta1,dim=2)
!!$          do i=1,m
!!$             ta0(i,:)=ta0(i,:)+v(i)*f
!!$          end do
!!$       else
!!$          w=matmul(transpose(a1),v)
!!$          do i=1,m
!!$             ta1(:,i)=ta1(:,i)+w(i)
!!$          end do
!!$          w=matmul(transpose(am1),v)
!!$          do i=1,m
!!$             ta0(:,i)=ta0(:,i)-w(i)
!!$          end do
!!$       end if
!!$    end if

    ! -1- initialize

    ldoshift= 1;ldogth=.false.
    if(present(doshift))then
       ldoshift=doshift
       if(present(dogth))then
          ldogth=dogth
          if(dogth.and.doshift/=0)then
             if(debug) write(*,*)"crqbd: Warning: doshift and dogth cannot be both .true."
             ldogth=.false.
          end if
       else
          ldoshift=1
          ldogth=.false.
       end if
    end if

    call drft(Am1,A0,A1,v,ldrift)
!    fdrift=ldrift
    if(present(drift))drift=ldrift
    if(present(dogth).and. .not.present(doshift))then
       ldogth=dogth
       if(dogth)then
         ldoshift=0
      else
         ldoshift=1
      end if
    end if


    tam1=-Am1; ta0=-A0; ta1=-A1
    do i=1,m
       ta0(i,i)=1+ta0(i,i)
    end do
!! Am1+A0+A1 is row stochastic
    !-2.1- shift of 1
    if(ldoshift==1)then
       f=1.d0/m
       if(ldrift<0) then  
          v=sum(tam1,dim=2)
          do i=1,m
             tam1(i,:)=tam1(i,:)-v(i)*f
          end do
          v=sum(ta1,dim=2)
          do i=1,m
             ta0(i,:)=ta0(i,:)+v(i)*f
          end do
       else
          w=matmul(transpose(a1),v)
          do i=1,m
             ta1(:,i)=ta1(:,i)+w(i)
          end do
          w=matmul(transpose(am1),v)
          do i=1,m
             ta0(:,i)=ta0(:,i)-w(i)
          end do
       end if
    end if

    !-2.2- shift of tau
    if(ldoshift==2)then
      if(allocated(ama)) deallocate(ama)
      if(allocated(tama)) deallocate(tama)
       allocate (ama(m,m,3),tama(m,m,3),stat=info)
       if (info/=0) then
           if(debug) write(*,*) "crqbd: info=",info
           info=1000
            return
        endif
       ama(:,:,1)=am1;ama(:,:,2)=a0;ama(:,:,3)=a1
       call compute_tau(ama,tau,uu1,v)
       if(debug) write(*,*)"tau=",tau
       ama=-ama
       do i=1,m
          ama(i,i,2)=ama(i,i,2)+1.d0
       end do
       call taushift(ama,tau,uu1,v,drift,tama)
       tam1=tama(:,:,1)
       ta0=tama(:,:,2)
       ta1=tama(:,:,3)
    end if

    !-2.3- shift double
    if(ldoshift==3)then
       f=1.d0/m
       if(ldrift<0) then  
          v=sum(tam1,dim=2)
          do i=1,m
             tam1(i,:)=tam1(i,:)-v(i)*f
          end do
          v=sum(ta1,dim=2)
          do i=1,m
             ta0(i,:)=ta0(i,:)+v(i)*f
          end do
       else
          w=matmul(transpose(a1),v)
          do i=1,m
             ta1(:,i)=ta1(:,i)+w(i)
          end do
          w=matmul(transpose(am1),v)
          do i=1,m
             ta0(:,i)=ta0(:,i)-w(i)
          end do
       end if

       allocate (ama(m,m,3),tama(m,m,3),stat=info)
    if (info/=0) then
        if(debug) write(*,*) "lrqbd: info=",info
        info=1000
        return
    endif
       ama(:,:,1)=am1;ama(:,:,2)=a0;ama(:,:,3)=a1
       call compute_tau(ama,tau,uu1,v)
       if(debug) write(*,*)"tau=",tau
       ama(:,:,1)=tam1;ama(:,:,2)=ta0;ama(:,:,3)=ta1
       call taushift(ama,tau,uu1,v,drift,tama)
       tam1=tama(:,:,1)
       ta0=tama(:,:,2)
       ta1=tama(:,:,3)
    end if




! LR inizialization
    aux=ta0
    call la_getrf(aux,ipiv)
    call la_getri(aux,ipiv)
    v1=-matmul(aux,ta1)
    vm1=-matmul(aux,tam1)
    ww=-matmul(vm1,v1)-matmul(v1,vm1)
    uu=0.d0
    do i=1,m
       ww(i,i)=ww(i,i)+1.d0
       uu(i,i)=1.d0
    end do
    g=vm1
    
! LR cycle
    do k=1,nitmax
       uu=matmul(uu,v1)
       aux=matmul(vm1,vm1)
       if(ldogth) vgth=sum(aux,dim=2)
       auxbig(1:m,1:m)=aux
       aux=matmul(v1,v1)
       if (ldogth) vgth=vgth+sum(aux,dim=2)
       auxbig(1:m,m+1:2*m)=aux
       aux=ww
       if(ldogth)then
          call bgth(vgth,aux,auxbig)
       else
          call la_gesv(aux,auxbig)
       end if
       vm1=auxbig(1:m,1:m)
       v1=auxbig(1:m,m+1:2*m)
       ww=matmul(vm1,v1)+matmul(v1,vm1)
       ww=-ww
       do i=1,m
          ww(i,i)=ww(i,i)+1.d0
       end do
       g = g + matmul(uu,vm1)
       err = min (maxval( sum(abs(v1),dim=2)), maxval(sum(abs(vm1),dim=2)))

       if(verb)then
          write(wout,*)"--iter=",k,"  Check  ",err
          call print_it
          else
          write(wout,10)
          call print_it_nolf
       end if

       if(err<eps)exit
    end do
! compute the solutions
    if(k>= nitmax) then 
          write(wout,*)"Reached the maximum number of iterations in LR"
          call print_it
     endif
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  if(ldoshift>0)then 
     if(drift<=0.and.ldoshift==1) g=g+1.0d0/m
     if(drift<=0.and.ldoshift==3) g=g+1.0d0/m
     if(drift>0.and.ldoshift>1) then
        do i=1,m
           do j=1,m
              g(i,j)=g(i,j)+tau*uu1(i)*v(j)
           end do
        end do
     end if
  end if
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    if(present(u)) u=a0+matmul(a1,g)
    if(present(r)) then
       aux=-a0-matmul(a1,g)
       do i=1,m
          aux(i,i)=aux(i,i)+1.d0
       end do
       call la_getrf(aux,ipiv)
       call la_getri(aux,ipiv)
       r=matmul(a1,aux)
    end if
    if(verb)then
!       write(wout,*)"drift=",ldrift
!       call print_it
       write(wout,*)"Flags:  doshift=",ldoshift,"dogth=",ldogth
       call print_it
       write(wout,*)"G-residual=",maxval(abs(matmul(matmul(a1,g)+a0,g)+am1-g))
       call print_it
       if(present(R)) then
              write(wout,*) "R-residual=", &
                  maxval(abs(matmul(r,matmul(r,am1)+a0)+a1-r)) 
               call print_it
       endif
       if(present(u)) then
          aux=-u
          do i=1,m
             aux(i,i)=1.d0+aux(i,i)
          end do
          call la_getrf(aux,ipiv)
          call la_getri(aux,ipiv)
          ta1=u-matmul(a1,matmul(aux,am1))-a0
          write(wout,*)"U-residual=",maxval(abs(ta1))
          call print_it
       end if
   else 
! if not verbose, print something anyway ....
       write(wout,*)" "
       call print_it
       write(wout,*)"G-residual=",maxval(abs(matmul(matmul(a1,g)+a0,g)+am1-g))
       call print_it
       if(present(R)) then
            write(wout,*) "R-residual=", &
                            maxval(abs(matmul(r,matmul(r,am1)+a0)+a1-r))
            call print_it
       end if
       if(present(u)) then
          aux=-u
          do i=1,m
             aux(i,i)=1.d0+aux(i,i)
          end do
          call la_getrf(aux,ipiv)
          call la_getri(aux,ipiv)
          ta1=u-matmul(a1,matmul(aux,am1))-a0
          write(wout,*)"U-residual=",maxval(abs(ta1))
          call print_it
       end if
    endif
    if (allocated(tam1)) deallocate(tam1)
    if (allocated(ta0)) deallocate(ta0)
    if (allocated(ta1)) deallocate(ta1)
    if (allocated(auxbig)) deallocate(auxbig)
    if (allocated(aux)) deallocate(aux)
    if (allocated(v)) deallocate(v)
    if (allocated(ipiv)) deallocate(ipiv)
    if (allocated(w)) deallocate(w)
    if (allocated(vgth)) deallocate(vgth)
    if (allocated(uu)) deallocate(uu)
    if (allocated(uu1)) deallocate(uu1)
    if (allocated(v1)) deallocate(v1)
    if (allocated(vm1)) deallocate(vm1)
    if (allocated(ww)) deallocate(ww)
    if(allocated(ama)) deallocate(ama)
    if(allocated(tama)) deallocate(tama)

    if(debug)  write(*,*) "lrqbd: info=",info
  end subroutine lrqbd

!=========================================================================!
!                          SUBROUTINE SHIFT                               !
!=========================================================================!
! On input: a nonnegative
! on output: shifted coefficients ta, drift
SUBROUTINE shift(a,ds,tau,uu,vv,drift,ta)
! ds=1 : one shift
! ds=2 : tau shift
! ds=3 : double shift
! if tau=-1 then tau has not been computed
  use smc_tools
  use smc_int, only: gth, compute_tau, taushift
  IMPLICIT NONE
  REAL(dp), DIMENSION(:,:,:):: a, ta
  REAL(dp), DIMENSION(:) :: uu,vv
  INTEGER :: ds
  REAL(dp)                                :: drift,tau

  REAL(dp), DIMENSION(:,:), ALLOCATABLE   :: s,aux,aux1
  REAL(dp), DIMENSION(:,:,:), ALLOCATABLE   :: ama
  REAL(dp), DIMENSION(:), ALLOCATABLE     :: p,v,w
  REAL(dp)                                :: im
  INTEGER                                 :: m,n,i,j
  n=size(a,3)
  m=size(a,1)
  im=1.d0/m
  if (allocated(s)) deallocate(s) 
  if (allocated(p)) deallocate(p) 
  if (allocated(v)) deallocate(v) 
  if (allocated(aux)) deallocate(aux) 
  if (allocated(w)) deallocate(w) 
  if (allocated(aux1)) deallocate(aux1) 
  if (allocated(ama)) deallocate(ama)
  allocate(s(m,m),p(m),v(m),aux(m,n),aux1(n,m),w(m),ama(m,m,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "shift: info=",info
        info=1000
        return
  endif

s=0;p=0;v=0;aux=0;aux1=0;

  ta=a
!!$! tau is not computed
!!$  if (ds>1.and.tau=-1) then
!!$     call compute_tau(a,tau,uu,vv)
!!$  end if

  if (ds==1) then
 s=sum(a,dim=3)
  call gth(s,w)
     if(drift<=0)then ! positive recurrent
        do i=1,n
           aux(:,i)=sum(ta(:,:,i),dim=2)
        end do
        v=0.d0
        do i=n-1,2,-1
           v=v+aux(:,i+1)
           p=v*im
           do j=1,m
              ta(:,j,i)=ta(:,j,i)+p
           end do
        end do
        p=sum(ta(:,:,1),dim=2)*im
        do j=1,m
           ta(:,j,1)=ta(:,j,1)-p
        end do
     else
        do i=1,n
           aux1(i,:)=matmul(w,ta(:,:,i))
        end do
        v=0
        do i=n,3,-1
           v=v+aux1(i,:)
           do j=1,m
              ta(j,:,i)=ta(j,:,i)-v
           end do
        end do
        p=aux1(1,:)
        do j=1,m
           ta(j,:,2)=ta(j,:,2)+p
        end do
     end if

     elseif(ds==2) then
        ama=a
      if(debug) write(*,*)"tau=",tau
       ama=-ama
       do i=1,m
          ama(i,i,2)=ama(i,i,2)+1.d0
       end do
       call taushift(ama,tau,uu,vv,drift,ta)
       do i=1,m
          ta(i,i,2)=ta(i,i,2)+1.d0
       end do
     else
! one shift
 s=sum(a,dim=3)
  call gth(s,w)
     if(drift<=0)then ! positive recurrent
        do i=1,n
           aux(:,i)=sum(ta(:,:,i),dim=2)
        end do
        v=0.d0
        do i=n-1,2,-1
           v=v+aux(:,i+1)
           p=v*im
           do j=1,m
              ta(:,j,i)=ta(:,j,i)+p
           end do
        end do
        p=sum(ta(:,:,1),dim=2)*im
        do j=1,m
           ta(:,j,1)=ta(:,j,1)-p
        end do
     else
        do i=1,n
           aux1(i,:)=matmul(w,ta(:,:,i))
        end do
        v=0
        do i=n,3,-1
           v=v+aux1(i,:)
           do j=1,m
              ta(j,:,i)=ta(j,:,i)-v
           end do
        end do
        p=aux1(1,:)
        do j=1,m
           ta(j,:,2)=ta(j,:,2)+p
        end do
     end if
! tau shift
     do i=1,m
        ta(i,i,2)=ta(i,i,2)-1.d0
     end do
     ama=ta
     call taushift(ama,tau,uu,vv,drift,ta)
      do i=1,m
          ta(i,i,2)=ta(i,i,2)+1.d0
       end do
     end if
  if (allocated(s)) deallocate(s)
  if (allocated(p)) deallocate(p)
  if (allocated(v)) deallocate(v)
  if (allocated(aux)) deallocate(aux)
  if (allocated(w)) deallocate(w)
  if (allocated(aux1)) deallocate(aux1)
  if (allocated(ama)) deallocate(ama)
end SUBROUTINE shift

! Transform GM1 into an MG1, by using Ramaswami dual
subroutine gm1tomg1(A, AA, v)
  use smc_tools
  use smc_int, only: gth

  implicit none
  real(dp),dimension(:,:,:):: A,AA
  real(dp),dimension(:):: v
  integer :: nba,n,i
  real(dp),dimension(:),allocatable:: vi
 real(dp),dimension(:,:),allocatable:: SumA

  nba=size(a,3)
  n=size(a,1)
  if(allocated(SumA))deallocate(SumA)
  if(allocated(vi)) deallocate(vi)
  allocate(SumA(n,n),vi(n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "gm1tomg1: info=",info
        info=1000
        return
  endif
  SumA=sum(A(:,:,:),dim=3)
  call gth(SumA,v)
  vi=1.d0/v
  do i=1,nba
     AA(:,:,i)=transpose(A(:,:,i))
  end do
  do i=1,n
     AA(:,i,:)=AA(:,i,:)*v(i)
     AA(i,:,:)=AA(i,:,:)*vi(i)
  end do
  if(allocated(SumA))deallocate(SumA)
  if(allocated(vi)) deallocate(vi)
end subroutine gm1tomg1

! Transform GM1 into an MG1, by using Bright dual
subroutine gm1tomg1b(A, AA,tau, w)
  use smc_tools
  use smc_int, only: compute_tau

  implicit none
  real(dp),dimension(:,:,:):: A,AA
  real(dp),dimension(:):: w
  real(dp):: tau, tt
  integer :: nba,n,i
  real(dp),dimension(:),allocatable:: wi, u

  nba=size(a,3)
  n=size(a,1)

  if(allocated(u)) deallocate(u)
  if(allocated(wi)) deallocate(wi)
  allocate(u(n),wi(n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "gm1tomg1b: info=",info
        info=1000
        return
  endif

  call compute_tau(a,tau,u,w)
  w=w/sum(w)
  if(debug) write(*,*)"gm1tomg1b:tau",tau
  
  wi=1.d0/w
  do i=1,nba
     AA(:,:,i)=transpose(A(:,:,i))
  end do
  do i=1,n
     AA(:,i,:)=AA(:,i,:)*w(i)
     AA(i,:,:)=AA(i,:,:)*wi(i)
  end do
  tt=1/tau
  do i=1,nba
     AA(:,:,i)=AA(:,:,i)*tt
     tt=tt*tau
  end do        
  if(debug) then
     write(*,*) "gm1tomg1b: stochasticity"
     do i=1,n
        write(*,*)i,sum(aa(i,:,:))
     end do
  end if
  if(allocated(wi)) deallocate(wi)
  if(allocated(u)) deallocate(u)
end subroutine gm1tomg1b


! Transform MG1 into an MG1, by using both duals
subroutine mg1tomg1(A, AA,tau, w)
  use smc_tools
  use smc_int, only: compute_tau

  implicit none
  real(dp),dimension(:,:,:):: A,AA
  real(dp),dimension(:):: w
  real(dp):: tau, tt
  integer :: nba,n,i
  real(dp),dimension(:),allocatable:: wi, u

  nba=size(a,3)
  n=size(a,1)

  if(allocated(u)) deallocate(u)
  if(allocated(wi)) deallocate(wi)
  allocate(u(n),wi(n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "mg1tomg1: info=",info
        info=1000
        return
  endif

  call compute_tau(a,tau,w,u)
  w=w/sum(w)
  if(debug) write(*,*)"mg1tomg1:tau",tau
  
  wi=1.d0/w
  AA=A
  do i=1,n
     AA(:,i,:)=AA(:,i,:)*w(i)
     AA(i,:,:)=AA(i,:,:)*wi(i)
  end do
  tt=1/tau
  do i=1,nba
     AA(:,:,i)=AA(:,:,i)*tt
     tt=tt*tau
  end do        
  if(debug) then
     write(*,*) "mg1tomg1: stochasticity"
     do i=1,n
        write(*,*)i,sum(aa(i,:,:))
     end do
  end if
  if(allocated(wi)) deallocate(wi)
  if(allocated(u)) deallocate(u)
end subroutine mg1tomg1

subroutine rresidual(A,r,norm1)
  use smc_tools

  implicit none
  real(dp),dimension(:,:,:):: A
  real(dp),dimension(:,:):: R
  real(dp)::norm1
  real(dp),dimension(:,:),allocatable :: aux
  integer :: nba, m,i
  m=size(a,1)
  nba=size(a,3)
  if(allocated(aux)) deallocate(aux)
  allocate(aux(m,m),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "rresidual: info=",info
        info=1000
        return
  endif
  aux=a(:,:,nba)
  do i=nba-1,1,-1
     aux=a(:,:,i)+matmul(R,aux)
  end do
  aux=aux-r
  norm1=maxval(sum(abs(aux),dim=1))
  if(allocated(aux)) deallocate(aux)
end subroutine rresidual


subroutine utor(A1,u,r)
use smc_tools
USE f95_lapack, ONLY: la_gesv
implicit none 
integer :: n,i
real(dp),dimension(:,:) :: a1,u,r
real(dp),dimension(:,:), allocatable :: aux
n=size(A1,1)
if(allocated(aux))deallocate(aux)
allocate(aux(n,n),stat=info)
if (info/=0) then
        if(debug) write(*,*) "utor: info=",info
        info=1000
        return
endif
R=transpose(A1)
aux=-U
do i=1,n
aux(i,i)=aux(i,i)+1.d0
end do
aux=transpose(aux)
call la_gesv(aux,r)
r=transpose(r)
if(allocated(aux))deallocate(aux)
end subroutine utor

subroutine gtou(A0,A1,G,U)
use smc_tools
implicit none 
real(dp),dimension(:,:) :: a0,a1,g,u
U=A0+matmul(A1,G)
end subroutine gtou


subroutine qbdrres(Am1,A0,A1,r,res)
use smc_tools
implicit none 
real(dp),dimension(:,:) :: am1,a0,a1,r
real(dp)::res
res=  maxval(abs(matmul(r,matmul(r,am1)+a0)+a1-r))
end subroutine qbdrres

subroutine qbdures(Am1,A0,A1,u,res)
use smc_tools
USE f95_lapack, ONLY: la_getri, la_getrf
implicit none 
real(dp),dimension(:,:) :: am1,a0,a1,u
real(dp),dimension(:,:),allocatable :: aux
integer,dimension(:),allocatable:: ipiv
real(dp)::res
integer :: m,i
m=size(am1,1)
if (allocated(aux))deallocate(aux)
if(allocated(ipiv))deallocate(ipiv)
allocate(aux(m,m),ipiv(m),stat=info)
if (info/=0) then
        if(debug) write(*,*) "qbdures: info=",info
        info=1000
        return
endif
aux=-u
do i=1,m
aux(i,i)=aux(i,i)+1.d0
end do
call la_getrf(aux,ipiv)
call la_getri(aux,ipiv)
res=  maxval(abs(u-matmul(a1,matmul(aux,am1))-a0))
if (allocated(aux))deallocate(aux)
if(allocated(ipiv))deallocate(ipiv)
end subroutine qbdures

function binomial(n,k)
  implicit none
  integer ::  n,k , i
  real(kind(0.d0)):: rn,rk,rb,binomial
  rb=1.d0
  rn=n
  rk=k
  do i=0,k-1
     rb=((rn-i)/(rk-i))*rb
  end do
  binomial=rb
end function binomial

subroutine compute_tauqbd(a,b,c,z)
! Computes the solution different from 1 of the equation
! rho(S(z))-z=0, by means of the deflated Newton method
! z=z-nwt/(1-nwt/(z-1)), nwt=(rho(S(z))-z)/(v^TS(z)'u-1)
! where S(z)u=rho u, v^TS(z)=rho v^T
  use smc_tools
  implicit none
  integer :: n
  real(dp),intent(in),dimension(:,:)::a,b,c
  real(dp),intent(out) :: z
  real(dp),dimension(:),allocatable::u(:),u1(:),v(:),s(:,:),s1(:,:)
  real(dp)::rp,nwt,r,rq
  integer::i,j
  n=size(a,1)
if (allocated(u))deallocate(u)
if (allocated(v))deallocate(v)
if (allocated(u1))deallocate(u1)
if (allocated(s))deallocate(s)
if (allocated(s1))deallocate(s1)

  allocate(u(n),v(n),u1(n),s(n,n),s1(n,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "compute_tauqbd: info=",info
        info=1000
        return
  endif
  z=0.5d0
  do i=1,20
     s=a+z*b+z*z*c;
     s1=b+2*z*c;
     call spect(n,s,u,v,r)
     u1=matmul(s1,u)
     rp=0;rq=0
     do j=1,n
        rp=rp+u1(j)*v(j)
        rq=rq+u(j)*v(j)
     end do
     rp=rp/rq-1.d0
     nwt=(r-z)/rp
     z=z-nwt /(1-nwt/(z-1.d0))
     if(debug)  write(*,*)"newtontau: it=",i," z=",z
     if(abs(nwt)<1.0e-9)exit
  end do
if (allocated(u))deallocate(u)
if (allocated(v))deallocate(v)
if (allocated(u1))deallocate(u1)
if (allocated(s))deallocate(s)
if (allocated(s1))deallocate(s1)
end subroutine compute_tauqbd

subroutine compute_tau(a,z,u,v)
! Computes the solution different from 1 of the equation
! rho(S(z))-z=0, by means of the deflated Newton method
! z=z-nwt/(1-nwt/(z-1)), nwt=(rho(S(z))-z)/(v^TS(z)'u-1)
! where S(z)u=rho u, v^TS(z)=rho v^T
! S(z)=sum_i A_i z^i
! On output: z, u, v
  use smc_tools
  implicit none
  integer :: n,m
  real(dp),intent(in),dimension(:,:,:)::a
  real(dp),intent(out) :: z
  real(dp),dimension(:)::u,v
  real(dp),dimension(:),allocatable::u1(:),s(:,:),s1(:,:)
  real(dp)::rp,nwt,r,rq
  integer::i,j
  n=size(a,1);m=size(a,3)
if (allocated(u1))deallocate(u1)
if (allocated(s))deallocate(s)
if (allocated(s1))deallocate(s1)
  allocate(u1(n),s(n,n),s1(n,n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "compute_tau: info=",info
        info=1000
        return
  endif
  z=0.5d0
  do i=1,20
     s=a(:,:,m)
     do j=m-1,1,-1
        s=s*z+a(:,:,j)
     end do
     s1=a(:,:,m)*(m-1)
     do j=m-1,2,-1
        s1=s1*z+a(:,:,j)*(j-1)
     end do
     call spect(n,s,u,v,r)
     u1=matmul(s1,u)
     rp=0;rq=0
     do j=1,n
        rp=rp+u1(j)*v(j)
        rq=rq+u(j)*v(j)
     end do
     rp=rp/rq-1.d0
     nwt=(r-z)/rp
     z=z-nwt /(1-nwt/(z-1.d0))
     if(debug)  write(*,*)"newtontau: it=",i," z=",z
     if(abs(nwt)<1.0e-9)exit
  end do
if (allocated(u1))deallocate(u1)
if (allocated(s))deallocate(s)
if (allocated(s1))deallocate(s1)
end subroutine compute_tau



subroutine spect(n,a,u,v,r)
! compute the spectral radius of A>=0 together with the right and left
! eigenvectors u,v,  by means of repeated squarings
! based on the fact that A^{2^k}-> uv^T
  use smc_tools
  implicit none
  integer :: n
  real(dp),dimension(n,n)::a,aux
  real(dp)::r,ma,mao,u(n),v(n),w(n),eps=1.0e-14
  integer::i
  mao=-100
  aux=a
  do i=1,20
     ma=maxval(abs(aux))
     aux=aux*(1.0/ma);
     aux=matmul(aux,aux)
!     if(debug) write(*,*)"err=",abs(mao-ma)
     if(abs(ma-mao)<n*eps)exit
     mao=ma
  end do
  if(debug)  write(*,*)"squaring iteration = ",i
  u=sum(aux,2)
  v=sum(aux,1)
  w=matmul(a,u)
  r=sum(w)/sum(u)
  ma=sum(u*v)
  ma=1.d0/sqrt(ma)
  u=ma*u
  v=ma*v
end subroutine spect

subroutine spect1(n,a,rho)
use f95_lapack
use smc_tools
implicit none
integer :: n
real(dp),dimension(n,n)::a,aa
real(dp),dimension(1,n)::vl,vr
real(dp),dimension(3*n)::work
real(dp),dimension(n)::wr,wi
real(dp)::rho
aa=a
call DGEEV('N','N', N, AA, N, WR, WI, VL, 1, VR,1, WORK, 3*n, info )
rho=maxval(wr**2+wi**2);
rho=sqrt(rho)
end subroutine spect1

subroutine taushift(a, tau, u, v, drift, b)
! transform an mg1 A into a tau-shifted mg1 B
! where A(tau)u=0
! u right eigenvector corresp to tau
! v left eigenvector corresp to tau
  use smc_tools
  implicit none
  real(dp),dimension(:,:,:) :: a,b
  real(dp),dimension(:) :: u,v
  real(dp):: tau, drift,t1
  integer :: m,n,i,j,k
  real(dp),dimension(:),allocatable :: uk
  
  n=size(a,1)
  m=size(a,3)
if (allocated(uk))deallocate(uk)
  allocate(uk(n),stat=info)
  if (info/=0) then
        if(debug) write(*,*) "taushift: info=",info
        info=1000
        return
  endif

    b=a
 
 if(drift<=0)then
     !shift tau to infty
      uk=0
     t1=1.d0/tau
     do k=2,m
        uk=t1*(uk+matmul(transpose(A(:,:,k-1)),v))
        do i=1,n
           do j=1,n
        b(i,j,k)=b(i,j,k)+u(i)*uk(j)
     end do
  end do
     end do
  else
    ! shift to zero
     uk=matmul(a(:,:,1),u)
        do i=1,n
           do j=1,n
     b(i,j,1)=a(i,j,1)-uk(i)*v(j)
     end do
  end do
     t1=1.d0/tau
     do k=2,m
        uk=t1*uk+matmul(A(:,:,k),u)
        do i=1,n
           do j=1,n
        b(i,j,k)=b(i,j,k)-uk(i)*v(j)
     end do
  end do
     end do
  end if
if (allocated(uk))deallocate(uk)
end subroutine taushift

subroutine mg_startup(a,ds,dual,tau,u,v,drft)
  use smc_tools
  use smc_int, only: compute_tau, drftmg1
  implicit none
  real(dp),dimension(:,:,:) :: a
  real(dp) :: tau, drft
  real(dp),dimension(:) :: u,v
  integer :: ds, dual
  tau=-1
  u=-1;v=-1
  drft= drftmg1(a)
  if(ds>1.or.dual>1)then
     call compute_tau(a,tau,u,v)
  end if
end subroutine mg_startup
