Example description
    Program f08rafe

!     F08RAF Example Program Text

!     Mark 26.2 Release. NAG Copyright 2017.

!     .. Use Statements ..
      Use nag_library, Only: dgemm, dorcsd, nag_wp, x04caf
!     .. Implicit None Statement ..
      Implicit None
!     .. Parameters ..
      Real (Kind=nag_wp), Parameter    :: one = 1.0_nag_wp
      Real (Kind=nag_wp), Parameter    :: zero = 0.0_nag_wp
      Integer, Parameter               :: nin = 5, nout = 6, recombine = 1,    &
                                          reprint = 0
!     .. Local Scalars ..
      Integer                          :: i, ifail, info, info2, j, ldu, ldu1, &
                                          ldu2, ldv, ldv1t, ldv2t, ldx, ldx11, &
                                          ldx12, ldx21, ldx22, lwork, m, n11,  &
                                          n12, n21, n22, p, q, r
!     .. Local Arrays ..
      Real (Kind=nag_wp), Allocatable  :: theta(:), u(:,:), u1(:,:), u2(:,:),  &
                                          v(:,:), v1t(:,:), v2t(:,:), w(:,:),  &
                                          work(:), x(:,:), x11(:,:), x12(:,:), &
                                          x21(:,:), x22(:,:)
      Real (Kind=nag_wp)               :: wdum(1)
      Integer, Allocatable             :: iwork(:)
!     .. Intrinsic Procedures ..
      Intrinsic                        :: cos, min, nint, sin
!     .. Executable Statements ..
      Write (nout,*) 'F08RAF Example Program Results'
      Write (nout,*)
      Flush (nout)
!     Skip heading in data file
      Read (nin,*)
      Read (nin,*) m, p, q

      r = min(min(p,q),min(m-p,m-q))

      ldx = m
      ldx11 = p
      ldx12 = p
      ldx21 = m - p
      ldx22 = m - p
      ldu = m
      ldu1 = p
      ldu2 = m - p
      ldv = m
      ldv1t = q
      ldv2t = m - q
      Allocate (x(ldx,m),u(ldu,m),v(ldv,m),theta(r),iwork(m),w(ldx,m))
      Allocate (x11(ldx11,q),x12(ldx12,m-q),x21(ldx21,q),x22(ldx22,m-q))
      Allocate (u1(ldu1,p),u2(ldu2,m-p),v1t(ldv1t,q),v2t(ldv2t,m-q))

!     Read and print orthogonal X from data file
!     (as, say, generated by a generalized singular value decomposition).

      Read (nin,*)(x(i,1:m),i=1,m)

!     Print general matrix using simple matrix printing routine x04caf.
!     ifail: behaviour on error exit
!            =0 for hard exit, =1 for quiet-soft, =-1 for noisy-soft
      ifail = 0
      Call x04caf('G','N',m,m,x,ldx,'    Orthogonal matrix X',ifail)
      Write (nout,*)

!     Compute the complete CS factorization of X:
!        X11 is stored in X(1:p,   1:q), X12 is stored in X(1:p,   q+1:m)
!        X21 is stored in X(p+1:m, 1:q), X22 is stored in X(p+1:m, q+1:m)
!        U1  is stored in U(1:p,   1:p), U2  is stored in U(p+1:m, p+1:m)
!        V1  is stored in V(1:q,   1:q), V2  is stored in V(q+1:m, q+1:m)
      x11(1:p,1:q) = x(1:p,1:q)
      x12(1:p,1:m-q) = x(1:p,q+1:m)
      x21(1:m-p,1:q) = x(p+1:m,1:q)
      x22(1:m-p,1:m-q) = x(p+1:m,q+1:m)

!     Workspace query first to get length of work array for complete CS
!     factorization routine dorcsd/f08raf.
      lwork = -1
      Call dorcsd('Yes U1','Yes U2','Yes V1T','Yes V2T','Column','Default',m,  &
        p,q,x,ldx,x(1,q+1),ldx,x(p+1,1),ldx,x(p+1,q+1),ldx,theta,u,ldu,        &
        u(p+1,p+1),ldu,v,ldv,v(q+1,q+1),ldv,wdum,lwork,iwork,info)
      lwork = nint(wdum(1))
      Allocate (work(lwork))
!     Initialize all of  u, v to zero.
      u(1:m,1:m) = zero
      v(1:m,1:m) = zero

!     This is how you might pass partitions as sub-matrices
      Call dorcsd('Yes U1','Yes U2','Yes V1T','Yes V2T','Column','Default',m,  &
        p,q,x,ldx,x(1,q+1),ldx,x(p+1,1),ldx,x(p+1,q+1),ldx,theta,u,ldu,        &
        u(p+1,p+1),ldu,v,ldv,v(q+1,q+1),ldv,work,lwork,iwork,info)
      If (info/=0) Then
        Write (nout,99999) 'Failure in DORCSD/F08RAF. info =', info
        Go To 100
      End If
!     Print Theta, U1, U2, V1T, V2T using matrix printing routine x04caf.
      Write (nout,99998) 'Components of CS factorization of X:'
      Flush (nout)
      ifail = 0
      Call x04caf('G','N',r,1,theta,r,'    Theta',ifail)
      Write (nout,*)
      Flush (nout)
!     By changes of sign the first r elements of the first row of U1 can be
!     made positive.
      Do i = 1, r
        If (u(1,i)<0.0_nag_wp) Then
          u(1:p,i) = -u(1:p,i)
          u(p+1:m,p+i) = -u(p+1:m,p+i)
          v(i,1:q) = -v(i,1:q)
          v(q+i,q+1:m) = -v(q+i,q+1:m)
        End If
      End Do
      ifail = 0
      Call x04caf('G','N',p,p,u,ldu,'    U1',ifail)
      Write (nout,*)
      Flush (nout)
      ifail = 0
      Call x04caf('G','N',m-p,m-p,u(p+1,p+1),ldu,'    U2',ifail)
      Write (nout,*)
      Flush (nout)
      ifail = 0
      Call x04caf('G','N',q,q,v,ldv,'    V1T',ifail)
      Write (nout,*)
      Flush (nout)
      ifail = 0
      Call x04caf('G','N',m-q,m-q,v(q+1,q+1),ldv,'    V2T',ifail)
      Write (nout,*)

!     And this is how you might pass partitions as separate matrices.
      Call dorcsd('Yes U1','Yes U2','Yes V1T','Yes V2T','Column','Default',m,  &
        p,q,x11,ldx11,x12,ldx12,x21,ldx21,x22,ldx22,theta,u1,ldu1,u2,ldu2,v1t, &
        ldv1t,v2t,ldv2t,work,lwork,iwork,info2)
      If (info2/=0) Then
        Write (nout,99999) 'Failure in DORCSD/F08RAF. info =', info
        Go To 100
      End If

!     Print Theta, U1, U2, V1T, V2T using matrix printing routine x04caf.
      If (reprint/=0) Then
!       By changes of sign the first r elements of the first row of U1 can be
!       made positive.
        Do i = 1, r
          If (u1(1,i)<0.0_nag_wp) Then
            u1(1:p,i) = -u1(1:p,i)
            u2(1:m-p,i) = -u2(1:m-p,i)
            v1t(i,1:q) = -v1t(i,1:q)
            v2t(i,1:m-q) = -v2t(i,1:m-q)
          End If
        End Do
        Write (nout,99998) 'Components of CS factorization of X:'
        Flush (nout)
        ifail = 0
        Call x04caf('G','N',r,1,theta,r,'    Theta',ifail)
        Write (nout,*)
        Flush (nout)
        ifail = 0
        Call x04caf('G','N',p,p,u1,ldu1,'    U1',ifail)
        Write (nout,*)
        Flush (nout)
        ifail = 0
        Call x04caf('G','N',m-p,m-p,u2,ldu2,'    U2',ifail)
        Write (nout,*)
        Flush (nout)
        ifail = 0
        Call x04caf('G','N',q,q,v1t,ldv1t,'    V1T',ifail)
        Write (nout,*)
        Flush (nout)
        ifail = 0
        Call x04caf('G','N',m-q,m-q,v2t,ldv2t,'    V2T',ifail)
        Write (nout,*)
        Flush (nout)
      End If
      If (recombine/=0) Then
!       Recombining should return the original matrix
!       Assemble Sigma_p into X
        x(1:m,1:m) = zero
        n11 = min(p,q) - r
        n12 = min(p,m-q) - r
        n21 = min(m-p,q) - r
        n22 = min(m-p,m-1) - r
!       Top Half
        Do j = 1, n11
          x(j,j) = one
        End Do
        Do j = 1, r
          x(j+n11,j+n11) = cos(theta(j))
          x(j+n11,j+n11+r+n21+n22) = -sin(theta(j))
        End Do
        Do j = 1, n12
          x(j+n11+r,j+n11+r+n21+n22+r) = -one
        End Do
!       Bottom half
        Do j = 1, n22
          x(p+j,q+j) = one
        End Do
        Do j = 1, r
          x(p+n22+j,j+n11) = sin(theta(j))
          x(p+n22+j,j+r+n21+n22) = cos(theta(j))
        End Do
        Do j = 1, n21
          x(p+n22+r+j,n11+r+j) = one
        End Do
!       multiply U * Sigma_p into w
        Call dgemm('n','n',m,m,m,one,u,ldu,x,ldx,zero,w,ldx)
!       form U * Sigma_p * V^T into u
        Call dgemm('n','n',m,m,m,one,w,ldx,v,ldv,zero,u,ldu)

        Flush (nout)
        ifail = 0
        Call x04caf('G','N',m,m,u,ldu,                                         &
          '    Recombined matrix X = U * Sigma_p * V^T',ifail)
      End If

100   Continue

99999 Format (1X,A,I4)
99998 Format (1X,A,/)
    End Program f08rafe