/* nag_dorcsd (f08rac) Example Program.
 *
 * Copyright 2014 Numerical Algorithms Group.
 *
 * Mark 24, 2013.
 */

#include <stdio.h>
#include <math.h>
#include <nag.h>
#include <nag_stdlib.h>
#include <nagf08.h>
#include <nagf16.h>
#include <nagx04.h>

int main(void)
{
  /* Scalars */
  Integer       exit_status = 0;
  Integer       pdx, pdu, pdv, pdx11, pdx12, pdx21, pdx22, pdu1, pdu2, pdv1t;
  Integer       pdv2t, pdw;
  Integer       i, j, m, p, q, n11, n12, n21, n22, r;
  Integer       recombine = 1, reprint = 0;
  double        alpha, beta;
  /* Arrays */
  double        *theta = 0, *u = 0, *u1 = 0, *u2 = 0, *v = 0, *v1t = 0, *w = 0,
                *v2t = 0, *x = 0, *x11 = 0, *x12 = 0, *x21 = 0, *x22 = 0;
  /* Nag Types */
  Nag_OrderType order;
  NagError      fail;

#ifdef NAG_COLUMN_MAJOR
#define X(I,J) x[(J-1)*pdx + I-1]
#define U(I,J) u[(J-1)*pdu + I-1]
#define V(I,J) v[(J-1)*pdv + I-1]
#define W(I,J) w[(J-1)*pdw + I-1]
#define X11(I,J) x11[(J-1)*pdx11 + I-1]
#define X12(I,J) x12[(J-1)*pdx12 + I-1]
#define X21(I,J) x21[(J-1)*pdx21 + I-1]
#define X22(I,J) x22[(J-1)*pdx22 + I-1]
  order = Nag_ColMajor;
#else
#define X(I,J) x[(I-1)*pdx + J-1]
#define U(I,J) u[(I-1)*pdu + J-1]
#define V(I,J) v[(I-1)*pdv + J-1]
#define W(I,J) w[(I-1)*pdw + J-1]
#define X11(I,J) x11[(I-1)*pdx11 + J-1]
#define X12(I,J) x12[(I-1)*pdx12 + J-1]
#define X21(I,J) x21[(I-1)*pdx21 + J-1]
#define X22(I,J) x22[(I-1)*pdx22 + J-1]
  order = Nag_RowMajor;
#endif

  INIT_FAIL(fail);

  printf("nag_dorcsd (f08rac) Example Program Results\n\n");
  fflush(stdout);
 
  /* Skip heading in data file*/
  scanf("%*[^\n] ");
  scanf("%ld%ld%ld%*[^\n] ", &m, &p, &q);

  r = MIN(MIN(p,q),MIN(m-p,m-q));

  if (!(x = NAG_ALLOC(m*m, double))||
      !(u = NAG_ALLOC(m*m, double))||
      !(v = NAG_ALLOC(m*m, double))||
      !(w = NAG_ALLOC(m*m, double))||
      !(theta = NAG_ALLOC(r, double))||
      !(x11 = NAG_ALLOC(p*q, double))||
      !(x12 = NAG_ALLOC(p*(m-q), double))||
      !(x21 = NAG_ALLOC((m-p)*q, double))||
      !(x22 = NAG_ALLOC((m-p)*(m-q), double))||
      !(u1 = NAG_ALLOC(p*p, double))||
      !(u2 = NAG_ALLOC((m-p)*(m-p), double))||
      !(v1t = NAG_ALLOC(q*q, double))||
      !(v2t = NAG_ALLOC((m-q)*(m-q), double)))
    {
      printf("Allocation failure\n");
      exit_status = -1;
      goto END;
    }
  pdx   = m;  pdu   = m;    pdv   = m;    pdw   = m;
  pdu1  = p;  pdu2  = m-p;  pdv1t = q;    pdv2t = m-q; 
#ifdef NAG_COLUMN_MAJOR
  pdx11 = p;  pdx12 = p;    pdx21 = m-p;  pdx22 = m-p;
#else
  pdx11 = q;  pdx12 = m-q;  pdx21 = q;    pdx22 = m-q;
#endif
  /* Read and print orthogonal X from data file
   * (as, say, generated by a generalized singular value decomposition).
   */
  for ( i=1; i<=m; i++)
    for (j=1;j<=m; j++)
      scanf("%lf", &X(i, j));
  /* nag_gen_real_mat_print (x04cac).
   */
  nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, m, m,
                         &X(1,1), pdx, "    Orthogonal matrix X", 0, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_gen_real_mat_print (x04cac).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }
  printf("\n");
  fflush(stdout);

  /* nag_dorcsd (f08rac).
   * Compute the complete CS factorization of X:
   * X11 is stored in X(1:p,    1:q),  X12 is stored in X(1:p,   q+1:m)
   * X21 is stored in X(p+1:m,  1:q),  X22 is stored in X(p+1:m, q+1:m)
   * U1  is stored in U(1:p,    1:p),  U2  is stored in U(p+1:m, p+1:m)
   * V1  is stored in V(1:q,    1:q),  V2  is stored in V(q+1:m, q+1:m)
   */
  for (j=1;j<=p; j++) {
    for (i=1;i<=q; i++)    X11(j, i) = X(j, i);
    for (i=1;i<=m-q; i++)  X12(j, i) = X(j, i + q);
  }
  for (j=1;j<=m-p; j++) {
    for (i=1;i<=q; i++)    X21(j, i) = X(j + p, i);
    for (i=1;i<=m-q; i++)  X22(j, i) = X(j + p, i + q);
  }
  for ( i=1; i<=m; i++)
    for (j=1;j<=m; j++) {
      U(i,j) = 0.0;
      V(i,j) = 0.0;
    }

  /* This is how you might pass partitions as sub-matrices */
  nag_dorcsd(order, Nag_AllU, Nag_AllU, Nag_AllVT, Nag_AllVT, Nag_UpperMinus, 
	     m, p, q, &X(1,1), pdx, &X(1,q+1), pdx, &X(p+1,1), pdx, &X(p+1,q+1),
             pdx, theta, &U(1,1), pdu, &U(p+1,p+1), pdu, &V(1,1), pdv,
             &V(q+1,q+1), pdv, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dorcsd (f08rac).\n%s\n", fail.message);
    exit_status = 2;
    goto END;
  }
  /* Print Theta, U1, U2, V1T, V2T
   * using matrix printing routine nag_gen_real_mat_print (x04cac).
   */
  printf("Components of CS factorization of X:\n");
  fflush(stdout);
  nag_gen_real_mat_print(Nag_ColMajor, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			 r, 1, theta, r, "    Theta", 0, &fail);
  printf("\n");
  fflush(stdout);
  nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			 p, p,  &U(1,1), pdu, "    U1", 0, &fail);
  printf("\n");
  fflush(stdout);
  nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			 m-p, m-p, &U(p+1,p+1), pdu, "    U2", 0, &fail);
  printf("\n");
  fflush(stdout);
  nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			 q, q, &V(1,1), pdv, "    V1T", 0, &fail);
  printf("\n");
  fflush(stdout);
  nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			 m-q,m-q, &V(q+1,q+1), pdv, "    V2T", 0, &fail);
  printf("\n");
  fflush(stdout);


  /* And this is how you might pass partitions as separate matrices. */
  nag_dorcsd(order, Nag_AllU, Nag_AllU, Nag_AllVT, Nag_AllVT, Nag_UpperMinus,
             m, p, q, 
	     x11, pdx11, x12, pdx12, x21, pdx21, x22, pdx22, theta, 
	     u1, pdu1, u2, pdu2, v1t, pdv1t, v2t, pdv2t, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error second from nag_dorcsd (f08rac).\n%s\n", fail.message);
    exit_status = 3;
    goto END;
  }
  /* Print Theta, U1, U2, V1T, V2T
   * using matrix printing routine nag_gen_real_mat_print (x04cac).
   */
  if (reprint != 0) {
    printf("Components of CS factorization of X:\n");
    fflush(stdout);
    nag_gen_real_mat_print(Nag_ColMajor, Nag_GeneralMatrix, Nag_NonUnitDiag,
			   r, 1, theta, r, "    Theta", 0, &fail);
    nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			   p, p, u1, pdu1, "    U1", 0, &fail);
    nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			   m-p, m-p, u2, pdu2, "    U2", 0, &fail);
    nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			   q, q, v1t, pdv1t, "    V1T", 0, &fail);
    nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
			   m-q, m-q, v2t, pdv2t, "    V2T", 0, &fail);
  }
  if ( recombine != 0) {
    /* Recombining should return the original matrix. 
       Assemble Sigma_p into X 
    */
    for ( i=1; i<=m; i++) 
      for (j=1;j<=m; j++) 
	X(i,j) = 0.0;
    n11 = MIN(p,q)-r;
    n12 = MIN(p,m-q)-r;
    n21 = MIN(m-p,q)-r;
    n22 = MIN(m-p,m-q)-r; 
    
    /* top half */ 
    for (j=1; j<=n11; j++) X(j,j) = 1.0;
    for (j=1; j<=r; j++) {
      X(j+n11,j+n11)           =  cos(theta[j-1]);
      X(j+n11,j+n11+r+n21+n22) = -sin(theta[j-1]);
    }
    for (j=1; j<=n12; j++) X(j+n11+r,j+n11+r+n21+n22+r) = -1.0;
    /* bottom half */
    for (j=1; j<=n22; j++) X(p+j,q+j) = 1.0;
    for (j=1; j<=r; j++) {
      X(p+n22+j,j+n11)       = sin(theta[j-1]);
      X(p+n22+j,j+r+n21+n22) = cos(theta[j-1]);
    } 
    for (j=1; j<=n21; j++) X(p+n22+r+j,n11+r+j) = 1.0;

    alpha = 1.0;
    beta = 0.0;
    /* multiply U * Sigma_p into w */
    nag_dgemm(order, Nag_NoTrans, Nag_NoTrans, m, m, m, alpha, 
	      &U(1,1), pdu, &X(1,1), pdx, beta, &W(1,1), pdw, &fail);
    /* form U * Sigma_p * V^T into u */
    nag_dgemm(order, Nag_NoTrans, Nag_NoTrans, m, m, m, alpha,
	      &W(1,1), pdw, &V(1,1), pdv, beta, &U(1,1), pdu, &fail);
    nag_gen_real_mat_print(order, Nag_GeneralMatrix, Nag_NonUnitDiag, 
	m, m, &U(1,1), pdu, "    U * Sigma_p * V^T", 0, &fail);
  }
 END:
   NAG_FREE(x);
   NAG_FREE(u);
   NAG_FREE(v);
   NAG_FREE(w);
   NAG_FREE(theta);
   NAG_FREE(x11);
   NAG_FREE(x12);
   NAG_FREE(x21);
   NAG_FREE(x22);
   NAG_FREE(u1);
   NAG_FREE(u2);
   NAG_FREE(v1t);
   NAG_FREE(v2t);
  return exit_status;
}