NAG Library Manual, Mark 30
Interfaces:  FL   CL   CPP   AD 

NAG CL Interface Introduction
Example description
/* nag_blgm_lm_describe_data (g22ybc) Example Program.
 *
 * Copyright 2024 Numerical Algorithms Group.
 *
 * Mark 30.0, 2024.
 */
/* Pre-processor includes */
#include <nag.h>
#include <stdio.h>
#include <string.h>

#define MAX_FORMULA_LEN 200
#define MAX_VNAME_LEN 200
#define MAX_PLAB_LEN 200
#define MAX_CVALUE_LEN 200

#define DAT(I, J) dat[(J)*pddat + (I)]

char *read_line(char formula[], Integer nchar);
Integer construct_labels(Integer ip, char **plab[], char *const vnames[],
                         Integer vinfo[]);
Integer fit_lm(void *hform, Nag_IncludeIntercept intcpt, Integer nobs,
               Integer mx, double x[], Integer ldx, Integer isx[], Integer ip,
               double y[], char *plab[]);

int main(void) {
  /* Integer scalar and array declarations */
  Integer i, j, ip = 0, pddat, ldx, lisx, lplab = 0, lvinfo, lvnames = 0, mx,
                nobs, nvar, sddat, sdx, lenlab;
  Integer exit_status = 0;
  Integer *isx = 0, *levels = 0, *vinfo = 0;
  Integer tvinfo[3];

  /* Nag Types */
  NagError fail;
  Nag_IncludeIntercept intcpt;

  /* Double scalar and array declarations */
  double *dat = 0, *x = 0, *y = 0;

  /* Character scalar and array declarations */
  char formula[MAX_FORMULA_LEN];
  char **vnames = 0, **plab = 0;

  /* Void pointers */
  void *hform = 0, *hddesc = 0, *hxdesc = 0;

  /* Initialize the error structure */
  INIT_FAIL(fail);

  printf("nag_blgm_lm_describe_data (g22ybc) Example Program Results\n\n");

  /* Skip heading in data file */
  scanf("%*[^\n] ");

  /* Read in size of the data matrix and number of variable labels supplied */
  scanf("%" NAG_IFMT "%" NAG_IFMT "%" NAG_IFMT "%*[^\n] ", &nobs, &nvar,
        &lvnames);

  /* Allocate memory */
  if (!(levels = NAG_ALLOC(nvar, Integer)) ||
      !(vnames = NAG_ALLOC(lvnames, char *))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }
  for (i = 0; i < lvnames; i++)
    if (!(vnames[i] = NAG_ALLOC(MAX_VNAME_LEN, char))) {
      printf("Allocation failure\n");
      exit_status = -1;
      goto END;
    }

  /* Read in number of levels and names for the variables */
  for (i = 0; i < nvar; i++) {
    scanf("%" NAG_IFMT "", &levels[i]);
  }
  scanf("%*[^\n] ");

  if (lvnames > 0) {
    for (i = 0; i < lvnames; i++)
      scanf("%50s", vnames[i]);
    scanf("%*[^\n] ");
  }

  /* Call nag_blgm_lm_describe_data (g22ybc) to get a description of */
  /* the data matrix */
  nag_blgm_lm_describe_data(&hddesc, nobs, nvar, levels, lvnames, vnames,
                            &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_blgm_lm_describe_data (g22ybc).\n%s\n",
           fail.message);
    exit_status = 1;
    goto END;
  }

  /* Read in the data matrix and response variable */
  pddat = nobs;
  sddat = nvar;
  if (!(dat = NAG_ALLOC(pddat * sddat, double)) ||
      !(y = NAG_ALLOC(nobs, double))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }
  for (i = 0; i < nobs; i++) {
    for (j = 0; j < nvar; j++)
      scanf("%lf", &DAT(i, j));
    scanf("%lf", &y[i]);
  }
  scanf("%*[^\n] ");

  /* Read in the formula for the full model, remove comments and */
  /* call nag_blgm_lm_formula (g22yac) to parse it */
  read_line(formula, MAX_FORMULA_LEN);
  nag_blgm_lm_formula(&hform, formula, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_blgm_lm_formula (g22yac).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Start of constructing the design matrix ... */

  /* Call nag_blgm_optset (g22zmc) to alter the storage order of X as */
  /* nag_correg_linregm_fit uses VAROBS storage  */
  nag_blgm_optset(hform, "Storage Order = VAROBS", &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_blgm_optset (g22zmc).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Call nag_blgm_lm_design_matrix (g22ycc) to get the size of */
  /* the design matrix */
  ldx = 0;
  sdx = 0;
  nag_blgm_lm_design_matrix(hform, hddesc, dat, pddat, sddat, &hxdesc, x, ldx,
                            sdx, &mx, &fail);
  if (fail.code != NW_ARRAY_SIZE && fail.code != NW_ALTERNATIVE) {
    printf("Error from nag_blgm_lm_design_matrix (g22ycc).\n%s\n",
           fail.message);
    exit_status = 1;
    goto END;
  }

  /* Allocate design matrix */
  ldx = mx;
  sdx = nobs;
  if (!(x = NAG_ALLOC(ldx * sdx, double))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }

  /* Call nag_blgm_lm_design_matrix (g22ycc) to generate the design matrix */
  nag_blgm_lm_design_matrix(hform, hddesc, dat, pddat, sddat, &hxdesc, x, ldx,
                            sdx, &mx, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_blgm_lm_design_matrix (g22ycc).\n%s\n",
           fail.message);
    exit_status = 1;
    goto END;
  }
  /* ... End of constructing the design matrix */

  /* Start of getting the isx vector and information on parameter labels ... */
  /* Get size of output arrays used by nag_blgm_lm_submodel (g22ydc) */
  lvinfo = 3;
  lisx = lplab = lenlab = 0;
  nag_blgm_lm_submodel(Nag_X, hform, hxdesc, &intcpt, &ip, lisx, isx, lplab,
                       plab, lenlab, lvinfo, tvinfo, &fail);
  if (fail.code != NW_ARRAY_SIZE) {
    printf("Error from nag_blgm_lm_design_matrix (g22ydc).\n%s\n",
           fail.message);
    exit_status = 1;
    goto END;
  }

  /* Allocate output arrays (we already know that lisx = mx, but */
  /* nag_blgm_lm_submodel returns it just in case) */
  lisx = tvinfo[0];
  lvinfo = tvinfo[2];
  /* We don't need plab as we are constructing our own labels from vinfo */
  lplab = 0;
  if (!(isx = NAG_ALLOC(lisx, Integer)) ||
      !(vinfo = NAG_ALLOC(lvinfo, Integer))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }

  /* Call nag_blgm_lm_submodel (g22ydc) to get the isx flag */
  /* and parameter labels */
  nag_blgm_lm_submodel(Nag_X, hform, hxdesc, &intcpt, &ip, lisx, isx, lplab,
                       plab, lenlab, lvinfo, vinfo, &fail);

  /* Construct some verbose labels for the parameters */
  if ((exit_status = construct_labels(ip, &plab, vnames, vinfo)))
    goto END;
  /* ... End of getting the isx vector and information on parameter labels */

  /* Fit a regression model and print the results */
  exit_status = fit_lm(hform, intcpt, nobs, mx, x, ldx, isx, ip, y, plab);

END:
  /* Call nag_blgm_handle_free (g22zac) to clean-up the g22 handles */
  nag_blgm_handle_free(&hform, &fail);
  nag_blgm_handle_free(&hddesc, &fail);
  nag_blgm_handle_free(&hxdesc, &fail);

  NAG_FREE(dat);
  NAG_FREE(x);
  NAG_FREE(y);
  NAG_FREE(levels);
  for (i = 0; i < lvnames; i++)
    NAG_FREE(vnames[i]);
  NAG_FREE(vnames);
  for (i = 0; i < ip; i++)
    NAG_FREE(plab[i]);
  NAG_FREE(plab);
  NAG_FREE(isx);
  NAG_FREE(vinfo);
  return (exit_status);
}

char *read_line(char formula[], Integer nchar) {
  /* Read in a line from stdin and remove any comments */
  char *pch;

  /* Read in the model formula */
  if (fgets(formula, nchar, stdin)) {
    /* Strip comments from formula */
    pch = strstr(formula, "::");
    if (pch)
      *pch = '\0';
    return formula;
  } else {
    return 0;
  }
}

Integer construct_labels(Integer ip, char **plab[], char *const vnames[],
                         Integer vinfo[]) {
  /* Construct labels from information held in VINFO */
  /* NB: For simplicity, the function contains no checks for buffer */
  /* overflow when manipulating character strings */

  /* Integer scalar and array declarations */
  Integer exit_status = 0;
  Integer b, i, j, k, li, vi;

  /* Character scalar and array declarations */
  char term[MAX_PLAB_LEN];
  char *this_lab = 0;

  if (!((*plab) = NAG_ALLOC(ip, char *))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }
  for (i = 0; i < ip; i++)
    if (!((*plab)[i] = NAG_ALLOC(MAX_VNAME_LEN, char))) {
      printf("Allocation failure\n");
      exit_status = -1;
      goto END;
    }

  k = 0;
  for (j = 0; j < ip; j++) {
    b = vinfo[k];
    k++;
    this_lab = (*plab)[j];
    if (b == 0) {
      sprintf((*plab)[j], "%s", "Intercept");
    } else {
      for (i = 0; i < b; i++) {
        vi = vinfo[k] - 1;
        li = vinfo[k + 1];

        if (li != 0)
          sprintf(term, "%s (Level %" NAG_IFMT ")%c", vnames[vi], li, '\0');
        else
          strncpy(term, vnames[vi], MAX_PLAB_LEN);

        if (i == 0) {
          strncpy(this_lab, term, MAX_PLAB_LEN);
          this_lab += strlen(this_lab);
        } else {
          this_lab += sprintf(this_lab, " . %s", term);
        }

        /* We are ignoring the contrast identifier when */
        /* constructing these labels */
        k += 3;
      }
    }
  }

END:

  return (exit_status);
}

Integer fit_lm(void *hform, Nag_IncludeIntercept intcpt, Integer nobs,
               Integer mx, double x[], Integer ldx, Integer isx[], Integer ip,
               double y[], char *plab[]) {
  /* Perform a multiple linear regression using */
  /* nag_correg_linregm_fit (g02dac) */

  /* Integer scalar and array declarations */
  Integer i, rank, ldq, exit_status = 0, lcvalue, ivalue;

  /* NAG types */
  Nag_Boolean svd;
  NagError fail;
  Nag_IncludeMean mean;
  Nag_VariableType optype;

  /* Double scalar and array declarations */
  double rss, tol, df, rvalue;
  double *b = 0, *cov = 0, *h = 0, *p = 0, *q = 0, *res = 0, *se = 0, *wt = 0,
         *com_ar = 0;

  /* Character scalar and array declarations */
  char cvalue[MAX_CVALUE_LEN];

  /* Initialize the error structure */
  INIT_FAIL(fail);

  /* We are assuming un-weighted data */
  ldq = (ip + 1);
  if (!(b = NAG_ALLOC(ip, double)) || !(se = NAG_ALLOC(ip, double)) ||
      !(cov = NAG_ALLOC((ip * ip + ip) / 2, double)) ||
      !(res = NAG_ALLOC(nobs, double)) || !(h = NAG_ALLOC(nobs, double)) ||
      !(q = NAG_ALLOC(nobs * ldq, double)) ||
      !(p = NAG_ALLOC(ip * (ip + 2), double)) ||
      !(com_ar = NAG_ALLOC(ip * ip + 5 * (ip - 1), double))) {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }

  /* Use suggested value for tolerance */
  tol = 0.000001;

  mean = (intcpt == Nag_Intercept) ? Nag_MeanInclude : Nag_MeanZero;

  /* Call nag_correg_linregm_fit (g02dac) to fit a regression model */
  nag_correg_linregm_fit(mean, nobs, x, ldx, mx, isx, ip, y, wt, &rss, &df, b,
                         se, cov, res, h, q, ldq, &svd, &rank, p, tol, com_ar,
                         &fail);

  /* Call nag_blgm_optget (g22znc) to get the formula for the model */
  /* being fit */
  lcvalue = MAX_CVALUE_LEN;
  nag_blgm_optget(hform, "Formula", &ivalue, &rvalue, cvalue, lcvalue, &optype,
                  &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_blgm_optget (g22znc).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Display the results */
  printf(" Model: %s\n", cvalue);
  printf("                                Parameter   Standard\n");
  printf(" Coefficients                   Estimate      Error\n");
  printf(" ---------------------------------------------------\n");
  for (i = 0; i < ip; i++)
    printf(" %-30s %7.3f     %7.3f\n", plab[i], b[i], se[i]);
  printf(" ---------------------------------------------------\n");
  printf(" Residual sum of squares = %9.4f\n", rss);
  printf(" Degrees of freedom      = %9.0f\n", df);

END:
  NAG_FREE(h);
  NAG_FREE(res);
  NAG_FREE(wt);
  NAG_FREE(b);
  NAG_FREE(cov);
  NAG_FREE(p);
  NAG_FREE(q);
  NAG_FREE(com_ar);
  NAG_FREE(se);
  return exit_status;
}