# Source code for naginterfaces.​library.​examples.​correg.​glm_binomial_ex

```#!/usr/bin/env python
"``naginterfaces.library.correg.glm_binomial`` Python Example."

# NAG Copyright 2017-2019.

# pylint: disable=invalid-name,too-many-locals,too-many-statements

import numpy as np

from naginterfaces.base import utils
from naginterfaces.library import blgm, correg, rand

[docs]def main():
"""
Example for :func:`naginterfaces.library.correg.glm_binomial`.

Use k-fold cross validation to estimate the true positive and
negative rates of a prediction from a logistic regression model.

The data used in this example was simulated.

>>> main()
naginterfaces.library.correg.glm_binomial Python Example Results.
Use k-fold cross validation to estimate the true positive and
negative rates of a prediction from a logistic regression model.
Observed
--------------------------
Predicted | Negative  Positive   Total
--------------------------------------
Negative  |    19         6        25
Positive  |     3        12        15
Total     |    22        18        40
True Positive Rate (Sensitivity):  0.67
True Negative Rate (Specificity):  0.86
"""
print('naginterfaces.library.correg.glm_binomial Python Example Results.')
print('Use k-fold cross validation to estimate the true positive and')
print('negative rates of a prediction from a logistic regression model.')

# Input the independent variables:
dat = np.asarray([
[0.0, -0.1, 1.0, 1.0],
[0.4, -1.1, 2.0, 1.0],
[-0.5, 0.2, 3.0, 0.0],
[0.6, 1.1, 2.0, 0.0],
[-0.3, -1.0, 3.0, 1.0],
[2.8, -1.8, 1.0, 1.0],
[0.4, -0.7, 1.0, 1.0],
[-0.4, -0.3, 2.0, 0.0],
[0.5, -2.6, 1.0, 0.0],
[-1.6, -0.3, 1.0, 1.0],
[0.4, 0.6, 1.0, 0.0],
[-1.6, 0.0, 2.0, 1.0],
[0.0, 0.4, 2.0, 1.0],
[-0.1, 0.7, 2.0, 1.0],
[-0.2, 1.8, 3.0, 1.0],
[-0.9, 0.7, 3.0, 1.0],
[-1.1, -0.5, 1.0, 1.0],
[-0.1, -2.2, 3.0, 1.0],
[-1.8, -0.5, 2.0, 1.0],
[-0.8, -0.9, 1.0, 1.0],
[1.9, -0.1, 2.0, 1.0],
[0.3, 1.4, 2.0, 1.0],
[0.4, -1.2, 3.0, 0.0],
[2.2, 1.8, 2.0, 0.0],
[1.4, -0.4, 1.0, 1.0],
[0.4, 2.4, 2.0, 1.0],
[-0.6, 1.1, 3.0, 1.0],
[1.4, -0.6, 3.0, 1.0],
[-0.1, -0.1, 1.0, 0.0],
[-0.6, -0.4, 1.0, 0.0],
[0.6, -0.2, 2.0, 1.0],
[-1.8, -0.3, 2.0, 1.0],
[-0.3, 1.6, 3.0, 1.0],
[-0.6, 0.8, 1.0, 1.0],
[0.3, -0.5, 1.0, 0.0],
[1.6, 1.4, 2.0, 1.0],
[-1.1, 0.6, 2.0, 1.0],
[-0.3, 0.6, 2.0, 1.0],
[-0.6, 0.1, 3.0, 1.0],
[1.0, 0.6, 3.0, 1.0]
])

# Input the dependent variables (y, t), with y out of t successes:
y = np.asarray([0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0,
0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0,
1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0,
1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0])
t = np.ones(len(y))

# Describe the data:
hddesc = utils.Handle()
nobs = len(dat)
levels = [1, 1, 3, 1]
vnames = ['V1', 'V2', 'V3', 'V4']
blgm.lm_describe_data(hddesc, nobs, levels, vnames=vnames)

# Specify the model:
hform = utils.Handle()
blgm.lm_formula(hform, 'V1 + V2 + V3 + V4 + V1.V4')

# We are using a logistic model, so the errors are assumed to be
# Binomial (the logistic link is the default link)
errfn = 'Binomial'

# Generate the design matrix, x:
hxdesc = utils.Handle()
x = blgm.lm_design_matrix(hform, hddesc, dat, hxdesc)

m = x.shape[1]

# Convert the model into a form that can be passed to correg.glm_binomial
mean, _, isx, _, _ = blgm.lm_submodel('S', hform, hxdesc, m, 0, 0)

# Formula and data description handles are no longer required:
map(blgm.handle_free, [hform, hddesc])

# Number of folds:
k = 5

# Not going to be using predicted standard errors
vfobs = False

# Initialise the random number generator:
seed = [42321]
statecomm = rand.init_repeat(6, seed)

lres = correg.glm_binomial(
x, isx, y, t, mean=mean,
)
# Loop over each fold
cnt = np.zeros(shape=(3, 3), dtype='int')
for fold in range(1, k+1):
# Split the data into training and validation datasets:
# the first nt rows of X and elements of Y and T contain the training
# data, the remaining entries the validation data
nt = rand.kfold_xyw(k, fold, x, statecomm, y=y, w=t)

nv = nobs - nt

# Fit the the logistic model to the training dataset:
lres = correg.glm_binomial(
x[:nt, :], isx, y[:nt], t[:nt], mean=mean
)

# Predict the response for observations in the validation dataset:
_, _, pred, _ = correg.glm_predict(
errfn, x[nt:, :], isx, lres.b, lres.cov, vfobs, t=t[nt:,],
mean=mean,
)

# Count the true/false positives/negatives:
for i in range(nv):
obs_val = int(y[nt+i])
pred_val = 1 if pred[i] >= 0.5 else 0
cnt[pred_val, obs_val] += 1

# Calculate the marginals:
cnt[-1, :] = np.sum(cnt, 0)
cnt[:, -1] = np.sum(cnt, 1)

# Display the results:
print('                      Observed')
print('            --------------------------')
print('Predicted | Negative  Positive   Total')
print('--------------------------------------')
print('Negative  | {:5d}     {:5d}     {:5d}'.format(*cnt[0, :]))
print('Positive  | {:5d}     {:5d}     {:5d}'.format(*cnt[1, :]))
print('Total     | {:5d}     {:5d}     {:5d}'.format(*cnt[2, :]))

if cnt[2, 1] > 0:
print('True Positive Rate (Sensitivity): {:5.2f}'.format(
cnt[1, 1] / float(cnt[2, 1])))
else:
print('True Positive Rate (Sensitivity): No positives in data')

if cnt[2, 0] > 0:
print('True Negative Rate (Specificity): {:5.2f}'.format(
cnt[0, 0] / float(cnt[2, 0])))
else:
print('True Negative Rate (Specificity): No negatives in data')

# Free the rest of the blgm handles:
map(blgm.handle_free, [hxdesc])

if __name__ == '__main__':
import doctest
import sys
sys.exit(
doctest.testmod(
None, verbose=True, report=False,
optionflags=doctest.REPORT_NDIFF,
).failed
)
```