Since there are a lot of computations and use of R for lectures notes on basics of binary logistic regression, I’ve put the R script into an Rmarkdown version.
Note that HSB data (N=600) where the variables are
Variable name | Definition |
---|---|
id | ID number |
sex | Gender: 1=male, 2=female |
ses | Socio-Economic Status |
sctyp | School Type: 1=public, 2=private |
hsp | High School Program |
locus | Locus of Control |
concpt | Self Concept |
mot | Motivation |
car | Career Choice |
rdg | Reading T-Score |
wrtg | Writing T-Score |
math | Math T-Score |
sci | Science T-Score |
civ | Civics T-Score |
program | 1 for academic, 0 for other |
achieve | Sum of 5 achievement test scores (which is transformed into mean) |
group | A grouping according to achieve (not used below) |
Other variables are created as needed.
This set of libraries are needed for all code to run properly
library(data.table) # may not need this
library(dplyr) # for "cut" function
library(MASS) # needed for negatiave binomial glm
library(vcd)
library(vcdExtra)
library(ggplot2)
library(ResourceSelection) # hosmer-lemshow statistic
library(pROC) # ROC curve/data
library(DescTools) # concordance index
library(car) # influence diagnostics
Set working director, read in data (a csv file), and quick check of data
setwd("C:\\Users\\cja\\Dropbox\\edps 589\\5 logistic regression")
hsb <- read.csv(file="hsb_data.csv", header=TRUE, sep=",")
names(hsb)
## [1] "id" "sex" "race" "ses" "sctyp" "hsp"
## [7] "locus" "concpt" "mot" "car" "rdg" "wrtg"
## [13] "math" "sci" "civ" "achieve" "sci_math" "type"
## [19] "xmean" "xsum" "program" "group"
head(hsb)
## id sex race ses sctyp hsp locus concpt mot car rdg wrtg math sci civ
## 1 1 2 1 1 1 3 0.29 0.88 0.67 10 33.6 43.7 40.2 39.0 40.6
## 2 4 2 1 2 1 3 0.06 0.03 0.00 15 38.9 41.1 32.7 41.7 40.6
## 3 23 2 1 1 1 3 -0.22 0.32 0.33 1 35.2 38.5 39.9 34.7 40.6
## 4 26 2 1 2 1 2 1.12 -0.74 0.67 15 31.0 41.1 36.0 36.9 45.6
## 5 29 2 1 2 1 2 -0.90 0.03 0.67 4 36.3 44.3 36.1 33.6 40.6
## 6 31 1 1 2 1 2 -0.21 -1.38 0.00 9 34.2 46.3 44.5 39.0 35.6
## achieve sci_math type xmean xsum program group
## 1 197.1 -1.2 case 39.42 197.1 0 1
## 2 195.0 9.0 case 39.00 195.0 0 1
## 3 188.9 -5.2 case 37.78 188.9 0 1
## 4 190.6 0.9 case 38.12 190.6 1 1
## 5 190.9 -2.5 case 38.18 190.9 1 1
## 6 199.6 -5.5 case 39.92 199.6 1 1
Create the mean of the 5 achievement test scores
hsb$achieve <- hsb$achieve/5
mean(hsb$achieve)
## [1] 51.98893
sd(hsb$achieve)
## [1] 8.089699
length(hsb$achieve) # sample size
## [1] 600
table(hsb$program) # 1 is academic
##
## 0 1
## 292 308
Always good to look at the data
lw1 <- loess(program ~ achieve,data=hsb) # fit loess to data
plot(hsb$achieve,jitter(hsb$program,0.1),
main="Whether in Academic Program vs Mean Achievement",
xlab="Mean of 5 Achievement Tests",
ylab="Probabilty in Academic Program",
cex=1.75,
col="black",
pch=20
)
j <- order(hsb$achieve)
lines(hsb$achieve[j],lw1$fitted[j],col="cyan",lwd=3)
text(64,.7,"Fitted Loess Curve",col="cyan")
The object j has indices for student ordered such that 1 is highest achievement
j[1:10]
## [1] 25 47 12 14 23 15 44 22 35 40
hsb$achieve[j[1:10]]
## [1] 32.94 33.74 34.98 35.30 35.36 35.42 35.48 35.50 35.96 35.96
Logit Model that we’ll be using throughout. Below fits the model to the data and adds fitted regression curve to the graph
summary( logit.mod <- glm(program ~ achieve, data=hsb, family=binomial(link="logit")) )
##
## Call:
## glm(formula = program ~ achieve, family = binomial(link = "logit"),
## data = hsb)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1417 -0.9443 0.4849 0.9583 2.1459
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.05477 0.69475 -10.15 <2e-16 ***
## achieve 0.13691 0.01327 10.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 831.35 on 599 degrees of freedom
## Residual deviance: 692.27 on 598 degrees of freedom
## AIC: 696.27
##
## Number of Fisher Scoring iterations: 3
hsb$logit.fit <- logit.mod$fitted.values
I want to add the logit model fitted values to graph, but with Rmarkdown I’ll have to redraw and add additional content
lw1 <- loess(program ~ achieve,data=hsb) # fit loess to data
plot(hsb$achieve,jitter(hsb$program,0.1),
main="Whether in Academic Program vs Mean Achievement",
xlab="Mean of 5 Achievement Tests",
ylab="Probabilty in Academic Program",
cex=1.75,
col="black",
pch=20
)
j <- order(hsb$xmean)
lines(hsb$achieve[j],lw1$fitted[j],col="cyan",lwd=3)
text(64,.7,"Fitted Loess Curve",col="cyan")
lines(hsb$achieve[j],hsb$logit.fit[j],col="blue",lwd=3)
text(64,.77,"Logit",col="blue")
This code requires the dplyr package for the cut function. This yield grouping that’s a bit better than what I put in the lecture notes, which is around page 19.
Create cut points (“levels”), give them labels (“label2”), do the cutting (hsb$grp.achieve)
levels <- c(-Inf, 40, 45, 47, 49, 51, 53, 55, 57, 60, 65, Inf)
labels <- c("<40","40-45","45-47","47-49","47-51","51-53","53-55","55-57","57-60","60-65",">65")
hsb$grp.achieve = cut(hsb$achieve, levels, labels = labels)
Take a look at hsb$grp.achieve, which is variable in HSB. Quick peek a number pre-group
table(hsb$grp.achieve)
##
## <40 40-45 45-47 47-49 47-51 51-53 53-55 55-57 57-60 60-65 >65
## 47 86 45 43 49 51 57 45 67 79 31
Using this new variable and compute various group level statistics and put them all into a data frame called “summary.p1”..
# - mean per group
(grp.mean <- aggregate(hsb$achieve,by=list(hsb$grp.achieve),FUN=mean))
## Group.1 x
## 1 <40 37.79447
## 2 40-45 42.54953
## 3 45-47 46.04622
## 4 47-49 47.88837
## 5 47-51 50.18122
## 6 51-53 52.15529
## 7 53-55 53.99193
## 8 55-57 56.03022
## 9 57-60 58.43433
## 10 60-65 62.41595
## 11 >65 66.54258
# - number of cases per group
(grp.n <- table(hsb$grp.achieve))
##
## <40 40-45 45-47 47-49 47-51 51-53 53-55 55-57 57-60 60-65 >65
## 47 86 45 43 49 51 57 45 67 79 31
# - number of non and academic students per group
(data.table <- table(hsb$grp.achieve,hsb$program))
##
## 0 1
## <40 39 8
## 40-45 68 18
## 45-47 30 15
## 47-49 27 16
## 47-51 29 20
## 51-53 29 22
## 53-55 14 43
## 55-57 16 29
## 57-60 21 46
## 60-65 16 63
## >65 3 28
# - observed proportion
(obs.p <- data.table[,2]/(data.table[,1]+data.table[,2]))
## <40 40-45 45-47 47-49 47-51 51-53 53-55 55-57
## 0.1702128 0.2093023 0.3333333 0.3720930 0.4081633 0.4313725 0.7543860 0.6444444
## 57-60 60-65 >65
## 0.6865672 0.7974684 0.9032258
# - pred prob sum
prob.sum <- aggregate(hsb$logit.fit,by=list(hsb$grp.achieve),FUN=sum)
(prob.sum <- prob.sum[,2]/grp.n)
##
## <40 40-45 45-47 47-49 47-51 51-53 53-55 55-57
## 0.1348042 0.2282907 0.3210272 0.3781267 0.4541550 0.5214715 0.5834798 0.6491954
## 57-60 60-65 >65
## 0.7196928 0.8143854 0.8854477
# - using equation on grouped data
(p.eq <- exp(-7.05477 + 0.13691*grp.mean[,2])/(1+exp(-7.05477 + 0.13691*grp.mean[,2])))
## [1] 0.1323510 0.2263017 0.3206946 0.3779231 0.4540157 0.5214397 0.5835260
## [8] 0.6493829 0.7202040 0.8161680 0.8865097
# - pred number
(pred.n <- p.eq*grp.n)
##
## <40 40-45 45-47 47-49 47-51 51-53 53-55 55-57
## 6.220499 19.461943 14.431255 16.250693 22.246768 26.593423 33.260984 29.222230
## 57-60 60-65 >65
## 48.253670 64.477269 27.481801
pred.n <- matrix(pred.n,nrow=11,ncol=1)
# - put into table like in lecture notes
summary.p19<- cbind(grp.n,grp.mean[,2],data.table[,2],obs.p,prob.sum,p.eq,pred.n)
summary.p19<- as.data.frame(summary.p19)
names(summary.p19) <- c("range # cases ","mean","# academic","obs p","sum pi","equation pi","pred freq")
summary.p19
## range # cases mean # academic obs p sum pi equation pi
## <40 47 37.79447 8 0.1702128 0.1348042 0.1323510
## 40-45 86 42.54953 18 0.2093023 0.2282907 0.2263017
## 45-47 45 46.04622 15 0.3333333 0.3210272 0.3206946
## 47-49 43 47.88837 16 0.3720930 0.3781267 0.3779231
## 47-51 49 50.18122 20 0.4081633 0.4541550 0.4540157
## 51-53 51 52.15529 22 0.4313725 0.5214715 0.5214397
## 53-55 57 53.99193 43 0.7543860 0.5834798 0.5835260
## 55-57 45 56.03022 29 0.6444444 0.6491954 0.6493829
## 57-60 67 58.43433 46 0.6865672 0.7196928 0.7202040
## 60-65 79 62.41595 63 0.7974684 0.8143854 0.8161680
## >65 31 66.54258 28 0.9032258 0.8854477 0.8865097
## pred freq
## <40 6.220499
## 40-45 19.461943
## 45-47 14.431255
## 47-49 16.250693
## 47-51 22.246768
## 51-53 26.593423
## 53-55 33.260984
## 55-57 29.222230
## 57-60 48.253670
## 60-65 64.477269
## >65 27.481801
For this we’ll use model fit to individual level data
(summary(logit.mod <- glm(program ~ achieve, data=hsb, family=binomial)))
##
## Call:
## glm(formula = program ~ achieve, family = binomial, data = hsb)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1417 -0.9443 0.4849 0.9583 2.1459
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.05477 0.69475 -10.15 <2e-16 ***
## achieve 0.13691 0.01327 10.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 831.35 on 599 degrees of freedom
## Residual deviance: 692.27 on 598 degrees of freedom
## AIC: 696.27
##
## Number of Fisher Scoring iterations: 3
Find the slope for mean achievement = 65
(p <- (subset(hsb,hsb$achieve==65)$logit.fit))
## [1] 0.8635031
exp(-7.0548 +.1369*(65))/(1 + exp(-7.0548 +.1369*(65)))
## [1] 0.8633857
# slope equals
(slope <- logit.mod$coefficients[2] * p * (1-p))
## achieve
## 0.01613753
Graph what we’re looking at
nhsb <- hsb[order(hsb$achieve),]
plot(nhsb$achieve,nhsb$logit.fit,
type='l',
col="blue",
main="Linear Interpretation at Achievement = 65",
xlab="Mean of 5 Achievement Tests",
ylab="Probility in Academic given mean Achievement",
ylim=c(0,1)
)
abline(h=p)
abline(v=65)
# to get line tangent at (65,p)
xlin <- c(55,65,70)
ylin <- c(-slope*10+p, p, slope*5+p)
lines(xlin,ylin,col="red")
text(40,.9,"p = .8635")
text(63,0.05,"x = 65")
text(50,0.7,"slope = 0.0161")
# 95% for beta
(lower <- logit.mod$coefficients[2] - 1.96*0.01327)
## achieve
## 0.1109056
(upper <- logit.mod$coefficients[2] + 1.96*0.01327)
## achieve
## 0.162924
# 95% Confidence interval for exp(beta) = odds ratio
exp(lower)
## achieve
## 1.117289
exp(upper)
## achieve
## 1.176947
Alternative, for the coefficients and odds ratios is below. These should be nearly the same.
(ci.coefficients <- confint(logit.mod, level=.95))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -8.4574031 -5.7302087
## achieve 0.1116148 0.1637129
(ci.odds <- exp(ci.coefficients))
## 2.5 % 97.5 %
## (Intercept) 0.0002123227 0.003246399
## achieve 1.1180821023 1.177876124
The z’s in the table of coeficients are essentially sqrt of Wald test, but we can compute them here (note .01327 is ASE).
(wald <- (logit.mod$coefficients[2]/.01327)^2)
## achieve
## 106.4532
1- pchisq(wald,1)
## achieve
## 0
The likelihood ratio tests is
(lr <- logit.mod$null.deviance-logit.mod$deviance)
## [1] 139.0819
1-pchisq(lr,1)
## [1] 0
Or more easily
anova(logit.mod, test="LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: program
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 599 831.35
## achieve 1 139.08 598 692.27 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We’ll do this 2 ways:
Fit models to individual level data and then group – we already did the grouping. We need some additional variables/statistics.
pred.acd <- summary.p19[,7]
pred.not <- summary.p19[,1] - pred.acd
number.attend <- summary.p19[,3]
number.not <- summary.p19[,1] - number.attend
Given these values, the “test” statistics are
Xsq <-(number.attend - pred.acd)**2 / pred.acd
+ (number.not - pred.not)**2 / pred.not
## [1] 0.077652331 0.032121108 0.010581743 0.002349487 0.188686215 0.864502183
## [7] 3.995466119 0.003130108 0.270934578 0.150269415 0.076326107
(Xsq <- sum(Xsq))
## [1] 4.667689
Gsq <- number.attend* log(number.attend/pred.acd)
+ number.not * log(number.not/pred.not)
## [1] -1.7400973 1.4778868 -0.5634208 0.2518645 2.3385756 5.0008529
## [7] -7.3928766 0.2237877 2.3840133 1.5499778 -0.4780110
(Gsq <- 2*sum(Gsq))
## [1] 4.643049
These do not have sampling distributions that are approximately chi-square, but do give a sense of how well the model is doing, i.e.,
(df <- length(hsb$grp.achieve) - 2)
## [1] 598
Xsq/df
## [1] 0.007805501
Gsq/df
## [1] 0.007764296
grp.data <- cbind(number.attend,number.not,summary.p19[,2])
grp.data <- as.data.frame(grp.data)
(grp.data$ncases <- grp.data$number.attend + grp.data$number.not)
## [1] 47 86 45 43 49 51 57 45 67 79 31
grp.data$p <- grp.data$number.attend/grp.data$ncases
(logit.mod2 <- glm(p ~ V3, weight=ncases, data=grp.data, family=binomial))
##
## Call: glm(formula = p ~ V3, family = binomial, data = grp.data, weights = ncases)
##
## Coefficients:
## (Intercept) V3
## -6.9254 0.1344
##
## Degrees of Freedom: 10 Total (i.e. Null); 9 Residual
## Null Deviance: 145.4
## Residual Deviance: 10.71 AIC: 60.17
1-pchisq(logit.mod2$deviance,logit.mod2$df.residual)
## [1] 0.2959252
# Or
anova(logit.mod2,test="LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: p
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 10 145.393
## V3 1 134.68 9 10.713 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Unlike the first method, the sampling distribution of tests statistics is approximately chi-square. The fit method uses all information in data when fitting the model.
Note that the fitted probabilities are very similar to fitted ones:
# - observed proportions
(grp.data$pobs <- grp.data$number.attend/grp.data$ncases)
## [1] 0.1702128 0.2093023 0.3333333 0.3720930 0.4081633 0.4313725 0.7543860
## [8] 0.6444444 0.6865672 0.7974684 0.9032258
# - fitted probabilities
(grp.data$phat <- logit.mod2$fitted)
## 1 2 3 4 5 6 7 8
## 0.1365091 0.2305232 0.3240343 0.3804475 0.4552657 0.5214771 0.5824559 0.6472276
## 9 10 11
## 0.7170882 0.8123450 0.8828884
# - fitted counts
(grp.data$predFreq <- grp.data$phat*grp.data$ncases)
## [1] 6.41593 19.82500 14.58154 16.35924 22.30802 26.59533 33.19999 29.12524
## [9] 48.04491 64.17526 27.36954
Or a graph of these
plot(grp.data$V3,grp.data$pobs, type='p',pch=1, col="blue", lwd=2,
xlab="Achievement",
ylab="Proporitions and Predications",
main="Logit Model fit to Grouped Data")
lines(grp.data$V3,grp.data$phat, col="red")
smooth <- loess(grp.data$pobs~grp.data$V3)
lines(grp.data$V3,smooth$fit, col="purple")
legend("topleft",legend=c("Observed proportions","Fitted Probabilities","Loess"),
col=c("blue","red","purple"), lty=1,lwd=2, cex=1)
# Get min and max of explanatory variable
(min.ach <- min(hsb$achieve))
## [1] 32.94
(max.ach <- max(hsb$achieve))
## [1] 70
achieve.range <- seq(from=32, to=70, by=.01)
# Get the fitted probabilities and se for achieve values in range defined above
logit.modt <- predict(logit.mod,
whatever <- data.frame(achieve=achieve.range),
type="response",
se.fit=TRUE)
# See was we get
names(logit.modt)
## [1] "fit" "se.fit" "residual.scale"
# Compute the CI bands
upper <- logit.modt$fit + 1.96*logit.modt$se.fit
lower <- logit.modt$fit - 1.96*logit.modt$se.fit
# Fitted probabilities
lw1 <- loess(program ~ achieve,data=hsb) # fit loess to data
plot(hsb$achieve, logit.mod$fitted.values,
type='n',
main="Data (Loess) & Logit Fitted w/ 95% Bands",
xlab="Mean of sum of 5 Achievement Tests",
ylab="Probabilty in Academic Program",
cex=1.75,
pch=20
)
# Add confidence band as shaded
polygon(c(achieve.range,rev(achieve.range)) ,
c(upper,rev(lower)),
col=rgb(0,0,1,0.2),
border=NA
)
# Add loess
j <- order(hsb$xmean)
lines(hsb$achieve[j],lw1$fitted[j],col="green",lwd=3)
lines(hsb$achieve[j],logit.mod$fitted.values[j],col="blue")
legend("topleft",legend=c("Loess","Logit","95% CI"), col=c("green","blue","azure"), lty=1, cex=1)
This is form the ResourceSelection package,
hoslem.test(logit.mod$y, fitted(logit.mod), g=10)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: logit.mod$y, fitted(logit.mod)
## X-squared = 3.9814, df = 8, p-value = 0.8588
If this is significant I worry; however, it is not significant, I want more evidence that it’s a good model for the data.
Computing sensitivity & specificity,
# -- decision tables
pred.decision <- ifelse(hsb$logit.fit >=.5,1,0)
table(hsb$program)
##
## 0 1
## 292 308
table(pred.decision)
## pred.decision
## 0 1
## 279 321
addmargins(cross <- table(pred.decision,hsb$program))
##
## pred.decision 0 1 Sum
## 0 198 81 279
## 1 94 227 321
## Sum 292 308 600
addmargins(prop.table(cross))
##
## pred.decision 0 1 Sum
## 0 0.3300000 0.1350000 0.4650000
## 1 0.1566667 0.3783333 0.5350000
## Sum 0.4866667 0.5133333 1.0000000
# Sensitivity
(sensitivity <- 227/308)
## [1] 0.737013
# - Specificity
(specificity <- 198/292)
## [1] 0.6780822
# - proportion correct
.3300+.37833
## [1] 0.70833
# - or
(198+227)/600
## [1] 0.7083333
# - or
sensitivity*(308/600) + specificity*(292/600)
## [1] 0.7083333
The concordance index using the DescTools package
Cstat(logit.mod,resp=hsb$program)
## [1] 0.768541
For the ROC curves, we use package pROC, which is also give us concordances (i.e., area under the curve)
roc1 <- roc(hsb$program ~ hsb$achieve, auc=TRUE, ci=TRUE, plot=TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
names(roc1)
## [1] "percent" "sensitivities" "specificities"
## [4] "thresholds" "direction" "cases"
## [7] "controls" "fun.sesp" "auc"
## [10] "call" "original.predictor" "original.response"
## [13] "predictor" "response" "levels"
## [16] "ci"
# - area usnder curve (i.e. concordance)
roc1$auc
## Area under the curve: 0.7685
# - concordance index (area under curve) and confidence interval for it.
roc1$ci
## 95% CI: 0.731-0.806 (DeLong)
# - a 2nd ROC plot
plot(roc1,
main="ROC Curve for Logit Model fit to HSB data (c=.77)",
col="red")
We’ll look at residuals from GLM and then look at the adjusted ones, the latter of which should be N(0,1).
# - what comes from glm
qqnorm(logit.mod$residuals)
Various way of getting standardized residuals – here they all look about the same.
# Pearson standardized residuals--- seems to be the same
resid <- rstandard(logit.mod, pearson=TRUE)
rstud <- rstudent(logit.mod, pearson=TRUE)
rdev <- rstudent(logit.mod, deviance=TRUE)
qqnorm(resid,
main="QQ plot of Pearson Standardized Residuals")
ylin <- c(-2,3)
lines(ylin,ylin)
qqnorm(rstud,
main="QQ plot of Studentized Standardized Residuals")
lines(ylin,ylin)
qqnorm(rdev, main="QQ plot of Deviance Standardized Residuals")
lines(ylin,ylin)
summary(rdev)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.151402 -0.945111 0.485526 0.007902 0.959097 2.156461
This is just a sub-set of what we could do but give a good idea of if there are some problem observations
inf <- influence.measures(logit.mod)
names(inf)
## [1] "infmat" "is.inf" "call"
summary(inf)
## Potentially influential observations of
## glm(formula = program ~ achieve, family = binomial, data = hsb) :
## NONE
## numeric(0)
You can use the above table to pick out potentially influential ones.
# --- requires car package
influencePlot(logit.mod)
## StudRes Hat CookD
## 22 2.1552903 0.005032289 0.0228079336
## 24 -0.4872975 0.005043183 0.0003202020
## 33 -0.4904528 0.005042404 0.0003245655
## 44 2.1564608 0.005031682 0.0228676846
influenceIndexPlot(logit.mod, vars=c("Cook","Studentized","hat"), id.n=5)
The influenceIndexPlot is throwing a lot of warnings, but does produce the graphic. (I have surpressed this this document)
There are many statistics that you can examine that are in the following table
# To see what is here:
head(inf$infmat)
## dfb.1_ dfb.achv dffit cov.r cook.d hat
## 1 -0.03562045 0.03344514 -0.03828352 1.0071931 0.0004639563 0.004821574
## 2 -0.03508014 0.03301247 -0.03750023 1.0073018 0.0004431245 0.004877112
## 3 -0.03328648 0.03150783 -0.03511819 1.0075578 0.0003839417 0.004992800
## 4 0.12412552 -0.11731384 0.13139260 0.9967804 0.0157254398 0.004967385
## 5 0.12376330 -0.11693913 0.13108943 0.9968166 0.0155808331 0.004962355
## 6 0.11216532 -0.10501107 0.12141093 0.9977753 0.0117353651 0.004745108