In an earlier post, I had shown this using iteratively reweighted least squares (IRLS). This is just an alternative method using Newton Raphson and the Fisher scoring algorithm. For further details, you can look here as well.
library(MLMusingR)
data(suspend)
m1 <- glm(sus ~ male + gpa * frpl + fight + frmp.c * pminor.c,
data = suspend,
family = binomial)
### extracting raw components
dat <- model.frame(m1)
fml <- formula(m1)
X <- model.matrix(fml, dat)
y <- model.response(dat, 'numeric')
k <- ncol(X)
#pp <- function(x) 1 / (1 + exp(-x)) #convert logit to pred prob
beta_0 <- rep(0, k) #initialize with zeroes
eta <- X %*% beta_0 #predicted logit
mu <- plogis(eta) #same as pp but built-in; convert logit to pred prob
vr <- (mu * (1 - mu)) #variance
wts <- diag(as.vector(vr)) #create weight matrix
d2 <- t(X) %*% wts %*% X #the Fisher information matrix
u1 <- t(X) %*% (y - mu) #the score function
iter <- 50 #max number of iterations
tol <- 1e-8 #tolerance
### now iterate
for (i in 1:iter){
#beta_1 <- beta_0 + solve(d2) %*% u1 #update beta
beta_1 <- beta_0 + chol2inv(chol(d2)) %*% u1 #update beta avoiding inversion
#print(beta_1) #show estimates
if(any(abs((beta_1 - beta_0) / beta_0) < tol)) break #stop loop if no change
eta <- X %*% beta_1 #predicted logit
mu <- plogis(eta) #this is just pp(eta)
vr <- mu * (1 - mu) #variance
wts <- diag(as.vector(vr)) #putting in a weight matrix
d2 <- t(X) %*% wts %*% X #information matrix
u1 <- t(X) %*% (y - mu) #score function
beta_0 <- beta_1
cat(i, "::") #show iteration number
}
1 ::2 ::3 ::4 ::5 ::6 ::7 ::
### compare results
se <- sqrt(diag(solve(d2))) #get standard error, inverse of information matrix
z <- beta_1 / se
p.val <- 2 * pt(-abs(z), df = Inf)
df <- data.frame(beta = beta_1, se = se, z = z, p = format(p.val, 3))
df
beta se z p
(Intercept) -1.5922023202 0.269404299 -5.9100851 3.419311e-09
male 0.3248969875 0.099383812 3.2691138 1.078849e-03
gpa -0.7954794245 0.084849355 -9.3751971 6.904665e-21
frpl -0.5627344684 0.318873753 -1.7647563 7.760473e-02
fight 2.0780999956 0.098472087 21.1034422 7.395171e-99
frmp.c 0.0030040140 0.003189348 0.9418898 3.462491e-01
pminor.c -0.0022362792 0.002302005 -0.9714485 3.313250e-01
gpa:frpl 0.3872562271 0.109168665 3.5473204 3.891710e-04
frmp.c:pminor.c 0.0001243666 0.000106535 1.1673780 2.430578e-01
#### to compare
m2 <- update(m1, epsilon = 1e-16) #change tolerance so results match
summary(m2)$coef
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.5922023202 0.269404299 -5.9100851 3.419311e-09
male 0.3248969875 0.099383812 3.2691138 1.078849e-03
gpa -0.7954794245 0.084849355 -9.3751971 6.904665e-21
frpl -0.5627344684 0.318873753 -1.7647563 7.760473e-02
fight 2.0780999956 0.098472087 21.1034422 7.395171e-99
frmp.c 0.0030040140 0.003189348 0.9418898 3.462491e-01
pminor.c -0.0022362792 0.002302005 -0.9714485 3.313250e-01
gpa:frpl 0.3872562271 0.109168665 3.5473204 3.891710e-04
frmp.c:pminor.c 0.0001243666 0.000106535 1.1673780 2.430578e-01
## equal deviances
-2 * sum((y * log(mu)) + (1 - y) * log(1 - mu))
[1] 3331.017
deviance(m2)
[1] 3331.017
An alternative using maximum likelihood estimation (MLE)
m3 <- glm(sus ~ male + gpa + frpl + gpa * frpl,
data = suspend,
family = binomial)
X <- model.matrix(m3)
## this is the function to maximize
mle <- function(b){
pp <- plogis(X %*% b)
sum(dbinom(y, 1, pp, log = TRUE))
}
k <- ncol(X) #number of variables
sta <- rep(0, k) #initial values
out <- optim(par = sta, mle, #starting values and function to optimize
control = list(fnscale = -1, #-1 to maximize
reltol = 1e-15, #tolerance
maxit = 1000),
hessian = TRUE) #so it doesn't keep iterating
out$par #coefficients, difference due to tolerance?
[1] -0.3712208 0.6727835 -0.9884962 -0.5433412 0.4056071
coef(m3) #using built in function
(Intercept) male gpa frpl gpa:frpl
-0.3712205 0.6727835 -0.9884965 -0.5433411 0.4056072
out$value #the log likelihood
[1] -1908.202
logLik(m3)
'log Lik.' -1908.202 (df=5)
sqrt(diag(solve(-out$hessian))) #standard errors
[1] 0.23858787 0.09314298 0.07768979 0.29413387 0.10102125
sqrt(diag(vcov(m3)))
(Intercept) male gpa frpl gpa:frpl
0.23858792 0.09314278 0.07768976 0.29413406 0.10102131
– END