brglm/0000755000176200001440000000000015062240332011352 5ustar liggesusersbrglm/MD50000644000176200001440000000307715062240332011671 0ustar liggesusers7f1260c08c357ee0b85bbe775e91877b *DESCRIPTION a6f7e09e5ba7f23bc0aeb5f084cb47d5 *NAMESPACE 395d668b73ddb0c457cf74ff92ac674f *R/aaa.R 46d1f03bd8594faafd6b84fb0dace608 *R/brglm.R dde6f49cfeed7fcd9bb3fda14361d44a *R/brglm.control.R 65a4a2d43dc91f72a786c3fe63379d2c *R/confint.brglm.R a95372ff24ea8a1267c2ca50bc7324c5 *R/gethats.R fba57f442a1d192852d6fc4e6ab2c0dd *R/glm.control1.R cc8635f507ea9b2f8fa06721c49cebbb *R/modifications.R b2d77e5fa7043af91c7042d5dced9007 *R/plot.profile.brglm.R 6202203d79589cc1ca93e8442474ff88 *R/profile.brglm.R b8a96e9bca68064939bb7888b90e816d *R/profileObjectives.R f7897e034ca98eff78ee06174d784dd0 *R/separation.detection.R 8d904e9bf8f4577a70347d0bdfd01963 *data/lizards.rda 0da85d0216c6d25e23ae98040639f6ae *inst/CHANGES f59c5b40baec614e4c55003a1f8c78ee *inst/CITATION a9119287c77b775ee0b46052565f6e45 *inst/Jeffreys_power.R fafa5df97dfc59276cadab173e63f37b *inst/WORDLIST 2b8ac6a2edb9bebf884c2fea14170939 *man/brglm.Rd 0f00cd9100e930a825a30e1f6f298141 *man/brglm.control.Rd 347fbeed1e2e2c18b39d6f596bffd06d *man/confint.brglm.Rd 8a1b91b04e940ad2d18a04de10dc0d70 *man/gethats.Rd c81f66a78e5ba8a7bc3cd16401eb13f9 *man/glm.control1.Rd 122e76157012d6ed3144f482e58f3d7d *man/lizards.Rd 346ead79c59f01f1bb133d6ee844dbbf *man/modifications.Rd 9a830094b299539c3a37a8793d76b496 *man/plot.profile.brglm.Rd ae00efdc667673c04e11afa04760ebee *man/profile.brglm.Rd 63bc090dec0a65cff53f082afc0231d9 *man/profileObjectives.Rd dbf7f9daef6de3cb2b57ad335111ed57 *man/separation.detection.Rd b634e50417de5d3050c5f7a88e0cce9d *src/brglm_init.c f7770402b91a9459d81baebd7108ddec *src/hats.c brglm/R/0000755000176200001440000000000014040310424011546 5ustar liggesusersbrglm/R/brglm.R0000644000176200001440000003652313450666662013033 0ustar liggesusers## * 'brglm' and 'brglm.fit' were written using as basis the code ## of 'glm' and 'glm.fit', respectively. ## * 'print.brglm' is a modification of 'print.glm' ## * 'summary.brglm' is a modification of 'summary.brglm' ## * 'print.summary.brglm' is a modification of 'print.summary.glm' ## Ioannis Kosmidis [15/02/2008] ## Suggestion by Kurt Hornik to avoid a warning related to the binding ## of n which is evaluated by family$initialize if(getRversion() >= "2.15.1") globalVariables("n") `brglm` <- function (formula, family = binomial, data, weights, subset, na.action, start = NULL, etastart, mustart, offset, control.glm = glm.control1(...), model = TRUE, method = "brglm.fit", pl = FALSE, x = FALSE, y = TRUE, contrasts = NULL, control.brglm = brglm.control(...), ...) { call <- match.call() if (is.character(family)) family <- get(family, mode = "function", envir = parent.frame()) if (is.function(family)) family <- family() if (is.null(family$family)) { print(family) stop("'family' not recognized") } br <- method == "brglm.fit" #################### ## More families to be implemented if (br & family$family != "binomial") stop("families other than 'binomial' are not currently implemented") #################### if (missing(data)) data <- environment(formula) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "etastart", "mustart", "offset"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") mf <- eval(mf, parent.frame()) switch(method, model.frame = return(mf), glm.fit = fit.proc <- glm.fit, brglm.fit = fit.proc <- brglm.fit, stop("invalid 'method': ", method)) #################### ## Arg control of fit.proc if (br) { formals(fit.proc)$control.brglm <- control.brglm } if (pl) formals(fit.proc)$pl <- TRUE #################### mt <- attr(mf, "terms") Y <- model.response(mf, "any") if (length(dim(Y)) == 1) { nm <- rownames(Y) dim(Y) <- NULL if (!is.null(nm)) names(Y) <- nm } Xor <- if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts) else matrix(, NROW(Y), 0) Xmax <- apply(abs(Xor), 2, max) Xmax[Xmax==0] <- 1 X <- sweep(Xor, 2, Xmax, "/") weights <- as.vector(model.weights(mf)) if (!is.null(weights) && !is.numeric(weights)) stop("'weights' must be a numeric vector") offset <- as.vector(model.offset(mf)) if (!is.null(weights) && any(weights < 0)) stop("negative weights not allowed") if (!is.null(offset)) { if (length(offset) == 1) offset <- rep(offset, NROW(Y)) else if (length(offset) != NROW(Y)) stop(gettextf("number of offsets is %d should equal %d (number of observations)", length(offset), NROW(Y)), domain = NA) } mustart <- model.extract(mf, "mustart") etastart <- model.extract(mf, "etastart") par.names <- colnames(X) fit <- fit.proc(x = X, y = Y, weights = weights, start = start, etastart = etastart, mustart = mustart, offset = offset, family = family, control = control.glm, intercept = attr(mt, "intercept") > 0) if (length(offset) && attr(mt, "intercept") > 0) { fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE], y = Y, weights = weights, offset = offset, family = family, control = control.glm, intercept = TRUE)$deviance } if (model) fit$model <- mf fit$na.action <- attr(mf, "na.action") ## Move back to the original scale if (nPars <- ncol(X)) { redundant <- if (br) fit$redundant else rep.int(0, nPars) fit$coefficients <- fit$coefficients/Xmax[!redundant] #fit$qr <- qr(sqrt(fit$weights) * Xor[, !redundant]) fit$qr <- qr(sqrt(fit$weights) * Xor) if (br) { fit$FisherInfo <- fit$FisherInfo * tcrossprod(Xmax[!redundant]) fit$control.brglm <- control.brglm } #################### ## Aliasing coefs <- rep(NA, ncol(X)) names(coefs) <- par.names coefs[!redundant] <- fit$coefficients fit$coefficients <- coefs #################### } fit$control.glm <- control.glm if (x) fit$x <- Xor if (!y) fit$y <- NULL if (br) fit$penalized.deviance <- if (all(family$link == "logit") | pl) fit$deviance - log(det(fit$FisherInfo)) else NULL fit <- c(fit, list(call = call, formula = formula, terms = mt, data = data, offset = offset, method = method, pl = pl, contrasts = attr(X, "contrasts"), xlevels = .getXlevels(mt, mf))) class(fit) <- c("brglm", "glm", "lm") fit } `brglm.fit` <- function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL, mustart = NULL, offset = rep(0, nobs), family = binomial(), control = glm.control(), control.brglm = brglm.control(), intercept = TRUE, pl = FALSE) { x <- as.matrix(x) nobs <- NROW(y) nvars <- ncol(x) EMPTY <- nvars == 0 if (is.null(weights)) weights <- rep.int(1, nobs) if (is.null(offset)) offset <- rep.int(0, nobs) variance <- family$variance dev.resids <- family$dev.resids aic <- family$aic linkfun <- family$linkfun dmu.deta <- family$mu.eta linkinv <- family$linkinv if (!is.function(variance) || !is.function(linkinv)) stop("'family' argument seems not to be a valid family object") if (EMPTY) { return(glm.fit(x = x, y = y, weights = weights, start = start, etastart = etastart, mustart = mustart, offset = offset, family = family, control = control, intercept = intercept)) } valideta <- family$valideta if (is.null(valideta)) valideta <- function(eta) TRUE validmu <- family$validmu if (is.null(validmu)) validmu <- function(mu) TRUE if (is.null(mustart)) { eval(family$initialize) } else { mukeep <- mustart eval(family$initialize) mustart <- mukeep } ## Suggestion by Kurt Hornik to reset the "warn" option value to ## what the user has set prior to the execution of brglm.fit warnValue <- options(warn = -1) cur.repr <- modifications(family, pl = pl) ### Find rows with zero weight nonzero.w <- which(weights!=0) y.count <- y * weights if (is.null(control.brglm$br.consts)) { wt <- weights + nvars/nobs y.adj <- (y.count + 0.5*nvars/nobs)/wt } else { wt <- weights + 2*control.brglm$br.consts y.adj <- (y.count + control.brglm$br.consts)/wt } # Find any aliased out parameters after the removal of any zero weight observations temp.fit <- glm.fit(x = x[nonzero.w,], y = y.adj[nonzero.w], weights = wt[nonzero.w], start = start, etastart = etastart[nonzero.w], mustart = mustart[nonzero.w], offset = offset[nonzero.w], family = family, control = control, intercept = intercept) redundant <- is.na(temp.fit$coefficients) # Remove columns corresponding to aliased out parameters if (any(redundant)) { x <- x[, -which(redundant), drop = FALSE] nvars <- nvars - sum(redundant) } # Refit to match the dimension of the original data set temp.fit <- glm.fit(x = x, y = y.adj, weights = wt, start = start, etastart = etastart, mustart = mustart, offset = offset, family = family, control = control, intercept = intercept) nIter <- 0 test <- TRUE x.t <- t(x) while (test & (nIter < control.brglm$br.maxit)) { nIter <- nIter + 1 ps <- temp.fit$fitted.values etas <- linkfun(ps) ww <- rep(0, nobs) ww[nonzero.w] <- temp.fit$weights[nonzero.w]/wt[nonzero.w] * weights[nonzero.w] W.X <- sqrt(ww[nonzero.w]) * x[nonzero.w, ] XWXinv <- chol2inv(chol(crossprod(W.X))) hats <- gethats(nobs, nvars, x.t, XWXinv, ww) #hats <- diag(x%*%XWXinv%*%t(ww * x)) cur.model <- cur.repr(ps) wt <- weights + hats * cur.model$at y.adj <- rep(0, nobs) y.adj[nonzero.w] <- (y.count[nonzero.w] + hats[nonzero.w] * cur.model$ar[nonzero.w])/wt[nonzero.w] temp.fit <- glm.fit(x = x, y = y.adj, weights = wt, etastart = etas, offset = offset, family = family, control = control, intercept = intercept) modscore <- t(dmu.deta(etas)/variance(ps) * x) %*% ((y.adj - ps) * wt) if (control.brglm$br.trace) { cat("Iteration:", nIter, "\n") cat("Modified scores:", modscore, "\n") } test <- sum(abs(modscore)) > control.brglm$br.epsilon } options(warnValue) temp.fit$converged <- nIter < control.brglm$br.maxit if (!temp.fit$converged) warning("Iteration limit reached") temp.fit$ModifiedScores <- c(modscore) ww <- rep(0, nobs) ww[nonzero.w] <- temp.fit$weights[nonzero.w]/wt[nonzero.w] * weights[nonzero.w] temp.fit$weights <- ww W.X <- sqrt(ww[nonzero.w]) * x[nonzero.w, ] temp.fit$FisherInfo <- crossprod(W.X) XWXinv <- chol2inv(chol(temp.fit$FisherInfo)) temp.fit$hats <- gethats(nobs, nvars, x.t, XWXinv, ww) temp.fit$qr <- qr(W.X) temp.fit$nIter <- nIter temp.fit$prior.weights <- weights temp.fit$y <- y temp.fit$deviance <- sum(dev.resids(temp.fit$y, temp.fit$fitted.values, temp.fit$prior.weights)) temp.fit$cur.model <- cur.model temp.fit$redundant <- redundant aic <- family$aic aic.model <- aic(y, n, ps, weights, temp.fit$deviance) temp.fit$aic <- aic.model + 2 * temp.fit$rank temp.fit } `print.brglm` <- function (x, digits = max(3, getOption("digits") - 3), ...) { if (x$method == "glm.fit" | !(nPars <- length(coef(x)))) { class(x) <- class(x)[-match("brglm", class(x))] return(print(x, digits, ...)) } cat("\nCall: ", deparse(x$call), "\n\n") if (nPars) { cat("Coefficients") if (is.character(co <- x$contrasts)) cat(" [contrasts: ", apply(cbind(names(co), co), 1, paste, collapse = "="), "]") cat(":\n") print.default(format(x$coefficients, digits = digits), print.gap = 2, quote = FALSE) } else cat("No coefficients\n\n") cat("\nDegrees of Freedom:", x$df.null, "Total (i.e. Null); ", x$df.residual, "Residual\n") if (nchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep = "") if (!is.null(x$penalized.deviance)) cat("Deviance:\t ", format(round(x$deviance, digits)), "\nPenalized Deviance:", format(round(x$penalized.deviance, digits)), "\tAIC:", format(round(x$aic, digits)), "\n") else cat("Deviance:\t ", format(round(x$deviance, digits)), "\tAIC:", format(round(x$aic, digits)), "\n") invisible(x) } `print.summary.brglm` <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) { cat("\nCall:\n") cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") if (length(x$aliased) == 0) { cat("\nNo Coefficients\n") } else { df <- if ("df" %in% names(x)) x[["df"]] else NULL if (!is.null(df) && (nsingular <- df[3] - df[1])) cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", sep = "") else cat("\nCoefficients:\n") coefs <- x$coefficients if (!is.null(aliased <- x$aliased) && any(aliased)) { cn <- names(aliased) coefs <- matrix(NA, length(aliased), 4, dimnames = list(cn, colnames(coefs))) coefs[!aliased, ] <- x$coefficients } printCoefmat(coefs, digits = digits, signif.stars = signif.stars, na.print = "NA", ...) } cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", format(x$dispersion), ")\n\n", apply(cbind(paste(format(c("Null", "Residual"), justify = "right"), "deviance:"), format(unlist(x[c("null.deviance", "deviance")]), digits = max(5, digits + 1)), " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1, paste, collapse = " "), sep = "") if (!is.null(x$penalized.deviance)) cat("Penalized deviance:", format(round(x$penalized.deviance, digits = max(5, digits + 1))), "\n") if (nchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep = "") cat("AIC: ", format(x$aic, digits = max(4, digits + 1)), "\n") correl <- x$correlation if (!is.null(correl)) { p <- NCOL(correl) if (p > 1) { cat("\nCorrelation of Coefficients:\n") if (is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.colnames = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop = FALSE], quote = FALSE) } } } cat("\n") invisible(x) } `summary.brglm` <- function (object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...) { if (object$method == "glm.fit") return(summary.glm(object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...)) df.r <- object$df.residual if (is.null(dispersion)) dispersion <- 1 aliased <- is.na(coef(object)) p <- object$rank if (p > 0) { p1 <- 1:p Qr <- object$qr coef.p <- object$coefficients[Qr$pivot[p1]] covmat.unscaled <- chol2inv(chol(object$FisherInfo)) covmat <- dispersion * covmat.unscaled var.cf <- diag(covmat) s.err <- sqrt(var.cf) tvalue <- coef.p/s.err dn <- c("Estimate", "Std. Error") pvalue <- 2 * pnorm(-abs(tvalue)) coef.table <- cbind(coef.p, s.err, tvalue, pvalue) dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", "Pr(>|z|)")) df.f <- NCOL(Qr$qr) } else { coef.table <- matrix(, 0, 4) dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) covmat.unscaled <- covmat <- matrix(, 0, 0) df.f <- length(aliased) } keep <- match(c("call", "terms", "family", "deviance", "aic", "contrasts", "df.residual", "null.deviance", "df.null", "iter", "na.action", "penalized.deviance"), names(object), 0) ans <- c(object[keep], list(deviance.resid = residuals(object, type = "deviance"), coefficients = coef.table, aliased = aliased, dispersion = dispersion, df = c(object$rank, df.r, df.f), cov.unscaled = covmat.unscaled, cov.scaled = covmat)) if (correlation && p > 0) { dd <- sqrt(diag(covmat.unscaled)) ans$correlation <- covmat.unscaled/outer(dd, dd) ans$symbolic.cor <- symbolic.cor } class(ans) <- "summary.brglm" return(ans) } brglm/R/profile.brglm.R0000644000176200001440000000563213450666662014467 0ustar liggesusers`print.profile.brglm` <- function (x, ...) { cat("'level' was set to", attr(x, "level"), "\n") cat("Methods that apply:\n") cat("'confint' 'plot' 'pairs'\n") } `profile.brglm` <- function (fitted, gridsize = 10, stdn = 5, stepsize = 0.5, level = 0.95, which = 1:length(coef(fitted)), verbose = TRUE, zero.bound = 1e-08, scale = FALSE, ...) { notNA <- !is.na(fitted$coefficients) if (level <= 0 | level >= 1) stop("invalid 'level'.") if (fitted$method == "glm.fit") { if (verbose) cat("Profiling the ordinary deviance for the supplied fit...\n") res1 <- profileModel(fitted, gridsize = gridsize, stdn = stdn, stepsize = stepsize, grid.bounds = NULL, quantile = qchisq(level, 1), objective = "ordinaryDeviance", agreement = TRUE, verbose = FALSE, trace.prelim = FALSE, which = which, profTraces = TRUE, zero.bound = zero.bound, scale = scale) res2 <- NULL } else { fitted1 <- update(fitted, method = "glm.fit") Xmat <- model.matrix(fitted)[, notNA] if (verbose) cat("Profiling the ordinary deviance for the corresponding ML fit...\n") res1 <- profileModel(fitted1, gridsize = gridsize, stdn = stdn, stepsize = stepsize, grid.bounds = NULL, quantile = qchisq(level, 1), objective = "ordinaryDeviance", agreement = TRUE, verbose = FALSE, trace.prelim = FALSE, which = which, profTraces = TRUE, zero.bound = zero.bound, scale = scale) if (fitted$pl | all(fitted$family$link == "logit")) { if (verbose) cat("Profiling the penalized deviance for the supplied fit...\n") res2 <- profileModel(fitted, gridsize = gridsize, stdn = stdn, stepsize = stepsize, grid.bounds = NULL, quantile = qchisq(level, 1), objective = "penalizedDeviance", agreement = TRUE, verbose = FALSE, trace.prelim = FALSE, which = which, profTraces = TRUE, zero.bound = zero.bound, scale = scale, X = model.matrix(fitted)[,!is.na(fitted$coefficients)]) } else { if (verbose) cat("Profiling the modified score statistic for the supplied fit...\n") res2 <- profileModel(fitted, gridsize = gridsize, stdn = stdn, stepsize = stepsize, grid.bounds = NULL, quantile = qchisq(level, 1), objective = "modifiedScoreStatistic", agreement = TRUE, verbose = FALSE, trace.prelim = FALSE, which = which, profTraces = TRUE, zero.bound = zero.bound, scale = scale, X = model.matrix(fitted)[,!is.na(fitted$coefficients)]) } } res <- list(profilesML = res1, profilesBR = res2) attr(res, "level") <- level class(res) <- "profile.brglm" res } brglm/R/aaa.R0000644000176200001440000000070214040310424012412 0ustar liggesusers.onAttach <- function(libname, pkgname) { packageStartupMessage("'brglm' will gradually be superseded by the 'brglm2' R package (https://cran.r-project.org/package=brglm2), which provides utilities for mean and median bias reduction for all GLMs.\n Methods for the detection of separation and infinite estimates in binomial-response models are provided by the 'detectseparation' R package (https://cran.r-project.org/package=detectseparation).") } brglm/R/gethats.R0000644000176200001440000000035213450666662013356 0ustar liggesusers`gethats` <- function (nobs, nvars, x.t, XWXinv, ww) { .C("hatsc", n = as.integer(nobs), p = as.integer(nvars), x = as.double(x.t), invfisherinf = as.double(XWXinv), w = as.double(ww), hat = double(nobs))$hat } brglm/R/brglm.control.R0000644000176200001440000000066313450666662014506 0ustar liggesusers`brglm.control` <- function (br.epsilon = 1e-08, br.maxit = 100, br.trace = FALSE, br.consts = NULL, ...) { if (!is.numeric(br.epsilon) || br.epsilon <= 0) stop("value of 'epsilon' must be > 0") if (!is.numeric(br.maxit) || br.maxit <= 0) stop("maximum number of iterations must be > 0") list(br.epsilon = br.epsilon, br.maxit = br.maxit, br.trace = br.trace, br.consts = br.consts) } brglm/R/glm.control1.R0000644000176200001440000000044713450666662014243 0ustar liggesusers## 'glm.control1' is a minor modification of 'glm.conrol' ## The only different is the addition of a ... argument ## Ioannis Kosmidis [15/02/2008] `glm.control1` <- function (epsilon = 1e-08, maxit = 25, trace = FALSE, ...) { glm.control(epsilon, maxit, trace) } brglm/R/modifications.R0000644000176200001440000000615013450666662014551 0ustar liggesusers`checkModifications` <- function (fun, Length = 100) { p <- seq(.Machine$double.neg.eps, 1 - 1e-10, length = Length) te <- fun(p) if (!is.list(te)) stop("The result should be a list of length two.") if (length(te) != 2) stop("The result should be a list of length two.") if (any(is.na(match(names(te), c("ar", "at"))))) stop("The result should be a list with elements 'ar' and'at'.") if (length(te$ar) != Length) stop("'ar' should be of the same length as 'p'") if (length(te$at) != Length) stop("'at' should be of the same length as 'p'") if (any(te$ar >= te$at)) stop("'ar' cannot take larger values than 'at'") if (any(te$ar < 0)) stop("'ar' cannot be negative") if (any(te$ar < 0)) stop("'at' cannot be negative") plot(p, te$at, ylim = c(0, 10), type = "l") points(p, te$ar, type = "l", col = "grey") drop(TRUE) } `modifications` <- function (family, pl = FALSE) { if (is.character(family)) family <- get(family, mode = "function", envir = parent.frame()) if (is.function(family)) family <- family() distr.link <- paste(family$family, family$link[1], sep = ".") distr.link <- gsub(pattern = "[(]", x = distr.link, replacement = ".") distr.link <- gsub(pattern = "[)]", x = distr.link, replacement = ".") if (pl) { out <- switch(distr.link, binomial.logit = function(p) { etas <- family$linkfun(p) list(ar = 0.5 * p/p, at = 1 * p/p) }, binomial.probit = function(p) { etas <- family$linkfun(p) list(ar = p * (1 - etas * (etas < 0)/dnorm(etas)), at = etas * ((etas >= 0) * (1 - p) - (etas < 0) * p)/dnorm(etas) + 0.5/p) }, binomial.cloglog = function(p) { etas <- family$linkfun(p) list(ar = -p/log(1 - p), at = 0.5/p) }, binomial.cauchit = function(p) { etas <- family$linkfun(p) list(ar = -2 * pi * etas * p * (etas < 0) + (p - 0.5) * (etas >= 0) + p, at = 2 * pi * etas * ((etas >= 0) - p) - (p - 0.5)/p * (etas < 0) + 1) }, NULL) if (is.null(out)) out <- match.fun("mpl.custom.family") } else { out <- switch(distr.link, binomial.logit = function(p) { etas <- family$linkfun(p) list(ar = 0.5 * p/p, at = 1 * p/p) }, binomial.probit = function(p) { etas <- family$linkfun(p) list(ar = -0.5 * p * etas * (etas < 0)/dnorm(etas) + p, at = 0.5 * etas * ((etas >= 0) - p)/dnorm(etas) + 1) }, binomial.cloglog = function(p) { etas <- family$linkfun(p) list(ar = -0.5 * p/log(1 - p) + p, at = 0.5 * p/p + 1) }, binomial.cauchit = function(p) { etas <- family$linkfun(p) list(ar = -pi * etas * p * (etas < 0) + p, at = pi * etas * ((etas >= 0) - p) + 1) }, NULL) if (is.null(out)) out <- match.fun("br.custom.family") } out } brglm/R/confint.brglm.R0000644000176200001440000000501013450666662014455 0ustar liggesusers`confint.brglm` <- function (object, parm = 1:length(coef(object)), level = 0.95, verbose = TRUE, endpoint.tolerance = 0.001, max.zoom = 100, zero.bound = 1e-08, stepsize = 0.5, stdn = 5, gridsize = 10, scale = FALSE, method = "smooth", ci.method = "union", n.interpolations = 100, ...) { prof <- profile.brglm(object, gridsize = 10, stdn = stdn, stepsize = stepsize, grid.bounds = NULL, level = level, which = parm, verbose = verbose, zero.bound = zero.bound, scale = scale) ci <- confint.profile.brglm(prof, method = method, ci.method = ci.method, endpoint.tolerance = endpoint.tolerance, max.zoom = max.zoom, n.interpolations = n.interpolations, verbose = verbose) drop(ci) } `confint.profile.brglm` <- function (object, parm, level = 0.95, method = "smooth", ci.method = "union", endpoint.tolerance = 0.001, max.zoom = 100, n.interpolations = 100, verbose = TRUE, ...) { alpha <- 1 - attr(object, "level") if (!(ci.method %in% c("union", "mean"))) stop("Invalid 'ci.method'.") if (is.null(object$profilesBR)) { ci <- profConfint(object$profilesML, method = method, endpoint.tolerance = endpoint.tolerance, max.zoom = max.zoom, n.interpolations = n.interpolations, verbose = FALSE) } else { if (verbose) cat("Calculating confidence intervals for the ML fit using deviance profiles...\n") ci1 <- profConfint(object$profilesML, method = method, endpoint.tolerance = endpoint.tolerance, max.zoom = max.zoom, n.interpolations = n.interpolations, verbose = FALSE) fit <- object$profilesBR$fit if (verbose) { if (fit$pl | all(fit$family$link == "logit")) cat("Calculating confidence intervals for the BR fit using penalized likelihood profiles...\n") else cat("Calculating confidence intervals for the BR fit using modified score statistic profiles...\n") } ci2 <- profConfint(object$profilesBR, method = method, endpoint.tolerance = endpoint.tolerance, max.zoom = max.zoom, n.interpolations = n.interpolations, verbose = FALSE) ci <- switch(ci.method, union = cbind(pmin(ci1[, 1], ci2[, 1]), pmax(ci1[, 2], ci2[, 2])), mean = (ci1 + ci2)/2) } profNames <- names(object$profilesML$profiles) dimnames(ci) <- list(profNames, paste(c(alpha/2, 1 - alpha/2) * 100, "%")) attr(ci, "profileModel object") <- NULL ci } brglm/R/plot.profile.brglm.R0000644000176200001440000000336013450671366015435 0ustar liggesuserspairs.profile.brglm <- function (x, colours = 2:3, ...) { if (is.null(x$profilesBR)) { pairs(x$profilesML, colours = colours, title = "Ordinary deviance", ...) } else { pairs(x$profilesML, colours = colours, title = "Ordinary deviance", ...) getOption("device")() fit <- x$profilesBR$fit tt <- if (fit$pl | all(fit$family$link == "logit")) "Penalized deviance" else "Modified score statistic" pairs(x$profilesBR, colours = colours, title = tt, ...) } } plot.profile.brglm <- function (x, signed = FALSE, interpolate = TRUE, n.interpolations = 100, print.grid.points = FALSE, ...) { if (is.null(x$profilesBR)) { plot(x$profilesML, cis = NULL, signed = signed, interpolate = interpolate, n.interpolations = n.interpolations, print.grid.points = print.grid.points, title = "Ordinary deviance", ...) } else { plot(x$profilesML, cis = NULL, signed = signed, interpolate = interpolate, n.interpolations = n.interpolations, print.grid.points = print.grid.points, title = "Ordinary deviance", ...) getOption("device")() fit <- x$profilesBR$fit tt <- if (fit$pl | all(fit$family$link == "logit")) "Penalized deviance" else "Modified score statistic" plot(x$profilesBR, cis = NULL, signed = signed, interpolate = interpolate, n.interpolations = n.interpolations, print.grid.points = print.grid.points, title = tt, ...) } } brglm/R/separation.detection.R0000644000176200001440000000166014040310424016016 0ustar liggesusers`separation.detection` <- function (fit, nsteps = 30) { .Deprecated(msg = "'separation.detection' will be removed from 'brglm' at version 0.8. Comprehensive methods for the detection of infinite estimates in binomial-response models are provided by the 'detectseparation' R package (https://cran.r-project.org/package=detectseparation).") fit.class <- class(fit)[1] if (fit.class != "glm") stop("Only objects of class 'glm' are accepted.") eps <- .Machine$double.eps betasNames <- names(betas <- coef(fit)) noNA <- !is.na(betas) stdErrors <- matrix(0, nsteps, length(betas)) for (i in 1:nsteps) { suppressWarnings(temp.fit <- update(fit, control = glm.control(maxit = i, epsilon = eps))) stdErrors[i, noNA] <- summary(temp.fit)$coef[betasNames[noNA], "Std. Error"] } res <- sweep(stdErrors, 2, stdErrors[1, ], "/") colnames(res) <- names(coef(fit)) res } brglm/R/profileObjectives.R0000644000176200001440000000172313450666662015400 0ustar liggesusers`modifiedScoreStatistic` <- function (fm, X, dispersion = 1) { y <- fm$y wt <- fm$prior LP <- fm$linear family <- fm$family probs <- family$linkinv(LP) dmu.deta <- family$mu.eta variance <- family$variance we <- c(wt * dmu.deta(LP)^2/variance(probs)) W.X <- sqrt(we) * X XWXinv <- chol2inv(chol(crossprod(W.X))) hats <- diag(X %*% XWXinv %*% t(we * X)) cur.model <- modifications(family, pl = fm$pl)(probs) mod.wt <- wt + c(hats * cur.model$at) y.adj <- (y * wt + hats * cur.model$ar)/mod.wt s.star <- t(c(dmu.deta(LP)/variance(probs)) * X) %*% ((y.adj - probs) * mod.wt) t(s.star) %*% XWXinv %*% s.star } `penalizedDeviance` <- function (fm, X, dispersion = 1) { Y <- fm$y LP <- fm$linear.predictor fam <- fm$family wt <- fm$prior.weights mu <- fm$fitted.values we <- fm$weights W.X <- sqrt(we) * X (sum(fam$dev.resid(Y, mu, wt)) - log(det(crossprod(W.X))))/dispersion } brglm/data/0000755000176200001440000000000013450666662012304 5ustar liggesusersbrglm/data/lizards.rda0000644000176200001440000000067613450666662014455 0ustar liggesusersMO@*hH4cbL7=qŔ?_nHV$>3y~ī*cLccW^0PHb:^/sN@Oc~B|d!>|$9}ɾ䟥3/'49([ P+K06GDCYj+Pb.mM1ApW({I\`EG.5 // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .C calls */ extern void hatsc(void *, void *, void *, void *, void *, void *); static const R_CMethodDef CEntries[] = { {"hatsc", (DL_FUNC) &hatsc, 6}, {NULL, NULL, 0} }; void R_init_brglm(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } brglm/NAMESPACE0000644000176200001440000000217613450666662012620 0ustar liggesusersuseDynLib("brglm",.registration=TRUE) export(modifications, checkModifications, summary.brglm, print.summary.brglm, print.brglm, brglm, brglm.fit, brglm.control, glm.control1, separation.detection, gethats, penalizedDeviance, modifiedScoreStatistic, profile.brglm, confint.brglm, confint.profile.brglm, print.profile.brglm, plot.profile.brglm, pairs.profile.brglm) S3method(print, brglm) S3method(summary, brglm) S3method(profile, brglm) S3method(confint, brglm) S3method(confint, profile.brglm) S3method(plot, profile.brglm) S3method(pairs, profile.brglm) S3method(print, profile.brglm) S3method(print, summary.brglm) importFrom("graphics", "pairs", "plot", "points") importFrom("stats", ".getXlevels", "binomial", "coef", "dnorm", "glm.control", "glm.fit", "is.empty.model", "model.extract", "model.matrix", "model.offset", "model.response", "model.weights", "naprint", "pnorm", "printCoefmat", "qchisq", "residuals", "summary.glm", "symnum", "update") import(profileModel) brglm/inst/0000755000176200001440000000000015062234356012340 5ustar liggesusersbrglm/inst/CHANGES0000644000176200001440000000641515062234356013341 0ustar liggesusersChanges since version 0.7.2 ---------------------------- * Fixed "Lost braces" note through PR https://github.com/ikosmidis/brglm/pull/1> thanks to GeraldineGomez@GitHub. Changes since version 0.7.1 ---------------------------- * Documentation updates: Updated citation to to Kosmidis & Firth (2021, Biometrika), and added pointers to it in help files. * Added depreciation warning for `detect.separation()` in light of the more comprehensive utilities provided by the 'detectseparation' R package (https://cran.r-project.org/package=detectseparation). * Updated on-load message. Changes since version 0.6.2 ---------------------------- * Documentation updates: advice on AIC and citation to Kosmidis & Firth (2020, Biometrika). * Added example on how to implement maximum penalized likelihood for logistic regression when the penalty is some power of the Jeffreys' prior (`?modifications`). Changes since version 0.6.1 ---------------------------- * Updated author and maintainer details. Changes since version 0.5.8 ---------------------------- * Removed dependence of `print.brglm()` on `print.glm()`. Instead the generic method is called (thanks to Brian Ripley for contacting me on this). Changes since version 0.5.7 ---------------------------- * Implemented a fix for correctly resetting the "warn" option value to what the user has set prior to the execution of `brglm.fit()` (thanks to Kurt Hornik for contacting me on this) * Implemented a fix to avoid a warning related to the binding of n which is evaluated by `family$initialize()` in `brglm.fit()` (thanks to Kurt Hornik for contacting me on this) Changes since version 0.5.6 ---------------------------- * Minor changes in `example(modifications)` to avoid the use of `.Call` in the functions therein (thanks to Brian Ripley for contacting me on this). * Updated contact information Changes since version 0.5.5 ---------------------------- * Added the `br.consts` argument to `brglm.control()` which offers some handling of the starting values to the `brglm.fit()` iteration. * Updated contact and citation information. Changes since version 0.5.4 ---------------------------- * Improved constant adjustments for starting the fitting procedure in `brglm.fit()`. * Updated the references to papers in the help files. Changes since version 0.5.3 ---------------------------- * Minor corrections to the help files. * The reported AIC, was incorrect, due to a minor bug. This is now corrected. * Minor modifications to the separation.detection function * Due to a bug, `brglm()` was mishandling observations with zero weight. This is now corrected. * Some improvements to profile and `confint()` methods for `brglm` objects. * Added citation file. Changes since version 0.5.2 ---------------------------- * Corrected a bug in `brglm()` that could cause division by zero while scaling the design matrix. * Due to a bug, `brglm.fit()` was producing errors with model fits containing more that one aliased out parameters. This is now corrected (thanks to Aleks Jakulin for spotting the issue). * Corrected a bug, that could affect the predict methods when aliased out parameters appeared (thanks to Aleks Jakulin for spotting the issue). Changes since version 0.5-1: ---------------------------- * Correction of typos (sorry!) and various improvements of the help files. brglm/inst/CITATION0000644000176200001440000000212014040310424013452 0ustar liggesusersyear <- sub("-.*", "", meta$Date) note <- sprintf("R package version %s", meta$Version) citHeader("To cite package 'brglm' in publications use at least one of the following, as appropriate. The finiteness and shrinkage properties of the reduced-bias estimator that 'brglm' computes for logistic regression is in Kosmidis & Firth (2021).") c(bibentry(bibtype = "Manual", title = "{brglm}: Bias Reduction in Binary-Response Generalized Linear Models", author = c(person("Ioannis", "Kosmidis")), year = year, note = note, url = "https://cran.r-project.org/package=brglm"), bibentry(bibtype = "article", title = "Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models", author = c(person(given = "Ioannis", family = "Kosmidis"), person(given = "David", family = "Firth")), year = 2021, journal = "Biometirka", volume = 108, number = 1, pages = "71---82", url = "https://doi.org/10.1093/biomet/asaa052")) brglm/inst/Jeffreys_power.R0000644000176200001440000000237314040310424015443 0ustar liggesusers## Ioannis Kosmidis, 11 Sep 2019 library("brglm") ## Set up custom additive modifications to the responses and totals; ## see ?brglm::modifications and the examples there for details ## .const below is the multiplier of the log-determinant of the Fisher information: ## .const = 1/2 does bias reduction ## brglm will retrieve the value of .const from the global environment here ## (not neat but it works with sufficient care!) br.custom.family <- function(p) { list(ar = .const * p/p, at = 2 * .const * p/p) } ## Set up a custom link-glm object (essentially just copying logit ## here as we only care about modifying the penalty for logit links) mylogit <- make.link("logit") mylogit$name <- "mylogit" data("lizards") ## The reduced-bias fit is .const <- 1/2 brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(mylogit), data=lizards) ## which is the same as what brglm does by default for logistic regression brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(logit), data=lizards) ## Stronger penalization (e.g. 5/2) can be achieved by .const <- 5/2 brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(mylogit), data=lizards) brglm/inst/WORDLIST0000644000176200001440000000044614040310424013520 0ustar liggesusersal behaviour Biometrika br Cai DasGupta et Fahrmeir Generalised GLIM glm grahami Heinze iteratively Jeffreys Kitagawa Konishi Konishi's Lesaffre Lewinger McCullagh modelling nd Nelder Nonsynchronous nsteps opalinus Physica pml prooduced Schemper Schoener Springer Statist Tutz Verlag Whittaker brglm/man/0000755000176200001440000000000015062234757012143 5ustar liggesusersbrglm/man/profileObjectives.Rd0000644000176200001440000000377514040310424016101 0ustar liggesusers\name{profileObjectives-brglm} \alias{profileObjectives} \alias{penalizedDeviance} \alias{modifiedScoreStatistic} \title{Objectives to be profiled} \description{ Objectives that are used in \code{\link{profile.brglm}} } \usage{ penalizedDeviance(fm, X, dispersion = 1) modifiedScoreStatistic(fm, X, dispersion = 1) } \arguments{ \item{fm}{the \bold{restricted} fit.} \item{X}{the model matrix of the fit on all parameters.} \item{dispersion}{the dispersion parameter.} } \details{ These objectives follow the specifications for objectives in the \pkg{profileModel} package and are used from \code{profile.brglm}. \code{penalizedDeviance} returns a deviance-like value corresponding to a likelihood function penalized by Jeffreys invariant prior. It has been used by Heinze & Schemper (2002) and by Bull et. al. (2002) for the construction of confidence intervals for the bias-reduced estimates in logistic regression. The \code{X} argument is the model matrix of the full (\bold{not} the restricted) fit. \code{modifiedScoreStatistic} mimics \code{\link[profileModel]{RaoScoreStatistic}} in \pkg{profileModel}, but with the ordinary scores replaced with the modified scores used for bias reduction. The argument \code{X} has the same interpretation as for \code{penalizedDeviance}. } \value{ A scalar. } \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Bull, S. B., Lewinger, J. B. and Lee, S. S. F. (2007). Confidence intervals for multinomial logistic regression in sparse data. \emph{Statistics in Medicine} \bold{26}, 903--918. Heinze, G. and Schemper, M. (2002). A solution to the problem of separation in logistic regression. \emph{Statistics in Medicine} \bold{21}, 2409--2419. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link[profileModel]{profileModel}}, \code{\link{profile.brglm}}.} \keyword{models} brglm/man/separation.detection.Rd0000644000176200001440000000467014040310424016540 0ustar liggesusers\name{separation.detection} \alias{separation.detection} \title{Separation Identification.} \description{ Provides a tool for identifying whether or not separation has occurred. } \usage{ separation.detection(fit, nsteps = 30) } \arguments{ \item{fit}{the result of a \code{\link{glm}} call.} \item{nsteps}{Starting from \code{maxit = 1}, the GLM is refitted for \code{maxit = 2}, \code{maxit = 3}, \ldots, \code{maxit = nsteps}. Default value is 30.} } \details{ Identifies separated cases for binomial-response GLMs, by refitting the model. At each iteration the maximum number of allowed IWLS iterations is fixed starting from 1 to \code{nsteps} (by setting \code{control = glm.control(maxit = j)}, where \code{j} takes values 1, \ldots, nsteps in \code{\link{glm}}). For each value of \code{maxit}, the estimated asymptotic standard errors are divided to the corresponding ones resulted for \code{control = glm.control(maxit = 1)}. Based on the results in Lesaffre & Albert (1989), if the sequence of ratios in any column of the resulting matrix diverges, then separation occurs and the maximum likelihood estimate for the corresponding parameter has value minus or plus infinity. } \value{ A matrix of dimension \code{nsteps} by \code{length(coef(fit))}, that contains the ratios of the estimated asymptotic standard errors. } \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Lesaffre, E. and Albert, A. (1989). Partial separation in logistic discrimination. \emph{J. R. Statist. Soc. \bold{B}}, \bold{51}, 109--116. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \examples{ ## Begin Example y <- c(1,1,0,0) totals <- c(2,2,2,2) x1 <- c(1,0,1,0) x2 <- c(1,1,0,0) m1 <- glm(y/totals ~ x1 + x2, weights = totals, family = binomial()) # No warning from glm... m1 # However estimates for (Intercept) and x2 are unusually large in # absolute value... Investigate further: # separation.detection(m1,nsteps=30) # Note that the values in the column for (Intercept) and x2 diverge, # while for x1 converged. Hence, separation has occurred and the # maximum lieklihood estimate for (Intercept) is minus infinity and # for x2 is plus infinity. The signs for infinity are taken from the # signs of (Intercept) and x1 in coef(m1). ## End Example } \keyword{models} \keyword{utilities} brglm/man/brglm.control.Rd0000644000176200001440000000370714040310424015200 0ustar liggesusers\name{brglm.control} \alias{brglm.control} \title{Auxiliary for Controlling BRGLM Fitting} \description{ Auxiliary function as user interface for \code{\link{brglm}} fitting. Typically only used when calling \code{brglm} or \code{brglm.fit}. } \usage{ brglm.control(br.epsilon = 1e-08, br.maxit = 100, br.trace=FALSE, br.consts = NULL, ...) } \arguments{ \item{br.epsilon}{positive convergence tolerance for the iteration described in \code{\link{brglm.fit}}.} \item{br.maxit}{integer giving the maximum number of iterations for the iteration in \code{\link{brglm.fit}}.} \item{br.trace}{logical indicating if output should be prooduced for each iteration.} \item{br.consts}{a (small) positive constant or a vector of such.} \item{\dots}{further arguments passed to or from other methods.} } \details{ If \code{br.trace=TRUE} then for each iteration the iteration number and the current value of the modified scores is \code{\link{cat}}'ed. If \code{br.consts} is specified then \code{br.consts} is added to the original binomial counts and \code{2*br.consts}. Then the model is fitted to the adjusted data to provide starting values for the iteration in \code{\link{brglm.fit}}. If \code{br.consts = NULL} (default) then \code{\link{brglm.fit}} adjusts the responses and totals by "number of parameters"/"number of observations" and twice that, respectively. } \value{ A list with the arguments as components. } \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Kosmidis, I. (2007). Bias reduction in exponential family nonlinear models. \emph{PhD Thesis}, Department of Statistics, University of Warwick. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{ \code{\link{brglm.fit}}, the fitting procedure used by \code{\link{brglm}}. } \keyword{iteration} brglm/man/gethats.Rd0000644000176200001440000000155114040310424014050 0ustar liggesusers\name{gethats} \alias{gethats} \title{Calculates the Leverages for a GLM through a C Routine} \description{ Calculates the leverages of a GLM through a C routine. It is intended to be used only within \code{\link{brglm.fit}}. } \usage{ gethats(nobs, nvars, x.t, XWXinv, ww) } \arguments{ \item{nobs}{The number of observations, i.e. \code{dim(X)[1]}.} \item{nvars}{The number of parameters, i.e. \code{dim(X)[1]}, where \code{X} is the model matrix, excluding the columns that correspond to aliased parameters.} \item{x.t}{\code{t(X)}.} \item{XWXinv}{The inverse of the Fisher information.} \item{ww}{The \sQuote{working} weights.} } \value{ A vector containing the diagonal elements of the hat matrix. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link{hatvalues}}, \code{\link{brglm.fit}}} \keyword{regression} brglm/man/confint.brglm.Rd0000644000176200001440000001653014040310424015156 0ustar liggesusers\name{confint.brglm} \alias{confint.brglm} \alias{confint.profile.brglm} \title{Computes confidence intervals of parameters for bias-reduced estimation} \description{ Computes confidence intervals for one or more parameters when estimation is performed using \code{\link{brglm}}. The resulting confidence intervals are based on manipulation of the profiles of the deviance, the penalized deviance and the modified score statistic (see \code{\link{profileObjectives}}). } \usage{ \method{confint}{brglm}(object, parm = 1:length(coef(object)), level = 0.95, verbose = TRUE, endpoint.tolerance = 0.001, max.zoom = 100, zero.bound = 1e-08, stepsize = 0.5, stdn = 5, gridsize = 10, scale = FALSE, method = "smooth", ci.method = "union", n.interpolations = 100, ...) \method{confint}{profile.brglm}(object, parm, level = 0.95, method = "smooth", ci.method = "union", endpoint.tolerance = 0.001, max.zoom = 100, n.interpolations = 100, verbose = TRUE, ...) } \arguments{ \item{object}{an object of class \code{"brglm"} or \code{"profile.brglm"}.} \item{parm}{either a numeric vector of indices or a character vector of names, specifying the parameters for which confidence intervals are to be estimated. The default is all parameters in the fitted model. When \code{object} is of class \code{"profile.brglm"}, \code{parm} is not used and confidence intervals are returned for all the parameters for which profiling took place.} \item{level}{the confidence level required. The default is 0.95. When \code{object} is of class \code{"profile.brglm"}, \code{level} is not used and the level attribute of \code{object} is used instead.} \item{verbose}{logical. If \code{TRUE} (default) progress indicators are printed during the progress of calculating the confidence intervals.} \item{endpoint.tolerance}{as in \code{\link[profileModel]{confintModel}}.} \item{max.zoom}{as in \code{\link[profileModel]{confintModel}}.} \item{zero.bound}{as in \code{\link[profileModel]{confintModel}}.} \item{stepsize}{as in \code{\link[profileModel]{confintModel}}.} \item{stdn}{as in \code{\link[profileModel]{confintModel}}.} \item{gridsize}{as in \code{\link[profileModel]{confintModel}}.} \item{scale}{as in \code{\link[profileModel]{confintModel}}.} \item{method}{as in \code{\link[profileModel]{confintModel}}.} \item{ci.method}{The method to be used for the construction of confidence intervals. It can take values \code{"union"} (default) and \code{"mean"} (see Details).} \item{n.interpolations}{as in \code{\link[profileModel]{confintModel}}.} \item{\dots}{further arguments to or from other methods.} } \details{ In the case of logistic regression Heinze & Schemper (2002) and Bull et. al. (2007) suggest the use of confidence intervals based on the profiles of the penalized likelihood, when estimation is performed using maximum penalized likelihood. Kosmidis (2007) illustrated that because of the shape of the penalized likelihood, confidence intervals based on the penalized likelihood could exhibit low or even zero coverage for hypothesis testing on large parameter values and also misbehave illustrating severe oscillation (see Brown et. al., 2001); see, also Kosmidis & Firth (2021) for discussion on the schrinkage implied by bias reduction and what that entails for inference. Kosmidis (2007) suggested an alternative confidence interval that is based on the union of the confidence intervals resulted by profiling the ordinary deviance for the maximum likelihood fit and by profiling the penalized deviance for the maximum penalized fit. Such confidence intervals, despite of being slightly conservative, illustrate less oscillation and avoid the loss of coverage. Another possibility is to use the mean of the corresponding endpoints instead of \dQuote{union}. Yet unpublished simulation studies suggest that such confidence intervals are not as conservative as the \dQuote{union} based intervals but illustrate more oscillation, which however is not as severe as in the case of the penalized likelihood based ones. The properties of the \dQuote{union} and \dQuote{mean} confidence intervals extend to all the links that are supported by \code{\link{brglm}}, when estimation is performed using maximum penalized likelihood. In the case of estimation using modified scores and for models other than logistic, where there is not an objective that is maximized, the profiles of the penalized likelihood for the construction of the \dQuote{union} and \dQuote{mean} confidence intervals can be replaced by the profiles of modified score statistic (see \code{\link{profileObjectives}}). The \code{confint} method for \code{brglm} and \code{profile.brglm} objects implements the \dQuote{union} and \dQuote{mean} confidence intervals. The method is chosen through the \code{ci.method} argument. } \value{ A matrix with columns the endpoints of the confidence intervals for the specified (or profiled) parameters. } \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Brown, L. D., Cai, T. T. and DasGupta, A. (2001). Interval estimation for a binomial proportion (with discussion). \emph{Statistical Science} \bold{16}, 101--117. Bull, S. B., Lewinger, J. B. and Lee, S. S. F. (2007). Confidence intervals for multinomial logistic regression in sparse data. \emph{Statistics in Medicine} \bold{26}, 903--918. Heinze, G. and Schemper, M. (2002). A solution to the problem of separation in logistic regression. \emph{Statistics in Medicine} \bold{21}, 2409--2419. Kosmidis, I. (2007). Bias reduction in exponential family nonlinear models. \emph{PhD Thesis}, Department of Statistics, University of Warwick. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link[profileModel]{confintModel}}, \code{\link[profileModel]{profileModel}}, \code{\link{profile.brglm}}. } \examples{ ## Begin Example 1 \dontrun{ library(MASS) data(bacteria) contrasts(bacteria$trt) <- structure(contr.sdif(3), dimnames = list(NULL, c("drug", "encourage"))) # fixed effects analyses m.glm.logit <- brglm(y ~ trt * week, family = binomial, data = bacteria, method = "glm.fit") m.brglm.logit <- brglm(y ~ trt * week, family = binomial, data = bacteria, method = "brglm.fit") p.glm.logit <- profile(m.glm.logit) p.brglm.logit <- profile(m.brglm.logit) # plot(p.glm.logit) plot(p.brglm.logit) # confidence intervals for the glm fit based on the profiles of the # ordinary deviance confint(p.glm.logit) # confidence intervals for the brglm fit confint(p.brglm.logit, ci.method = "union") confint(p.brglm.logit, ci.method = "mean") # A cloglog link m.brglm.cloglog <- update(m.brglm.logit, family = binomial(cloglog)) p.brglm.cloglog <- profile(m.brglm.cloglog) plot(p.brglm.cloglog) confint(m.brglm.cloglog, ci.method = "union") confint(m.brglm.cloglog, ci.method = "mean") ## End example } \dontrun{ ## Begin Example 2 y <- c(1, 1, 0, 0) totals <- c(2, 2, 2, 2) x1 <- c(1, 0, 1, 0) x2 <- c(1, 1, 0, 0) m1 <- brglm(y/totals ~ x1 + x2, weights = totals, family = binomial(cloglog)) p.m1 <- profile(m1) confint(p.m1, method="zoom") } } \keyword{models} \keyword{htest} brglm/man/glm.control1.Rd0000644000176200001440000000154414040310424014732 0ustar liggesusers\name{glm.control1} \alias{glm.control1} \title{Auxiliary for Controlling BRGLM Fitting} \description{ Auxiliary function as user interface for \code{\link{brglm}} fitting. Typically only used when calling \code{brglm} or \code{brglm.fit}. } \usage{ glm.control1(epsilon = 1e-08, maxit = 25, trace = FALSE, ...) } \arguments{ \item{epsilon}{as in \code{\link{glm.control}}.} \item{maxit}{as in \code{\link{glm.control}}.} \item{trace}{as in \code{\link{glm.control}}.} \item{\dots}{further arguments passed to or from other methods.} } \details{ The only difference with \code{\link{glm.control}} is that \code{glm.control1} supports further arguments to be passed from other methods. However, this additional arguments have no effect on the resulting list. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \keyword{iteration} brglm/man/profile.brglm.Rd0000644000176200001440000000567415062234364015202 0ustar liggesusers\name{profile.brglm} \alias{profile.brglm} \alias{print.profile.brglm} \title{Calculate profiles for objects of class 'brglm'.} \description{ Creates \code{"profile.brglm"} objects to be used for the calculation of confidence intervals and for plotting. } \usage{ \method{profile}{brglm}(fitted, gridsize = 10, stdn = 5, stepsize = 0.5, level = 0.95, which = 1:length(coef(fitted)), verbose = TRUE, zero.bound = 1e-08, scale = FALSE, ...) } \arguments{ \item{fitted}{an object of class \code{"brglm"}.} \item{gridsize}{as in \code{\link[profileModel]{profileModel}}.} \item{stdn}{as in \code{\link[profileModel]{profileModel}}.} \item{stepsize}{as in \code{\link[profileModel]{profileModel}}.} \item{level}{\code{qchisq(level,1)} indicates the range that the profiles must cover.} \item{which}{as in \code{\link[profileModel]{profileModel}}.} \item{verbose}{as in \code{\link[profileModel]{profileModel}}.} \item{zero.bound}{as in \code{\link[profileModel]{profileModel}}.} \item{scale}{as in \code{\link[profileModel]{profileModel}}.} \item{\dots}{further arguments passed to or from other methods.} } \details{ \code{profile.brglm} calculates the profiles of the appropriate objectives to be used for the construction of confidence intervals for the bias-reduced estimates (see \code{\link{confint.brglm}} for the objectives that are profiled). } \value{ An object of class \code{"profile.glm"} with attribute \dQuote{level} corresponding to the argument \code{level}. The object supports the methods \code{\link{print}}, \code{\link{plot}}, \code{\link{pairs}} and \code{\link{confint}} and it is a list of the components: \item{profilesML}{a \code{"profileModel"} object containing the profiles of the ordinary deviance for the maximum likelihood fit corresponding to \code{fitted}.} \item{profilesBR}{\code{NULL} if \code{method = "glm.fit"} in \code{\link{brglm}}. If \code{method = "brglm.fit"} and \code{pl = TRUE}, \code{profilesBR} is a \code{"profileModel"} object containing the profiles of the penalized deviance for the parameters of \code{fitted}. If \code{method = "brglm.fit"} and \code{pl = FALSE} \code{profilesBR} is a \code{"profileModel"} object containing the profiles of the modified score statistic (see \code{\link{profileObjectives}}) for the parameters of \code{fitted}.} } \note{ Objects of class \code{"profile.brglm"} support the methods: \describe{ \item{\code{print}}{which prints the \code{"level"} attribute of the object, as well as the supported methods.} \item{\code{confint}}{see \code{\link{confint.brglm}}.} \item{\code{plot}}{see \code{\link{plot.profile.brglm}}.} \item{\code{pairs}}{see \code{\link{plot.profile.brglm}}.} } } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link[profileModel]{profileModel}}, \code{\link{profile.brglm}}.} \examples{ # see example in 'confint.brglm'. } \keyword{models} brglm/man/lizards.Rd0000644000176200001440000000260013450666662014103 0ustar liggesusers\name{lizards} \docType{data} \alias{lizards} \title{Habitat Preferences of Lizards} \usage{data(lizards)} \description{ The \code{lizards} data frame has 23 rows and 6 columns. Variables \code{grahami} and \code{opalinus} are counts of two lizard species at two different perch heights, two different perch diameters, in sun and in shade, at three times of day. } \format{ This data frame contains the following columns: \describe{ \item{\code{grahami}}{count of grahami lizards} \item{\code{opalinus}}{count of opalinus lizards} \item{\code{height}}{a factor with levels \code{"<5ft"}, \code{">=5ft"}} \item{\code{diameter}}{a factor with levels \code{"<=2in"}, \code{">2in"}} \item{\code{light}}{a factor with levels \code{"sunny"}, \code{"shady"}} \item{\code{time}}{a factor with levels \code{"early"}, \code{"midday"}, \code{"late"}} } } \source{ McCullagh, P. and Nelder, J. A. (1989) \emph{Generalized Linear Models} (2nd Edition). London: Chapman and Hall. Originally from Schoener, T. W. (1970) Nonsynchronous spatial overlap of lizards in patchy habitats. \emph{Ecology} \bold{51}, 408--418. } \examples{ data(lizards) glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial, data=lizards) brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial, data=lizards) } \keyword{datasets} brglm/man/brglm.Rd0000644000176200001440000003402015062235231013520 0ustar liggesusers\name{brglm} \alias{brglm} \alias{brglm.fit} \alias{print.brglm} \alias{summary.brglm} \alias{print.summary.brglm} \title{Bias reduction in Binomial-response GLMs} \description{ Fits binomial-response GLMs using the bias-reduction method developed in Firth (1993) for the removal of the leading (\eqn{\mathop{\rm O}(n^{-1})}{O(n^{-1})}) term from the asymptotic expansion of the bias of the maximum likelihood estimator. Fitting is performed using pseudo-data representations, as described in Kosmidis (2007, Chapter 5). For estimation in binomial-response GLMs, the bias-reduction method is an improvement over traditional maximum likelihood because: \describe{ \item{-}{the bias-reduced estimator is second-order unbiased and has smaller variance than the maximum likelihood estimator, and} \item{-}{the resulting estimates and their corresponding standard errors are \bold{always} finite while the maximum likelihood estimates can be infinite (in situations where complete or quasi separation occurs); see Kosmidis & Firth (2021) for the proof of finiteness in logistic regression models.} } } \usage{ brglm(formula, family = binomial, data, weights, subset, na.action, start = NULL, etastart, mustart, offset, control.glm = glm.control1(...), model = TRUE, method = "brglm.fit", pl = FALSE, x = FALSE, y = TRUE, contrasts = NULL, control.brglm = brglm.control(...), ...) brglm.fit(x, y, weights = rep(1, nobs), start = NULL, etastart = NULL, mustart = NULL, offset = rep(0, nobs), family = binomial(), control = glm.control(), control.brglm = brglm.control(), intercept = TRUE, pl = FALSE) } \arguments{ \item{formula}{as in \code{\link{glm}}.} \item{family}{as in \code{\link{glm}}. \code{brglm} currently supports only the \code{"binomial"} family with links \code{"logit"}, \code{"probit"}, \code{"cloglog"}, \code{"cauchit"}.} \item{data}{as in \code{\link{glm}}.} \item{weights}{as in \code{\link{glm}}.} \item{subset}{as in \code{\link{glm}}.} \item{na.action}{as in \code{\link{glm}}.} \item{start}{as in \code{\link{glm}}.} \item{etastart}{as in \code{\link{glm}}.} \item{mustart}{as in \code{\link{glm}}.} \item{offset}{as in \code{\link{glm}}.} \item{control.glm}{\code{control.glm} replaces the \code{control} argument in \code{\link{glm}} but essentially does the same job. It is a list of parameters to control \code{\link{glm.fit}}. See the documentation of \code{glm.control1} for details.} \item{control}{same as in \code{\link{glm}}. Only available to \code{brglm.fit}.} \item{intercept}{as in \code{\link{glm}}.} \item{model}{as in \code{\link{glm}}.} \item{method}{the method to be used for fitting the model. The default method is \code{"brglm.fit"}, which uses either the modified-scores approach to estimation or maximum penalized likelihood (see the \code{pl} argument below). The standard \code{\link{glm}} methods \code{"glm.fit"} for maximum likelihood and \code{"model.frame"} for returning the model frame without any fitting, are also accepted.} \item{pl}{a logical value indicating whether the model should be fitted using maximum penalized likelihood, where the penalization is done using Jeffreys invariant prior, or using the bias-reducing modified scores. It is only used when \code{method = "brglm.fit"}. The default value is \code{FALSE} (see also the Details section).} \item{x}{as in \code{\link{glm}}.} \item{y}{as in \code{\link{glm}}.} \item{contrasts}{as in \code{\link{glm}}.} \item{control.brglm}{a list of parameters for controlling the fitting process when \code{method = "brglm.fit"}. See documentation of \code{\link{brglm.control}} for details.} \item{\dots}{further arguments passed to or from other methods.} } \details{ \code{brglm.fit} is the workhorse function for fitting the model using either the bias-reduction method or maximum penalized likelihood. If \code{method = "glm.fit"}, usual maximum likelihood is used via \code{\link{glm.fit}}. The main iteration of \code{brglm.fit} consists of the following steps: \enumerate{ \item Calculate the diagonal components of the hat matrix (see \code{\link{gethats}} and \code{\link{hatvalues}}). \item Obtain the pseudo-data representation at the current value of the parameters (see \code{\link{modifications}} for more information). \item Fit a local GLM, using \code{\link{glm.fit}} on the pseudo data. \item Adjust the quadratic weights to agree with the original binomial totals. } Iteration is repeated until either the iteration limit has been reached or the sum of the absolute values of the modified scores is less than some specified positive constant (see the \code{br.maxit} and \code{br.epsilon} arguments in \code{\link{brglm.control}}). The default value (\code{FALSE}) of \code{pl}, when \code{method = "brglm.fit"}, results in estimates that are free of any \eqn{\mathop{\rm O}(n^{-1})}{O(n^{-1})} terms in the asymptotic expansion of their bias. When \code{pl = TRUE} bias-reduction is again achieved but generally not at such order of magnitude. In the case of logistic regression the value of \code{pl} is irrelevant since maximum penalized likelihood and the modified-scores approach coincide for natural exponential families (see Firth, 1993). For other language related details see the details section in \code{\link{glm}}. } \value{ \code{\link{brglm}} returns an object of class \code{"brglm"}. A \code{"brglm"} object inherits first from \code{"glm"} and then from \code{"lm"} and is a list containing the following components: \item{coefficients}{as in \code{\link{glm}}.} \item{residuals}{as in \code{\link{glm}}.} \item{fitted.values}{as in \code{\link{glm}}.} \item{effects}{as in \code{\link{glm}}.} \item{R}{as in \code{\link{glm}}.} \item{rank}{as in \code{\link{glm}}.} \item{qr}{as in \code{\link{glm}}.} \item{family}{as in \code{\link{glm}}.} \item{linear.predictors}{as in \code{\link{glm}}.} \item{deviance}{as in \code{\link{glm}}.} \item{aic}{as in \code{\link{glm}} (see Details).} \item{null.deviance}{as in \code{\link{glm}}.} \item{iter}{as in \code{\link{glm}}.} \item{weights}{as in \code{\link{glm}}.} \item{prior.weights}{as in \code{\link{glm}}.} \item{df.residual}{as in \code{\link{glm}}.} \item{df.null}{as in \code{\link{glm}}.} \item{y}{as in \code{\link{glm}}.} \item{converged}{as in \code{\link{glm}}.} \item{boundary}{as in \code{\link{glm}}.} \item{ModifiedScores}{the vector of the modified scores for the parameters at the final iteration. If \code{pl = TRUE} they are the derivatives of the penalized likelihood at the final iteration.} \item{FisherInfo}{the Fisher information matrix evaluated at the resulting estimates. Only available when \code{method = "brglm.fit"}.} \item{hats}{the diagonal elements of the hat matrix. Only available when \code{method = "brglm.fit"}} \item{nIter}{the number of iterations that were required until convergence. Only available when \code{method = "brglm.fit"}.} \item{cur.model}{a list with components \code{ar} and \code{at} which contains the values of the additive modifications to the responses (\code{y}) and to the binomial totals (\code{prior.weights}) at the resulting estimates (see \code{\link{modifications}} for more information). Only available when \code{method = "brglm.fit"}.} \item{model}{as in \code{\link{glm}}.} \item{call}{as in \code{\link{glm}}.} \item{formula}{as in \code{\link{glm}}.} \item{terms}{as in \code{\link{glm}}.} \item{data}{as in \code{\link{glm}}.} \item{offset}{as in \code{\link{glm}}.} \item{control.glm}{as \code{control} in the result of \code{\link{glm}}.} \item{control.brglm}{the \code{control.brglm} argument that was passed to \code{brglm}. Only available when \code{method = "brglm.fit"}.} \item{method}{the method used for fitting the model.} \item{contrasts}{as in \code{\link{glm}}.} \item{xlevels}{as in \code{\link{glm}}.} \item{pl}{logical having the same value with the \code{pl} argument passed to \code{brglm}. Only available when \code{method = "brglm.fit"}.} } \note{ 1. Supported methods for objects of class \code{"brglm"} are: \describe{ \item{\code{\link{print}}}{through \code{print.brglm}.} \item{\code{\link{summary}}}{through \code{summary.brglm}.} \item{\code{\link{coefficients}}}{inherited from the \code{"glm"} class.} \item{\code{\link{vcov}}}{inherited from the\code{"glm"} class.} \item{\code{\link{predict}}}{inherited from the\code{"glm"} class.} \item{\code{\link{residuals}}}{inherited from the\code{"glm"} class.} } and other methods that apply to objects of class \code{"glm"} 2. A similar implementation of the bias-reduction method could be done for every GLM, following Kosmidis (2007) (see also Kosmidis and Firth, 2009). The full set of families and links will be available in a future version. However, bias-reduction is not generally beneficial as it is in the binomial family and it could cause inflation of the variance (see Firth, 1993). 3. Basically, the differences between maximum likelihood, maximum penalized likelihood and the modified scores approach are more apparent in small sample sizes, in sparse data sets and in cases where complete or quasi-complete separation occurs. Asymptotically (as \eqn{n} goes to infinity), the three different approaches are equivalent to first order. 4. When an offset is not present in the model, the modified-scores based estimates are usually smaller in magnitude than the corresponding maximum likelihood estimates, shrinking towards the origin of the scale imposed by the link function. Thus, the corresponding estimated asymptotic standard errors are also smaller. The same is true for the maximum penalized likelihood estimates when for example, the logit (where the maximum penalized likelihood and modified-scores approaches coincide) or the probit links are used. However, generally the maximum penalized likelihood estimates do not shrink towards the origin. In terms of mean-value parameterization, in the case of maximum penalized likelihood the fitted probabilities would shrink towards the point where the Jeffreys prior is maximized or equivalently where the quadratic weights are simultaneously maximized (see Kosmidis, 2007). 5. Implementations of the bias-reduction method for logistic regressions can also be found in the \pkg{logistf} package. In addition to the obvious advantage of \code{brglm} in the range of link functions that can be used (\code{"logit"}, \code{"probit"}, \code{"cloglog"} and \code{"cauchit"}), \code{brglm} is also more efficient computationally. Furthermore, for any user-specified link function (see the Example section of \code{\link{family}}), the user can specify the corresponding pseudo-data representation to be used within \code{brglm} (see \code{\link{modifications}} for details). } \section{Warnings}{ 1. It is not advised to use methods associated with model comparison (\code{\link{add1}}, \code{\link{drop1}}, \code{\link{anova}}, etc.) on objects of class \code{"brglm"}. Model comparison when estimation is performed using the modified scores or the penalized likelihood is an on-going research topic and will be implemented as soon as it is concluded. 2. The use of Akaike's information criterion (AIC) for model selection when \code{method = "brglm.fit"} is asymptotically valid, because the log-likelihood derivatives dominate the modification (in terms of asymptotic order). } \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Bull, S. B., Lewinger, J. B. and Lee, S. S. F. (2007). Confidence intervals for multinomial logistic regression in sparse data. \emph{Statistics in Medicine} \bold{26}, 903--918. Firth, D. (1992) Bias reduction, the Jeffreys prior and {GLIM}. In \emph{Advances in GLIM and statistical modelling: Proceedings of the GLIM 92 conference, Munich}, Eds. L.~Fahrmeir, B.~Francis, R.~Gilchrist and G.Tutz, pp. 91--100. New York: Springer. Firth, D. (1992) Generalized linear models and Jeffreys priors: An iterative generalized least-squares approach. In \emph{Computational Statistics I}, Eds. Y. Dodge and J. Whittaker. Heidelberg: Physica-Verlag. Firth, D. (1993). Bias reduction of maximum likelihood estimates. \emph{Biometrika} \bold{80}, 27--38. Heinze, G. and Schemper, M. (2002). A solution to the problem of separation in logistic regression. \emph{Statistics in Medicine} \bold{21}, 2409--2419. Kosmidis, I. (2007). Bias reduction in exponential family nonlinear models. \emph{PhD Thesis}, Department of Statistics, University of Warwick. Kosmidis, I. and Firth, D. (2009). Bias reduction in exponential family nonlinear models. \emph{Biometrika} \bold{96}, 793--804. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link{glm}}, \code{\link{glm.fit}}} \examples{ ## Begin Example data(lizards) # Fit the GLM using maximum likelihood lizards.glm <- brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(logit), data=lizards, method = "glm.fit") # Now the bias-reduced fit: lizards.brglm <- brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(logit), data=lizards, method = "brglm.fit") lizards.glm lizards.brglm # Other links update(lizards.brglm, family = binomial(probit)) update(lizards.brglm, family = binomial(cloglog)) update(lizards.brglm, family = binomial(cauchit)) # Using penalized maximum likelihood update(lizards.brglm, family = binomial(probit), pl = TRUE) update(lizards.brglm, family = binomial(cloglog), pl = TRUE) update(lizards.brglm, family = binomial(cauchit), pl = TRUE) } \keyword{models} \keyword{regression} \keyword{iteration} brglm/man/plot.profile.brglm.Rd0000644000176200001440000000254614040310424016135 0ustar liggesusers\name{plot.profile.brglm} \alias{plot.profile.brglm} \alias{pairs.profile.brglm} \title{Plot methods for 'profile.brglm' objects} \description{ \code{plot.profile.brglm} plots the objects of class \code{"profileModel"} that are contained in an object of class \code{"profile.brglm"}. \code{pairs.profile.brglm} is a diagnostic tool that plots pairwise profile traces. } \usage{ \method{plot}{profile.brglm}(x, signed = FALSE, interpolate = TRUE, n.interpolations = 100, print.grid.points = FALSE, ...) \method{pairs}{profile.brglm}(x, colours = 2:3, ...) } \arguments{ \item{x}{a \code{"profile.brglm"} object.} \item{signed}{as in \code{\link[profileModel]{plot.profileModel}}.} \item{interpolate}{as in \code{\link[profileModel]{plot.profileModel}}.} \item{n.interpolations}{as in \code{\link[profileModel]{plot.profileModel}}.} \item{print.grid.points}{as in \code{\link[profileModel]{plot.profileModel}}.} \item{colours}{as in \code{\link[profileModel]{plot.profileModel}}.} \item{\dots}{further arguments passed to or from other methods.} } \details{ See Details in \code{\link[profileModel]{plot.profileModel}}. } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \seealso{\code{\link[profileModel]{plot.profileModel}}, \code{\link{profile.brglm}}.} \examples{ # see example in 'confint.brglm'. } \keyword{dplot} \keyword{hplot} brglm/man/modifications.Rd0000644000176200001440000002723014040310424015243 0ustar liggesusers\name{modifications} \alias{modifications} \alias{checkModifications} \title{Additive Modifications to the Binomial Responses and Totals for Use within `brglm.fit'} \description{ Get, test and set the functions that calculate the additive modifications to the responses and totals in binomial-response GLMs, for the application of bias-reduction either via modified scores or via maximum penalized likelihood (where penalization is by Jeffreys invariant prior). } \usage{ modifications(family, pl = FALSE) } \arguments{ \item{family}{a family object of the form \code{binomial(link = "link")}, where \code{"link"} can be one of \code{"logit"}, \code{"probit"}, \code{"cloglog"} and \code{"cauchit"}. The usual ways of giving the family name are supported (see \code{\link{family}}).} \item{pl}{logical determining whether the function returned corresponds to modifications for the penalized maximum likelihood approach or for the modified-scores approach to bias-reduction. Default value is \code{FALSE}.} } \details{ The function returned from \code{modifications} accepts the argument \code{p} which are the binomial probabilities and returns a list with components \code{ar} and \code{at}, which are the link-dependent parts of the additive modifications to the actual responses and totals, respectively. Since the resulting function is used in \code{\link{brglm.fit}}, for efficiency reasons no check is made for \code{p >= 0 | p <= 1}, for \code{length(at) == length(p)} and for \code{length(ap) == length(p)}. } \section{Construction of custom pseudo-data representations}{ If \eqn{y^*}{y*} are the pseudo-responses (pseudo-counts) and \eqn{m^*}{m*} are the pseudo-totals then we call the pair \eqn{(y^*, m^*)}{(y*, m*)} a pseudo-data representation. Both the modified-scores approach and the maximum penalized likelihood have a common property: there exists \eqn{(y^*, m^*)}{(y*, m*)} such that if we replace the actual data \eqn{(y, m)} with \eqn{(y^*, m^*)}{(y*, m*)} in the expression for the ordinary scores (first derivatives of the likelihood) of a binomial-response GLM, then we end-up either with the modified-scores or with the derivatives of the penalized likelihood (see Kosmidis, 2007, Chapter 5). Let \eqn{\mu} be the mean of the binomial response \eqn{y} (i.e. \eqn{\mu=mp}{\mu = m p}, where \eqn{p} is the binomial probability corresponding to the count \eqn{y}). Also, let \eqn{d} and \eqn{d'} denote the first and the second derivatives, respectively, of \eqn{\mu}{\mu} with respect to the linear predictor \eqn{\eta}{\eta} of the model. All the above are viewed as functions of \eqn{p}. The pseudo-data representations have the generic form \tabular{ll}{ pseudo-response : \tab \eqn{y^*=y + h a_r(p)}{y* = y + h a_r(p)} \cr pseudo-totals : \tab \eqn{m^*=m + h a_t(p)}{m* = m + h a_t(p)}, \cr } where \eqn{h} is the leverage corresponding to \eqn{y}. The general expressions for \eqn{a_r(p)} ("r" for "response") and \eqn{a_t(p)} ("t" for "totals") are: \emph{modified-scores approach} \tabular{l}{ \eqn{a_r(p) = d'(p)/(2w(p))} \cr \eqn{a_t(p) = 0}, \cr } \emph{maximum penalized likelihood approach} \tabular{l}{ \eqn{a_r(p) = d'(p)/w(p) + p - 0.5} \cr \eqn{a_t(p) = 0}. \cr } For supplying \eqn{(y^*, m^*)}{(y*, m*)} in \code{\link{glm.fit}} (as is done by \code{\link{brglm.fit}}), an essential requirement for the pseudo-data representation is that it should mimic the behaviour of the original responses and totals, i.e. \eqn{0 \le y^* \le m^*}{0 \le y* \le m*}. Since \eqn{h \in [0, 1]}, the requirement translates to \eqn{0 \le a_r(p) \le a_t(p)} for every \eqn{p \in (0, 1)}. However, the above definitions of \eqn{a_r(p)} and \eqn{a_t(p)} do not necessarily respect this requirement. On the other hand, the pair \eqn{(a_r(p), a_t(p))} is not unique in the sense that for a given link function and once the link-specific structure of the pair has been extrapolated, there is a class of equivalent pairs that can be resulted following only the following two rules: \itemize{ \item add and subtract the same quantity from either \eqn{a_r(p)} or \eqn{a_t(p)}. \item if a quantity is to be moved from \eqn{a_r(p)} to \eqn{a_t(p)} it first has to be divided by \eqn{-p}. } For example, in the case of penalized maximum likelihood, the pairs \eqn{(d'(p)/w(p) + p - 0.5 , 0)} and \eqn{(d'(p)/w(p) + p , 0.5/p)} are equivalent, in the sense that if the corresponding pseudo-data representations are substituted in the ordinary scores both return the same expression. So, in order to construct a pseudo-data representation that corresponds to a user-specified link function and has the property \eqn{0 \le a_r(p) \le a_t(p)} for every \eqn{p \in (0, 1)}, one merely has to pursue a simple algebraic calculation on the initial pair \eqn{(a_r(p), a_t(p))} using only the two aforementioned rules until an appropriate pair is resulted. There is always a pair! Once the pair has been found the following steps should be followed. \enumerate{ \item For a user-specified link function the user has to write a modification function with name "br.custom.family" or "pml.custom.family" for \code{pl=FALSE} or \code{pl=TRUE}, respectively. The function should take as argument the probabilities \code{p} and return a list of two vectors with same length as \code{p} and with names \code{c("ar", "at")}. The result corresponds to the pair \eqn{(a_r(p), a_t(p))}. \item Check if the custom-made modifications function is appropriate. This can be done via the function \code{\link{checkModifications}} which has arguments \code{fun} (the function to be tested) and \code{Length} with default value \code{Length=100}. \code{Length} is to be used when the user-specified link function takes as argument a vector of values (e.g. the \code{logexp} link in \code{?family}). Then the value of \code{Length} should be the length of that vector. \item Put the function in the search patch so that \code{modifications} can find it. \item \code{\link{brglm}} can now be used with the custom family as \code{\link{glm}} would be used. } } \note{ The user could also deviate from modified-scores and maximum penalized likelihood and experiment with implemented (or not) links, e.g. \code{probit}, constructing his own pseudo-data representations of the aforementioned general form. This could be done by changing the link name, e.g. by \code{probitt <- make.link(probit) ; probitt$name <- "probitt"} and then setting a custom \code{br.custom.family} that does not necessarily depend on the \code{probit} link. Then, \code{brglm} could be used with \code{pl=FALSE}. A further generalization would be to completely remove the hat value \eqn{h} in the generic expression of the pseudo-data representation and have general additive modifications that depend on \eqn{p}. To do this divide both \code{ar} and \code{at} by \code{pmax(get("hats",parent.frame()),.Machine\$double.eps)} within the custom modification function (see also Examples). } \author{Ioannis Kosmidis, \email{ioannis.kosmidis@warwick.ac.uk}} \references{ Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. \emph{Biometrika}, \bold{108}, 71--82. Kosmidis, I. (2007). Bias reduction in exponential family nonlinear models. \emph{PhD Thesis}, Department of Statistics, University of Warwick. } \seealso{\code{\link{brglm}}, \code{\link{brglm.fit}}} \examples{ ## Begin Example 1 ## logistic exposure model, following the Example in ?family. See, ## Shaffer, T. 2004. Auk 121(2): 526-540. # Definition of the link function logexp <- function(days = 1) { linkfun <- function(mu) qlogis(mu^(1/days)) linkinv <- function(eta) plogis(eta)^days mu.eta <- function(eta) days * plogis(eta)^(days-1) * binomial()$mu.eta(eta) valideta <- function(eta) TRUE link <- paste("logexp(", days, ")", sep="") structure(list(linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, valideta = valideta, name = link), class = "link-glm") } # Here d(p) = days * p * ( 1 - p^(1/days) ) # d'(p) = (days - (days+1) * p^(1/days)) * d(p) # w(p) = days^2 * p * (1-p^(1/days))^2 / (1-p) # Initial modifications, as given from the general expressions above: br.custom.family <- function(p) { etas <- binomial(logexp(.days))$linkfun(p) # the link function argument `.days' will be detected by lexical # scoping. So, make sure that the link-function inputted arguments # have unusual names, like `.days' and that # the link function enters `brglm' as # `family=binomial(logexp(.days))'. list(ar = 0.5*(1-p)-0.5*(1-p)*exp(etas)/.days, at = 0*p/p) # so that to fix the length of at } .days <-3 # `.days' could be a vector as well but then it should have the same # length as the number of observations (`length(.days)' should be # equal to `length(p)'). In this case, `checkModifications' should # have argument `Length=length(.days)'. # # Check: \dontrun{checkModifications(br.custom.family)} # OOOPS error message... the condition is not satisfied # # After some trivial algebra using the two allowed operations, we # get new modifications: br.custom.family <- function(p) { etas <- binomial(logexp(.days))$linkfun(p) list(ar=0.5*p/p, # so that to fix the length of ar at=0.5+exp(etas)*(1-p)/(2*p*.days)) } # Check: checkModifications(br.custom.family) # It is OK. # Now, modifications(binomial(logexp(.days))) # works. # Notice that for `.days <- 1', `logexp(.days)' is the `logit' link # model and `a_r=0.5', `a_t=1'. # In action: library(MASS) example(birthwt) m.glm <- glm(formula = low ~ ., family = binomial, data = bwt) .days <- bwt$age m.glm.logexp <- update(m.glm,family=binomial(logexp(.days))) m.brglm.logexp <- brglm(formula = low ~ ., family = binomial(logexp(.days)), data = bwt) # The fit for the `logexp' link via maximum likelihood m.glm.logexp # and the fit for the `logexp' link via modified scores m.brglm.logexp ## End Example ## Begin Example 2 ## Another possible use of brglm.fit: ## Deviating from bias reducing modified-scores: ## Add 1/2 to the response of a probit model. y <- c(1,2,3,4) totals <- c(5,5,5,5) x1 <- c(1,0,1,0) x2 <- c(1,1,0,0) my.probit <- make.link("probit") my.probit$name <- "my.probit" br.custom.family <- function(p) { h <- pmax(get("hats",parent.frame()),.Machine$double.eps) list(ar=0.5/h,at=1/h) } m1 <- brglm(y/totals~x1+x2,weights=totals,family=binomial(my.probit)) m2 <- glm((y+0.5)/(totals+1)~x1+x2,weights=totals+1,family=binomial(probit)) # m1 and m2 should be the same. # End example # Begin example 3: Maximum penalized likelihood for logistic regression, # with the penalty being a powerof the Jeffreys prior (`.const` below) # Setup a custom logit link mylogit <- make.link("logit") mylogit$name <- "mylogit" ## Set-up the custom family br.custom.family <- function(p) { list(ar = .const * p/p, at = 2 * .const * p/p) } data("lizards") ## The reduced-bias fit is .const <- 1/2 brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(mylogit), data=lizards) ## which is the same as what brglm does by default for logistic regression brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(logit), data=lizards) ## Stronger penalization (e.g. 5/2) can be achieved by .const <- 5/2 brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(mylogit), data=lizards) # End example } \keyword{models} \keyword{regression} brglm/DESCRIPTION0000644000176200001440000000273715062240332013071 0ustar liggesusersPackage: brglm Type: Package Title: Bias Reduction in Binomial-Response Generalized Linear Models Version: 0.7.3 Authors@R: person(given = "Ioannis", family = "Kosmidis", role = c("aut", "cre"), email = "ioannis.kosmidis@warwick.ac.uk", comment = c(ORCID = "0000-0003-1556-0302")) URL: https://github.com/ikosmidis/brglm BugReports: https://github.com/ikosmidis/brglm/issues Description: Fit generalized linear models with binomial responses using either an adjusted-score approach to bias reduction or maximum penalized likelihood where penalization is by Jeffreys invariant prior. These procedures return estimates with improved frequentist properties (bias, mean squared error) that are always finite even in cases where the maximum likelihood estimates are infinite (data separation). Fitting takes place by fitting generalized linear models on iteratively updated pseudo-data. The interface is essentially the same as 'glm'. More flexibility is provided by the fact that custom pseudo-data representations can be specified and used for model fitting. Functions are provided for the construction of confidence intervals for the reduced-bias estimates. License: GPL (>= 2) Depends: R (>= 2.6.0), profileModel Suggests: MASS NeedsCompilation: yes Packaged: 2025-09-16 10:23:40 UTC; yiannis Author: Ioannis Kosmidis [aut, cre] (ORCID: ) Maintainer: Ioannis Kosmidis Repository: CRAN Date/Publication: 2025-09-16 10:50:02 UTC