# Double Generalized Linear Compound Poisson model # Author: Daniel A. Andersen (IMADA) ComPoissonDGLM <-function(formula, dformula, data, exposure = rep(1, nrow(data)), numclaims = NA, method = "ML", power = c(1.0001, 1.9999), GSS = TRUE, tol = 1e-3) { form1 <- update(formula, log(y1) ~ .) # Initial value of mu form2 <- update(dformula, log(di) ~ .) # Initial value of phi form3 <- update(formula, zm ~ .) # Scoring update of mu form4 <- update(dformula, zd ~ .) # Scoring update of phi #y <- eval(formula[[2]]) mf <- model.frame(formula, data = data) y <- model.response(mf) y1 <- y + !y # Modify zero values if ("tweedie" %in% rownames(installed.packages()) == FALSE) { print("Tweedie package required for the function to work") install.packages("tweedie") } library(tweedie) W <- exposure N <- numclaims diwarning <- F dispersionfit.warning <- F x <- model.matrix(formula, data = data)[, 1:ncol(model.matrix(formula, data = data))] xt <- t(x) z <- model.matrix(dformula, data = data)[, 1:ncol(model.matrix(dformula, data = data))] zt <- t(z) p <- power p.org <- p input <- list( formula = formula, dformula = dformula, data = data, exposure = exposure, numclaims = numclaims, method = method, power = p.org) a <- (p - 2) / (p - 1) a1 <- p[1]; b1 <- p[2] # Golden Section Search (GSS) limits g <- (sqrt(5) - 1) / 2 # Golden Section Search constant kalpha <- function(theta) (a[j] - 1) / a[j] * (theta / (a[j] - 1)) ^ a[j] for (q in 1:100) { if (GSS == TRUE) { lower <- a1 + (1 - g) * (b1 - a1) # GSS search limits upper <- a1 + g * (b1 - a1) est <- a1 + (b1 - a1) / 2 # Midpoint of interval if (.5 * (b1 - a1) > tol) { p <- c(lower, upper) print("Interval") print(c(a1,b1)) print("") } else { p <- est print("Estimated value") print(p) } } a <- (p - 2) / (p - 1) logvmax <- rep(-.Machine$double.xmax, length(p)) for (j in 1:length(p)) { data$y1 <- y1 initial <- lm(form1, data = data) eta <- initial$fitted.values h <- influence(initial)$hat mu <- exp(eta) part1 <- (y * y ^ (1 - p[j]) - y * mu ^ (1 - p[j])) / (1 - p[j]) part2 <- (mu ^ (2 - p[j]) - y ^ (2 - p[j])) / (2 - p[j]) di <- 2 * W * (part1 + part2) di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p[j]) / (2 - p[j]) if (any(di <= 0)) { diwarning <- T di[which(di <= 0)] <- 1 } data$di <- di data$W <- data$W deta <- lm(form2, data = data)$fitted.values phi <- exp(deta) for (i in 1:500) { if (i > 1) { logv1old <- logv1 mfitold <- mfit dfitold <- dfit h <- influence(mfit)$hat } if (!any(is.na(N))) { t <- y * mu ^ (1 - p[j]) / (1 - p[j]) - mu ^ (2 - p[j]) / (2 - p[j]) wdi <- (2 * W * mu ^ (2 - p[j])) / ((2 - p[j]) * (p[j] - 1) * phi) di <- -2 / wdi * (N * phi / (p[j] - 1) + W * t) + phi if (method == "ML") { wd <- (1 / phi) ^ (-2) * wdi / (2 * phi ^ 2) } if (method == "REML") { wd <- (1 / phi) ^ (-2) * max(wdi - h, 0) / (2 * phi ^ 2) di <- wdi / (wdi - h) * di } } else{ part1 <- (y * y ^ (1 - p[j]) - y * mu ^ (1 - p[j])) / (1 - p[j]) part2 <- (mu ^ (2 - p[j]) - y ^ (2 - p[j])) / (2 - p[j]) di <- 2 * W * (part1 + part2) di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p[j]) / (2 - p[j]) if (method == "ML") { wd <- rep(.5, length(y)) } if (method == "REML") { wd <- (1 - h ^ 2) di <- di / (1 - h) } } zd <- deta + (di - phi) * 1 / phi data$zd <- zd data$wd <- wd data$W <- W dfit <- lm(form4, data = data, weights = wd) deta <- dfit$fitted.values phi <- exp(deta) if (any(phi == Inf) || any(is.na(phi))) { dispersionfit.warning <- T dfit <- dfitold break } zm <- eta + (y - mu) / mu wm <- mu ^ (2 - p[j]) * W / phi data$zm <- zm data$wm <- wm # The code stops here ---------------------------------------- mfit <- lm(form3, data = data, weights = wm) eta <- mfit$fitted.values mu <- exp(eta) if (any(is.na(di))) { diwarning <- T mfit <- mfitold dfit <- dfitold break } if (!any(is.na(N))) { logv1 <- sum(N * (as.numeric((1 - a[j]) * log(W / phi)) + log(kalpha(-1 / y) + !y)) - lgamma(-N * a[j] + !N) + W / phi * (y * mu ^ (1 / (a[j] - 1)) * (a[j] - 1) - kalpha(mu ^ (1 / (a[j] - 1)) * (a[j] - 1)))) } else logv1 <- sum(log(dtweedie( y = y, mu = mu, power = p[j], phi = phi / W))) if (is.infinite(logv1)) { logv1 <- -9999999999 } if (method == "REML") { w <- diag(wm) logv1 <- logv1 + .5 * log(det(xt %*% w %*% x)) } if (i > 1 && logv1 - logv1old < 10e-3) { break } } logvmax[j] <- logv1old if (GSS == FALSE || length(p) == 1) { if (logvmax[j] == max(logvmax)) { modfinal <- mfitold dmodfinal <- dfitold pfinal <- p[j] afinal <- a[j] } } } if (length(p) == 1) # If GSS is used p will eventually have length 1 break else if (GSS == FALSE && p[j] == p[length(p)]) # No GSS and last value of p break else{ if (-logvmax[1] < -logvmax[2]) { b1 <- upper } else{ a1 <- lower } } } p <- pfinal a <- afinal kalpha <- function(theta) (a - 1) / a * (theta / (a - 1)) ^ a mu <- exp(modfinal$fitted.values) phi <- exp(dmodfinal$fitted.values) wm.final <- mu ^ (2 - p) * W / phi if (!any(is.na(N))) { t <- y * mu ^ (1 - p) / (1 - p) - mu ^ (2 - p) / (2 - p) wdi <- (2 * W * mu ^ (2 - p)) / ((2 - p) * (p - 1) * phi) di <- -2 / wdi * (N * phi / (p - 1) + W * t) + phi if (method == "ML") { wd.final <- (1 / phi) ^ (-2) * wdi / (2 * phi ^ 2) } if (method == "REML") { wd.final <- (1 / phi) ^ (-2) * max(wdi - h, 0) / (2 * phi ^ 2) } } else{ part1 <- (y * y ^ (1 - p) - y * mu ^ (1 - p)) / (1 - p) part2 <- (mu ^ (2 - p) - y ^ (2 - p)) / (2 - p) di <- 2 * W * (part1 + part2) di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p) / (2 - p) if (method == "ML") { wd.final <- rep(.5, length(y)) } if (method == "REML") { wd.final <- (1 - h ^ 2) } } zeros <- exp(-W / phi * kalpha(mu ^ (1 / (a - 1)) * (a - 1))) # Probability of zero difinal <- 2 * W * ((y ^ (2 - p)) / ((1 - p) * (2 - p)) - (y * (mu ^ (1 - p))) / (1 - p) + (mu ^ (2 - p)) / (2 - p)) devresid <- sign(y - mu) * sqrt(difinal) diresid <- sign(difinal - phi) * sqrt(2 * (log(phi / difinal) + difinal / phi - 1)) if (diwarning == T) warning("Perfect fit for some values") if (dispersionfit.warning == T) warning("Inf in dispersion fit") sd.mean <- sqrt(diag(solve(xt%*%diag(wm.final)%*%x))) sd.dispersion <- sqrt(diag(solve(zt%*%diag(wd.final)%*%z))) out <- list( input = input, L = logvmax, L.max = max(logvmax), power = p.org, power.max = p, a.max = a, sd.mean = sd.mean, sd.dispersion = sd.dispersion, di = difinal, zeros = zeros, devresid = devresid, diresid = diresid, wd.final = wd.final, wm.final = wm.final, mu = mu, phi = phi, coefficients.mean = modfinal$coefficients, coefficients.dispersion = dmodfinal$coefficients ) return(out) } # Confidence interval for power parameter conf.p <- function(object) { if ("rootSolve" %in% rownames(installed.packages()) == FALSE) { print("rootSolve package required for the function to work") install.packages("rootSolve") } library(rootSolve) L.max <- object$L.max ci <- uniroot.all( function(p) ComPoissonDGLM( object$input$formula, object$input$dformula, object$input$data, object$input$exposure, object$input$numclaims, object$input$method,power = p,GSS = F )$L - L.max + qchisq(0.95,1) / 2,interval = object$power ) p.std <- sum(abs(object$power.max-ci))/2/qnorm(0.975) return(list(p.std=p.std,ci=ci)) }