# Double Generalized Linear Compound Poisson model
# Author: Daniel A. Andersen (IMADA)

ComPoissonDGLM <-function(formula, dformula, data, 
                          exposure = rep(1, nrow(data)), numclaims = NA,
                          method = "ML", power = c(1.0001, 1.9999), 
                          GSS = TRUE, tol = 1e-3) {
    form1 <- update(formula, log(y1) ~ .)   # Initial value of mu
    form2 <- update(dformula, log(di) ~ .)  # Initial value of phi
    form3 <- update(formula, zm ~ .)        # Scoring update of mu
    form4 <- update(dformula, zd ~ .)       # Scoring update of phi
    #y <- eval(formula[[2]])
    mf <- model.frame(formula, data = data)
    y <- model.response(mf)
    y1 <- y + !y                            # Modify zero values
    if ("tweedie" %in% rownames(installed.packages()) == FALSE) {
      print("Tweedie package required for the function to work")
      install.packages("tweedie")
    }
    library(tweedie)
    W <- exposure
    N <- numclaims
    diwarning <- F
    dispersionfit.warning <- F
    x <- model.matrix(formula, data = data)[, 1:ncol(model.matrix(formula, data = data))]
    xt <- t(x)
    z <- model.matrix(dformula, data = data)[, 1:ncol(model.matrix(dformula, data = data))]
    zt <- t(z)
    
    p <- power
    p.org <- p
    input <- list( formula = formula, dformula = dformula, data = data, 
          exposure = exposure, numclaims = numclaims,
          method = method, power = p.org)
    a <- (p - 2) / (p - 1)
    
    a1 <- p[1]; b1 <- p[2]          # Golden Section Search (GSS) limits
    g <- (sqrt(5) - 1) / 2          # Golden Section Search constant
    
    kalpha <- function(theta)
      (a[j] - 1) / a[j] * (theta / (a[j] - 1)) ^ a[j]
    for (q in 1:100) {
      if (GSS == TRUE) {
        lower <- a1 + (1 - g) * (b1 - a1)   # GSS search limits
        upper <- a1 + g * (b1 - a1)
        est <- a1 + (b1 - a1) / 2           # Midpoint of interval
        if (.5 * (b1 - a1) > tol) {
          p <- c(lower, upper)
          print("Interval")
          print(c(a1,b1))
          print("")
        }
        else {
          p <- est
          print("Estimated value")
          print(p)
        }
      }
      a <- (p - 2) / (p - 1)
      logvmax <- rep(-.Machine$double.xmax, length(p))
      
      
      for (j in 1:length(p)) {
        data$y1 <- y1
        initial <- lm(form1, data = data)
        eta <- initial$fitted.values
        h <- influence(initial)$hat
        mu <- exp(eta)
        part1 <- (y * y ^ (1 - p[j]) - y * mu ^ (1 - p[j])) / (1 - p[j])
        part2 <- (mu ^ (2 - p[j]) - y ^ (2 - p[j])) / (2 - p[j])
        di <- 2 * W * (part1 + part2)
        di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p[j]) / (2 - p[j])
        if (any(di <= 0)) {
          diwarning <- T
          di[which(di <= 0)] <- 1
        }
        data$di <- di
        data$W <- data$W
        deta <- lm(form2, data = data)$fitted.values
        phi <- exp(deta)
        for (i in 1:500) {
          if (i > 1) {
            logv1old <- logv1
            mfitold <- mfit
            dfitold <- dfit
            h <- influence(mfit)$hat
          }
          
          if (!any(is.na(N))) {
            t <- y * mu ^ (1 - p[j]) / (1 - p[j]) - mu ^ (2 - p[j]) / (2 - p[j])
            wdi <- (2 * W * mu ^ (2 - p[j])) / ((2 - p[j]) * (p[j] - 1) *
                                                  phi)
            di <- -2 / wdi * (N * phi / (p[j] - 1) + W * t) + phi
            if (method == "ML") {
              wd <- (1 / phi) ^ (-2) * wdi / (2 * phi ^ 2)
            }
            if (method == "REML") {
              wd <- (1 / phi) ^ (-2) * max(wdi - h, 0) / (2 * phi ^ 2)
              di <- wdi / (wdi - h) * di
            }
          }
          else{
            part1 <- (y * y ^ (1 - p[j]) - y * mu ^ (1 - p[j])) / (1 - p[j])
            part2 <- (mu ^ (2 - p[j]) - y ^ (2 - p[j])) / (2 - p[j])
            di <- 2 * W * (part1 + part2)
            di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p[j]) / (2 - p[j])
            if (method == "ML") {
              wd <- rep(.5, length(y))
            }
            if (method == "REML") {
              wd <- (1 - h ^ 2)
              di <- di / (1 - h)
            }
            
          }
          zd <- deta + (di - phi) * 1 / phi
          data$zd <- zd
          data$wd <- wd
          data$W <- W
          dfit <- lm(form4, data = data, weights = wd)
          deta <- dfit$fitted.values
          phi <- exp(deta)
          
          if (any(phi == Inf) || any(is.na(phi))) {
            dispersionfit.warning <- T
            dfit <- dfitold
            break
          }
          
          zm <- eta + (y - mu) / mu
          wm <- mu ^ (2 - p[j]) * W / phi
          data$zm <- zm
          data$wm <- wm
          # The code stops here ----------------------------------------
          mfit <- lm(form3, data = data, weights = wm)
          eta <- mfit$fitted.values
          mu <- exp(eta)
          
          if (any(is.na(di))) {
            diwarning <- T
            mfit <- mfitold
            dfit <- dfitold
            break
          }
          if (!any(is.na(N))) {
            logv1 <- sum(N * (as.numeric((1 - a[j]) * log(W / phi)) + 
              log(kalpha(-1 / y) + !y)) - lgamma(-N * a[j] + !N) + W / phi * (y * mu ^ (1 /
              (a[j] - 1)) * (a[j] - 1) - kalpha(mu ^ (1 / (a[j] - 1)) * (a[j] - 1))))
          }
          
          else
            logv1 <- sum(log(dtweedie(
              y = y, mu = mu, power = p[j], phi = phi / W)))
          
          if (is.infinite(logv1)) {
            logv1 <- -9999999999
          }
          if (method == "REML") {
            w <- diag(wm)
            logv1 <- logv1 + .5 * log(det(xt %*% w %*% x))
          }
          
          if (i > 1 && logv1 - logv1old < 10e-3) {
            break
          }
        }
        
        logvmax[j] <- logv1old
        
        if (GSS == FALSE || length(p) == 1) {
          if (logvmax[j] == max(logvmax)) {
            modfinal <- mfitold
            dmodfinal <- dfitold
            pfinal <- p[j]
            afinal <- a[j]
          }
        }
        
      }
      
      if (length(p) == 1)     # If GSS is used p will eventually have length 1
        break
      else if (GSS == FALSE && p[j] == p[length(p)])  # No GSS and last value of p
        break
      else{
        if (-logvmax[1] < -logvmax[2]) {
          b1 <- upper
        }
        else{
          a1 <- lower
        }
      }
    }
    p <- pfinal
    a <- afinal
    
    kalpha <- function(theta)
      (a - 1) / a * (theta / (a - 1)) ^ a
    mu <- exp(modfinal$fitted.values)
    phi <- exp(dmodfinal$fitted.values)
    
    wm.final <- mu ^ (2 - p) * W / phi
    
    if (!any(is.na(N))) {
      t <- y * mu ^ (1 - p) / (1 - p) - mu ^ (2 - p) / (2 - p)
      wdi <- (2 * W * mu ^ (2 - p)) / ((2 - p) * (p - 1) * phi)
      di <- -2 / wdi * (N * phi / (p - 1) + W * t) + phi
      if (method == "ML") {
        wd.final <- (1 / phi) ^ (-2) * wdi / (2 * phi ^ 2)
      }
      if (method == "REML") {
        wd.final <- (1 / phi) ^ (-2) * max(wdi - h, 0) / (2 * phi ^ 2)
      }
    }
    else{
      part1 <- (y * y ^ (1 - p) - y * mu ^ (1 - p)) / (1 - p)
      part2 <- (mu ^ (2 - p) - y ^ (2 - p)) / (2 - p)
      di <- 2 * W * (part1 + part2)
      di[y == 0] <- 2 * W[y == 0] * mu[y == 0] ^ (2 - p) / (2 - p)
      if (method == "ML") {
        wd.final <- rep(.5, length(y))
      }
      if (method == "REML") {
        wd.final <- (1 - h ^ 2)
      }
      
    }
    
    zeros <- exp(-W / phi * kalpha(mu ^ (1 / (a - 1)) * (a - 1)))   # Probability of zero
    difinal <- 2 * W * ((y ^ (2 - p)) / ((1 - p) * (2 - p)) - (y * (mu ^ (1 - p))) / 
              (1 - p) + (mu ^ (2 - p)) / (2 - p))
    devresid <- sign(y - mu) * sqrt(difinal)
    diresid <-
      sign(difinal - phi) * sqrt(2 * (log(phi / difinal) + difinal / phi - 1))
    if (diwarning == T)
      warning("Perfect fit for some values")
    if (dispersionfit.warning == T)
      warning("Inf in dispersion fit")
    
    sd.mean <- sqrt(diag(solve(xt%*%diag(wm.final)%*%x)))
    sd.dispersion <- sqrt(diag(solve(zt%*%diag(wd.final)%*%z)))
    
    out <- list( input = input, L = logvmax, 
        L.max = max(logvmax), power = p.org, power.max = p, a.max = a, 
        sd.mean = sd.mean, sd.dispersion = sd.dispersion, di = difinal, 
        zeros = zeros, devresid = devresid, diresid = diresid, 
        wd.final = wd.final, wm.final = wm.final, mu = mu, phi = phi,
        coefficients.mean = modfinal$coefficients,
        coefficients.dispersion = dmodfinal$coefficients
      )
    return(out)
}

# Confidence interval for power parameter
conf.p <- function(object) {
  if ("rootSolve" %in% rownames(installed.packages()) == FALSE) {
    print("rootSolve package required for the function to work")
    install.packages("rootSolve")
  }
  library(rootSolve)
  L.max <- object$L.max
  ci <-
    uniroot.all(
      function(p)
        ComPoissonDGLM(
          object$input$formula, object$input$dformula, object$input$data, 
          object$input$exposure, object$input$numclaims,
          object$input$method,power = p,GSS = F
        )$L - L.max + qchisq(0.95,1) / 2,interval = object$power
    )
  p.std <- sum(abs(object$power.max-ci))/2/qnorm(0.975)
  return(list(p.std=p.std,ci=ci))
}