########################################################################################## ## Regressão Não Linear ################################################################## ########################################################################################## ## Exemplo 1 - # dia: dias após aplicação do nematicída # eclod: número de ovos eclodidos de nematóide ## Entrando com os dados lines <- " dia eclod 2 13.00 4 56.50 6 97.50 8 168.00 10 246.50 12 323.00 14 374.00 16 389.00 " ## Trasnformando em numeros e data frame da <- read.table(textConnection(lines), header=TRUE); closeAllConnections() ## Verificando o que foi lido str(da) ## Diagrama de dispersão plot(eclod~dia, da) ## Gride fino para a predição do modelo new <- data.frame(dia=seq(0,30,l=100)) ## Refazendo o diagrama para mostrar a predição fora do intervalo amostrado plot(eclod~dia, da, xlim=c(0,30), ylim=c(0,600)) ## Ajustando uma reta m0 <- lm(eclod~dia, da) lines(predict(m0, newdata=new)~new$dia, col=1) ## Ajustando um polinomio quadrático m1 <- lm(eclod~poly(dia, 2), da) lines(predict(m1, newdata=new)~new$dia, col=2) ## Ajustando um polinomio cúbico m2 <- lm(eclod~poly(dia, 3), da) lines(predict(m2, newdata=new)~new$dia, col=3) ## Ajustando um modelo não-linear logistico m3 <- nls(eclod~SSlogis(dia, Asym, xmid, scal), data=da) lines(predict(m3, newdata=new)~new$dia, col=4) #------------------------------------------------------------------------------------------ # lineares: # * em termos de ajuste: é uma aproximação local e portanto a predição fora do intervalo # das covariáveis não deve ser feita # * em termos de interpretação: é um modelo empírico, relevância teórica, interpretação em # tem termos de taxa por unidade de incremento, nos polinômios interpretação é complicada # * em termos de inferência: estimadores de MQ0/verossimilhança possuem expressão analítica # inferências em pequenas amostras são exatas #------------------------------------------------------------------------------------------ # não lineares # * em termos de ajuste: é um modelo globa e portanto a predição fora do é justificada # * em termos de interpretação: é um modelo com obtido com suporte teórico, os parâmetros # possuem significado que agrega interpretação # * em termos de inferência: estimadores de MQ0/verossimilhança usam procedimentos # numéricos (gargalo do chute inicial), inferências em pequenas amostras são aproximadas e # dependem da parametrização do modelo e dos dados #------------------------------------------------------------------------------------------ ## Entendendo o que significa os parâmetros require(rpanel) ## Escrevendo a função logistica logistica <- function(x,Asym, xmid, scal){ fx <- Asym / ( 1 + exp( ( xmid - x)/scal)) return(fx)} norm.panel <- function(panel){ curve(logistica(x, Asym = panel$Asym, xmid = panel$xmid, scal = panel$scal),0,5, ylim=c(0,2)) abline(v=panel$xmid, h=panel$Asym/2) panel } # passar os argumentos que serão fixos, abre a janelinha panel <- rp.control() # controla a média rp.slider(panel, Asym, 0, 5, initval=2, showvalue=TRUE, action=norm.panel) rp.slider(panel, xmid, -1, 1, initval=0, showvalue=TRUE, action=norm.panel) rp.slider(panel, scal, 0, 2, initval=1, showvalue=TRUE, action=norm.panel) ## Reajustando o modelo não linear especificando a função como eu quiser. m4 <- nls(eclod ~ Asym/( 1 + exp( ( xmid - dia)/scal)), start=list(Asym=400, xmid=8, scal=2),data=da) coef(m3) coef(m4) ########################################################################################## ## Alguns modelos não lineares ########################################################### ########################################################################################## ########################################################################################## ## Modelo Michaelis Menten ############################################################### ########################################################################################## A <- 10; B <- 3 curve(A*x/(B+x), 0, 50, ylim=c(0,10), col=2, lwd=3) abline(h=c(A, A/2), v=B, lty=3) ## Vendo como os parametros mudam a curva norm.panel <- function(panel){ curve(panel$A*x/(panel$B+x), 0, 50, ylim=c(0,10), col=2, lwd=3) abline(h=c(panel$A, panel$A/2), v=panel$B, lty=3) panel } panel <- rp.control() # controla a média rp.slider(panel, A, 0, 10, initval=2, showvalue=TRUE, action=norm.panel) rp.slider(panel, B, -3, 5, initval=0, showvalue=TRUE, action=norm.panel) ########################################################################################## ## Modelo Monomolecular ################################################################## ########################################################################################## C <- A/B curve(C*x/(1+x/B),col=4, lwd=2, lty=2) norm.panel <- function(panel){ curve(panel$C*x/(1+x/panel$B),ylim=c(0,10),col=4, lwd=2, lty=2) panel } panel <- rp.control() # controla a média rp.slider(panel, C, 0, 10, initval=2, showvalue=TRUE, action=norm.panel) rp.slider(panel, B, -3, 5, initval=0, showvalue=TRUE, action=norm.panel) ########################################################################################## ## Modelo logistico ###################################################################### ########################################################################################## A <- 10; B <- 25; C <- 5 curve(A/(1+exp((B-x)/C)), 0, 50, col=2, lwd=3) abline(h=c(A, A/2), v=B, lty=3) ########################################################################################## ## Modelo resposta platô ################################################################# ########################################################################################## A <- 1; B <- 0.5; x0 <- 5 curve(A+B*x*(x=x0),0, 20, col=2, lwd=3) abline(h=c(A, A+B*x0), v=x0, lty=3) ########################################################################################## ## Exemplo 2 - Menos trivial ############################################################# ########################################################################################## frango <- expand.grid(dia=2:42, sistema=factor(c("A","B"))) frango$peso <- c( 80.18145, 89.98167, 132.14629, 192.04534, 167.68245, 191.45191, 220.74227, 212.98519, 230.82651, 346.32728, 391.14474, 407.79706, 441.54167, 499.63470, 575.36996, 603.35279, 678.09090, 763.96071, 787.66652, 921.68731, 959.13005, 1069.59008, 1150.70054, 1269.26359, 1313.35194, 1419.24574, 1532.63279, 1647.94630, 1722.91144, 1832.84384, 1921.09935, 1960.50372, 2062.17519, 2204.45014, 2258.73203, 2311.79432, 2466.26338, 2505.48039, 2521.81638, 2625.00725, 2728.60234, 201.41506, 240.71230, 289.29251, 215.56332, 294.79948, 297.17629, 346.07243, 358.03428, 393.36050, 388.47739, 477.51108, 420.89742, 490.44854, 605.53948, 629.18954, 659.28526, 713.87248, 773.69469, 887.45404, 943.04904, 970.29292, 980.20056, 1142.43274, 1197.28398, 1187.79456, 1243.54212, 1340.48431, 1453.78205, 1542.45519, 1596.08595, 1702.33500, 1801.46693, 1847.62131, 1860.69871, 2018.38835, 2046.97753, 2077.06034, 2236.60287, 2238.75234, 2302.30264, 2354.35641) ## Analise descritiva require(lattice) ## Duas curvas no mesmo gráfico xyplot(peso~dia, groups=sistema, data=frango, type=c("p","smooth")) ## Uma curva em cada gráfico xyplot(peso~dia|sistema, data=frango, type=c("p","smooth")) ## Ajusntando um modelo para cada sistema nA <- nls(peso~ A/(1 + exp( (xmid - dia)/scal)), data=subset(frango, sistema=="A"), start=list(A=3000, xmid=25, scal=10)) summary(nA) ## Apenas sistema B nB <- nls(peso~ A/(1 + exp( (xmid - dia)/scal)), data=subset(frango, sistema=="B"), start=list(A=3000, xmid=25, scal=10)) summary(nB) ########################################################################################## ## Ajustnado junto - Muito mais conveniente porque aumenta a quantidade de dados ######### ########################################################################################## # fazer o ajuste das duas curvas num único nls(), estimativa do QMR é mais consistente nAB <- nls(peso~ A[sistema]/(1 + exp( (xmid[sistema] - dia)/scal[sistema])), data=frango, start=list(A=c(3200,3200),xmid=c(28,30),scal=c(8,10))) summary(nAB) ## Desenhando os ajustes comparando com os dados # fazer o gráfico dos valores ajustados/preditos new <- expand.grid(dia=0:70, sistema=factor(c("A","B"))) new$fit <- predict(nAB, newdata=new) layout(1) with(frango, plot(peso~dia, col=sistema, xlim=c(0,70), ylim=c(0,3200))) with(subset(new, sistema=="A"), lines(dia, fit)) with(subset(new, sistema=="B"), lines(dia, fit, col=2)) ## Obtendo intervalos de confiança confint.default(nAB) # baseado em normalidade assintótica confint(nAB) # baseado em perfil de verossimilhança (modelo logístico bem "linear") ## Modelo com assintota A comum entre os sistemas nAB2 <- nls(peso~ A/(1 + exp( (xmid[sistema] - dia)/scal[sistema])) , data=frango, start=list( A=c(3200), xmid=c(28,30), scal=c(8,10))) summary(nAB2) ## Testando se precisa ou não manter duas assintotas anova(nAB2,nAB) ## Os modelos são idênticos logLik(nAB) logLik(nAB2) ########################################################################################## ## Exemplo 3 - Bem menos trivial ######################################################### ########################################################################################## OD <- read.table("OD.csv", header=TRUE, sep=",", dec=",") names(OD) <- c("Ensaio", "Rep", "Tipo", "Tempo", "OD") dados = subset(OD, Ensaio == c(1,2,4)) ## Gráfico exploratório require(lattice) xyplot(OD ~ Tempo | factor(Rep) + factor(Ensaio), groups = Tipo, data=dados, type="l") ## Ignorando que tem repetição xyplot(OD ~ Tempo | factor(Ensaio), groups = Tipo, data=dados, type="p",layout=c(3,1)) ### Modelo não linear que a literatura recomenda para os dados fc.od <- function(t, A, B, C){ f.od <- A / ( 1 + (B^C / t^C)) return(f.od) } plot(fc.od(seq(0,10,l = 100), A = 0.5, B = 5, C = 5) ~ seq(0,10,l = 100), type = "l") ## Verificando o que cada parâmetro modifica na curva norm.panel <- function(panel){ curve(fc.od(x, A = panel$A, B = panel$B, C = panel$C),0,10, ylim=c(0,10)) panel } # passar os argumentos que serão fixos, abre a janelinha panel <- rp.control() # controla a média rp.slider(panel, A, -5, 5, initval=1, showvalue=TRUE, action=norm.panel,resolution=0.1) rp.slider(panel, B, -10, 10, initval=0.5, showvalue=TRUE, action=norm.panel,resolution=0.1) rp.slider(panel, C, -10, 10, initval=1, showvalue=TRUE, action=norm.panel,resolution=0.1) ########################################################################################## ## Ajustando alguns modelos para os dados de OD lembre-se que o interesse é comparar os### ## tipos ################################################################################# ## Modelo 1 - Ignora todas as condições experimentais mod1 <- nls(OD ~ A/(1 + (B/Tempo)^C), start = list(A = 0.8, B = 5, C = 1.35), algorithm = "port", data=dados) summary(mod1) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod1) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) ## Modelo 2 - Vamos considerar a diferença entre os tipo mod2 <- nls(OD ~ A[Tipo]/(1 + (B[Tipo]/Tempo)^C[Tipo]), start = list(A = c(0.8,0.8), B = c(5,5), C = c(1.35,1.35)), algorithm = "port", data=dados) summary(mod2) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod2) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) ## Modelo 3 - Vamos considerar apenas a diferença entre Ensaios dados$Ensaio <- as.factor(dados$Ensaio) levels(dados$Ensaio) mod3 <- nls(OD ~ A[Ensaio]/(1 + (B[Ensaio]/Tempo)^C), start = list(A = c(0.6,0.87,0.69), B = c(2.52,7.22,7.28), C = 1.75), algorithm = "port", data=dados) summary(mod3) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod3) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) ## Parece mais razoável colocar desvios por Ensaio e Tipo, isso é mais fácil de fazer com a função gnls() require(nlme) mod4 <- gnls(OD ~ A/(1 + (B/Tempo)^C), params = list(A ~ Tipo + Ensaio, B ~ Tipo + Ensaio, C ~ 1), start = c(0.82,0.04,0.08,0.05,2.52,0.05,5,5,1.35), data=dados) summary(mod4) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod4) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) ## O problema é que este modelo esta impondo que a curva deve sair do zero vamos flexibilizar isso colocando mais um componente no modelo require(nlme) mod5 <- gnls(OD ~ B0 + A/(1 + (B/Tempo)^C), params = list(B0 ~ Tipo + Ensaio, A ~ Tipo + Ensaio, B ~ Tipo + Ensaio, C ~ 1), start = c(0.25, 0.05,-0.05,-0.05,0.82,0.04,0.08,0.05,2.52,0.05,5,5,1.35), data=dados) summary(mod5) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod5) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) ## Mas o que eu realmente queria testar ??? summary(mod5) ## Parece que o efeito do tipo é apenas no B0 vamos reajustar o modelo deixando o efeito de ensaio apenas em B0 e comparar com o anterior mod6 <- gnls(OD ~ B0 + A/(1 + (B/Tempo)^C), params = list(B0 ~ Tipo + Ensaio, A ~ Ensaio, B ~ Ensaio, C ~ 1), start = c(0.25, 0.05,-0.05,-0.05,0.82,0.08,0.05,2.52,5,5,1.35), data=dados) summary(mod6) xyplot(OD ~ Tempo | factor(Ensaio), data=dados, group=Tipo,type = "p", layout=c(3,1)) + as.layer(xyplot(fitted(mod6) ~ Tempo | factor(Ensaio),group=Tipo, data=dados, type = "a")) anova(mod5,mod6)