#Ilustrando o Teorema Central do Limite através de um exemplo: #(Soares & Siqueira, pag 37) Doenças Cardiovasculares em Honolulu honolulu=read.csv('exe33.csv',header=T,sep=',') honolulu[1,] summary(honolulu$WEIGHT) hist(honolulu$WEIGHT) summary(honolulu$HEIGHT) hist(honolulu$HEIGHT) summary(honolulu$AGE) hist(honolulu$AGE) table(honolulu$SMOKING) summary(honolulu$GLUCOSE) hist(honolulu$GLUCOSE) summary(honolulu$SERUMC) hist(honolulu$SERUMC) summary(honolulu$SYSTOLIC) hist(honolulu$SYSTOLIC) boxplot(honolulu$WEIGHT) medias5=NULL for (i in 1:1000){ dat=honolulu[sample(1:nrow(honolulu),size=5,replace=TRUE),] medias5=rbind(medias5,apply(dat,2,mean)) } medias10=NULL for (i in 1:1000){ dat=honolulu[sample(1:nrow(honolulu),size=10,replace=TRUE),] medias10=rbind(medias10,apply(dat,2,mean)) } medias20=NULL for (i in 1:1000){ dat=honolulu[sample(1:nrow(honolulu),size=20,replace=TRUE),] medias20=rbind(medias20,apply(dat,2,mean)) } par(mfrow=c(2,2)) hist(honolulu$WEIGHT,main='Histograma dos pesos',xlab='Pesos',xlim=c(min(honolulu$WEIGHT),max(honolulu$WEIGHT))) hist(medias5[,3],main='1000 amostras de tamanho 5',xlab='Pesos médios',xlim=c(min(honolulu$WEIGHT),max(honolulu$WEIGHT))) hist(medias10[,3],main='1000 amostras de tamanho 10',xlab='Pesos médios',xlim=c(min(honolulu$WEIGHT),max(honolulu$WEIGHT))) hist(medias20[,3],main='1000 amostras de tamanho 20',xlab='Pesos médios',xlim=c(min(honolulu$WEIGHT),max(honolulu$WEIGHT))) par(mfrow=c(2,2)) hist(honolulu$AGE,main='Histograma das idades',xlab='Idades',xlim=c(min(honolulu$AGE),max(honolulu$AGE))) hist(medias5[,5],main='1000 amostras de tamanho 5',xlab='Idades médias',xlim=c(min(honolulu$AGE),max(honolulu$AGE))) hist(medias10[,5],main='1000 amostras de tamanho 10',xlab='Idades médias',xlim=c(min(honolulu$AGE),max(honolulu$AGE))) hist(medias20[,5],main='1000 amostras de tamanho 20',xlab='Idades médias',xlim=c(min(honolulu$AGE),max(honolulu$AGE))) par(mfrow=c(2,2)) hist(honolulu$SYSTOLIC,main='Histograma das Pressões Sistólicas',xlab='Pressões Sistólicas',xlim=c(min(honolulu$SYSTOLIC),max(honolulu$SYSTOLIC))) hist(medias5[,10],main='1000 amostras de tamanho 5',xlab='Pressões sistólicas médias',xlim=c(min(honolulu$SYSTOLIC),max(honolulu$SYSTOLIC))) hist(medias10[,10],main='1000 amostras de tamanho 10',xlab='Pressões sistólicas médias',xlim=c(min(honolulu$SYSTOLIC),max(honolulu$SYSTOLIC))) hist(medias20[,10],main='1000 amostras de tamanho 20',xlab='Pressões sistólicas médias',xlim=c(min(honolulu$SYSTOLIC),max(honolulu$SYSTOLIC))) par(mfrow=c(2,2)) hist(honolulu$SMOKING,main='Histograma do Tabagismo',xlab='Estatus de fumo',xlim=c(min(honolulu$SMOKING),max(honolulu$SMOKING))) hist(medias5[,6],main='1000 amostras de tamanho 5',xlab='Proporções de fumantes',xlim=c(min(honolulu$SMOKING),max(honolulu$SMOKING))) hist(medias10[,6],main='1000 amostras de tamanho 10',xlab='Proporções de fumantes',xlim=c(min(honolulu$SMOKING),max(honolulu$SMOKING))) hist(medias20[,6],main='1000 amostras de tamanho 20',xlab='Proporções de fumantes',xlim=c(min(honolulu$SMOKING),max(honolulu$SMOKING))) #Pelo TCL: pbar~N(p,sqrt(p(1-p)/n)) par(mfrow=c(2,2)) n=20 p=mean(honolulu$SMOKING) dp=sqrt(p*(1-p)/n) d=dnorm(seq(0,1,l=200),p,dp) hist(medias20[,6],freq=FALSE,xlab='Proporções de fumantes',main='n=20',ylim=c(0,max(d)),xlim=c(0,1)) lines(seq(0,1,l=200),d,col=2) n=40 medias40=NULL for (i in 1:2000){ dat=honolulu[sample(1:nrow(honolulu),size=n,replace=TRUE),] medias40=rbind(medias40,apply(dat,2,mean)) } p=mean(honolulu$SMOKING) dp=sqrt(p*(1-p)/n) d=dnorm(seq(0,1,l=200),p,dp) hist(medias40[,6],freq=FALSE,xlab='Proporções de fumantes',main='n=40',ylim=c(0,max(d)),xlim=c(0,1)) lines(seq(0,1,l=200),d,col=2) n=60 medias60=NULL for (i in 1:2000){ dat=honolulu[sample(1:nrow(honolulu),size=n,replace=TRUE),] medias60=rbind(medias60,apply(dat,2,mean)) } p=mean(honolulu$SMOKING) dp=sqrt(p*(1-p)/n) d=dnorm(seq(0,1,l=200),p,dp) hist(medias60[,6],freq=FALSE,xlab='Proporções de fumantes',main='n=60',ylim=c(0,max(d)),xlim=c(0,1)) lines(seq(0,1,l=200),d,col=2) mean(medias60[,6]) sd(medias60[,6]) p dp n=40 dat=honolulu[sample(1:nrow(honolulu),size=n,replace=TRUE),] phat=mean(dat$SMOKING) dphat=sqrt(phat*(1-phat)/n) z=qnorm(0.975,0,1) phat-z*dphat #limite inferior do intervalo de confiança de 95% para p phat+z*dphat #limite superior do intervalo de confiança de 95% para p p phat # suponha agora que X=peso # Pelo TCL: Xbar~N(mu,sigma/sqrt(n)) n=40 mu=mean(honolulu$WEIGHT) sigma=sqrt(sum((honolulu$WEIGHT-mean(honolulu$WEIGHT))^2)/nrow(honolulu)) dp=sigma/sqrt(n) d=dnorm(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),mu,dp) hist(medias40[,3],freq=FALSE,xlab='Pesos médios',main='n=40') lines(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),d,col=2) muhat=mean(dat$WEIGHT) sigmahat=sd(dat$WEIGHT) dphat=sigmahat/sqrt(n) z=qnorm(0.975,0,1) muhat-z*dphat #limite inferior do intervalo de confiança de 95% para a media muhat+z*dphat #limite superior do intervalo de confiança de 95% para a media mu muhat dphat=sigmahat/sqrt(n) d=dnorm(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),mu,dphat) lines(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),d,col=4) par(mfrow=c(1,1)) x=seq(-4,4,l=200) zz=dnorm(x,0,1) tt=dt(x,n-1) plot(x,zz,col=2,type='l') lines(x,tt,col=4) #Para amostras pequenas: (Xbar-mu)/dphat ~ t com n-1 g.l. n=10 dat=honolulu[sample(1:nrow(honolulu),size=n,replace=TRUE),] mu=mean(honolulu$WEIGHT) sigma=sqrt(sum((honolulu$WEIGHT-mean(honolulu$WEIGHT))^2)/nrow(honolulu)) dp=sigma/sqrt(n) d=dnorm(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),mu,dp) hist(medias10[,3],freq=FALSE,xlab='Pesos médios',main='n=5') lines(seq(min(honolulu$WEIGHT),max(honolulu$WEIGHT),l=200),d,col=2) muhat=mean(dat$WEIGHT) sigmahat=sd(dat$WEIGHT) dphat=sigmahat/sqrt(n) z=qnorm(0.975,0,1) muhat-z*dphat #limite inferior do intervalo de confiança de 95% para a media muhat+z*dphat #limite superior do intervalo de confiança de 95% para a media mu muhat hist((medias10[,3]-mu)/dphat,freq=FALSE,xlab='Pesos médios padronizados',main='n=10') d=dnorm(seq(-4,4,l=200),0,1) lines(seq(-4,4,l=200),d,col=2) tt=dt(seq(-5,5,l=200),n-1) lines(seq(-5,5,l=200),tt,col=4) segments(-1.96,0,-1.96,dnorm(-1.96,0,1),lty=3,col=2) segments(1.96,0,1.96,dnorm(1.96,0,1),lty=3,col=2) a=qt(0.025,n-1) b=qt(0.975,n-1) e=dt(a,n-1) v=dt(b,n-1) segments(a,0,a,e,lty=3,col=4) segments(b,0,b,v,lty=3,col=4) muhat-b*dphat #limite inferior do intervalo de confiança de 95% para a media muhat+b*dphat #limite superior do intervalo de confiança de 95% para a media