====== Aquecendo: Exemplos / Motivação ====== 1. Modelo Linear (regressão)


require(MASS)
data(whiteside)		
find(whiteside)
#help(whiteside)
names(whiteside)
require(lattice)

trellis.par.set(col.whitebg()) 	
xyplot(Gas ~ Temp | Insul, whiteside,
  xlab = "temperatura externa média (graus C)",
  ylab = "Consumo de Gás (cu.ft/1000)",
  as.table = T, aspect = 1, layout = c(2,1),
  ylim = range(0, whiteside$Gas+0.5),
  xlim = range(0, whiteside$Temp+0.5),
  panel =
    function(x, y, ...) {
    panel.xyplot(x, y, ...)
    panel.lmline(x, y, ...)
  })

#####################
# Modelando: Consumo de gás
# ajustes separados para "before" e "after"
gasB <- lm(Gas ~ Temp, whiteside,subset = Insul == "Before")
gasA <- update(gasB, subset = Insul == "After")
summary(gasB)
summary(gasA)
# Ambas linhas em um único modelo ajustado
gasBA <- lm(Gas ~ Insul/Temp - 1, whiteside)
summary(gasBA)
# Curvatura
gasQ <- lm(Gas ~ Insul/(Temp + I(Temp^2)) - 1, 	whiteside)
summary(gasQ)$coef
# Paralelismo
gasPR <- lm(Gas ~ Insul + Temp, whiteside)
anova(gasPR, gasBA)
# Residuos
res <- resid(gasBA)
fit <- fitted(gasBA)
plot(fit, res, xlab="Valores ajustados",ylab="Residuos",main="Gráfico de Residuos")
abline(h = 0, col = "red", lty = 2)
qqnorm(res)
qqline(res)
title(main="Q-Q Plot Normal")

2. Árvores de classificação


## Alguns pacotes:
##   - rpart (parte de "VR bundle")
##   - tree 
## (rpart trata dados faltates, tree não)


## Carregando o pacote MASS para usar o conjunto de dados Cars93
require(MASS)
#?Cars93

## O objetivo aqui é ver se as variáveis identificam o "Tipo" do veículo

names(Cars93)
## Selecionando um subconjunto com as variáveis a serem utilizadas
## (excluindo as não usadas na classificação)
cars93 <- subset(Cars93, select = -c(Manufacturer,
                 Model, Rear.seat.room, Luggage.room, Make))
print(names(cars93), quote = FALSE)


### Carregando o pacote "tree"
require(tree)

## 1o ajuste
cars93.t1 <- tree(Type ~ ., cars93, minsize = 5)
plot(cars93.t1) 
text(cars93.t1, cex = 0.75)

## Verificando erros/acertos de classificação
with(cars93, table(Type, predict(cars93.t1, type = "class")))

## definindo a "poda" da árvore por validação cruzada
## (repetimos 4 vezes cada por envolver simulação)
par(mfrow = c(2,2))
for(j in 1:4)
	plot(cv.tree(cars93.t1, FUN=prune.tree))
# outro critério
for(j in 1:4)
	plot(cv.tree(cars93.t1, FUN=prune.misclass))

### Duas árvores "podadas"
par(mfrow = c(1,2))
cars93.t2 <- prune.misclass(cars93.t1, best = 6)
plot(cars93.t2, type = "u"); text(cars93.t2)

cars93.t3 <- prune.tree(cars93.t1, best = 6)
plot(cars93.t3, type = "u"); text(cars93.t3)

### Predição usando uma árvore selecionada
## (para grandes conjuntos pode-se dividir em 2 grupos)
pred <- predict(cars93.t3, newdata=Cars93, type = "class")
with(cars93, table(pred, Type))

## Comparação com modelo multinomial (ajustado via rede neural)
library(nnet)
m <- multinom(Type ~ Width + EngineSize +
  Passengers + Origin, cars93, maxit = 1000)
pfm <- predict(m, type = "class")
with(cars93, table(Type, pfm))


### Usando apenas 2 preditores
cars.2t <- tree(Type ~ Width + EngineSize, Cars93)
par(mfrow = c(1,1))
plot(cars.2t); text(cars.2t)

par(mfrow = c(2,2))
for(j in 1:4)
  plot(cv.tree(cars.2t, FUN=prune.misclass))

par(mfrow = c(1,1))
cars.2t1 <- prune.misclass(cars.2t, best = 6)
plot(cars.2t1); text(cars.2t1)

partition.tree(cars.2t1)
with(cars93, {
  points(Width, EngineSize, pch=8,
         col = as.numeric(Type), cex = 0.5)
  legend("topleft", levels(Type), pch = 8,
         col = 1:length(levels(Type)), bty = "n")
})

3. Visualizando dados (espaciais) de áreas (polígonos)


######################################
## instalando os pacotes (se necessário)
######################################
#install.packages(c("maptools", "sp", "spdep", "classInt", "RColorBrewer"), dep=T)
#####################################
## Carregando pacotes necessários
#####################################
require(maptools)     ## funções para importação/expostação e manipulação de mapas e dados geográficos    
gpclibPermit()        ## função para habilitar licença de uso
require(sp)           ## (classes) para representação de dados espaciais no R 
require(spdep)        ## funções análises de dados de áreas
require(classInt)     ## rotinas para faclitar a divisão de dados em classes por vários critérios
require(RColorBrewer) ## usada aqui para criar palhetas de cores nas visualizações em mapas

##
## Bairros de Curitiba
##
## lendo dados tipo shapefiles
ctba <- readShapePoly("dados/bairros.shp", IDvar="CODE")
plot(ctba)

head(ctba@data)
ctba@data$NOME <- as.character(ctba@data$NOME)

## mudança de codificação de caracteres (só use se necessário, 
## depende do sistema operacional e do enconding do sistema)
## Veja os nomes dos unicípios e se os acentos aparecem corretamente
Encoding(ctba@data$NOME)
Encoding(ctba@data$NOME) <- "latin1"
ctba@data$NOME <- enc2native(ctba@data$NOME)
head(ctba@data)

tb <- read.csv("dados/tabe.csv", enc="latin1")
head(tb) ## ops nao importou corretamente assim!

## vamos importar de outra forma:
tb <- read.table("dados/tabe.csv", sep=";", head=T, enc="latin1")
head(tb)
str(tb)

## e agora ordenando os dados corretamente, para compatibilizar a ordem
## dos minicípios no shape e na tabela de atributos
ind <- match(ctba@data$CODE, tb$CODE)
ind

tb <- tb[ind,]
row.names(tb) <- ctba$CODE

ctba <- spCbind(ctba, tb)

names(ctba)
head(ctba@data)

## Visualizando um mapa do atributo: Segurança
#INT <- classIntervals(ctba$Segurança, n=5, style="quantile")

#jpeg("seguranca.jpg", width=750,height=200)
par(mfrow=c(1,3), mar=c(2,2,0.5, 0.5))

INT <- classIntervals(ctba$Segurança, style="fixed", fixedBreaks=c(0, 20, 40, 60, 80, 100))
CORES.5 <- rev(c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Reds"))[-1])  ## tirando uma cor no final
COL <- findColours(INT, CORES.5)

plot(ctba, col=COL)
title("Segurança")

TB <- attr(COL, "table")
legtext <- paste(names(TB))  #, " (", TB, ")", sep="")
legend("bottomright", fill=attr(COL, "palette"), legend=legtext, bty="n")

## Visualizando um mapa do atributo: Segurança
INT <- classIntervals(ctba$Segurança, n=5, style="quantile")
CORES.5 <- rev(c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Reds"))[-1])  ## tirando uma cor no final
COL <- findColours(INT, CORES.5)

plot(ctba, col=COL)
title("Segurança")

TB <- attr(COL, "table")
legtext <- paste(names(TB))  #, " (", TB, ")", sep="")
legend("bottomright", fill=attr(COL, "palette"), legend=legtext, bty="n")

## Visualizando um mapa do atributo: Segurança
INT <- classIntervals(ctba$Segurança, n=5, style="sd")
CORES.5 <- rev(c(rev(brewer.pal(4, "Blues")), brewer.pal(3, "Reds")))  ## tirando uma cor no final
COL <- findColours(INT, CORES.5)

plot(ctba, col=COL)
title("Segurança")

TB <- attr(COL, "table")
legtext <- paste(names(TB))  #, " (", TB, ")", sep="")
legend("bottomright", fill=attr(COL, "palette"), legend=legtext, bty="n")

4.SKATER


#    \texttt{read.table()} and \texttt{readShapePoly()}
#  \item standardize data (\texttt{scale()})
names(ctba@data)
ctba@data <- transform(ctba@data, Hab = scale(Habitação), Edu = scale(Educação), Transp = scale(Transporte),
 						Seg = scale(Segurança), Sau = scale(Saúde)) 
names(ctba@data)
#  \item neighbourhood list (\texttt{poly2nb()})
ctba.nb1 <- poly2nb(ctba)
args(poly2nb)
#  \item costs for edges (dissimilarities) (\texttt{nbcosts()})
ctba.costs <- nbcosts(ctba.nb1, ctba@data[,16:20])
#  \item weighted neighbourhood structure  (\texttt{nb2listw()})
ctba.nbw <- nb2listw(ctba.nb1, ctba.costs, style="B")
#  \item minimum spanning tree  (\texttt{mstree()})  
ctba.mst <- mstree(ctba.nbw, 5)

### the mstree plot
par(mar=c(0,0,0,0))
plot(ctba.mst, coordinates(ctba), col=2,       
     cex.lab=.7, cex.circles=0.035, fg="blue")
plot(ctba, border=gray(.5), add=TRUE)

### three groups with no restriction
ctba.gr1 <- skater(ctba.mst[,1:2], ctba@data[,16:20], 2)

### thee groups with minimum population 
ctba.gr2 <- skater(ctba.mst[,1:2], ctba@data[,16:20], 2, 1000000, ctba@data$Pop)

### thee groups with minimun number of areas
ctba.gr3 <- skater(ctba.mst[,1:2], ctba@data[,16:20], 2, 3, rep(1,nrow(ctba@data[,16:20])))

### groups frequency
table(ctba.gr1$groups)
table(ctba.gr2$groups)
table(ctba.gr3$groups)

### the skater plot
par(mar=c(0,0,0,0))
plot(ctba.gr1, coordinates(ctba), cex.circles=0.035, cex.lab=.7)

### more one partition
ctba.gr1b <- skater(ctba.gr1, ctba@data[,16:20], 1)

### length groups frequency
table(ctba.gr1$groups)
table(ctba.gr1b$groups)

### the skater plot, using other colors
plot(ctba.gr1b, coordinates(ctba), cex.circles=0.035, cex.lab=.7,
     groups.colors=colors()[(1:length(ctba.gr1b$ed))*10])

### the Spatial Polygons plot
plot(ctba, col=heat.colors(4)[ctba.gr1$groups])
plot(ctba, col=heat.colors(4)[ctba.gr1b$groups])