Um pouco de ggplot2

Cristiano de Carvalho Santos (DEST-UFMG)

Pacote ggplot2

ggplot(data = mtcars, aes(x = disp, y = mpg)) + geom_point()

library(ggplot2)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Grafico de x versus y
ggplot(data = mtcars, aes(x = disp, y = mpg)) + 
  geom_point()

# Equivalentemente
ggplot(data = mtcars) + 
  geom_point(aes(x = disp, y = mpg))

Geoms

Os geoms definem qual forma geométrica será utilizada para a visualização dos dados no gráfico. Veja os geoms mais utilizados:

# Grafico de x versus y
ggplot(data = mtcars, aes(x = disp, y = mpg)) + 
  geom_line()

# Grafico de x versus y
ggplot(data = mtcars, aes(x = disp, y = mpg)) + 
  geom_point() + geom_abline(intercept = 40, slope = -0.07)

# Grafico de x versus y
ggplot(data = mtcars, aes(x = disp, y = mpg)) + 
  geom_point() + geom_hline(yintercept = 25)

ggplot(mtcars, aes(x = as.factor(cyl), y = mpg)) + 
  geom_boxplot()

ggplot(data = mtcars, aes(x = disp)) + 
  geom_histogram()

ggplot(data = mtcars, aes(x = disp)) + 
  geom_density() 

http://www.sthda.com/english/wiki/ggplot2-area-plot-quick-start-guide-r-software-and-data-visualization

set.seed(1234)
df <- data.frame(
  sex=factor(rep(c("F", "M"), each=200)),
  weight=round(c(rnorm(200, mean=55, sd=5),
                 rnorm(200, mean=65, sd=5)))
  )
dat <- with(density(df$weight), data.frame(x, y))
ggplot(data = dat, mapping = aes(x = x, y = y)) +
    geom_line()+
    geom_area(mapping = aes(x = ifelse(x>65 & x< 70 , x, 0)), fill = "red") +
    xlim(30, 80)

g <- ggplot(mpg, aes(class))
# Number of cars in each class:
g + geom_bar()

dplot <- ggplot(diamonds, aes(color, fill = cut)) +
  xlab(NULL)+ ylab(NULL)+ theme(legend.position = "none")

dplot + geom_bar()

dplot + geom_bar(position = "fill")

dplot + geom_bar(position = "dodge")

Personalizando os gráficos

Cores

# Separando por uma terceira variavel que eh um fator - colour
ggplot(data = mtcars, aes(x = disp, y = mpg, colour = as.factor(am))) + 
  geom_point()

# Separando com o shape
ggplot(data = mtcars, aes(x = disp, y = mpg, shape = as.factor(am))) + 
  geom_point(size=3 ,colour = "red")

# Separando por uma variavel continua
ggplot(mtcars, aes(x = disp, y = mpg, colour = cyl)) + 
  geom_point()

# o tamanho também pode representar uma variavel
ggplot(mtcars, aes(x = disp, y = mpg, colour = cyl, size = wt)) +
  geom_point()

# cores
ggplot(mtcars, aes(x = as.factor(cyl), y = mpg, colour = as.factor(cyl))) + 
  geom_boxplot()

ggplot(mtcars, aes(x = as.factor(cyl), y = mpg, fill = as.factor(cyl))) + geom_boxplot()

## Todos da mesma cor
ggplot(mtcars, aes(x = as.factor(cyl), y = mpg)) + 
  geom_boxplot(color = "red", fill = "pink")

# Density
ggplot(data=mtcars, aes(x=mpg)) + geom_density(fill =3)

# aes(color) compartilhado
ggplot(mtcars, aes(disp, mpg, color=as.factor(am))) + geom_point() + 
  geom_smooth(method="lm") 

# aes(color) apenas nos pontos
ggplot(mtcars, aes(disp, mpg)) + geom_point(aes(color=as.factor(am))) + 
  geom_smooth(method="lm") 

Eixos

ggplot(mtcars, aes(x = mpg)) + 
  geom_histogram(fill="blue",color="orange") +
  xlab("Milhas por galão") +
  ylab("Frequência") +
  xlim(c(0, 40)) +
  ylim(c(0,8))

Legenda

ggplot(mtcars, aes(x = as.factor(cyl), fill = as.factor(cyl))) + 
geom_bar() 

ggplot(mtcars, aes(x = as.factor(cyl), fill = as.factor(cyl))) + 
geom_bar() +  labs(fill = "cyl", title = "Título")

ggplot(mtcars, aes(x = as.factor(cyl), fill = as.factor(cyl))) + 
  geom_bar() +
  labs(fill = "cyl") +
  theme(legend.position="top")

ggplot(mtcars, aes(x = as.factor(cyl), fill = as.factor(cyl))) + 
  geom_bar() +
  guides(fill=FALSE)

Facets

## Facets
ggplot(mtcars, aes(x = mpg, y = disp, colour = as.factor(cyl))) + 
  geom_point() + 
  facet_grid(am~.,scales="free") ## free_y ou free_x deixa apenas um dos eixos com escalas diferentes

ggplot(mtcars, aes(x = mpg, y = disp, colour = as.factor(cyl))) +
  geom_point() + 
  facet_grid(.~am)

## Outra forma
ggplot(mtcars, aes(x = mpg, y = disp, colour = as.factor(cyl))) +
  geom_point() + facet_wrap(~ am,ncol=2) ## permite especificar o número de colunas

Mais configurações possíveis

## Um exemplo cheio de configuracoes
ggplot( mtcars, aes( x = as.factor(am), y = disp ) ) + theme_bw() +
        geom_boxplot( alpha = 0.2, size = 0.5, outlier.colour = c("grey40") , outlier.size=2,width=0.8) +
        scale_fill_manual(values=gray.colors(3)) + 
        theme(axis.text=element_text(size=8), axis.title=element_text(size=9), 
        strip.text = element_text(size=9),plot.margin = unit(c(0.05, 0.2, 0.05, 0.05), "cm"))+
        labs(title="", x="am", y="Disp")

Estatísticas descritivas no gráfico

ggplot(mpg, aes(trans, cty)) +
  geom_point() +
  stat_summary(geom = "point", fun.y = "mean", colour = "red", size = 4)

df <- data.frame(dose=c("D0.5", "D1", "D2"),
                 len=c(4.2, 10, 29.5))
head(df)
##   dose  len
## 1 D0.5  4.2
## 2   D1 10.0
## 3   D2 29.5
ggplot(data=df, aes(x=dose, y=len)) +
  geom_bar(stat="identity", color="blue", fill="white")

ggplot(data=df, aes(x=dose, y=len)) +
  geom_col( color="blue", fill="white")

df <- data.frame(dose=c("D0.5", "D1", "D2","D0.5", "D1", "D2"),
                 len=c(4.2, 10, 29.5,5,15,36))

head(df)
##   dose  len
## 1 D0.5  4.2
## 2   D1 10.0
## 3   D2 29.5
## 4 D0.5  5.0
## 5   D1 15.0
## 6   D2 36.0
ggplot(data=df, aes(x=dose, y=len)) +
  geom_bar(stat="identity", color="blue", fill="white")

ggplot(data=df, aes(x=dose, y=len)) +
  stat_summary(geom = "bar", fun.y = "mean", colour = "red", size = 2, fill=5)

ggplot(data=df, aes(x=dose, y=len)) +
  stat_summary(geom = "point", fun.y = "mean", colour = "red", size = 2, fill=5)

ggplot(data=df, aes(x=dose, y=len)) +
  stat_summary(geom = "bar", fun.y = "sd", colour = "red", size = 2, fill=5)

ggplot(data=df, aes(x=dose)) +
  geom_bar(stat="count", color="blue", fill=5)

Existem outros temas?

library(ggthemes)
library(gridExtra)

grafico <- ggplot(mpg, aes(displ, hwy, col=factor(cyl))) + geom_point() + 
  geom_smooth(method = "lm", se = F) + ggtitle("Cilindradas, cilindros e Milhas por Galão") + 
  ylab("Milhas por galão") + xlab("Cilindradas")

# Grafico original
grafico

# Tema "The Economist" com respectiva escala de cores
grafico + theme_economist() + scale_color_economist()

# Tema "The Wall Street Journal" com respectiva escala de cores
grafico + theme_wsj() + scale_color_wsj()

# Tema "Excel" com respectiva escala de cores
grafico + theme_excel() + scale_color_excel()

# Tema "fivethirtyeight"
grafico + theme_fivethirtyeight() 

# Tema "highcharts" com respectiva escala de cores
grafico + theme_hc()  + scale_color_hc()

# Tema "Tufte" 
grafico + theme_tufte() 

# Tema "Stata" com respectiva escala de cores
grafico + theme_stata() + scale_color_stata()

Agrupando gráficos em uma única figura

## Para colocar varios graficos juntos
g1 <- grafico + theme_fivethirtyeight() 
g2 <- grafico + theme_hc() + scale_color_hc()
g3 <- grafico + theme_tufte() 
g4 <- grafico + theme_stata() + scale_color_stata()
grid.arrange(g1, g2, g3, g4)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'