With thanks to this file by Jenny Bryan.

First basic plot

suppressMessages(library("ggplot2"))
suppressMessages(library("gapminder"))
gDat <- gapminder

p <- ggplot(gDat, aes(x = gdpPercap, y = lifeExp)) # just initializes
p + geom_point()  # # scatterplot

# idem:  p + layer(geom = "point")

In logs

#' log transformation ... quick and dirty
ggplot(gDat, aes(x = log10(gdpPercap), y = lifeExp)) + geom_point()

#' a better way to log transform
p + geom_point() + scale_x_log10()

#' let's make that stick
p <- p + scale_x_log10()
#' common workflow: gradually build up the plot you want  
#' re-define the object 'p' as you develop "keeper" commands  

By Continent

#' convey continent by color: MAP continent variable to aesthetic color
p + geom_point(aes(color = continent))

ggplot(gDat, aes(x = gdpPercap, y = lifeExp, color = continent)) +
    geom_point() + scale_x_log10() # in full detail, up to now

#' address overplotting: SET alpha transparency and size to a value
p + geom_point(alpha = (1/3), size = 3)

#' add a fitted curve or line
p + geom_point() + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

#p + geom_point() + geom_smooth(lwd = 3, se = FALSE)
#p + geom_point() + geom_smooth(lwd = 3, se = FALSE, method = "lm")

#' revive our interest in continents!
#p + aes(color = continent) + geom_point() + geom_smooth(lwd = 3, se = FALSE)
p + aes(color = continent) + geom_point() + geom_smooth(lwd = 1, se = FALSE, method="loess")

Nicer: Facetting

#' facetting: another way to exploit a factor
#p + geom_point(alpha = (1/3), size = 3) + facet_wrap(~ continent)
p + geom_point(alpha = (1/3), size = 3) + facet_wrap(~ continent) +
    geom_smooth(lwd = 2, se = FALSE, method="loess")

Exercises:

  • plot lifeExp against year
  • make mini-plots, split out by continent
  • add a fitted smooth and/or linear regression, w/ or w/o facetting
  • other ideas?

Some Stripplots Below

#' plot lifeExp against year
(y <- ggplot(gDat, aes(x = year, y = lifeExp)) + geom_point())

#' make mini-plots, split out by continent
y + facet_wrap(~ continent)

#' add a fitted smooth and/or linear regression, w/ or w/o facetting
#y + geom_smooth(se = FALSE, lwd = 2) +   geom_smooth(se = FALSE, method ="lm", color = "orange", lwd = 2)

y + geom_smooth(se = FALSE, lwd = 2, method="loess") + 
    facet_wrap(~ continent)

Scatter-line plots

#' last bit on scatterplots  
#' how can we "connect the dots" for one country?  
#' i.e. make a spaghetti plot?
#y + facet_wrap(~ continent) + geom_line() # uh, no
#y + facet_wrap(~ continent) + geom_line(aes(group = country)) # yes!
y + facet_wrap(~ continent) + geom_line(aes(group = country)) +
    geom_smooth(se = FALSE, lwd = 2, method="loess") 

Note about subsetting data

#' sadly, ggplot() does not have a 'subset =' argument  
#' so do that 'on the fly' with subset(..., subset = ...)
ggplot(subset(gDat, country == "Zimbabwe"),
       aes(x = year, y = lifeExp)) + geom_line() + geom_point()

let just look at four countries

jCountries <- c("Canada", "Rwanda", "Cambodia", "Mexico")
ggplot(subset(gDat, country %in% jCountries),
       aes(x = year, y = lifeExp, color = country)) + geom_line() + geom_point()

## when you really care, make your legend easy to navigate  
## this means visual order = data order = factor level order
## ggplot(subset(gDat, country %in% jCountries),
##       aes(x = year, y = lifeExp, color = reorder(country, -1 * lifeExp, max))) +
##    geom_line() + geom_point()

### Another approach to overplotting
## ggplot(gDat, aes(x = gdpPercap, y = lifeExp)) +
##' ggplot(gDat, aes(x = gdpPercap, y = lifeExp)) + scale_x_log10() + geom_bin2d()