ggplot2

2 minute read

Published:

This lesson is from r-statistics.co

Setup

  • make plots by adding layers
    • tell ggplot what dataset (dataframe) to use, ggplot(df)
      • ggplot doesn’t take vectors as arguments
    • add whatever aesthetics, aes()
library(ggplot2)

dim(diamonds)
head(diamonds)
View(diamonds)
names(diamonds)
# "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  "x"       "y"       "z"  


# if only the dataset is known
ggplot(diamonds)  

# if only X-axis is known. The Y-axis can be specified in respective geoms
ggplot(diamonds, aes(x=carat))  

# if both X and Y axes are fixed for all layers
ggplot(diamonds, aes(x=carat, y=price))

# Each category of the 'cut' variable will now have a distinct  color, once a geom is added
ggplot(diamonds, aes(x=carat, color=cut))  

# Fix color and will not vary based on dataframe variable
ggplot(diamonds, aes(x=carat), color="steelblue")

Layers

# Adding scatterplot geom (layer1) and smoothing geom (layer2)
ggplot(diamonds, aes(x=carat, y=price, color=cut)) + 
  geom_point() + 
  geom_smooth()

# Same as above but specifying the aesthetics inside the geoms
ggplot(diamonds) + 
  geom_point(aes(x=carat, y=price, color=cut)) + 
  geom_smooth(aes(x=carat, y=price, color=cut)) 


# Remove color from geom_smooth for one smoothing line
ggplot(diamonds) + 
  geom_point(aes(x=carat, y=price, color=cut)) + 
  geom_smooth(aes(x=carat, y=price))

# same but simpler
ggplot(diamonds, aes(x=carat, y=price)) + 
  geom_point(aes(color=cut)) + 
  geom_smooth()

# shape of the points vary with color feature?
ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=color)) + 
  geom_point()

Labels

# add axis lables and plot title.
gg <- ggplot(diamonds, aes(x=carat, y=price, color=cut)) + 
  geom_point() + 
  labs(title="Scatterplot", x="Carat", y="Price")

print(gg)

Theme

# add title and axis text, change legend title.
gg1 = gg + theme(
  plot.title = element_text(size=30, face="bold"), 
  axis.text.x = element_text(size=15), 
  axis.text.y = element_text(size=15),
  axis.title.x = element_text(size=25),
  axis.title.y = element_text(size=25)) + 
  scale_color_discrete(name="Cut of diamonds")
print(gg1)  # print the plot
  • If the legend shows a shape attribute based on a factor variable
    • scale_shape_discrete(name="legend title")
  • Had it been a continuous variable,
    • scale_shape_continuous(name="legend title")

Facets

# row ~ column

# columns defined by 'cut'
gg1 + facet_wrap( ~ cut, ncol=3)

# row: color, column: cut
gg1 + facet_wrap(color ~ cut)

# In a grid
gg1 + facet_grid(color ~ cut)

Other Charts

# Bar Charts

# Y axis derived from counts of X item
plot1 = ggplot(mtcars, aes(x=cyl)) + 
  geom_bar() + 
  labs(title="Frequency bar chart")
print(plot1)

# Y axis is explicit in the dataframe 'stat=identity'
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
View(df)
plot2 <- ggplot(df, aes(x=var, y=nums)) + 
  geom_bar(stat = "identity")
print(plot2)

# Flipping coordinates
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
ggplot(df, aes(x=var, y=nums)) + 
  geom_bar(stat = "identity") + 
  coord_flip() + 
  labs(title="Coordinates are flipped")
# Adjust X and Y axis limits
# coord_cartesian(xlim=c(x1,x2))
# xlim(c(x1,x2)) # delete datapoints
# scale_x_continuous(limits=c(x1,x2)) # delete datapoints

# Coord_cartesian zoomed in
ggplot(diamonds, aes(x=carat, y=price)) + 
  geom_point(aes(color=cut)) + 
  geom_smooth() + 
  coord_cartesian(ylim=c(0, 10000)) + 
  labs(title="Coord_cartesian zoomed in!")

# Datapoints deleted: change in smoothing lines
ggplot(diamonds, aes(x=carat, y=price)) + 
  geom_point(aes(color=cut)) + 
  geom_smooth() + 
  ylim(c(0, 10000)) + 
  labs(title="Datapoints deleted: Note the change in smoothing lines!")