ggplot2
Published:
This lesson is from r-statistics.co
Setup
- make plots by adding layers
- tell ggplot what dataset (dataframe) to use,
ggplot(df)
- ggplot doesn’t take vectors as arguments
- add whatever aesthetics,
aes()
- tell ggplot what dataset (dataframe) to use,
library(ggplot2)
dim(diamonds)
head(diamonds)
View(diamonds)
names(diamonds)
# "carat" "cut" "color" "clarity" "depth" "table" "price" "x" "y" "z"
# if only the dataset is known
ggplot(diamonds)
# if only X-axis is known. The Y-axis can be specified in respective geoms
ggplot(diamonds, aes(x=carat))
# if both X and Y axes are fixed for all layers
ggplot(diamonds, aes(x=carat, y=price))
# Each category of the 'cut' variable will now have a distinct color, once a geom is added
ggplot(diamonds, aes(x=carat, color=cut))
# Fix color and will not vary based on dataframe variable
ggplot(diamonds, aes(x=carat), color="steelblue")
Layers
# Adding scatterplot geom (layer1) and smoothing geom (layer2)
ggplot(diamonds, aes(x=carat, y=price, color=cut)) +
geom_point() +
geom_smooth()
# Same as above but specifying the aesthetics inside the geoms
ggplot(diamonds) +
geom_point(aes(x=carat, y=price, color=cut)) +
geom_smooth(aes(x=carat, y=price, color=cut))
# Remove color from geom_smooth for one smoothing line
ggplot(diamonds) +
geom_point(aes(x=carat, y=price, color=cut)) +
geom_smooth(aes(x=carat, y=price))
# same but simpler
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth()
# shape of the points vary with color feature?
ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=color)) +
geom_point()
Labels
# add axis lables and plot title.
gg <- ggplot(diamonds, aes(x=carat, y=price, color=cut)) +
geom_point() +
labs(title="Scatterplot", x="Carat", y="Price")
print(gg)
Theme
# add title and axis text, change legend title.
gg1 = gg + theme(
plot.title = element_text(size=30, face="bold"),
axis.text.x = element_text(size=15),
axis.text.y = element_text(size=15),
axis.title.x = element_text(size=25),
axis.title.y = element_text(size=25)) +
scale_color_discrete(name="Cut of diamonds")
print(gg1) # print the plot
- If the legend shows a shape attribute based on a factor variable
scale_shape_discrete(name="legend title")
- Had it been a continuous variable,
scale_shape_continuous(name="legend title")
Facets
# row ~ column
# columns defined by 'cut'
gg1 + facet_wrap( ~ cut, ncol=3)
# row: color, column: cut
gg1 + facet_wrap(color ~ cut)
# In a grid
gg1 + facet_grid(color ~ cut)
Other Charts
# Bar Charts
# Y axis derived from counts of X item
plot1 = ggplot(mtcars, aes(x=cyl)) +
geom_bar() +
labs(title="Frequency bar chart")
print(plot1)
# Y axis is explicit in the dataframe 'stat=identity'
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
View(df)
plot2 <- ggplot(df, aes(x=var, y=nums)) +
geom_bar(stat = "identity")
print(plot2)
# Flipping coordinates
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
ggplot(df, aes(x=var, y=nums)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title="Coordinates are flipped")
# Adjust X and Y axis limits
# coord_cartesian(xlim=c(x1,x2))
# xlim(c(x1,x2)) # delete datapoints
# scale_x_continuous(limits=c(x1,x2)) # delete datapoints
# Coord_cartesian zoomed in
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth() +
coord_cartesian(ylim=c(0, 10000)) +
labs(title="Coord_cartesian zoomed in!")
# Datapoints deleted: change in smoothing lines
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth() +
ylim(c(0, 10000)) +
labs(title="Datapoints deleted: Note the change in smoothing lines!")