# ggplot2

Published:

This lesson is from r-statistics.co

# Setup

• make plots by adding layers
• tell ggplot what dataset (dataframe) to use, ggplot(df)
• ggplot doesnâ€™t take vectors as arguments
• add whatever aesthetics, aes()
library(ggplot2)

dim(diamonds)
View(diamonds)
names(diamonds)
# "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  "x"       "y"       "z"

# if only the dataset is known
ggplot(diamonds)

# if only X-axis is known. The Y-axis can be specified in respective geoms
ggplot(diamonds, aes(x=carat))

# if both X and Y axes are fixed for all layers
ggplot(diamonds, aes(x=carat, y=price))

# Each category of the 'cut' variable will now have a distinct  color, once a geom is added
ggplot(diamonds, aes(x=carat, color=cut))

# Fix color and will not vary based on dataframe variable
ggplot(diamonds, aes(x=carat), color="steelblue")


# Layers

# Adding scatterplot geom (layer1) and smoothing geom (layer2)
ggplot(diamonds, aes(x=carat, y=price, color=cut)) +
geom_point() +
geom_smooth()

# Same as above but specifying the aesthetics inside the geoms
ggplot(diamonds) +
geom_point(aes(x=carat, y=price, color=cut)) +
geom_smooth(aes(x=carat, y=price, color=cut))

# Remove color from geom_smooth for one smoothing line
ggplot(diamonds) +
geom_point(aes(x=carat, y=price, color=cut)) +
geom_smooth(aes(x=carat, y=price))

# same but simpler
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth()

# shape of the points vary with color feature?
ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=color)) +
geom_point()


# Labels

# add axis lables and plot title.
gg <- ggplot(diamonds, aes(x=carat, y=price, color=cut)) +
geom_point() +
labs(title="Scatterplot", x="Carat", y="Price")

print(gg)


# Theme

# add title and axis text, change legend title.
gg1 = gg + theme(
plot.title = element_text(size=30, face="bold"),
axis.text.x = element_text(size=15),
axis.text.y = element_text(size=15),
axis.title.x = element_text(size=25),
axis.title.y = element_text(size=25)) +
scale_color_discrete(name="Cut of diamonds")
print(gg1)  # print the plot

• If the legend shows a shape attribute based on a factor variable
• scale_shape_discrete(name="legend title")
• Had it been a continuous variable,
• scale_shape_continuous(name="legend title")

# Facets

# row ~ column

# columns defined by 'cut'
gg1 + facet_wrap( ~ cut, ncol=3)

# row: color, column: cut
gg1 + facet_wrap(color ~ cut)

# In a grid
gg1 + facet_grid(color ~ cut)



# Other Charts

# Bar Charts

# Y axis derived from counts of X item
plot1 = ggplot(mtcars, aes(x=cyl)) +
geom_bar() +
labs(title="Frequency bar chart")
print(plot1)

# Y axis is explicit in the dataframe 'stat=identity'
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
View(df)
plot2 <- ggplot(df, aes(x=var, y=nums)) +
geom_bar(stat = "identity")
print(plot2)

# Flipping coordinates
df <- data.frame(var=c("a", "b", "c"), nums=c(1:3))
ggplot(df, aes(x=var, y=nums)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title="Coordinates are flipped")

# Adjust X and Y axis limits
# coord_cartesian(xlim=c(x1,x2))
# xlim(c(x1,x2)) # delete datapoints
# scale_x_continuous(limits=c(x1,x2)) # delete datapoints

# Coord_cartesian zoomed in
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth() +
coord_cartesian(ylim=c(0, 10000)) +
labs(title="Coord_cartesian zoomed in!")

# Datapoints deleted: change in smoothing lines
ggplot(diamonds, aes(x=carat, y=price)) +
geom_point(aes(color=cut)) +
geom_smooth() +
ylim(c(0, 10000)) +
labs(title="Datapoints deleted: Note the change in smoothing lines!")



Tags: