Plotly
Published:
This lesson is from datacamp Introduction to Plotly
Introduction
Converting a ggplot2 scatterplot
- Datasets
setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")
View(vgsales)
names(vgsales)
# [1] "Name" "Platform" "Year" "Genre"
# [5] "Publisher" "NA_Sales" "EU_Sales" "JP_Sales"
# [9] "Other_Sales" "Global_Sales" "Critic_Score" "Critic_Count"
# [13] "User_Score" "User_Count" "Developer" "Rating"
glimpse(vgsales)
library(dplyr)
library(ggplot2)
library(plotly)
# Store the scatterplot of Critic_Score vs. NA_Sales sales in 2016
scatter <- vgsales %>%
filter(Year == 2016) %>%
ggplot(aes(x = NA_Sales, y = Critic_Score)) +
geom_point(alpha = 0.3)
scatter
ggplotly(scatter) # Convert to plotly graphic
# Scatterplot of Critic_Score vs. NA_Sales sales with Rating
scatter <- vgsales %>%
filter(!is.na(Platform) & !is.na(NA_Sales) & !is.na(Critic_Score)) %>%
filter(NA_Sales < 10) %>%
ggplot(aes(x = NA_Sales, y = Critic_Score, color=Rating)) +
geom_point(alpha = 0.3)
scatter
ggplotly(scatter) # Convert to plotly graphic
Univariate
library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels
setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")
# Bar Chart using Platform
vgsales %>%
count(Platform) %>% # gives frequency table with type and n
plot_ly(x=~Platform, y=~n) %>% # aesthetics ~Type and ~n
add_bars() # Add bars Trace
# Bar Chart using Platform in descending order
library(forcats)
vgsales %>%
count(Platform) %>% # gives frequency table with type and n
mutate(Platform=fct_reorder(as.factor(Platform), n, .desc=TRUE)) %>%
plot_ly(x=~Platform, y=~n) %>% # aesthetics ~Type and ~n
add_bars() # Add bars Trace
# Histogram
vgsales %>%
filter(!is.na(Critic_Score)) %>%
plot_ly(x=~Critic_Score) %>% # Aesthetis
add_histogram()
# Create a histogram of Critic_Score with at most 25 bins
vgsales %>%
filter(!is.na(Critic_Score)) %>%
plot_ly(x=~Critic_Score) %>% # Aesthetis
add_histogram(nbinsx=25) # at most 25 bins
# Create a histogram with bins of width 10 between 0 and 100
vgsales %>%
filter(!is.na(Critic_Score)) %>%
plot_ly(x=~Critic_Score) %>% # Aesthetis
add_histogram(xbins=list(start=0, end=100, size=10))
# Create a histogram with bins of width 20 between 0 and 100
vgsales %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(xbins = list(start=0, end=100, size=20))
# Create a frequency for Genre
genre_table <- vgsales %>%
count(Genre)
# Create a bar chart of Genre
genre_table %>%
plot_ly(x=~Genre, y=~n) %>%
add_bars()
# Create a frequency for Genre
genre_table <- vgsales %>%
count(Genre)
# Reorder the bars for Genre by n
genre_table %>%
mutate(Genre = fct_reorder(Genre, n, .desc = TRUE)) %>%
plot_ly(x = ~Genre, y = ~n) %>%
add_bars()
Bivariate Graphics
library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels
setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")
# Scatter plot # both numerical
vgsales %>%
plot_ly(x = ~NA_Sales, y = ~Critic_Score) %>%
add_markers() # Select block to zoom and double click to go back
# Bar chart # both categorical
vgsales %>%
count(Rating, Platform) %>%
plot_ly(x = ~Platform, y = ~n, color = ~Rating) %>%
add_bars() %>% # bars side by side
layout(barmode = "stack") # modify to stack
# By Proportion
vgsales %>%
count(Rating, Platform) %>%
group_by(Rating) %>% # to count prop
mutate(prop = n/sum(n)) %>% # to count prop
plot_ly(x = ~Platform, y = ~prop, color = ~Rating) %>% # y by prop
add_bars() %>% # bars side by side
layout(barmode = "stack") # modify to stack
# Boxplots - how numeric changes based on level of categorical
vgsales %>%
filter(Developer == "Nintendo" & User_Score != "tbd") %>%
plot_ly(x = ~Rating, y = ~User_Score) %>%
add_boxplot()
# mouse pointer will show five number summary
# Create a scatter plot of User_Score (y-axis) against Critic_Score (x-axis)
vgsales %>%
plot_ly(x = ~Critic_Score, y = ~User_Score) %>%
add_markers()
# Filter out the 2016 video games
vg2016 <- vgsales %>%
filter(Year == 2016)
# Create a stacked bar chart of Rating by Genre
vg2016 %>%
count(Genre, Rating) %>%
plot_ly(x = ~Genre, y = ~n, color = ~Rating) %>%
add_bars() %>%
layout(barmode = "stack")
# Filter out the 2016 video games
vg2016 <- vgsales %>%
filter(Year == 2016)
# Create boxplots of Global_Sales by Genre for above data
vg2016 %>%
plot_ly(x=~Global_Sales, y=~Genre) %>%
add_boxplot()
Customizing graphics
library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels
setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")
# histogram with color
vgsales %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("red")) # I tells it is not variable
# Scatter plot with opacity to overcome overplotting
vgsales %>%
plot_ly(x=~Critic_Score, y=~User_Score) %>%
add_markers(marker = list(opacity=0.2)) # 80% transparent
# Scatter plot with symbols to overcome overplotting
vgsales %>%
plot_ly(x=~Critic_Score, y=~User_Score) %>%
add_markers(marker = list(symbol="circle-open"))
# Create a histogram of Critic_Score with navy bars that are 50% transparent
vgsales2016 = vgsales %>%
filter(Year == 2016)
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("navy"), opacity = 0.5)
# Change the color of the histogram using a hex code
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("#111e6c"))
# Change the color of the histogram using rgb()
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(marker = list(color = "rgb(17, 30, 108)"))
# Set the plotting symbol to diamond and the size to 4
plot_ly(data = vg2016, x = ~User_Score, y = ~Critic_Score) %>%
add_markers(marker = list(size = 4, symbol = "diamond"))
# histogram with color
vgsales %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("red")) # I tells it is not variable
# Scatter plot with opacity to overcome overplotting
vgsales %>%
plot_ly(x=~Critic_Score, y=~User_Score) %>%
add_markers(marker = list(opacity=0.2)) # 80% transparent
# Scatter plot with symbols to overcome overplotting
vgsales %>%
plot_ly(x=~Critic_Score, y=~User_Score) %>%
add_markers(marker = list(symbol="circle-open"))
# Create a histogram of Critic_Score with navy bars that are 50% transparent
vgsales2016 = vgsales %>%
filter(Year == 2016)
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("navy"), opacity = 0.5)
# Change the color of the histogram using a hex code
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(color = I("#111e6c"))
# Change the color of the histogram using rgb()
vgsales2016 %>%
plot_ly(x = ~Critic_Score) %>%
add_histogram(marker = list(color = "rgb(17, 30, 108)"))
# Set the plotting symbol to diamond and the size to 4
plot_ly(data = vg2016, x = ~User_Score, y = ~Critic_Score) %>%
add_markers(marker = list(size = 4, symbol = "diamond"))
# Third variable in Scatter Plot as Categorical
vgsales2016 %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
add_markers()
# Third variable in Scatter Plot as Quantative
vgsales2016 %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, color=~as.numeric(User_Score)) %>%
add_markers()
# Third variable in Scatter Plot as factor
vgsales2016 %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, color=~as.factor(User_Score)) %>%
add_markers()
# RColorBrewerPalette
vgsales2016 %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
add_markers(colors="Dark2")
vgsales2016 %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
add_markers(colors=c("orange", "black", "skyblue", "red"))
# scatter plaot of User_Score and Critic_Score
# Use color to add Genre as a third variable
vgsales2016 %>%
plot_ly(x=~Critic_Score, y=~User_Score, color=~Genre) %>%
add_markers(colors="Dark2")
# Create a scatterplot of User_Score against Critic_Score
# symbol coded by Rating
vgsales2016 %>%
plot_ly(x=~Critic_Score, y=~User_Score, symbol=~Rating) %>%
add_markers()
# Create a scatterplot of User_Score vs. Critic_Score colored by User_Count
vgsales2016 %>%
plot_ly(x = ~Critic_Score, y = ~User_Score, color=~log(User_Count)) %>%
add_markers()
# Hover info
# Create a bar chart of Platform
# hoverinfo only for the bar heights
vgsales %>%
count(Platform) %>%
plot_ly(x=~Platform, y=~n, hoverinfo="y") %>%
add_bars()
vgsales %>%
plot_ly(x=~NA_Sales, y=~Critic_Score, hoverinfo="text",
text = ~paste("NA Sales:", NA_Sales,
"<br>",
"Critic Score:", Critic_Score)
) %>%
add_markers()
vgsales %>%
count(Rating) %>%
plot_ly(x=~Rating, y=~n, hoverinfo="y") %>%
add_bars()
# Third variable in hover info with text
# Add video game Name to the hover info text
# Create a scatterplot of User_Score vs. Critic score
vgsales2016 %>%
plot_ly(x=~Critic_Score, y=~User_Score, text=~Name) %>%
add_markers()
# Format the hover info for NA_Sales, EU_Sales, and Name
vgsales2016 %>%
plot_ly(x = ~NA_Sales, y = ~EU_Sales,
hoverinfo = "text",
text = ~paste("NA_Sales: ", NA_Sales,
"<br>",
"EU_Sales: ", EU_Sales,
"<br>",
"Name: ", Name)
) %>%
add_markers()
# labels and title
vgsales %>%
count(Platform) %>%
plot_ly(x=~Platform, y=~n, hoverinfo="y") %>%
add_bars() %>%
layout(
xaxis = list(title = "Video Game Platform"),
yaxis = list(title = "Frequency"),
title = "Popularity of Video Game Platform"
)
# log scale
vgsales2016 %>%
plot_ly(x = ~User_Count, y = ~User_Score) %>%
add_markers() %>%
layout(
xaxis = list(title="No of Users (log scale)", type="log"),
yaxis = list(title="User Score"),
title = "User Score vs No of Users"
)
# showgrid, zeroline, plot_background, paper_background
vgsales2016 %>%
plot_ly(x = ~User_Count, y = ~User_Score) %>%
add_markers() %>%
layout(
xaxis = list(title="No of Users", showgrid=FALSE, zeroline=FALSE),
yaxis = list(title="User Score", showgrid=FALSE, zeroline=FALSE),
plot_bgcolor = toRGB("gray90"),
paper_bgcolor = toRGB("skyblue")
)
# Are best-selling video games generally well received by critics?
# Polish the scatterplot by transforming the x-axis and labeling both axes
vgsales2016 %>%
plot_ly(x = ~Global_Sales, y = ~Critic_Score) %>%
add_markers(marker = list(opacity = 0.5)) %>%
layout(xaxis = list(title="Global sales (millions of units)", type="log"),
yaxis = list(title="Critic score"))
# Set the paper background color to #ebebeb
# remove the vertical grid
annual_vgsales = vgsales %>%
filter(Year >=1980 & Year <= 2016)
annual_vgsales %>%
plot_ly(x = ~Year, y = ~Global_Sales) %>%
add_lines() %>%
layout(xaxis=list(showgrid=FALSE),
paper_bgcolor = "#ebebeb")