Plotly

8 minute read

Published:

This lesson is from datacamp Introduction to Plotly

Introduction

Converting a ggplot2 scatterplot

setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")
View(vgsales)
names(vgsales)
# [1] "Name"         "Platform"     "Year"         "Genre"       
# [5] "Publisher"    "NA_Sales"     "EU_Sales"     "JP_Sales"    
# [9] "Other_Sales"  "Global_Sales" "Critic_Score" "Critic_Count"
# [13] "User_Score"   "User_Count"   "Developer"    "Rating" 

glimpse(vgsales)

library(dplyr)
library(ggplot2)
library(plotly)

# Store the scatterplot of Critic_Score vs. NA_Sales sales in 2016
scatter <- vgsales %>%
  filter(Year == 2016) %>%
  ggplot(aes(x = NA_Sales, y = Critic_Score)) +
  geom_point(alpha = 0.3)

scatter
ggplotly(scatter) # Convert to plotly graphic



# Scatterplot of Critic_Score vs. NA_Sales sales with Rating
scatter <- vgsales %>%
  filter(!is.na(Platform) & !is.na(NA_Sales) & !is.na(Critic_Score)) %>%
  filter(NA_Sales < 10) %>%
  ggplot(aes(x = NA_Sales, y = Critic_Score, color=Rating)) +
  geom_point(alpha = 0.3)

scatter
ggplotly(scatter) # Convert to plotly graphic

Univariate

library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels 

setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")

# Bar Chart using Platform
vgsales %>%
  count(Platform) %>% # gives frequency table with type and n
  plot_ly(x=~Platform, y=~n) %>%  # aesthetics ~Type and ~n
  add_bars() # Add bars Trace


# Bar Chart using Platform in descending order
library(forcats)
vgsales %>%
  count(Platform) %>% # gives frequency table with type and n
  mutate(Platform=fct_reorder(as.factor(Platform), n, .desc=TRUE)) %>%
  plot_ly(x=~Platform, y=~n) %>%  # aesthetics ~Type and ~n
  add_bars() # Add bars Trace

# Histogram
vgsales %>%
  filter(!is.na(Critic_Score)) %>%
  plot_ly(x=~Critic_Score) %>% # Aesthetis
  add_histogram()

# Create a histogram of Critic_Score with at most 25 bins
vgsales %>%
  filter(!is.na(Critic_Score)) %>%
  plot_ly(x=~Critic_Score) %>% # Aesthetis
  add_histogram(nbinsx=25) # at most 25 bins


# Create a histogram with bins of width 10 between 0 and 100
vgsales %>%
  filter(!is.na(Critic_Score)) %>%
  plot_ly(x=~Critic_Score) %>% # Aesthetis
  add_histogram(xbins=list(start=0, end=100, size=10))


# Create a histogram with bins of width 20 between 0 and 100
vgsales %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(xbins = list(start=0, end=100, size=20))


# Create a frequency for Genre
genre_table <- vgsales %>%
	count(Genre)

# Create a bar chart of Genre
genre_table %>%
	plot_ly(x=~Genre, y=~n) %>%
	add_bars()

# Create a frequency for Genre
genre_table <- vgsales %>%
	count(Genre)

# Reorder the bars for Genre by n
genre_table %>%
	mutate(Genre = fct_reorder(Genre, n, .desc = TRUE)) %>%
	plot_ly(x = ~Genre, y = ~n) %>% 
	add_bars()                      

Bivariate Graphics

library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels 

setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")

# Scatter plot # both numerical
vgsales %>%
  plot_ly(x = ~NA_Sales, y = ~Critic_Score) %>%
  add_markers() # Select block to zoom and double click to go back

# Bar chart # both categorical
vgsales %>%
  count(Rating, Platform) %>%
  plot_ly(x = ~Platform, y = ~n, color = ~Rating) %>%
  add_bars() %>% # bars side by side
  layout(barmode = "stack") # modify to stack

# By Proportion
vgsales %>%
  count(Rating, Platform) %>%
  group_by(Rating) %>% # to count prop
  mutate(prop = n/sum(n)) %>% # to count prop
  plot_ly(x = ~Platform, y = ~prop, color = ~Rating) %>% # y by prop
  add_bars() %>% # bars side by side
  layout(barmode = "stack") # modify to stack

# Boxplots - how numeric changes based on level of categorical
vgsales %>%
  filter(Developer == "Nintendo" & User_Score != "tbd") %>%
  plot_ly(x = ~Rating, y = ~User_Score) %>%
  add_boxplot()
# mouse pointer will show five number summary


# Create a scatter plot of User_Score (y-axis) against Critic_Score (x-axis)
vgsales %>% 
  plot_ly(x = ~Critic_Score, y = ~User_Score) %>%
  add_markers()


# Filter out the 2016 video games
vg2016 <- vgsales %>%
  filter(Year == 2016)
# Create a stacked bar chart of Rating by Genre
vg2016 %>%
  count(Genre, Rating) %>%
  plot_ly(x = ~Genre, y = ~n, color = ~Rating) %>%
  add_bars() %>%
  layout(barmode = "stack")


# Filter out the 2016 video games
vg2016 <- vgsales %>%
  filter(Year == 2016)

# Create boxplots of Global_Sales by Genre for above data
vg2016 %>% 
  plot_ly(x=~Global_Sales, y=~Genre) %>%
  add_boxplot()

Customizing graphics

library(dplyr)
library(ggplot2)
library(plotly)
library(forcats) # for reordering factor levels 

setwd("~/Downloads/teaching/R")
vgsales = read.csv("dc/vgsales.csv")

# histogram with color
vgsales %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("red")) # I tells it is not variable

# Scatter plot with opacity to overcome overplotting
vgsales %>%
  plot_ly(x=~Critic_Score, y=~User_Score) %>%
  add_markers(marker = list(opacity=0.2)) # 80% transparent


# Scatter plot with symbols to overcome overplotting
vgsales %>%
  plot_ly(x=~Critic_Score, y=~User_Score) %>%
  add_markers(marker = list(symbol="circle-open"))


# Create a histogram of Critic_Score with navy bars that are 50% transparent
vgsales2016 = vgsales %>%
  filter(Year == 2016)

vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("navy"), opacity = 0.5)

# Change the color of the histogram using a hex code
vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("#111e6c"))

# Change the color of the histogram using rgb()
vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(marker = list(color = "rgb(17, 30, 108)"))


# Set the plotting symbol to diamond and the size to 4
plot_ly(data = vg2016, x = ~User_Score, y = ~Critic_Score) %>% 
  add_markers(marker = list(size = 4, symbol = "diamond")) 


# histogram with color
vgsales %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("red")) # I tells it is not variable

# Scatter plot with opacity to overcome overplotting
vgsales %>%
  plot_ly(x=~Critic_Score, y=~User_Score) %>%
  add_markers(marker = list(opacity=0.2)) # 80% transparent


# Scatter plot with symbols to overcome overplotting
vgsales %>%
  plot_ly(x=~Critic_Score, y=~User_Score) %>%
  add_markers(marker = list(symbol="circle-open"))


# Create a histogram of Critic_Score with navy bars that are 50% transparent
vgsales2016 = vgsales %>%
  filter(Year == 2016)

vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("navy"), opacity = 0.5)

# Change the color of the histogram using a hex code
vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(color = I("#111e6c"))

# Change the color of the histogram using rgb()
vgsales2016 %>%
  plot_ly(x = ~Critic_Score) %>%
  add_histogram(marker = list(color = "rgb(17, 30, 108)"))


# Set the plotting symbol to diamond and the size to 4
plot_ly(data = vg2016, x = ~User_Score, y = ~Critic_Score) %>% 
  add_markers(marker = list(size = 4, symbol = "diamond")) 


# Third variable in Scatter Plot as Categorical
vgsales2016 %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
  add_markers()


# Third variable in Scatter Plot as Quantative
vgsales2016 %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, color=~as.numeric(User_Score)) %>%
  add_markers()


# Third variable in Scatter Plot as factor
vgsales2016 %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, color=~as.factor(User_Score)) %>%
  add_markers()

# RColorBrewerPalette
vgsales2016 %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
  add_markers(colors="Dark2")


vgsales2016 %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, color=~Rating) %>%
  add_markers(colors=c("orange", "black", "skyblue", "red"))


# scatter plaot of User_Score and Critic_Score
# Use color to add Genre as a third variable
vgsales2016 %>%
  plot_ly(x=~Critic_Score, y=~User_Score, color=~Genre) %>%
  add_markers(colors="Dark2")



# Create a scatterplot of User_Score against Critic_Score 
# symbol coded by Rating
vgsales2016 %>%
  plot_ly(x=~Critic_Score, y=~User_Score, symbol=~Rating) %>%
  add_markers()


# Create a scatterplot of User_Score vs. Critic_Score colored by User_Count
vgsales2016 %>%
  plot_ly(x = ~Critic_Score, y = ~User_Score, color=~log(User_Count)) %>%
  add_markers()


# Hover info
# Create a bar chart of Platform
# hoverinfo only for the bar heights
vgsales %>%
  count(Platform) %>%
  plot_ly(x=~Platform, y=~n, hoverinfo="y") %>%
  add_bars()

vgsales %>%
  plot_ly(x=~NA_Sales, y=~Critic_Score, hoverinfo="text",
          text = ~paste("NA Sales:", NA_Sales, 
                        "<br>",
                        "Critic Score:", Critic_Score)
          ) %>%
  add_markers()


vgsales %>%
  count(Rating) %>%
  plot_ly(x=~Rating, y=~n, hoverinfo="y") %>%
  add_bars()


# Third variable in hover info with text
# Add video game Name to the hover info text
# Create a scatterplot of User_Score vs. Critic score
vgsales2016 %>%
  plot_ly(x=~Critic_Score, y=~User_Score, text=~Name) %>% 
  add_markers()


# Format the hover info for NA_Sales, EU_Sales, and Name
vgsales2016 %>%
  plot_ly(x = ~NA_Sales, y = ~EU_Sales,
          hoverinfo = "text",
          text = ~paste("NA_Sales: ", NA_Sales, 
                        "<br>",
                        "EU_Sales: ", EU_Sales, 
                        "<br>",
                        "Name: ", Name)
          ) %>%
  add_markers()

# labels and title
vgsales %>%
  count(Platform) %>%
  plot_ly(x=~Platform, y=~n, hoverinfo="y") %>%
  add_bars() %>%
  layout(
    xaxis = list(title = "Video Game Platform"),
    yaxis = list(title = "Frequency"),
    title = "Popularity of Video Game Platform"
  )


# log scale
vgsales2016 %>%
  plot_ly(x = ~User_Count, y = ~User_Score) %>%
  add_markers() %>%
  layout(
    xaxis = list(title="No of Users (log scale)", type="log"),
    yaxis = list(title="User Score"),
    title = "User Score vs No of Users"
  )


# showgrid, zeroline, plot_background, paper_background
vgsales2016 %>%
  plot_ly(x = ~User_Count, y = ~User_Score) %>%
  add_markers() %>%
  layout(
    xaxis = list(title="No of Users", showgrid=FALSE, zeroline=FALSE),
    yaxis = list(title="User Score", showgrid=FALSE, zeroline=FALSE),
    plot_bgcolor = toRGB("gray90"), 
    paper_bgcolor = toRGB("skyblue")
  )

# Are best-selling video games generally well received by critics? 
# Polish the scatterplot by transforming the x-axis and labeling both axes
vgsales2016 %>%
  plot_ly(x = ~Global_Sales, y = ~Critic_Score) %>%
  add_markers(marker = list(opacity = 0.5)) %>%
  layout(xaxis = list(title="Global sales (millions of units)", type="log"),
         yaxis = list(title="Critic score"))


# Set the paper background color to #ebebeb
# remove the vertical grid
annual_vgsales = vgsales %>%
  filter(Year >=1980 & Year <= 2016)

annual_vgsales %>%
  plot_ly(x = ~Year, y = ~Global_Sales) %>%
  add_lines() %>%
  layout(xaxis=list(showgrid=FALSE),
         paper_bgcolor = "#ebebeb")