Published:

Resources

# Basic Commands

• Functions are used to perform operations

• Concatenate to create a Vector

•   x = c(1,2,3,4,5) # 1 2 3 4 5
x <- c(1,2,3,4,5) # 1 2 3 4 5

• Help

•   ?c


•   x = c(1,2,3,4,5)
y = c(1,2,3,4,5)
z = x+y # 2  4  6  8 10
z = z+2 # 4  6  8 10 12

k = c(2,3)
z = x+k # Error

k = c(2)
z = x+k # 3 4 5 6 7

• Length of vector

•   length(x) # 5
length(y) # 5
length(k) # 2

• List of All Objects and Remove Object(s)

•   ls() # x, y, k, z
rm(k, z)

rm(list=ls())

• Creating Function

•   f = function(x, y) x^2 + y^2
f(10, 10) # 200

f = function(x, y){ # Shift+Enter
z = x^2 + y^2
z
}
f(10, 10) # 200

• Matrix

•   ?matrix
x = matrix(data=c(1, 2, 3, 4), nrow=2, ncol=2)

x = matrix(c(1, 2, 3, 4), 2, 2)

x = matrix(nrow=2, ncol=2, data=c(1, 2, 3, 4))

dim(x)

• Outer

•   x = 1:10
y = x
z = x %o% y # default multiplication operator
z = outer(x,y)
z = outer(x, y, "+")

f = function(x, y) x^2 + y^2
z = outer(x, y, f)

• Other Functions

•   sqrt(x)
x^2

# rnorm() generates vector of normal variables with mean 0 and std 1
x = rnorm(50)
y = rnorm(50, mean=50, sd=0.1)

# Correlation
cor(x, y)

# Seed
set.seed(5); x = rnorm(5); x
set.seed(5); x = rnorm(5); x

# Mean, Variance, Standard Deviation
mean(x); var(x); sd(x)


# Graphics

•   x = rnorm(100)
y = rnorm(100)

plot(x,y)
plot(x, y, xlim=c(0,2), ylim=c(0,2))

plot(x, y, type="p") # "l" for lines and "b" for both points and lines

plot(x,y, main="Scatter Plot", xlab="x-axis", ylab="y-axis") # xlabel and ylabel

# Saving plot in pdf
plot(x,y, main="xy plot", xlab="x-axis", ylab="y-axis")
dev.off() # complete plotting

# Saving plot in jpg
plot(x,y, main="xy plot", xlab="x-axis", ylab="y-axis")
dev.off() # complete plotting

• Sequence of Numbers

•   # Sequence for vector of integers
x = seq(1, 10); x
x = 1:10; x

# Sequence for vector of integers equally spaced
x = seq(1, 10, length=5); x
x <- seq(-pi, pi, length = 5); x

• 3D Plots

• First dimension: a vector of the x values
• Second dimension: a vector of the y values
• Third dimension: a matrix of the z values whose elements correspond to each pair of (x, y) coordinates

•   x = 1:10
y = x

f = function(x, y) cos(y) / (1 + x^2)
z = outer(x, y, f)

# 3D Plot
persp(x, y, z)
persp(x, y, z , theta=30)
persp(x, y, fa , theta=30, phi=70)
persp(x, y, fa , theta=30, phi=40)

# Heatmap based on values of z
image(x, y, z)


# Indexing Data

•   A = matrix(1:16, 4, 4)

A[4,4] # 16
A[c(1,2,3), c(1,2,3)] # 3x3
A[1:3, 1:3]

A[4,]
A[,4]

A[1:3,]
A[,1:3]

A[-1,]
A[-c(1,2),]
A[-c(1,4),]


# https://www.kaggle.com/c/titanic/data
getwd()

data[0,] # column names

install.packages("ISLR2")
library(ISLR2)

Auto = ISLR2::Auto

View(Auto)

dim(Auto)

Auto = read.csv("data/auto.csv", header = T, na.strings = c("?"), stringsAsFactors = T)

Auto[1:4, ]

Auto <- na.omit(Auto)
dim(Auto)

names(Auto)

• na.strings
• chars replaced with NA (missing element)
• stringsAsFactors = T
• any variable containing character strings should be interpreted as a qualitative variable, and that each distinct character string represents a distinct level for that qualitative variable.

# Additional Graphical and Numerical Summaries

• Scatterplot of quantitative variables

•   library(ISLR2)
Auto = ISLR2::Auto

names(Auto)

plot(Auto$cylinders , Auto$mpg)

attach(Auto)
plot(cylinders , mpg)
detach()

• Converting Quantative to Qualitative

• Cylinders variable has small number of possible values so may be converted to qualitative

•   cylinders <- as.factor(cylinders)

plot(cylinders , mpg) # Box Plot

plot(cylinders , mpg , col="red")

plot(cylinders , mpg , col="red", varwidth=T)

plot(cylinders , mpg , col="red", varwidth=T, horizontal=T)

plot(cylinders , mpg , col="red", varwidth=T, xlab="cylinders", ylab="MPG")

• Histogram

• Numberic Variable

•   hist(mpg)

hist(mpg , col=2) # red

hist(mpg , col=2, breaks=15) # breaks is suggestion only

• Pairs - Scatterplot Matrix

•   pairs(Auto) # for every pair of variables

pairs(~ mpg + displacement + horsepower + weight + acceleration, data = Auto) # subset of variables

• Identify

•   plot(horsepower , mpg)

# Label some points with a variable
names(Auto) # Field names

identify(horsepower , mpg , name) # Select points to be labeled as Esc

• Summary

•   summary(Auto) # Numeric summary of each variable

summary(mpg) # Summary of single variable


Tags: