R - Getting Started
Published:
Download:
Resources
Basic Commands
Functions are used to perform operations
Concatenate to create a Vector
x = c(1,2,3,4,5) # 1 2 3 4 5 x <- c(1,2,3,4,5) # 1 2 3 4 5
Help
?c
Adding two vectors
x = c(1,2,3,4,5) y = c(1,2,3,4,5) z = x+y # 2 4 6 8 10 z = z+2 # 4 6 8 10 12 k = c(2,3) z = x+k # Error k = c(2) z = x+k # 3 4 5 6 7
Length of vector
length(x) # 5 length(y) # 5 length(k) # 2
List of All Objects and Remove Object(s)
ls() # x, y, k, z rm(k, z) rm(list=ls())
Creating Function
f = function(x, y) x^2 + y^2 f(10, 10) # 200 f = function(x, y){ # Shift+Enter z = x^2 + y^2 z } f(10, 10) # 200
Matrix
?matrix x = matrix(data=c(1, 2, 3, 4), nrow=2, ncol=2) x = matrix(c(1, 2, 3, 4), 2, 2) x = matrix(nrow=2, ncol=2, data=c(1, 2, 3, 4)) dim(x)
Outer
x = 1:10 y = x z = x %o% y # default multiplication operator z = outer(x,y) z = outer(x, y, "+") f = function(x, y) x^2 + y^2 z = outer(x, y, f)
Other Functions
sqrt(x) x^2 # rnorm() generates vector of normal variables with mean 0 and std 1 x = rnorm(50) y = rnorm(50, mean=50, sd=0.1) # Correlation cor(x, y) # Seed set.seed(5); x = rnorm(5); x set.seed(5); x = rnorm(5); x # Mean, Variance, Standard Deviation mean(x); var(x); sd(x)
Graphics
x = rnorm(100) y = rnorm(100) plot(x,y) plot(x, y, xlim=c(0,2), ylim=c(0,2)) plot(x, y, type="p") # "l" for lines and "b" for both points and lines plot(x,y, main="Scatter Plot", xlab="x-axis", ylab="y-axis") # xlabel and ylabel # Saving plot in pdf pdf(file = "/Users/naneja/Downloads/xy.pdf") plot(x,y, main="xy plot", xlab="x-axis", ylab="y-axis") dev.off() # complete plotting # Saving plot in jpg jpeg(file = "/Users/naneja/Downloads/xy.jpg") plot(x,y, main="xy plot", xlab="x-axis", ylab="y-axis") dev.off() # complete plotting
Sequence of Numbers
# Sequence for vector of integers x = seq(1, 10); x x = 1:10; x # Sequence for vector of integers equally spaced x = seq(1, 10, length=5); x x <- seq(-pi, pi, length = 5); x
3D Plots
- First dimension: a vector of the x values
- Second dimension: a vector of the y values
Third dimension: a matrix of the z values whose elements correspond to each pair of (x, y) coordinates
x = 1:10 y = x f = function(x, y) cos(y) / (1 + x^2) z = outer(x, y, f) # 3D Plot persp(x, y, z) persp(x, y, z , theta=30) persp(x, y, fa , theta=30, phi=70) persp(x, y, fa , theta=30, phi=40) # Heatmap based on values of z image(x, y, z)
Indexing Data
A = matrix(1:16, 4, 4) A[4,4] # 16 A[c(1,2,3), c(1,2,3)] # 3x3 A[1:3, 1:3] A[4,] A[,4] A[1:3,] A[,1:3] A[-1,] A[-c(1,2),] A[-c(1,4),]
Loading Data
# https://www.kaggle.com/c/titanic/data
getwd()
setwd("/Users/naneja/Downloads/R")
data = read.csv("titanic/train.csv")
data[0,] # column names
install.packages("ISLR2")
library(ISLR2)
Auto = ISLR2::Auto
Auto = read.csv("data/auto.csv")
View(Auto)
head(Auto)
dim(Auto)
Auto = read.csv("data/auto.csv", header = T, na.strings = c("?"), stringsAsFactors = T)
Auto[1:4, ]
Auto <- na.omit(Auto)
dim(Auto)
names(Auto)
na.strings
- chars replaced with NA (missing element)
stringsAsFactors = T
- any variable containing character strings should be interpreted as a qualitative variable, and that each distinct character string represents a distinct level for that qualitative variable.
Additional Graphical and Numerical Summaries
Scatterplot of quantitative variables
library(ISLR2) Auto = ISLR2::Auto names(Auto) plot(Auto$cylinders , Auto$mpg) attach(Auto) plot(cylinders , mpg) detach()
Converting Quantative to Qualitative
Cylinders variable has small number of possible values so may be converted to qualitative
cylinders <- as.factor(cylinders) plot(cylinders , mpg) # Box Plot plot(cylinders , mpg , col="red") plot(cylinders , mpg , col="red", varwidth=T) plot(cylinders , mpg , col="red", varwidth=T, horizontal=T) plot(cylinders , mpg , col="red", varwidth=T, xlab="cylinders", ylab="MPG")
Histogram
Numberic Variable
hist(mpg) hist(mpg , col=2) # red hist(mpg , col=2, breaks=15) # breaks is suggestion only
Pairs - Scatterplot Matrix
pairs(Auto) # for every pair of variables pairs(~ mpg + displacement + horsepower + weight + acceleration, data = Auto) # subset of variables
Identify
plot(horsepower , mpg) # Label some points with a variable names(Auto) # Field names identify(horsepower , mpg , name) # Select points to be labeled as Esc
Summary
summary(Auto) # Numeric summary of each variable summary(mpg) # Summary of single variable