R Vocabulary

The basics

# The first functions to learn
?
str

# Important operators and assignment
%in%, match
=, <-, <<-
$, [, [[, head, tail, subset
with
assign, get

# Comparison 
all.equal, identical
!=, ==, >, >=, <, <=
is.na, complete.cases
is.finite

# Basic math
*, +, -, /, ^, %%, %/%
abs, sign
acos, asin, atan, atan2
sin, cos, tan
ceiling, floor, round, trunc, signif
exp, log, log10, log2, sqrt

max, min, prod, sum
cummax, cummin, cumprod, cumsum, diff
pmax, pmin
range
mean, median, cor, sd, var
rle

# Functions to do with functions
function
missing
on.exit
return, invisible

# Logical & sets 
&, |, !, xor
all, any
intersect, union, setdiff, setequal
which

# Vectors and matrices
c, matrix
# automatic coercion rules character > numeric > logical
length, dim, ncol, nrow
cbind, rbind
names, colnames, rownames
t
diag
sweep
as.matrix, data.matrix

# Making vectors 
c
rep, rep_len
seq, seq_len, seq_along
rev
sample
choose, factorial, combn
(is/as).(character/numeric/logical/...)

# Lists & data.frames 
list, unlist
data.frame, as.data.frame
split
expand.grid

# Control flow 
if, &&, || (short circuiting)
for, while
next, break
switch
ifelse

# Apply & friends
lapply, sapply, vapply
apply
tapply
replicate

Common data structures

# Date time
ISOdate, ISOdatetime, strftime, strptime, date
difftime
julian, months, quarters, weekdays
library(lubridate)

# Character manipulation 
grep, agrep
gsub
strsplit
chartr
nchar
tolower, toupper
substr
paste
trimws
library(stringr)

# Factors 
factor, levels, nlevels
reorder, relevel
cut, findInterval
interaction
options(stringsAsFactors = FALSE)

# Array manipulation
array
dim
dimnames
aperm
library(abind)

Statistics

# Ordering and tabulating 
duplicated, unique
merge
order, rank, quantile
sort
table, ftable

# Linear models 
fitted, predict, resid, rstandard
lm, glm
hat, influence.measures
logLik, df, deviance
formula, ~, I
anova, coef, confint, vcov
contrasts

# Miscellaneous tests
apropos("\\.test$")

# Random variables 
(q, p, d, r) * (beta, binom, cauchy, chisq, exp, f, gamma, geom, 
  hyper, lnorm, logis, multinom, nbinom, norm, pois, signrank, t, 
  unif, weibull, wilcox, birthday, tukey)

# Matrix algebra 
crossprod, tcrossprod
eigen, qr, svd
%*%, %o%, outer
rcond
solve

Working with R

# Workspace 
ls, exists, rm
getwd, setwd
q
source
install.packages, library, require

# Help
help, ?
help.search
apropos
RSiteSearch
citation
demo
example
vignette

# Debugging
traceback
browser
recover
options(error = )
stop, warning, message
tryCatch, try

I/O

# Output
print, cat
message, warning
dput
format
sink, capture.output
sprintf

# Reading and writing data
data
count.fields
read.csv, write.csv
read.delim, write.delim
read.fwf
readLines, writeLines
readRDS, saveRDS
load, save
library(foreign)

# Files and directories 
dir
basename, dirname, tools::file_ext
file.path
path.expand, normalizePath
file.choose
file.copy, file.create, file.remove, file.rename, dir.create
file.exists, file.info
tempdir, tempfile
download.file, library(downloader)

apply 계열 함수

function	input	output
apply()	배열 또는 행렬	배열 또는 리스트
lapply()	벡터, 리스트, 표현식	리스트 반환
sapply()	lapply와 유사	백터, 행력, 배열
tapply()	데이터를 그룹으로 묶은뒤 함수 적용
mapply()	여러 벡터, 리스트를 함수의 인자들로 받아 처리

d <- matrix(1:9, ncol=3)

d

##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9

apply(d, 1, sum)

## [1] 12 15 18

apply(d, 2, sum)

## [1]  6 15 24

result <- lapply(1:3, function(x){x*2})

problems!!!

데이터 프레임을 처리한 결과를 리스트로 얻은 뒤 해당 리스트를 다시 데이터 프레임으로 변환할 필요가 있을 때 단계가 복잡

d <- as.data.frame(matrix(unlist(lapply(iris[,1:4], mean)),
                                                    ncol=4, byrow=TRUE))

names(d) <-  names(iris[,1:4])
d

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.843333    3.057333        3.758    1.199333

data.frame(do.call(cbind, lapply(iris[,1:4], mean)))

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.843333    3.057333        3.758    1.199333

데이터별 활용기술 권고

처리속도	메모리 이내(50%, 1~4G)	10~100 Giga	tera 급
배치, 분석	dplyr	DB(RSQLite)	Sparklyr with Hadoop Cluster
near 실시간	Parallel, DT, feather	Monetdb	Empala 등 …