# The first functions to learn
?
str
# Important operators and assignment
%in%, match
=, <-, <<-
$, [, [[, head, tail, subset
with
assign, get
# Comparison
all.equal, identical
!=, ==, >, >=, <, <=
is.na, complete.cases
is.finite
# Basic math
*, +, -, /, ^, %%, %/%
abs, sign
acos, asin, atan, atan2
sin, cos, tan
ceiling, floor, round, trunc, signif
exp, log, log10, log2, sqrt
max, min, prod, sum
cummax, cummin, cumprod, cumsum, diff
pmax, pmin
range
mean, median, cor, sd, var
rle
# Functions to do with functions
function
missing
on.exit
return, invisible
# Logical & sets
&, |, !, xor
all, any
intersect, union, setdiff, setequal
which
# Vectors and matrices
c, matrix
# automatic coercion rules character > numeric > logical
length, dim, ncol, nrow
cbind, rbind
names, colnames, rownames
t
diag
sweep
as.matrix, data.matrix
# Making vectors
c
rep, rep_len
seq, seq_len, seq_along
rev
sample
choose, factorial, combn
(is/as).(character/numeric/logical/...)
# Lists & data.frames
list, unlist
data.frame, as.data.frame
split
expand.grid
# Control flow
if, &&, || (short circuiting)
for, while
next, break
switch
ifelse
# Apply & friends
lapply, sapply, vapply
apply
tapply
replicate
Common data structures
# Date time
ISOdate, ISOdatetime, strftime, strptime, date
difftime
julian, months, quarters, weekdays
library(lubridate)
# Character manipulation
grep, agrep
gsub
strsplit
chartr
nchar
tolower, toupper
substr
paste
trimws
library(stringr)
# Factors
factor, levels, nlevels
reorder, relevel
cut, findInterval
interaction
options(stringsAsFactors = FALSE)
# Array manipulation
array
dim
dimnames
aperm
library(abind)
# Ordering and tabulating
duplicated, unique
merge
order, rank, quantile
sort
table, ftable
# Linear models
fitted, predict, resid, rstandard
lm, glm
hat, influence.measures
logLik, df, deviance
formula, ~, I
anova, coef, confint, vcov
contrasts
# Miscellaneous tests
apropos("\\.test$")
# Random variables
(q, p, d, r) * (beta, binom, cauchy, chisq, exp, f, gamma, geom,
hyper, lnorm, logis, multinom, nbinom, norm, pois, signrank, t,
unif, weibull, wilcox, birthday, tukey)
# Matrix algebra
crossprod, tcrossprod
eigen, qr, svd
%*%, %o%, outer
rcond
solve
# Workspace
ls, exists, rm
getwd, setwd
q
source
install.packages, library, require
# Help
help, ?
help.search
apropos
RSiteSearch
citation
demo
example
vignette
# Debugging
traceback
browser
recover
options(error = )
stop, warning, message
tryCatch, try
# Output
print, cat
message, warning
dput
format
sink, capture.output
sprintf
# Reading and writing data
data
count.fields
read.csv, write.csv
read.delim, write.delim
read.fwf
readLines, writeLines
readRDS, saveRDS
load, save
library(foreign)
# Files and directories
dir
basename, dirname, tools::file_ext
file.path
path.expand, normalizePath
file.choose
file.copy, file.create, file.remove, file.rename, dir.create
file.exists, file.info
tempdir, tempfile
download.file, library(downloader)
function | input | output |
---|---|---|
apply() | 배열 또는 행렬 | 배열 또는 리스트 |
lapply() | 벡터, 리스트, 표현식 | 리스트 반환 |
sapply() | lapply와 유사 | 백터, 행력, 배열 |
tapply() | 데이터를 그룹으로 묶은뒤 함수 적용 | |
mapply() | 여러 벡터, 리스트를 함수의 인자들로 받아 처리 |
d <- matrix(1:9, ncol=3)
d
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
apply(d, 1, sum)
## [1] 12 15 18
apply(d, 2, sum)
## [1] 6 15 24
result <- lapply(1:3, function(x){x*2})
데이터 프레임을 처리한 결과를 리스트로 얻은 뒤 해당 리스트를 다시 데이터 프레임으로 변환할 필요가 있을 때 단계가 복잡
d <- as.data.frame(matrix(unlist(lapply(iris[,1:4], mean)),
ncol=4, byrow=TRUE))
names(d) <- names(iris[,1:4])
d
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.843333 3.057333 3.758 1.199333
data.frame(do.call(cbind, lapply(iris[,1:4], mean)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.843333 3.057333 3.758 1.199333
처리속도 | 메모리 이내(50%, 1~4G) | 10~100 Giga | tera 급 |
---|---|---|---|
배치, 분석 | dplyr | DB(RSQLite) | Sparklyr with Hadoop Cluster |
near 실시간 | Parallel, DT, feather | Monetdb | Empala 등 … |