Chapter 21 data.table
21.1 Split data.table into chunks in a list
Split method for data.table. Faster and more flexible. Be aware that processing list of data.tables will be generally much slower than manipulation in single data.table by group using by argument, read more on data.table.
library(data.table)
set.seed(123)
dt = data.table(x1 = rep(letters[1:2], 6),
x2 = rep(letters[3:5], 4),
x3 = rep(letters[5:8], 3),
y = rnorm(12))
dt = dt[sample(.N)]
df = as.data.frame(dt)
df## x1 x2 x3 y
## 1 a e g 1.55870831
## 2 b d h -1.26506123
## 3 b c f -0.44566197
## 4 a c g 0.46091621
## 5 b d f -0.23017749
## 6 a c e -0.56047565
## 7 b e f 1.71506499
## 8 b e h 0.35981383
## 9 b c h 0.07050839
## 10 a d g 1.22408180
## 11 a e e -0.68685285
## 12 a d e 0.12928774
21.1.1 nested list using flatten arguments
new_list <- split(dt, by=c("x1", "x2"))
new_list## $a.e
## x1 x2 x3 y
## 1: a e g 1.5587083
## 2: a e e -0.6868529
##
## $b.d
## x1 x2 x3 y
## 1: b d h -1.2650612
## 2: b d f -0.2301775
##
## $b.c
## x1 x2 x3 y
## 1: b c f -0.44566197
## 2: b c h 0.07050839
##
## $a.c
## x1 x2 x3 y
## 1: a c g 0.4609162
## 2: a c e -0.5604756
##
## $b.e
## x1 x2 x3 y
## 1: b e f 1.7150650
## 2: b e h 0.3598138
##
## $a.d
## x1 x2 x3 y
## 1: a d g 1.2240818
## 2: a d e 0.1292877
new_list <- split(dt, by=c("x1", "x2"), flatten=FALSE)
new_list## $a
## $a$e
## x1 x2 x3 y
## 1: a e g 1.5587083
## 2: a e e -0.6868529
##
## $a$c
## x1 x2 x3 y
## 1: a c g 0.4609162
## 2: a c e -0.5604756
##
## $a$d
## x1 x2 x3 y
## 1: a d g 1.2240818
## 2: a d e 0.1292877
##
##
## $b
## $b$d
## x1 x2 x3 y
## 1: b d h -1.2650612
## 2: b d f -0.2301775
##
## $b$c
## x1 x2 x3 y
## 1: b c f -0.44566197
## 2: b c h 0.07050839
##
## $b$e
## x1 x2 x3 y
## 1: b e f 1.7150650
## 2: b e h 0.3598138
21.1.2 Example
dt_example = data.table(group = rep(c("group1", "group2"), 4),
gene = c(letters[1:4], letters[3:6]))
dt## x1 x2 x3 y
## 1: a e g 1.55870831
## 2: b d h -1.26506123
## 3: b c f -0.44566197
## 4: a c g 0.46091621
## 5: b d f -0.23017749
## 6: a c e -0.56047565
## 7: b e f 1.71506499
## 8: b e h 0.35981383
## 9: b c h 0.07050839
## 10: a d g 1.22408180
## 11: a e e -0.68685285
## 12: a d e 0.12928774
21.1.2.1 Crate a matrix from data.table
library(UpSetR)
list_group = split(dt_example[-which(names(df)=="z")], by="group", drop=TRUE)
list_group## named list()