Chapter 21 data.table
21.1 Split data.table into chunks in a list
Split method for data.table. Faster and more flexible. Be aware that processing list of data.tables will be generally much slower than manipulation in single data.table by group using by argument, read more on data.table.
library(data.table)
set.seed(123)
= data.table(x1 = rep(letters[1:2], 6),
dt x2 = rep(letters[3:5], 4),
x3 = rep(letters[5:8], 3),
y = rnorm(12))
= dt[sample(.N)]
dt = as.data.frame(dt)
df df
## x1 x2 x3 y
## 1 a e g 1.55870831
## 2 b d h -1.26506123
## 3 b c f -0.44566197
## 4 a c g 0.46091621
## 5 b d f -0.23017749
## 6 a c e -0.56047565
## 7 b e f 1.71506499
## 8 b e h 0.35981383
## 9 b c h 0.07050839
## 10 a d g 1.22408180
## 11 a e e -0.68685285
## 12 a d e 0.12928774
21.1.1 nested list using flatten
arguments
<- split(dt, by=c("x1", "x2"))
new_list new_list
## $a.e
## x1 x2 x3 y
## 1: a e g 1.5587083
## 2: a e e -0.6868529
##
## $b.d
## x1 x2 x3 y
## 1: b d h -1.2650612
## 2: b d f -0.2301775
##
## $b.c
## x1 x2 x3 y
## 1: b c f -0.44566197
## 2: b c h 0.07050839
##
## $a.c
## x1 x2 x3 y
## 1: a c g 0.4609162
## 2: a c e -0.5604756
##
## $b.e
## x1 x2 x3 y
## 1: b e f 1.7150650
## 2: b e h 0.3598138
##
## $a.d
## x1 x2 x3 y
## 1: a d g 1.2240818
## 2: a d e 0.1292877
<- split(dt, by=c("x1", "x2"), flatten=FALSE)
new_list new_list
## $a
## $a$e
## x1 x2 x3 y
## 1: a e g 1.5587083
## 2: a e e -0.6868529
##
## $a$c
## x1 x2 x3 y
## 1: a c g 0.4609162
## 2: a c e -0.5604756
##
## $a$d
## x1 x2 x3 y
## 1: a d g 1.2240818
## 2: a d e 0.1292877
##
##
## $b
## $b$d
## x1 x2 x3 y
## 1: b d h -1.2650612
## 2: b d f -0.2301775
##
## $b$c
## x1 x2 x3 y
## 1: b c f -0.44566197
## 2: b c h 0.07050839
##
## $b$e
## x1 x2 x3 y
## 1: b e f 1.7150650
## 2: b e h 0.3598138
21.1.2 Example
= data.table(group = rep(c("group1", "group2"), 4),
dt_example gene = c(letters[1:4], letters[3:6]))
dt
## x1 x2 x3 y
## 1: a e g 1.55870831
## 2: b d h -1.26506123
## 3: b c f -0.44566197
## 4: a c g 0.46091621
## 5: b d f -0.23017749
## 6: a c e -0.56047565
## 7: b e f 1.71506499
## 8: b e h 0.35981383
## 9: b c h 0.07050839
## 10: a d g 1.22408180
## 11: a e e -0.68685285
## 12: a d e 0.12928774
21.1.2.1 Crate a matrix from data.table
library(UpSetR)
= split(dt_example[-which(names(df)=="z")], by="group", drop=TRUE)
list_group list_group
## named list()