Quantcast
Channel: best way to transpose data.table - Stack Overflow
Viewing all articles
Browse latest Browse all 7

Answer by PeterAU for best way to transpose data.table

$
0
0

The tdt function which I provide below should be faster

tdt <- function(DT, transpose.col, ...) {
# The transpose function is efficient, but lacks the keeping of row and colnames
new.row.names <- colnames(DT)
new.row.names <- new.row.names[!new.row.names %in% transpose.col]
new.col.names <- DT[, transpose.col, with = F]
DT <- DT[, !colnames(DT) %in% transpose.col, with = F]
DT <- transpose(DT, ...)
colnames(DT) <- unlist(new.col.names)
DT$var <- new.row.names
# change order of DT after transposing 
setcolorder(DT, c("var", setdiff(names(DT), "var")))
colnames(DT)[1] <- transpose.col
return(DT)
}

library(microbenchmark); library(microbenchmarkCore)
DT <- data.table(x=1:1000, y=paste("name", 1:1000, sep = "_"), z = paste("test", 1:1000, sep = "."))

rbind(microbenchmark(tdt(DT, "y")), 
microbenchmark(dcast(melt(DT, id.vars = "y"), variable ~ y)),
microbenchmark(DT[, data.table(t(.SD), keep.rownames=TRUE), .SDcols=-"y"]))

Unit: milliseconds
expr       min        lq      mean    median        uq        max neval cld

tdt(DT, "y")  3.463842  3.719341  4.308158  3.911599  4.576477  20.406940   100  a 

dcast(melt(DT, id.vars = "y"), variable ~ y)  5.146119  5.496761  5.826647  5.580796  5.870584   9.536541   100  a 

DT[, data.table(t(.SD), keep.rownames = TRUE), .SDcols = -"y"] 29.975567 34.554989 40.807036 36.724430 39.102396 104.242218   100   b

d <- tdt(DT, "y") 

d[1:2, 1:11]
y name_1 name_2 name_3 name_4 name_5 name_6 name_7 name_8 name_9 name_10
1: x      1      2      3      4      5      6      7      8      9      10
2: z test.1 test.2 test.3 test.4 test.5 test.6 test.7 test.8 test.9 test.10

DT[1:10, 1:3]
x       y       z
1:  1  name_1  test.1
2:  2  name_2  test.2
3:  3  name_3  test.3
4:  4  name_4  test.4
5:  5  name_5  test.5
6:  6  name_6  test.6
7:  7  name_7  test.7
8:  8  name_8  test.8
9:  9  name_9  test.9
10: 10 name_10 test.10

class(d)
[1] "data.table" "data.frame"

Viewing all articles
Browse latest Browse all 7

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>