data.table - Random resampling from repeat measures in r -
i'm new re-sampling techniques , bootstrapping in r, appreciated. have dataframe consists of different individuals have been measured more once. randomly sample single measurement each individual in population, , calculate population mean , standard deviation. procedure many times (500 or so) , obtain 2 new dataframes, 1 population means , population standard deviations each variable (dim.1 through dim.4). this, can extract global mean , standard deviation downstream analysis. here dataframe looks ("id" each individuals unique number, , can see there variable amounts of repeat measures each individual).
id      dim.1       dim.2        dim.3        dim.4 41  0.4001945  1.15899378  0.269197195  0.184791153 14  2.1615710  1.15712356 -0.096055808  0.450943821 63  0.4325496  0.75521068  0.085588532 -0.233144806 53  1.2459718  0.97450610 -0.069171367 -0.613423267 63  1.3380629  0.22606572 -0.061178395 -0.304960508 42  1.6048214  0.94184036  0.232863647 -0.201738198 57  1.3306709  0.80440736 -0.955949551 -0.734022636 53  0.7019118  0.87285991 -0.042557052 -0.146748989 51  0.7235493  0.29946448  0.474477629  0.305810371 53  1.2431220  1.20252749 -0.073627812  0.237740020 41  1.1788653  0.55536570 -0.017354302  0.119014260 14  2.5769809  0.18551630  0.634304132  0.617288243 67  1.0445458  1.47107481  0.024383348  0.111808376 31  0.9759513  1.31091796 -0.008660192  0.189962355 63  1.8621687  0.97137412  0.317014897 -0.390871248 76  0.5905190  1.49817641 -0.374503265  0.142478388 90  2.4323563  0.87696545  0.467220123  0.513197279 67  2.2378032  0.35682721  0.400233674 -0.926848226 41  1.7098808  0.40470067  0.050950910 -0.153059068 97  1.5351169  1.11597681  0.011878347 -0.092047152 63  1.2647155  0.80006707  0.730022680 -0.089726522 57  1.7200676  0.01358165  0.450075592  0.038352174 76  0.6949196  1.36741272 -1.286488394  0.477345585 123  2.4235534  1.69165605  0.528863655  0.447856674 76 -2.4022432 -0.27531557 -1.850999153  2.194893741 117  1.6955740 -1.86088122  1.502655438  0.856026945 117  0.7130716  1.44198379 -1.495098987 -1.021981479 131  0.8425548  1.22970621 -0.160634720  0.005202717 117  1.0913048  1.19834030 -0.240309947  0.279379075 90  2.5787954  0.21638781  0.973339314  0.853752379 105  1.4989440  1.31525062  0.233114414  0.082557111 45  0.4749492  0.36264159  0.016554066  0.434416650 14  1.9841503 -0.18133091 -0.517021686  0.131796394 here dput version....
structure(list(anid = structure(c(3l, 1l, 9l, 7l, 9l, 4l, 8l,  7l, 6l, 7l, 3l, 1l, 10l, 2l, 9l, 11l, 12l, 10l, 3l, 13l, 9l,  8l, 11l, 16l, 11l, 15l, 15l, 17l, 15l, 12l, 14l, 5l, 1l), .label = c("14",  "31", "41", "42", "45", "51", "53", "57", "63", "67", "76", "90",  "97", "105", "117", "123", "131"), class = "factor"), dim.1 = c(0.400194544195721,  2.16157096683054, 0.432549610256816, 1.24597182598991, 1.33806287869605,  1.60482137307563, 1.33067093524332, 0.701911835019105, 0.723549265733465,  1.24312199041168, 1.17886527411877, 2.57698094739979, 1.04454579781695,  0.975951278566957, 1.86216869726173, 0.590519015534528, 2.43235630542313,  2.23780317751189, 1.70988079418724, 1.53511692947232, 1.26471553939687,  1.72006761902848, 0.694919562457936, 2.42355344632234, -2.40224317003857,  1.69557401848893, 0.713071563313831, 0.84255475961074, 1.09130484807346,  2.57879543707134, 1.49894397171646, 0.474949215360165, 1.9841503256016 ), dim.2 = c(1.15899377720071, 1.15712355628702, 0.755210676050028,  0.974506103663373, 0.226065715930444, 0.941840360304357, 0.804407356238532,  0.872859912826886, 0.299464475124326, 1.2025274866889, 0.55536570304097,  0.185516296049789, 1.47107481283135, 1.31091795925695, 0.971374119614307,  1.49817640676682, 0.876965451353274, 0.356827207847936, 0.404700668672103,  1.11597680662439, 0.800067070614603, 0.0135816493815426, 1.36741271705742,  1.69165605426992, -0.275315573666507, -1.86088122056554, 1.44198379044125,  1.229706212058, 1.19834030462339, 0.216387812905091, 1.31525061699366,  0.362641590025834, -0.181330912913297), dim.3 = c(0.269197195180612,  -0.0960558078596061, 0.0855885321454752, -0.0691713671666404,  -0.0611783947257435, 0.232863646917399, -0.955949551451659, -0.0425570523689114,  0.474477629049467, -0.0736278121798866, -0.0173543018324465,  0.634304131880689, 0.0243833483864922, -0.00866019164798527,  0.317014896588811, -0.374503264871839, 0.467220123029729, 0.400233673552903,  0.0509509097106227, 0.0118783465387495, 0.730022679967163, 0.450075591988245,  -1.28648839432794, 0.528863655457902, -1.85099915345691, 1.50265543792412,  -1.49509898726221, -0.160634720376254, -0.24030994662375, 0.973339313851613,  0.233114414466102, 0.0165540663395682, -0.517021685999838), dim.4 = c(0.184791153018369,  0.45094382124022, -0.233144806193005, -0.613423266807646, -0.304960507895512,  -0.201738198311526, -0.734022636110577, -0.146748988783387, 0.305810371055691,  0.237740020179384, 0.11901425952943, 0.61728824337695, 0.111808376374363,  0.189962354663836, -0.390871248426407, 0.14247838773032, 0.513197279323348,  -0.926848226311571, -0.153059067639092, -0.0920471522899872,  -0.0897265219239891, 0.0383521738356584, 0.477345585143069, 0.447856673901548,  2.19489374105159, 0.856026944966164, -1.02198147948597, 0.00520271670521917,  0.279379074573862, 0.853752378937349, 0.0825571109781094, 0.434416649778733,  0.131796393683415)), .names = c("anid", "dim.1", "dim.2", "dim.3",  "dim.4"), class = "data.frame", row.names = c("20", "26", "36",  "46", "49", "52", "75", "93", "94", "110", "118", "124", "132",  "143", "157", "168", "185", "199", "210", "211", "215", "225",  "240", "245", "248", "250", "254", "270", "272", "281", "297",  "322", "337")) 
here go:
boot_id <- function(df) {   s <- sample((1:nrow(df)), size=1, replace=f)   return(df[s,]) }  boot_dat <- function(df, n= 500, f= c("mean", "sd")) {   f <- match.arg(f, c("mean", "sd"), several.ok = false)    res <- matrix(na, nrow= n, ncol= ncol(df)-1)    (i in 1:n) {     df2 <- df[, boot_id(.sd), by= "id"]     df2$id <- null     if (f == "mean") {       res[i,] <- colmeans(df2)     } else {       res[i,] <- apply(df2, 2, sd)     }   }   return(res) }  # dt <- <your structure> names(dt) <- c("id", "d1", "d2", "d3", "d4") library(data.table) dt <- data.table(dt) setkey(dt, id)  dat_means <- boot_dat(dt, f= "mean") dat_sds   <- boot_dat(dt, f= "sd") 
Comments
Post a Comment