Actuellement, le bloc de données ressemble à ceci:
Scenario Month A B C 0 1 1 -0.593186301 1.045550808 -0.593816304 1 2 0.178626141 2.043084432 0.111370583 1 3 1.205779717 -0.324083723 -1.397716949 0 2 1 0.933615199 0.052647056 -0.656486153 2 2 1.647291688 -1.065793671 0.799040546 2 3 1.613663101 -1.955567231 -1.817457972 0 3 1 -0.621991775 1.634069402 -1.404981646 3 2 -1.899326887 -0.836322394 -1.826351541 3 3 0.164235141 -1.160701812 1.238246459
Je voudrais ajouter des lignes en haut de la ligne où Month = 1 comme ci-dessous. Je sais que dplyr a une fonction add_rows mais j'aimerais ajouter des lignes en fonction d'une condition. Toute aide est extrêmement appréciée.
Scenario Month A B C 1 1 -0.593186301 1.045550808 -0.593816304 1 2 0.178626141 2.043084432 0.111370583 1 3 1.205779717 -0.324083723 -1.397716949 2 1 0.933615199 0.052647056 -0.656486153 2 2 1.647291688 -1.065793671 0.799040546 2 3 1.613663101 -1.955567231 -1.817457972 3 1 -0.621991775 1.634069402 -1.404981646 3 2 -1.899326887 -0.836322394 -1.826351541 3 3 0.164235141 -1.160701812 1.238246459
4 Réponses :
Voici une méthode simple (sans boucles) utilisant la base R -
df <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L ), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, -0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, -1.404981646, -1.826351541, 1.238246459)), class = "data.frame", row.names = c(NA, -9L))
Data -
df1 <- df[rep(1:nrow(df), (df$Month == 1)+1), ] df1[duplicated(df1, fromLast = T), ] <- NA df1$Scenario[is.na(df1$Scenario)] <- 0 df1 Scenario Month A B C 1 0 NA NA NA NA 1.1 1 1 -0.5931863 1.04555081 -0.5938163 2 1 2 0.1786261 2.04308443 0.1113706 3 1 3 1.2057797 -0.32408372 -1.3977169 4 0 NA NA NA NA 4.1 2 1 0.9336152 0.05264706 -0.6564862 5 2 2 1.6472917 -1.06579367 0.7990405 6 2 3 1.6136631 -1.95556723 -1.8174580 7 0 NA NA NA NA 7.1 3 1 -0.6219918 1.63406940 -1.4049816 8 3 2 -1.8993269 -0.83632239 -1.8263515 9 3 3 0.1642351 -1.16070181 1.2382465
Une solution utilisant tidyverse
.
dat <- read.table(text = "Scenario Month A B C 1 1 -0.593186301 1.045550808 -0.593816304 1 2 0.178626141 2.043084432 0.111370583 1 3 1.205779717 -0.324083723 -1.397716949 2 1 0.933615199 0.052647056 -0.656486153 2 2 1.647291688 -1.065793671 0.799040546 2 3 1.613663101 -1.955567231 -1.817457972 3 1 -0.621991775 1.634069402 -1.404981646 3 2 -1.899326887 -0.836322394 -1.826351541 3 3 0.164235141 -1.160701812 1.238246459 ", header = TRUE)
library(tidyverse) dat2 <- dat %>% split(f = .$Scenario) %>% map_dfr(~bind_rows(tibble(Scenario = 0), .x)) dat2 # # A tibble: 12 x 5 # Scenario Month A B C # <dbl> <int> <dbl> <dbl> <dbl> # 1 0 NA NA NA NA # 2 1 1 -0.593 1.05 -0.594 # 3 1 2 0.179 2.04 0.111 # 4 1 3 1.21 -0.324 -1.40 # 5 0 NA NA NA NA # 6 2 1 0.934 0.0526 -0.656 # 7 2 2 1.65 -1.07 0.799 # 8 2 3 1.61 -1.96 -1.82 # 9 0 NA NA NA NA # 10 3 1 -0.622 1.63 -1.40 # 11 3 2 -1.90 -0.836 -1.83 # 12 3 3 0.164 -1.16 1.24
D'une manière ou d'une autre, add_row
ne prend pas plusieurs valeurs dans son paramètre .before
.
Une façon consiste à fractionner
le dataframe partout où Month = 1
, puis pour chaque dataframe, ajouter une ligne en utilisant add_row
au-dessus de Month = 1
.
library(tidyverse) map_df(split(df, cumsum(df$Month == 1)), ~ add_row(., Scenario = 0, .before = which(.$Month == 1))) # Scenario Month A B C #1 0 NA NA NA NA #2 1 1 -0.5931863 1.04555081 -0.5938163 #3 1 2 0.1786261 2.04308443 0.1113706 #4 1 3 1.2057797 -0.32408372 -1.3977169 #5 0 NA NA NA NA #6 2 1 0.9336152 0.05264706 -0.6564862 #7 2 2 1.6472917 -1.06579367 0.7990405 #8 2 3 1.6136631 -1.95556723 -1.8174580 #9 0 NA NA NA NA #10 3 1 -0.6219918 1.63406940 -1.4049816 #11 3 2 -1.8993269 -0.83632239 -1.8263515 #12 3 3 0.1642351 -1.16070181 1.2382465
Voici une option avec data.table
df1 <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L ), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, -0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, -1.404981646, -1.826351541, 1.238246459)), class = "data.frame", row.names = c(NA, -9L))
Ou comme @ chinsoon12 mentionné dans les commentaires
setDT(df1)[, rbindlist(.(.(Scenario=0L), c(.(Scenario=rep(Scenario, .N)), .SD)), use.names=TRUE, fill=TRUE), by=.(Scenario)][, -1L]
une autre possibilité: df1 [ rbindlist (. (. (Scenario = 0L), c (. (Scenario = rep (Scenario, .N)), .SD)), use.names = TRUE, fill = TRUE) , par =. (Scénario)] [ -1L]