3
votes

Ajouter des lignes à un bloc de données en fonction des valeurs de l'une des colonnes

Actuellement, le bloc de données ressemble à ceci:

    Scenario Month       A            B            C
      0                                             
      1     1 -0.593186301  1.045550808 -0.593816304
      1     2  0.178626141  2.043084432  0.111370583
      1     3  1.205779717 -0.324083723 -1.397716949
      0                                             
      2     1  0.933615199  0.052647056 -0.656486153
      2     2  1.647291688 -1.065793671  0.799040546
      2     3  1.613663101 -1.955567231 -1.817457972
      0                                             
      3     1 -0.621991775  1.634069402 -1.404981646
      3     2 -1.899326887 -0.836322394 -1.826351541
      3     3  0.164235141 -1.160701812  1.238246459

Je voudrais ajouter des lignes en haut de la ligne où Month = 1 comme ci-dessous. Je sais que dplyr a une fonction add_rows mais j'aimerais ajouter des lignes en fonction d'une condition. Toute aide est extrêmement appréciée.

   Scenario Month            A            B            C
         1     1 -0.593186301  1.045550808 -0.593816304
         1     2  0.178626141  2.043084432  0.111370583
         1     3  1.205779717 -0.324083723 -1.397716949
         2     1  0.933615199  0.052647056 -0.656486153
         2     2  1.647291688 -1.065793671  0.799040546
         2     3  1.613663101 -1.955567231 -1.817457972
         3     1 -0.621991775  1.634069402 -1.404981646
         3     2 -1.899326887 -0.836322394 -1.826351541
         3     3  0.164235141 -1.160701812  1.238246459

r dplyr tidyverse

0 commentaires

4 Réponses :

0
votes

Voici une méthode simple (sans boucles) utilisant la base R -

df <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 
0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, 
-0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 
2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 
1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 
0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, 
-1.404981646, -1.826351541, 1.238246459)), class = "data.frame", row.names = c(NA, 
-9L))

Data -

df1 <- df[rep(1:nrow(df), (df$Month == 1)+1), ]

df1[duplicated(df1, fromLast = T), ] <- NA

df1$Scenario[is.na(df1$Scenario)] <- 0

df1
    Scenario Month          A           B          C
1          0    NA         NA          NA         NA
1.1        1     1 -0.5931863  1.04555081 -0.5938163
2          1     2  0.1786261  2.04308443  0.1113706
3          1     3  1.2057797 -0.32408372 -1.3977169
4          0    NA         NA          NA         NA
4.1        2     1  0.9336152  0.05264706 -0.6564862
5          2     2  1.6472917 -1.06579367  0.7990405
6          2     3  1.6136631 -1.95556723 -1.8174580
7          0    NA         NA          NA         NA
7.1        3     1 -0.6219918  1.63406940 -1.4049816
8          3     2 -1.8993269 -0.83632239 -1.8263515
9          3     3  0.1642351 -1.16070181  1.2382465

0 commentaires

4
votes

Une solution utilisant tidyverse.

dat <- read.table(text = "Scenario Month            A            B            C
         1     1 -0.593186301  1.045550808 -0.593816304
         1     2  0.178626141  2.043084432  0.111370583
         1     3  1.205779717 -0.324083723 -1.397716949
         2     1  0.933615199  0.052647056 -0.656486153
         2     2  1.647291688 -1.065793671  0.799040546
         2     3  1.613663101 -1.955567231 -1.817457972
         3     1 -0.621991775  1.634069402 -1.404981646
         3     2 -1.899326887 -0.836322394 -1.826351541
         3     3  0.164235141 -1.160701812  1.238246459 ",
                  header = TRUE)

library(tidyverse)

dat2 <- dat %>%
  split(f = .$Scenario) %>%
  map_dfr(~bind_rows(tibble(Scenario = 0), .x))
dat2
# # A tibble: 12 x 5
#    Scenario Month       A        B       C
#       <dbl> <int>   <dbl>    <dbl>   <dbl>
#  1        0    NA  NA      NA       NA    
#  2        1     1  -0.593   1.05    -0.594
#  3        1     2   0.179   2.04     0.111
#  4        1     3   1.21   -0.324   -1.40 
#  5        0    NA  NA      NA       NA    
#  6        2     1   0.934   0.0526  -0.656
#  7        2     2   1.65   -1.07     0.799
#  8        2     3   1.61   -1.96    -1.82 
#  9        0    NA  NA      NA       NA    
# 10        3     1  -0.622   1.63    -1.40 
# 11        3     2  -1.90   -0.836   -1.83 
# 12        3     3   0.164  -1.16     1.24

0 commentaires

3
votes

D'une manière ou d'une autre, add_row ne prend pas plusieurs valeurs dans son paramètre .before .

Une façon consiste à fractionner le dataframe partout où Month = 1 , puis pour chaque dataframe, ajouter une ligne en utilisant add_row au-dessus de Month = 1 .

library(tidyverse)
map_df(split(df, cumsum(df$Month == 1)), 
     ~ add_row(., Scenario = 0, .before = which(.$Month == 1)))

#   Scenario Month          A           B          C
#1         0    NA         NA          NA         NA
#2         1     1 -0.5931863  1.04555081 -0.5938163
#3         1     2  0.1786261  2.04308443  0.1113706
#4         1     3  1.2057797 -0.32408372 -1.3977169
#5         0    NA         NA          NA         NA
#6         2     1  0.9336152  0.05264706 -0.6564862
#7         2     2  1.6472917 -1.06579367  0.7990405
#8         2     3  1.6136631 -1.95556723 -1.8174580
#9         0    NA         NA          NA         NA
#10        3     1 -0.6219918  1.63406940 -1.4049816
#11        3     2 -1.8993269 -0.83632239 -1.8263515
#12        3     3  0.1642351 -1.16070181  1.2382465

0 commentaires

2
votes

Voici une option avec data.table

df1 <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 
0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, 
-0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 
2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 
1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 
0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, 
-1.404981646, -1.826351541, 1.238246459)), class = "data.frame", 
row.names = c(NA, 
-9L))

Ou comme @ chinsoon12 mentionné dans les commentaires

setDT(df1)[, rbindlist(.(.(Scenario=0L), c(.(Scenario=rep(Scenario, .N)), 
        .SD)), use.names=TRUE, fill=TRUE), by=.(Scenario)][, -1L]

1 commentaires

une autre possibilité: df1 [ rbindlist (. (. (Scenario = 0L), c (. (Scenario = rep (Scenario, .N)), .SD)), use.names = TRUE, fill = TRUE) , par =. (Scénario)] [ -1L]