1
votes

Renommer conditionnellement les éléments au sein des groupes en fonction des observations précédentes dans tidyverse

Mon dataframe contient les éléments suivants:

1) Chaque user_id peut avoir plusieurs order_id 2) Chaque order_id peut être de 2 types: les commandes mensuelles ont un cycle de 1 mois (30 jours) ou 3 mois (90 jours). Les utilisateurs - au cours de sa vie - peuvent passer d'un plan_type à l'autre plusieurs fois.

df_final <- tibble::tribble(
              ~order_id, ~behavior_type,   ~order_type,
                    123,         "none", "acquisition",
                    124,         "none",      "repeat",
                    125,      "upgrade",      "repeat",
                    126,         "none", "acquisition",
                    127,    "downgrade",      "repeat",
                    128,         "none",      "repeat",
                    129,         "none", "acquisition",
                    130,         "none",      "repeat",
                    131,         "none",      "repeat",
                    132,         "none", "acquisition",
                    133,         "none",      "repeat"
              )

> df_final
# A tibble: 11 x 3
   order_id behavior_type order_type 
      <dbl> <chr>         <chr>      
 1      123 none          acquisition
 2      124 none          repeat     
 3      125 upgrade       repeat     
 4      126 none          acquisition
 5      127 downgrade     repeat     
 6      128 none          repeat     
 7      129 none          acquisition
 8      130 none          repeat     
 9      131 none          repeat     
10      132 none          acquisition
11      133 none          repeat     

Je souhaite créer 2 colonnes supplémentaires comme suit:

1) order_type : Je souhaite classer le order_ids regroupement par user_id et date ; après avoir classé la commande par date , je veux les nommer acquisition si rank = 1 (premier ordre), répéter sinon.

J'ai réussi à le faire comme suit:

df_input %>%
  group_by(user_id) %>%
  mutate(rank = row_number(date)) %>%
  mutate(order_type = ifelse(rank == '1','acquisition','repeats'))

2) Je souhaite créer une colonne behavior_type . Je souhaite marquer chaque order_id comme upgrade , downgrade ou none . Si l'utilisateur passe de mensuel à 3 mois , la commande sera mise à jour , si à partir de 3 mois code > à mensuellement un downgrade , none sinon.

Ma base de données finale devra ressembler à:

library(tidyverse)
df_input <- tibble::tribble(
              ~user_id, ~order_id,        ~date, ~plan_type,
                     1,       123, "01-01-2020",  "monthly",
                     1,       124, "01-31-2020",  "monthly",
                     1,       125, "03-01-2020", "3-months",
                     2,       126, "01-11-2019", "3-months",
                     2,       127, "10-13-2018",  "monthly",
                     2,       128, "11-12-2018",  "monthly",
                     3,       129, "01-10-2019", "3-months",
                     3,       130, "04-10-2019", "3-months",
                     3,       131, "07-09-2019", "3-months",
                     4,       132, "01-02-2020",  "monthly",
                     4,       133, "02-01-2020",  "monthly"
              )

> df_input
# A tibble: 11 x 4
   user_id order_id date       plan_type
     <dbl>    <dbl> <chr>      <chr>    
 1       1      123 01-01-2020 monthly  
 2       1      124 01-31-2020 monthly  
 3       1      125 03-01-2020 3-months 
 4       2      126 01-11-2019 3-months 
 5       2      127 10-13-2018 monthly  
 6       2      128 11-12-2018 monthly  
 7       3      129 01-10-2019 3-months 
 8       3      130 04-10-2019 3-months 
 9       3      131 07-09-2019 3-months 
10       4      132 01-02-2020 monthly  
11       4      133 02-01-2020 monthly  

Toute aide pour cette deuxième étape


0 commentaires

3 Réponses :


1
votes

Comme ça?

library(dplyr)
#just repeating your first step
df_interim <- df_input %>%
  group_by(user_id) %>%
  mutate(rank = row_number(date)) %>%
  mutate(order_type = ifelse(rank == '1','acquisition','repeats'))

df_interim %>% 
  mutate(behavior_type = case_when(plan_type == "monthly" & lag(plan_type) == "3-months" ~ "downgrade",
                                   plan_type == "3-months" & lag(plan_type) == "monthly" ~ "upgrade",
                                   TRUE ~ "none")) %>% 
  ungroup() %>% 
  select(order_id, behavior_type, order_type)

# A tibble: 11 x 3
   order_id behavior_type order_type 
      <dbl> <chr>         <chr>      
 1      123 none          acquisition
 2      124 none          repeats    
 3      125 upgrade       repeats    
 4      126 none          acquisition
 5      127 downgrade     repeats    
 6      128 none          repeats    
 7      129 none          acquisition
 8      130 none          repeats    
 9      131 none          repeats    
10      132 none          acquisition
11      133 none          repeats


0 commentaires

0
votes

La partie type de commande pourrait être plus simple comme ceci:

  group_by(user_id) %>%
  mutate(order_type = if_else(as.Date(date, "%m-%d-%Y") == min(as.Date(date, "%m-%d-%Y")), "acquisition", "repeat"))


0 commentaires

0
votes
library(dplyr)

df_input %>%
  group_by(user_id) %>%
  mutate(rank = row_number(date)) %>%
  mutate(order_type = if_else(rank == "1","acquisition","repeats"),
         behavior_type = if_else(rank == "1", "none",
                                 if_else(plan_type != lag(plan_type) & plan_type == "3-months", "upgrade",
                                         if_else(plan_type != lag(plan_type) & plan_type == "monthly", "downgrade", 
                                                 "none"))))

# A tibble: 11 x 7
# Groups:   user_id [4]
   user_id order_id date       plan_type  rank order_type  behavior_type
     <dbl>    <dbl> <chr>      <chr>     <int> <chr>       <chr>        
 1       1      123 01-01-2020 monthly       1 acquisition none         
 2       1      124 01-31-2020 monthly       2 repeats     none         
 3       1      125 03-01-2020 3-months      3 repeats     upgrade      
 4       2      126 01-11-2019 3-months      1 acquisition none         
 5       2      127 10-13-2018 monthly       2 repeats     downgrade    
 6       2      128 11-12-2018 monthly       3 repeats     none         
 7       3      129 01-10-2019 3-months      1 acquisition none         
 8       3      130 04-10-2019 3-months      2 repeats     none         
 9       3      131 07-09-2019 3-months      3 repeats     none         
10       4      132 01-02-2020 monthly       1 acquisition none         
11       4      133 02-01-2020 monthly       2 repeats     none  

0 commentaires