Skip to contents

Split a data frame into multiple new data frames based on one or two grouping variables. The surv_group_by() function takes an existing data frame and converts it into a grouped data frame where survival analysis are performed "by group".

Usage

surv_group_by(data, grouping.vars)

Arguments

data

a data frame

grouping.vars

a character vector containing the name of grouping variables. Should be of length <= 2

Value

Returns an object of class surv_group_by which is a tibble data frame with the following components:

  • one column for each grouping variables. Contains the levels.

  • a coumn named "data", which is a named list of data subsets created by the grouping variables. The list names are created by concatening the levels of grouping variables.

Examples

library("survival")
library("magrittr")

# Grouping by one variables: treatment "rx"
#::::::::::::::::::::::::::::::::::::::::::
grouped.d <- colon %>%
  surv_group_by("rx")

grouped.d # print
#> # A tibble: 3 × 2
#> # Groups:   rx [3]
#>   rx      data               
#> * <fct>   <named list>       
#> 1 Obs     <tibble [630 × 15]>
#> 2 Lev     <tibble [620 × 15]>
#> 3 Lev+5FU <tibble [608 × 15]>

grouped.d$data # Access to the data
#> $rx.Obs
#> # A tibble: 630 × 15
#>       id study   sex   age obstruct perfor adhere nodes status differ extent
#>    <dbl> <dbl> <dbl> <dbl>    <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
#>  1     3     1     0    71        0      0      1     7      1      2      2
#>  2     3     1     0    71        0      0      1     7      1      2      2
#>  3     5     1     1    69        0      0      0    22      1      2      3
#>  4     5     1     1    69        0      0      0    22      1      2      3
#>  5     8     1     1    54        0      0      0     1      0      2      3
#>  6     8     1     1    54        0      0      0     1      0      2      3
#>  7    13     1     1    64        0      0      0     1      1      2      3
#>  8    13     1     1    64        0      0      0     1      1      2      3
#>  9    15     1     1    46        1      0      0     4      0      2      3
#> 10    15     1     1    46        1      0      0     4      0      2      3
#> # ℹ 620 more rows
#> # ℹ 4 more variables: surg <dbl>, node4 <dbl>, time <dbl>, etype <dbl>
#> 
#> $rx.Lev
#> # A tibble: 620 × 15
#>       id study   sex   age obstruct perfor adhere nodes status differ extent
#>    <dbl> <dbl> <dbl> <dbl>    <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
#>  1     7     1     1    77        0      0      0     5      1      2      3
#>  2     7     1     1    77        0      0      0     5      1      2      3
#>  3     9     1     1    46        0      0      1     2      0      2      3
#>  4     9     1     1    46        0      0      1     2      0      2      3
#>  5    11     1     0    47        0      0      1     1      0      2      3
#>  6    11     1     0    47        0      0      1     1      0      2      3
#>  7    14     1     1    68        1      0      0     3      1      2      3
#>  8    14     1     1    68        1      0      0     3      1      2      3
#>  9    17     1     1    62        1      0      1     6      1      2      3
#> 10    17     1     1    62        1      0      1     6      1      2      3
#> # ℹ 610 more rows
#> # ℹ 4 more variables: surg <dbl>, node4 <dbl>, time <dbl>, etype <dbl>
#> 
#> $`rx.Lev+5FU`
#> # A tibble: 608 × 15
#>       id study   sex   age obstruct perfor adhere nodes status differ extent
#>    <dbl> <dbl> <dbl> <dbl>    <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
#>  1     1     1     1    43        0      0      0     5      1      2      3
#>  2     1     1     1    43        0      0      0     5      1      2      3
#>  3     2     1     1    63        0      0      0     1      0      2      3
#>  4     2     1     1    63        0      0      0     1      0      2      3
#>  5     4     1     0    66        1      0      0     6      1      2      3
#>  6     4     1     0    66        1      0      0     6      1      2      3
#>  7     6     1     0    57        0      0      0     9      1      2      3
#>  8     6     1     0    57        0      0      0     9      1      2      3
#>  9    10     1     0    68        0      0      0     1      0      2      3
#> 10    10     1     0    68        0      0      0     1      0      2      3
#> # ℹ 598 more rows
#> # ℹ 4 more variables: surg <dbl>, node4 <dbl>, time <dbl>, etype <dbl>
#> 

# Grouping by two variables
#::::::::::::::::::::::::::::::::::::::::::
grouped.d <- colon %>%
   surv_group_by(grouping.vars = c("rx", "adhere"))
   grouped.d
#> # A tibble: 6 × 3
#> # Groups:   rx, adhere [6]
#>   rx      adhere data               
#> * <fct>    <dbl> <named list>       
#> 1 Obs          0 <tibble [536 × 14]>
#> 2 Obs          1 <tibble [94 × 14]> 
#> 3 Lev          0 <tibble [522 × 14]>
#> 4 Lev          1 <tibble [98 × 14]> 
#> 5 Lev+5FU      0 <tibble [530 × 14]>
#> 6 Lev+5FU      1 <tibble [78 × 14]>