Compute summary statistics for one or multiple numeric variables.

get_summary_stats(
  data,
  ...,
  type = c("full", "common", "robust", "five_number", "mean_sd", "mean_se", "mean_ci",
    "median_iqr", "median_mad", "quantile", "mean", "median", "min", "max"),
  show = NULL,
  probs = seq(0, 1, 0.25)
)

Arguments

data

a data frame

...

(optional) One or more unquoted expressions (or variable names) separated by commas. Used to select a variable of interest. If no variable is specified, then the summary statistics of all numeric variables in the data frame is computed.

type

type of summary statistics. Possible values include: "full", "common", "robust", "five_number", "mean_sd", "mean_se", "mean_ci", "median_iqr", "median_mad", "quantile", "mean", "median", "min", "max"

show

a character vector specifying the summary statistics you want to show. Example: show = c("n", "mean", "sd"). This is used to filter the output after computation.

probs

numeric vector of probabilities with values in [0,1]. Used only when type = "quantile".

Value

A data frame containing descriptive statistics, such as:

  • n: the number of individuals

  • min: minimum

  • max: maximum

  • median: median

  • mean: mean

  • q1, q3: the first and the third quartile, respectively.

  • iqr: interquartile range

  • mad: median absolute deviation (see ?MAD)

  • sd: standard deviation of the mean

  • se: standard error of the mean

  • ci: 95 percent confidence interval of the mean

Examples

# Full summary statistics
data("ToothGrowth")
ToothGrowth %>% get_summary_stats(len)
#> # A tibble: 1 × 13
#>   variable     n   min   max median    q1    q3   iqr   mad  mean    sd    se
#>   <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 len         60   4.2  33.9   19.2  13.1  25.3  12.2  9.04  18.8  7.65 0.988
#> # … with 1 more variable: ci <dbl>

# Summary statistics of grouped data
# Show only common summary
ToothGrowth %>%
  group_by(dose, supp) %>%
  get_summary_stats(len, type = "common")
#> # A tibble: 6 × 12
#>   supp   dose variable     n   min   max median   iqr  mean    sd    se    ci
#>   <fct> <dbl> <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 OJ      0.5 len         10   8.2  21.5  12.2   6.48 13.2   4.46 1.41   3.19
#> 2 VC      0.5 len         10   4.2  11.5   7.15  4.95  7.98  2.75 0.869  1.96
#> 3 OJ      1   len         10  14.5  27.3  23.4   5.35 22.7   3.91 1.24   2.80
#> 4 VC      1   len         10  13.6  22.5  16.5   2.02 16.8   2.52 0.795  1.80
#> 5 OJ      2   len         10  22.4  30.9  26.0   2.5  26.1   2.66 0.84   1.90
#> 6 VC      2   len         10  18.5  33.9  26.0   5.42 26.1   4.80 1.52   3.43

# Robust summary statistics
ToothGrowth %>% get_summary_stats(len, type = "robust")
#> # A tibble: 1 × 4
#>   variable     n median   iqr
#>   <fct>    <dbl>  <dbl> <dbl>
#> 1 len         60   19.2  12.2

# Five number summary statistics
ToothGrowth %>% get_summary_stats(len, type = "five_number")
#> # A tibble: 1 × 7
#>   variable     n   min   max    q1 median    q3
#>   <fct>    <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 len         60   4.2  33.9  13.1   19.2  25.3

# Compute only mean and sd
ToothGrowth %>% get_summary_stats(len, type = "mean_sd")
#> # A tibble: 1 × 4
#>   variable     n  mean    sd
#>   <fct>    <dbl> <dbl> <dbl>
#> 1 len         60  18.8  7.65

# Compute full summary statistics but show only mean, sd, median, iqr
ToothGrowth %>%
    get_summary_stats(len, show = c("mean", "sd", "median", "iqr"))
#> # A tibble: 1 × 6
#>   variable     n  mean    sd median   iqr
#>   <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 len         60  18.8  7.65   19.2  12.2