Calculates confidence intervals (CI) using bootstrap methods.
This enhanced version of DescTools::BootCI() returns a data frame.
ci_boot(.data, x, y = NULL, conf.level = 0.95, ...)Data frame.
Column names (unquoted).
Confidence level. Default: 0.95.
Additional parameters for DescTools::BootCI(), including:
FUN – function for which CI is calculated;
bci.method – interval method:
"perc" – percentile method,
"bca" – bias-corrected and accelerated (BCa) method (see note below),
others;
R – number of replications, typically 1,000 to 10,000.
A data frame with confidence intervals. Columns depend on arguments and grouping:
(if grouped) grouping variable names;
Column matching the statistic name (from FUN) containing the estimate;
lwr.ci, upr.ci – lower and upper CI bounds.
Similar to DescTools::BootCI(), but:
First argument is a data frame;
Arguments x and y are unquoted column names;
Responds to dplyr::group_by() for subgroup calculations;
Returns a data frame for convenient plotting with ggplot2.
Notes:
Each group should have at least 20 observations for bootstrap methods.
Use set.seed() for reproducible results.
If using bci.method = "bca" produces the warning
"extreme order statistics used as endpoints",
the BCa method is unsuitable; use "perc" instead
(https://rcompanion.org/handbook/E_04.html).
# Bootstrap is useful when:
# - Data is skewed (not normal)
# - You want CI for statistics other than the mean (e.g., median, SD)
# - You don't want to assume a specific distribution
data(iris, package = "datasets")
head(iris)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3.0 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5.0 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
set.seed(123) # For reproducible results
# Example 1: CI for the median (resistant to outliers)
iris |>
ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.55
# Compare to mean CI - median is often more robust
# Example 2: CI for the median by group
iris |>
dplyr::group_by(Species) |>
ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
#> # A tibble: 3 × 4
#> Species median lwr.ci upr.ci
#> <fct> <dbl> <dbl> <dbl>
#> 1 setosa 1.5 1.4 1.5
#> 2 versicolor 4.35 4.1 4.5
#> 3 virginica 5.55 5.25 5.7
# Useful when groups have different distributions
# Example 3: CI for standard deviation
# How variable is petal length?
set.seed(456)
iris |>
ci_boot(Petal.Length, FUN = sd, R = 1000, bci.method = "perc")
#> # A tibble: 1 × 3
#> sd lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 1.77 1.64 1.86
# Example 4: CI for interquartile range (IQR)
# IQR = 75th percentile - 25th percentile
set.seed(789)
iris |>
ci_boot(Petal.Length, FUN = IQR, R = 1000, bci.method = "perc")
#> # A tibble: 1 × 3
#> IQR lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 3.5 3.03 3.87
# Example 5: CI for correlation coefficient (Pearson's r)
# How related are petal length and width?
set.seed(101)
iris |>
dplyr::group_by(Species) |>
ci_boot(
Petal.Length, Petal.Width,
FUN = cor, method = "pearson",
R = 1000, bci.method = "perc"
)
#> # A tibble: 3 × 4
#> Species cor lwr.ci upr.ci
#> <fct> <dbl> <dbl> <dbl>
#> 1 setosa 0.332 0.0743 0.522
#> 2 versicolor 0.787 0.670 0.874
#> 3 virginica 0.322 0.113 0.517
# Look for CIs that don't include 0 (suggests real correlation)
# Example 6: Comparing BCa and percentile methods
set.seed(111)
# BCa method (often more accurate but requires more assumptions)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "bca")
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.5
# Percentile method (simpler, more robust)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.6
# Example 7: Effect of number of bootstrap replications
set.seed(222)
# Fewer replications (faster but less stable)
iris |> ci_boot(Petal.Length, FUN = median, R = 500, bci.method = "perc")
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.57
# More replications (slower but more stable)
iris |> ci_boot(Petal.Length, FUN = median, R = 5000, bci.method = "perc")
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.55
# For teaching: 1000 is usually enough; for research: 5000-10000
# Example 8: Handling missing values
set.seed(333)
iris |>
ci_boot(
Petal.Length,
FUN = median, na.rm = TRUE,
R = 1000, bci.method = "bca"
)
#> # A tibble: 1 × 3
#> median lwr.ci upr.ci
#> <dbl> <dbl> <dbl>
#> 1 4.35 4 4.55
# Example 9: With mtcars dataset
set.seed(444)
data(mtcars, package = "datasets")
mtcars |>
dplyr::group_by(cyl) |>
ci_boot(mpg, FUN = median, R = 1000, bci.method = "perc")
#> # A tibble: 3 × 4
#> cyl median lwr.ci upr.ci
#> <dbl> <dbl> <dbl> <dbl>
#> 1 6 19.7 18.1 21
#> 2 4 26 22.8 30.4
#> 3 8 15.2 14.5 16.4
# Compare median MPG for different cylinder counts
# Example 10: Spearman correlation (rank-based, robust to outliers)
set.seed(555)
iris |>
dplyr::group_by(Species) |>
ci_boot(
Petal.Length, Petal.Width,
FUN = cor, method = "spearman",
R = 1000, bci.method = "perc"
)
#> # A tibble: 3 × 4
#> Species cor lwr.ci upr.ci
#> <fct> <dbl> <dbl> <dbl>
#> 1 setosa 0.271 0.00686 0.501
#> 2 versicolor 0.787 0.638 0.885
#> 3 virginica 0.363 0.119 0.579