Load packages

Function cvo_create_folds()

# Make some data
dataset_1 <- data.frame(
  ID = rep(1:20, each = 2),
  gr = gl(4, 10, labels = LETTERS[1:4]),
  .row = 1:40
)

# Set parameters
n_folds = 5

# Explore data
str(dataset_1)
#> 'data.frame':    40 obs. of  3 variables:
#>  $ ID  : int  1 1 2 2 3 3 4 4 5 5 ...
#>  $ gr  : Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
#>  $ .row: int  1 2 3 4 5 6 7 8 9 10 ...
# table(dataset_1[,c("gr","ID")])
# summary(dataset_1)
set.seed(1)
cvo_create_folds(block_by = dataset_1$ID, k = n_folds)
#> --- A cvo object: ----------------------------------------------------
#>  indices stratified blocked cv_type k repetitions sample_size
#>    Train      FALSE    TRUE  k-fold 5           1          40
#> ----------------------------------------------------------------------

When variable names from data frame are provided:

folds_1_a <- cvo_create_folds(
  data = dataset_1,
  stratify_by = "gr",
  block_by = "ID",
  n_folds,
  returnTrain = FALSE
)
# str(folds_1_a)
cvo_test_bs(
  folds_1_a,
  stratify_by = "gr",
  block_by = "ID",
  data = dataset_1
)
#> ************************************************************\n____________________________________________________________\n                Test for STRATIFICATION 
#> 
#>            A B C D      <<<     >>>              A    B    C    D
#> Rep1_Fold1 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> 
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in 
#> each block differs significantly.
#> ____________________________________________________________\n                Test for BLOCKING: BLOCKED
#> 
#>       ID
#>            1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 2 0 0 0 0 0 0 0  2 ..
#> Rep1_Fold2 0 0 0 2 0 0 2 0 0  0 ..
#> Rep1_Fold3 0 0 0 0 2 0 0 0 2  0 ..
#> Rep1_Fold4 0 0 2 0 0 0 0 2 0  0 ..
#> Rep1_Fold5 2 0 0 0 0 2 0 0 0  0 ..
#> 
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n

When vectors from workspace are provided:

folds_1_b <- cvo_create_folds(
  stratify_by = dataset_1$gr,
  block_by = dataset_1$ID,
  k = n_folds,
  returnTrain = FALSE
)
# str(folds_1_b)
cvo_test_bs(
  folds_1_b,
  stratify_by = "gr",
  block_by = "ID",
  data = dataset_1
)
#> ************************************************************\n____________________________________________________________\n                Test for STRATIFICATION 
#> 
#>            A B C D      <<<     >>>              A    B    C    D
#> Rep1_Fold1 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> 
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in 
#> each block differs significantly.
#> ____________________________________________________________\n                Test for BLOCKING: BLOCKED
#> 
#>       ID
#>            1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 0 0 0 2 2 0 0 0  0 ..
#> Rep1_Fold2 0 0 2 0 0 0 0 0 0  2 ..
#> Rep1_Fold3 0 0 0 2 0 0 0 0 2  0 ..
#> Rep1_Fold4 2 0 0 0 0 0 2 0 0  0 ..
#> Rep1_Fold5 0 2 0 0 0 0 0 2 0  0 ..
#> 
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n

Not blocked but stratified:

folds_1_c <- cvo_create_folds(
  stratify_by = dataset_1$gr,
  k = n_folds,
  returnTrain = FALSE
)
# str(folds_1_c)
cvo_test_bs(
  folds_1_c, 
  stratify_by = "gr",
  block_by = "ID",
  data = dataset_1
)
#> ************************************************************\n____________________________________________________________\n                Test for STRATIFICATION 
#> 
#>            A B C D      <<<     >>>              A    B    C    D
#> Rep1_Fold1 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> 
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in 
#> each block differs significantly.
#> ____________________________________________________________\n                Test for BLOCKING: NOT BLOCKED
#> 
#>       ID
#>            1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 2 0 0 0 0 0 1 1 0  0 ..
#> Rep1_Fold2 0 0 0 2 0 0 0 0 1  1 ..
#> Rep1_Fold3 0 2 0 0 0 1 0 0 1  0 ..
#> Rep1_Fold4 0 0 1 0 1 1 1 0 0  0 ..
#> Rep1_Fold5 0 0 1 0 1 0 0 1 0  1 ..
#> 
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n

Blocked but not stratified:

folds_1_d <- cvo_create_folds(
  block_by = dataset_1$ID,
  k = n_folds,
  returnTrain = FALSE
)
# str(folds_1_d)
cvo_test_bs(
  folds_1_d,
  stratify_by = "gr",
  block_by = "ID",
  dataset_1
)
#> ************************************************************\n____________________________________________________________\n                Test for STRATIFICATION 
#> 
#>            A B C D      <<<     >>>              A    B    C    D
#> Rep1_Fold1 4 2 0 2  <-Counts | Proportions->  0.50 0.25 0.00 0.25
#> Rep1_Fold2 2 0 4 2  <-Counts | Proportions->  0.25 0.00 0.50 0.25
#> Rep1_Fold3 0 6 0 2  <-Counts | Proportions->  0.00 0.75 0.00 0.25
#> Rep1_Fold4 2 2 2 2  <-Counts | Proportions->  0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 0 4 2  <-Counts | Proportions->  0.25 0.00 0.50 0.25
#> 
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in 
#> each block differs significantly.
#> ____________________________________________________________\n                Test for BLOCKING: BLOCKED
#> 
#>       ID
#>            1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 0 0 2 2 2 0 0 0  0 ..
#> Rep1_Fold2 0 2 0 0 0 0 0 0 0  0 ..
#> Rep1_Fold3 0 0 0 0 0 0 2 0 2  2 ..
#> Rep1_Fold4 2 0 0 0 0 0 0 2 0  0 ..
#> Rep1_Fold5 0 0 2 0 0 0 0 0 0  0 ..
#> 
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n