vignettes/manyROC_3_cvo.Rmd
manyROC_3_cvo.Rmd
cvo_create_folds()
# Make some data dataset_1 <- data.frame( ID = rep(1:20, each = 2), gr = gl(4, 10, labels = LETTERS[1:4]), .row = 1:40 ) # Set parameters n_folds = 5 # Explore data str(dataset_1)
#> 'data.frame': 40 obs. of 3 variables:
#> $ ID : int 1 1 2 2 3 3 4 4 5 5 ...
#> $ gr : Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
#> $ .row: int 1 2 3 4 5 6 7 8 9 10 ...
# table(dataset_1[,c("gr","ID")]) # summary(dataset_1)
set.seed(1) cvo_create_folds(block_by = dataset_1$ID, k = n_folds)
#> --- A cvo object: ----------------------------------------------------
#> indices stratified blocked cv_type k repetitions sample_size
#> Train FALSE TRUE k-fold 5 1 40
#> ----------------------------------------------------------------------
When variable names from data frame are provided:
folds_1_a <- cvo_create_folds( data = dataset_1, stratify_by = "gr", block_by = "ID", n_folds, returnTrain = FALSE ) # str(folds_1_a) cvo_test_bs( folds_1_a, stratify_by = "gr", block_by = "ID", data = dataset_1 )
#> ************************************************************\n____________________________________________________________\n Test for STRATIFICATION
#>
#> A B C D <<< >>> A B C D
#> Rep1_Fold1 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#>
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in
#> each block differs significantly.
#> ____________________________________________________________\n Test for BLOCKING: BLOCKED
#>
#> ID
#> 1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 2 0 0 0 0 0 0 0 2 ..
#> Rep1_Fold2 0 0 0 2 0 0 2 0 0 0 ..
#> Rep1_Fold3 0 0 0 0 2 0 0 0 2 0 ..
#> Rep1_Fold4 0 0 2 0 0 0 0 2 0 0 ..
#> Rep1_Fold5 2 0 0 0 0 2 0 0 0 0 ..
#>
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n
When vectors from workspace are provided:
folds_1_b <- cvo_create_folds( stratify_by = dataset_1$gr, block_by = dataset_1$ID, k = n_folds, returnTrain = FALSE ) # str(folds_1_b) cvo_test_bs( folds_1_b, stratify_by = "gr", block_by = "ID", data = dataset_1 )
#> ************************************************************\n____________________________________________________________\n Test for STRATIFICATION
#>
#> A B C D <<< >>> A B C D
#> Rep1_Fold1 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#>
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in
#> each block differs significantly.
#> ____________________________________________________________\n Test for BLOCKING: BLOCKED
#>
#> ID
#> 1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 0 0 0 2 2 0 0 0 0 ..
#> Rep1_Fold2 0 0 2 0 0 0 0 0 0 2 ..
#> Rep1_Fold3 0 0 0 2 0 0 0 0 2 0 ..
#> Rep1_Fold4 2 0 0 0 0 0 2 0 0 0 ..
#> Rep1_Fold5 0 2 0 0 0 0 0 2 0 0 ..
#>
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n
Not blocked but stratified:
folds_1_c <- cvo_create_folds( stratify_by = dataset_1$gr, k = n_folds, returnTrain = FALSE ) # str(folds_1_c) cvo_test_bs( folds_1_c, stratify_by = "gr", block_by = "ID", data = dataset_1 )
#> ************************************************************\n____________________________________________________________\n Test for STRATIFICATION
#>
#> A B C D <<< >>> A B C D
#> Rep1_Fold1 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold2 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold3 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold4 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#>
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in
#> each block differs significantly.
#> ____________________________________________________________\n Test for BLOCKING: NOT BLOCKED
#>
#> ID
#> 1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 2 0 0 0 0 0 1 1 0 0 ..
#> Rep1_Fold2 0 0 0 2 0 0 0 0 1 1 ..
#> Rep1_Fold3 0 2 0 0 0 1 0 0 1 0 ..
#> Rep1_Fold4 0 0 1 0 1 1 1 0 0 0 ..
#> Rep1_Fold5 0 0 1 0 1 0 0 1 0 1 ..
#>
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n
Blocked but not stratified:
folds_1_d <- cvo_create_folds( block_by = dataset_1$ID, k = n_folds, returnTrain = FALSE ) # str(folds_1_d) cvo_test_bs( folds_1_d, stratify_by = "gr", block_by = "ID", dataset_1 )
#> ************************************************************\n____________________________________________________________\n Test for STRATIFICATION
#>
#> A B C D <<< >>> A B C D
#> Rep1_Fold1 4 2 0 2 <-Counts | Proportions-> 0.50 0.25 0.00 0.25
#> Rep1_Fold2 2 0 4 2 <-Counts | Proportions-> 0.25 0.00 0.50 0.25
#> Rep1_Fold3 0 6 0 2 <-Counts | Proportions-> 0.00 0.75 0.00 0.25
#> Rep1_Fold4 2 2 2 2 <-Counts | Proportions-> 0.25 0.25 0.25 0.25
#> Rep1_Fold5 2 0 4 2 <-Counts | Proportions-> 0.25 0.00 0.50 0.25
#>
#> If stratified, the proportions of each group in each fold
#> (row) should be (approximately) equal and with no zero values.
#> Test is not valid if data is blocked and number of cases in
#> each block differs significantly.
#> ____________________________________________________________\n Test for BLOCKING: BLOCKED
#>
#> ID
#> 1 2 3 4 5 6 7 8 9 10 ..
#> Rep1_Fold1 0 0 0 2 2 2 0 0 0 0 ..
#> Rep1_Fold2 0 2 0 0 0 0 0 0 0 0 ..
#> Rep1_Fold3 0 0 0 0 0 0 2 0 2 2 ..
#> Rep1_Fold4 2 0 0 0 0 0 0 2 0 0 ..
#> Rep1_Fold5 0 0 2 0 0 0 0 0 0 0 ..
#>
#> Table shows number of observations in each fold.
#> If blocked, the same ID appears just in one fold.
#> 10 (of 20) first columns are displayed.
#> ************************************************************\n