我正在try 使用tidymodels来适应XGBoost型号,但不断收到以下错误:

→ A | error:   `terms` must be a <terms>, not a double vector.
There were issues with some computations   A: x5
Warning message:
All models failed. Run `show_notes(.Last.tune.result)` for more information. 

我从Kaggle下载了一个数据集. 下面的块加载数据,以二进制因子重新编码因变量,并将预测值仅转换为数字AND因子.

#load data
dat <- read_csv(file = "loanPayments.csv") %>%
  mutate(
    #compute DV
    paid = as.numeric(is.na(past_due_days)),
    #clean education
    education = gsub(pattern = "Above|Below| or ", replacement = "", x = education),
    education = gsub(pattern = " ", replacement = "", x = education),
    education = gsub(pattern = "Bechalor", replacement = "bachelor", x = education),
    education = tolower(education),
    #convert characters to factors
    education = as.factor(education), 
    Gender = as.factor(Gender),
    # dv to factor
    paid = as.factor(paid)
    ) %>%
  dplyr::select(paid, Principal, terms, age, education, Gender)
#rename columns as lower case
colnames(dat) <- tolower(colnames(dat))

然后,在执行完initial_split之后,我设置了recpiek-fold cross validation.

#train test split
initial_split <- initial_split(
  data = dat, #dataset
  prop = 0.8, #train/test proportion
  strata = paid) #variable to stratify (DV)
#preprocessing
preprocessing_recipe <- recipe(
    paid ~ ., #state formula
    data = training(initial_split)) %>%
  # create dummy variable
  step_dummy(all_nominal()) %>%
  # centering and scaling continuous variables
  step_normalize(all_numeric_predictors()) %>%
  # remove no variance predictors
  step_nzv(all_numeric_predictors()) %>%
  #
  prep()
#cross validation
cv_folds <- bake(
    preprocessing_recipe, #preprocessing
    new_data = training(initial_split) #data
  ) %>%
  vfold_cv(v = 5) #folds

在这里,我准备了模型规范和tidymodelsworkflow来调优一些超参数.

#define model
xgbSpec <- parsnip::boost_tree(
  trees = 50, min_n = tune(), 
  tree_depth = tune(), learn_rate = tune(), 
  loss_reduction = tune()) %>%
  set_engine(engine = "xgboost",
             objective = "multi:softprob",
             num_class = 2) %>%
  set_mode(mode = "classification")
#set parameters grid
xgboostGrid <- grid_latin_hypercube(
    min_n(), tree_depth(),
    learn_rate(), loss_reduction(),
    size = 10) #how many hyperparam combinations?
#create workflow
xgboost_wf <- workflows::workflow() %>%
  add_formula(paid ~ .) %>%
  add_model(xgbSpec) #model
#tune parameters
xgboostFitted <- tune::tune_grid(
  object = xgboost_wf, #workflow
  resamples = cv_folds, #cross-validation
  grid = xgboostGrid, #grid search
  metrics = yardstick::metric_set(accuracy),
  control = control_grid(save_pred = TRUE))

运行代码后,优化会产生以下错误:

→ A | error:   `terms` must be a <terms>, not a double vector.
There were issues with some computations   A: x5
Warning message:
All models failed. Run `show_notes(.Last.tune.result)` for more information. 

我做错了什么?

这是我的数据的dput()的输出.

structure(list(paid = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("0", 
"1"), class = "factor"), principal = c(1000, 1000, 1000, 1000, 
1000, 300, 1000, 1000, 1000, 800, 300, 1000, 1000, 900, 1000, 
800, 1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 1000, 1000, 
800, 1000, 1000, 1000, 800, 1000, 1000, 1000, 800, 800, 1000, 
700, 1000, 1000, 1000, 800, 1000, 1000, 1000, 800, 1000, 1000, 
1000, 800, 800, 1000, 800, 1000, 1000, 1000, 1000, 1000, 800, 
800, 1000, 1000, 1000, 1000, 800, 900, 1000, 1000, 300, 1000, 
800, 800, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 800, 1000, 1000, 1000, 1000, 800, 800, 800, 1000, 
800, 800, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 800, 1000, 1000, 1000, 800, 1000, 1000, 800, 1000, 
800, 1000, 1000, 1000, 1000, 800, 1000, 800, 1000, 1000, 1000, 
300, 1000, 800, 1000, 800, 500, 1000, 1000, 1000, 1000, 800, 
1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 1000, 800, 1000, 
1000, 1000, 1000, 800, 1000, 800, 800, 1000, 1000, 1000, 1000, 
1000, 300, 1000, 800, 1000, 1000, 1000, 1000, 800, 800, 1000, 
1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 800, 1000, 1000, 1000, 1000, 300, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 800, 800, 800, 1000, 1000, 1000, 1000, 800, 
1000, 1000, 800, 1000, 1000, 1000, 1000, 800, 800, 1000, 1000, 
800, 800, 1000, 1000, 800, 1000, 1000, 500, 1000, 1000, 800, 
1000, 1000, 1000, 800, 1000, 800, 1000, 800, 1000, 800, 1000, 
1000, 800, 1000, 1000, 1000, 1000, 1000, 800, 800, 1000, 800, 
800, 800, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 
1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 
1000, 1000, 800, 800, 1000, 800, 1000, 1000, 800, 1000, 800, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
800, 1000, 1000, 800, 1000, 1000, 1000, 1000, 800, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 
1000, 1000, 800, 1000, 800, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 800, 1000, 800, 
800, 1000, 1000, 800, 1000, 800, 1000, 1000, 800, 1000, 1000, 
1000, 1000, 1000, 800, 1000, 1000, 1000, 1000, 800, 1000, 1000, 
1000, 1000, 800, 1000, 1000, 800, 1000, 1000, 800, 1000, 1000, 
1000, 1000, 1000, 1000, 800, 1000, 800, 1000, 1000, 1000, 800, 
1000, 800, 1000, 1000, 800, 1000, 800, 800, 1000, 1000, 1000, 
800, 1000, 1000, 1000, 1000, 1000, 1000, 500, 800, 1000, 800, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 800, 800, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 800, 1000, 800, 
1000, 1000, 1000, 1000, 1000, 800, 1000, 1000, 1000, 800, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 800, 1000, 
1000), terms = c(30, 30, 30, 15, 30, 7, 30, 30, 30, 15, 7, 15, 
30, 7, 7, 15, 30, 15, 30, 30, 30, 30, 30, 15, 30, 30, 15, 15, 
30, 30, 15, 30, 30, 30, 15, 15, 30, 15, 15, 30, 15, 15, 7, 15, 
15, 7, 30, 30, 30, 15, 15, 30, 15, 30, 30, 15, 30, 30, 15, 15, 
30, 15, 30, 30, 15, 15, 30, 30, 7, 30, 15, 15, 30, 15, 15, 30, 
30, 30, 30, 30, 30, 7, 15, 15, 30, 30, 30, 30, 15, 15, 15, 7, 
15, 15, 30, 30, 15, 30, 30, 30, 7, 15, 30, 15, 15, 30, 30, 30, 
30, 7, 15, 30, 30, 30, 30, 15, 30, 30, 30, 15, 30, 30, 15, 15, 
30, 30, 15, 15, 15, 30, 15, 15, 30, 30, 15, 30, 15, 15, 15, 30, 
30, 15, 7, 15, 30, 15, 30, 7, 15, 15, 15, 15, 15, 15, 30, 30, 
15, 15, 15, 30, 15, 15, 30, 15, 15, 30, 30, 30, 15, 30, 15, 30, 
15, 30, 15, 15, 30, 30, 30, 30, 15, 7, 30, 15, 30, 30, 30, 15, 
15, 15, 30, 30, 15, 30, 15, 30, 30, 15, 30, 30, 15, 30, 15, 15, 
30, 7, 30, 30, 7, 30, 15, 30, 30, 30, 30, 30, 15, 15, 15, 30, 
15, 30, 30, 15, 30, 30, 15, 15, 15, 15, 15, 7, 30, 30, 15, 30, 
30, 15, 15, 15, 30, 15, 15, 15, 7, 30, 15, 15, 30, 15, 15, 30, 
30, 7, 30, 30, 15, 15, 7, 30, 15, 15, 30, 15, 15, 15, 15, 15, 
30, 30, 30, 30, 30, 30, 30, 15, 15, 30, 15, 30, 15, 15, 30, 30, 
15, 30, 30, 30, 30, 15, 15, 30, 30, 30, 30, 30, 15, 30, 30, 30, 
15, 30, 30, 15, 15, 15, 15, 30, 30, 15, 30, 15, 30, 30, 30, 15, 
30, 15, 30, 15, 30, 30, 30, 15, 15, 30, 15, 30, 30, 30, 15, 15, 
30, 30, 30, 30, 30, 30, 15, 15, 30, 30, 15, 30, 15, 30, 30, 15, 
30, 15, 30, 15, 30, 30, 30, 15, 30, 30, 30, 30, 30, 15, 30, 15, 
15, 30, 30, 15, 30, 30, 15, 30, 15, 15, 30, 15, 15, 30, 30, 30, 
30, 15, 15, 30, 30, 30, 15, 30, 15, 15, 15, 15, 30, 30, 15, 30, 
30, 15, 30, 30, 30, 15, 30, 15, 15, 30, 15, 30, 30, 15, 15, 30, 
15, 15, 30, 15, 30, 15, 15, 30, 15, 30, 15, 15, 30, 30, 30, 30, 
30, 15, 15, 15, 7, 30, 30, 30, 30, 30, 30, 30, 30, 15, 30, 30, 
30, 15, 30, 30, 30, 30, 30, 15, 15, 30, 30, 15, 30, 30, 30, 15, 
15, 30, 30, 15, 30, 30, 30, 15, 30, 30, 15, 30, 15, 30, 15, 30, 
15, 30, 15, 30, 15, 15, 15, 30, 30, 30, 15, 30, 15, 30, 30, 30, 
30, 15, 30, 30, 15, 15, 30, 30), age = c(45, 50, 33, 27, 28, 
35, 29, 36, 28, 26, 29, 39, 26, 26, 27, 26, 40, 32, 32, 26, 26, 
43, 25, 26, 26, 29, 39, 34, 31, 33, 33, 37, 27, 37, 33, 29, 27, 
33, 24, 21, 32, 30, 31, 30, 24, 35, 22, 32, 32, 50, 27, 35, 35, 
34, 21, 25, 27, 26, 44, 39, 34, 37, 34, 45, 24, 28, 28, 37, 35, 
43, 29, 29, 33, 34, 25, 30, 31, 35, 37, 44, 28, 25, 29, 33, 37, 
33, 24, 27, 43, 46, 34, 32, 38, 27, 33, 36, 26, 34, 22, 31, 29, 
38, 30, 45, 35, 30, 31, 31, 28, 29, 29, 27, 27, 33, 28, 25, 40, 
23, 35, 24, 34, 22, 20, 23, 33, 26, 28, 43, 34, 38, 26, 43, 26, 
33, 24, 30, 32, 22, 47, 20, 28, 35, 27, 33, 30, 31, 26, 37, 26, 
35, 29, 23, 23, 30, 34, 36, 26, 29, 28, 27, 24, 31, 28, 27, 25, 
24, 28, 28, 35, 38, 38, 29, 35, 24, 39, 25, 38, 30, 21, 46, 31, 
29, 35, 30, 27, 31, 33, 34, 28, 42, 32, 30, 25, 27, 21, 24, 29, 
40, 29, 29, 30, 26, 36, 27, 20, 26, 26, 27, 23, 39, 27, 30, 33, 
27, 35, 29, 50, 31, 31, 29, 35, 39, 29, 30, 33, 26, 25, 37, 26, 
26, 27, 34, 37, 36, 33, 30, 30, 36, 29, 36, 32, 29, 36, 30, 31, 
19, 26, 34, 35, 35, 38, 29, 28, 22, 32, 31, 28, 37, 25, 19, 51, 
29, 23, 30, 23, 34, 31, 24, 42, 40, 29, 32, 28, 35, 30, 44, 37, 
31, 36, 31, 42, 28, 30, 30, 24, 34, 29, 38, 34, 28, 30, 41, 29, 
37, 36, 30, 27, 29, 40, 28, 29, 37, 33, 27, 24, 31, 28, 40, 33, 
41, 30, 26, 27, 20, 24, 26, 30, 29, 22, 24, 25, 28, 37, 32, 34, 
28, 35, 27, 24, 44, 31, 27, 21, 30, 38, 34, 31, 23, 27, 39, 30, 
25, 50, 23, 38, 27, 31, 40, 32, 29, 26, 25, 35, 41, 37, 34, 45, 
26, 32, 28, 34, 29, 26, 26, 22, 27, 33, 28, 24, 37, 36, 18, 25, 
40, 29, 26, 30, 33, 30, 32, 25, 35, 30, 26, 29, 26, 46, 36, 38, 
32, 30, 35, 29, 26, 32, 25, 33, 39, 28, 26, 26, 28, 39, 29, 33, 
27, 34, 26, 28, 32, 27, 21, 39, 38, 36, 33, 21, 25, 29, 33, 47, 
33, 23, 24, 27, 32, 33, 27, 35, 37, 28, 33, 34, 29, 34, 29, 24, 
34, 25, 24, 30, 28, 24, 26, 24, 29, 31, 26, 25, 29, 38, 41, 26, 
26, 35, 37, 25, 24, 34, 33, 38, 38, 26, 37, 42, 49, 26, 41, 38, 
26, 32, 27, 33, 30, 26, 35, 46, 27, 22, 27, 30, 27, 47, 30, 26, 
38, 46, 35, 45, 36, 38, 27, 27, 29, 30, 28, 26, 30, 38, 28), 
    education = structure(c(3L, 1L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 
    2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 
    2L, 3L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, 2L, 
    1L, 2L, 2L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 2L, 1L, 1L, 3L, 
    3L, 2L, 3L, 1L, 3L, 4L, 1L, 2L, 3L, 2L, 3L, 2L, 1L, 3L, 2L, 
    1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 
    3L, 2L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 
    1L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 
    1L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 
    3L, 2L, 2L, 4L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 
    3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 
    3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 
    2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 
    2L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
    3L, 2L, 2L, 2L, 3L, 1L, 3L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 
    3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 3L, 3L, 
    1L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 
    2L, 1L, 1L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 1L, 2L, 3L, 2L, 2L, 
    2L, 3L, 1L, 2L, 3L, 1L, 1L, 2L, 3L, 1L, 3L, 2L, 3L, 2L, 3L, 
    1L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 
    2L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 2L, 2L, 
    2L, 1L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 
    3L, 3L, 2L, 4L, 3L, 1L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 
    3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 1L, 2L, 
    3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 
    3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 
    3L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 3L, 
    1L, 1L, 2L, 1L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 
    3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 
    3L, 2L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 2L, 
    2L, 3L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 2L, 3L), levels = c("bachelor", 
    "college", "highschool", "master"), class = "factor"), gender = structure(c(2L, 
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 2L), levels = c("female", "male"), class = "factor")), row.names = c(NA, 
-500L), class = c("tbl_df", "tbl", "data.frame"))

我try 了不同的教程,但我的代码/数据都不起作用. https://www.r-bloggers.com/2020/05/using-xgboost-with-tidymodels/https://www.r-bloggers.com/2020/05/tidymodels-and-xgbooost-a-few-learnings/ 一百零二 https://juliasilge.com/blog/xgboost-tune-volleyball/ https://juliasilge.com/blog/austin-housing/

推荐答案

这有帮助吗?

step_dummy()只调整为预测因素.prep()bake()可以帮助您了解预处理对数据的影响,但工作流会为您处理这一点.

library(tidymodels)

initial_split <- initial_split(df, prop = 0.8, strata = paid)
train <- training(initial_split)

preprocessing_recipe <- 
  recipe(train, paid ~ .) %>%
  step_dummy(all_nominal_predictors()) %>% ### predictors only
  step_normalize(all_numeric_predictors()) %>%
  step_nzv(all_numeric_predictors())

cv_folds <- vfold_cv(train, v = 5)

xgbSpec <- boost_tree(
  trees = 50,
  min_n = tune(),
  tree_depth = tune(),
  learn_rate = tune(),
  loss_reduction = tune()
) %>%
  set_engine(engine = "xgboost") %>%
  set_mode(mode = "classification")

xgboostGrid <- grid_latin_hypercube(
  min_n(),
  tree_depth(),
  learn_rate(),
  loss_reduction(),
  size = 10
)

xgboost_wf <- workflow() %>%
  add_recipe(preprocessing_recipe) %>%
  add_model(xgbSpec)

xgboostFitted <- 
  tune_grid(
    object = xgboost_wf,
    resamples = cv_folds,
    grid = xgboostGrid,
    metrics = metric_set(accuracy),
    control = control_grid(save_pred = TRUE)
    )

xgboostFitted |> autoplot()

创建于2024-03-10,共reprex v2.1.0

R相关问答推荐

在ggplot Likert条中添加水平线

如何将在HW上运行的R中的消息(错误、警告等)作为批处理任务输出

使用tidyverse / Mutate的存款账户余额

根据R中两个变量的两个条件删除带有dspirr的行

如何求解arg必须为NULL或deSolve包的ode函数中的字符向量错误

随机森林回归:下拉列重要性

在发布到PowerBI Service时,是否可以使用R脚本作为PowerBI的数据源?

如何动态更新selectizeInput?

如何在R中合并两个基准点?

如何调整曲线图中的y轴标签?

如果某些列全部为NA,则更改列

使用整齐的计算(curl -curl )和杂音

如何在观测缺失的地方添加零

使用R中的正则表达式将一列分割为多列

从非重叠(非滚动)周期中的最新数据向后开窗并在周期内计数

有没有可能用shiny 的书签恢复手风琴面板?

我如何go 掉盒子图底部的数字?

如何使用字符串从重复的模式中提取多个数字?

在不对R中的变量分组的情况下取两行的平均值

如何构建一个for循环来循环处理动物ID?