from hyperopt import fmin, tpe, hp, Trials from sklearn.metrics import make_scorer from sklearn.model_selection import cross_val_score import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.externals import joblib from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import KFold # Assuming `model`, `X_train`, `y_train`, and `kf` are defined earlier in your code # Define the search space for hyperopt search_space = { 'max_depth': hp.quniform('max_depth', 3, 6, 1), 'min_samples_leaf': hp.uniform('min_samples_leaf', 0.04, 0.08), 'max_features': hp.uniform('max_features', 0.2, 0.8) } # Define the objective function to minimize def objective(params): model.set_params(**params) scores = cross_val_score(model, X_train, y_train, cv=kf, scoring=mae_scorer) # loss = 1 - scores return np.mean(scores) # loss # Initialize Trials to store optimization results trials = Trials() # Use fmin from hyperopt to perform Bayesian optimization best_hyperparams = fmin( fn=objective, space=search_space, algo=tpe.suggest, max_evals=50, # Adjust the number of evaluations as needed trials=trials, verbose=1, rstate=np.random.RandomState(42) ) # Get the best hyperparameters best_bayes_hyperparams = {key: best_hyperparams[key] for key in search_space} # Set the best hyperparameters to the model model.set_params(**best_bayes_hyperparams) # Fit the model with the best hyperparameters model.fit(X_train, y_train) # Save the best model joblib.dump(model, 'my_best_bayes_model.pkl') # Access the results from hyperopt hyperopt_results = pd.DataFrame({ 'Accuracy': [-trial['result']['loss'] for trial in trials.results], 'Parameter': [trial['misc']['vals'] for trial in trials.trials] }) # Visualize contribution of parameter to get the optimal accuracy (Scatterplot or kdeplot) plt.scatter(hyperopt_results['Parameter'], hyperopt_results['Accuracy'], s=100, alpha=0.5) plt.show()