bayesian optimization python

from hyperopt import fmin, tpe, hp, Trials
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold

# Assuming `model`, `X_train`, `y_train`, and `kf` are defined earlier in your code

# Define the search space for hyperopt
search_space = {
    'max_depth': hp.quniform('max_depth', 3, 6, 1),
    'min_samples_leaf': hp.uniform('min_samples_leaf', 0.04, 0.08),
    'max_features': hp.uniform('max_features', 0.2, 0.8)
}

# Define the objective function to minimize
def objective(params):
    model.set_params(**params)
    scores = cross_val_score(model, X_train, y_train, cv=kf, scoring=mae_scorer)
    # loss = 1 - scores
    return np.mean(scores) # loss

# Initialize Trials to store optimization results
trials = Trials()

# Use fmin from hyperopt to perform Bayesian optimization
best_hyperparams = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,  # Adjust the number of evaluations as needed
    trials=trials,
    verbose=1,
    rstate=np.random.RandomState(42)
)

# Get the best hyperparameters
best_bayes_hyperparams = {key: best_hyperparams[key] for key in search_space}

# Set the best hyperparameters to the model
model.set_params(**best_bayes_hyperparams)

# Fit the model with the best hyperparameters
model.fit(X_train, y_train)

# Save the best model
joblib.dump(model, 'my_best_bayes_model.pkl')

# Access the results from hyperopt
hyperopt_results = pd.DataFrame({
    'Accuracy': [-trial['result']['loss'] for trial in trials.results],
    'Parameter': [trial['misc']['vals'] for trial in trials.trials]
})

# Visualize contribution of parameter to get the optimal accuracy (Scatterplot or kdeplot)
plt.scatter(hyperopt_results['Parameter'], hyperopt_results['Accuracy'], s=100, alpha=0.5)
plt.show()
Python相关代码片段