# Hyperparameter Tuning from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold from sklearn.metrics import accuracy_score, mean_absolute_error, make_scorer # See what parameters can be tuned model.get_params() kf = KFold(n_splits=5, shuffle=True, random_state=42) custom_params = { 'max_depth': [3, 4,5, 6], 'min_samples_leaf': [0.04, 0.06, 0.08], 'max_features': [0.2, 0.4,0.6, 0.8] } mae_scorer = make_scorer(mean_absolute_error) # Grid Search model_cv1 = GridSearchCV(estimator=model, param_grid=custom_params, cv=kf, scoring= mae_scorer , # 'neg_mean_squared_error' verbose=1, n_jobs=-1, refit = True, random_state=42) # Randomized Search model_cv2 = RandomizedSearchCV(estimator=model, param_distributions=custom_params, n_iter=10, cv=kf, scoring=mse_scorer, # Use custom scorer here verbose=1, n_jobs=-1, refit=True, random_state=42) # Bayes Search from skopt import BayesSearchCV search_spaces = { 'max_depth': (3, 6), 'min_samples_leaf': (0.04, 0.08, 'uniform'), 'max_features': (0.2, 0.8, 'uniform') } model_bayes_cv = BayesSearchCV(estimator=model, search_spaces=search_spaces, cv=kf, n_iter=50, # Adjust the number of iterations as needed scoring=mae_scorer, verbose=1, n_jobs=-1, refit=True, random_state=42 ) # use TPOT for GENETIC SEARCH CV model_cv.fit(X_train, y_train) model_cv.cv_results_ # See all information from dictionary best_hyperparams = model_cv.best_params_ # Get the parameters that produce best result best_model = model_cv.best_estimator_ # Get the best model best_model.get_params() # Get the parameters of the best model y_pred = best_model.predict(X_test) # predict with best model best_score = best_model.best_score_ # Best result # Visualize contribution of parameter to get the optimal accuracy (Scatterplot or kdeplot) results_df = pd.DataFrame({ 'Accuracy': model_cv.cv_results['mean_test_score'], 'Parameter': model_cv.cv_results['param_name'] # Adjust the parameter name as needed }) plt.scatter(results_df['Parameter'], results_df['Accuracy'], s=100, alpha=0.5) from sklearn.externals import joblib joblib.dump(best_model, 'my_best_model.pkl') # Save the model in pkl file- Finding optimal combination of parameters for a model - parameters: - the ones that is set by the model after learning from dataset - eg: co-efficients of linear regression, node decision by the decision trees - accessible by attribute (in the attribute section in the documentation) - hyperparameters : - the ones that we have the option to set before creating the model - print the estimator to see what it contains - accessible by parameter (in the parameter section in the documentation) - Silly things to do (some examples): - Creating a random forest with just 2 or 3 trees - 1,2 neighbors in knn algorithm - increasing a hyperparameter by a small amount - Be aware of conflicting hyperparameter choices (The 'newton-cg', 'sag' and 'lbfgs' solvers support only l2 penalties.) - Visualize if the hyperparameter has any effect: - Graph of learning curve : hyperparameter on X-axis and accuracy on Y-axis - Problem: So many models can be build. But among these, find an optimal model that yields optimal result. - Solution: Train with a set of adjustable parameters and compare the results to find the optimal model - Rule of thumb : Cross validation is used to estimate the generalization performance. - Curse of dimensionality : exhaustively searching results in exponential increase of dimensions with the increase of grid. - Best practice : Do this when you really need optimal solution since it does not make a bad model into a good model. - optimal hyperparameters = set of hyperparameters corresponding to the best CV score. - Some algorithms: - Uninformed Search: - Grid Search : - Find result for all possible combination of parameters - Guaranteed best result - time consuming process, resource intensive - Random Search : - Randomly choose a number of combinations of given parameters - A good result but may not be the absolute best - fast - Idea : You are unlikely to keep completely missing the 'good area' for a long time when randomly picking new spots - Informed Search : - Coarse to Fine: (Hybrid of grid search and randomized search) 1. Random search 2. Find promising areas (Narrow Down) 3. Grid search in the smaller area or skip for step 4 4. Continue from step 1 until optimal score is obtained - Idea : Narrow down the optimal area. The best result will be in that area. - Bayesian Optimization : - Inferring the probability of best result by deducing the outcome of past results - eg: Given a certain event occurs due to another event, we get more clearer idea of the probability of certain outcome - Idea : Getting better as we get more evidence. - Genetic Algorithms : 1. We can create some models (that have hyperparameter settings) 2. We can pick the best (by our scoring function). These are the ones that 'survive' 3. We can create new models that are similar to the best ones 4. We add in some randomness so we don't reach a local optimum 5. Repeat until we are happy! - Idea : With evolution in gene sequence combination of best genes in new generations, we can obtain the optimal being