Commit 10b5cb4f authored by Leodegario Lorenzo II's avatar Leodegario Lorenzo II
Browse files

Add permutation importance

parent e5cd9af7
......@@ -38,9 +38,11 @@ from sklearn.metrics import precision_score, recall_score, zero_one_loss
from sklearn.metrics import explained_variance_score, max_error
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_squared_log_error, median_absolute_error
from sklearn.metrics import make_scorer
# For inspection
from sklearn.inspection import partial_dependence
from sklearn.inspection import permutation_importance
# For ML Models
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
......@@ -971,7 +973,78 @@ class MLModels:
+ '</center>'))
return result, axes
def plot_permutation_importance(
est, X, y, feature_names=None, num_feat=10, scorer=None,
n_repeats=10,
random_state=1337):
"""
Plot the feature importance via permutation importance method
Generates the permutation importance plot given the trained model,
input data and labels.
Parameters
----------
est : trained sklearn estimator
Trained model to be inspected
X : array-like
Input data
y : array-like
Label data
feature_names : list of str or array-like, default=None
List of name of features
num_feat : int, default=10
Number of features to be included in the plot
scorer : str, default=None
Scorer to be used. See MLModels.scorers() for full list.
n_repeats : int, default=10
Number of times to permute a feature
random_state : int, default=1337
Random state of the permutation randomizer
"""
# Get scorer
if scorer is not None:
scoring = make_scorer(MLModels.scorers()[scorer])
else:
scoring = None
# Get importances
importances = permutation_importance(
est, X, y, n_repeats=n_repeats,
random_state=1337, scoring=scoring)
importances_mean = importances['importances_mean']
# Set num feat depending on the number of features
num_feat = min([len(importances_mean), num_feat])
# Normalize importances
importances_mean = importances_mean / importances_mean.sum()
# Get indices
indices = importances_mean.argsort()[::-1][:num_feat]
# Initialize figure
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
# Generate bar graph of importances
if feature_names is not None:
ax.barh(np.array(feature_names)[indices][::-1],
importances_mean[indices][::-1])
else:
ax.barh(range(num_feat), importances_mean[indices[::-1]])
# Set axis labels
ax.set_xlabel('Importances', fontsize=14)
ax.set_ylabel('Feature', fontsize=14)
# Remove spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
return ax
def plot_partial_dependence(est, X, features, feature_names):
"""
Plot the partial dependence of features wrt the estimator
......@@ -986,7 +1059,7 @@ class MLModels:
X : array-like
The dataset in which partial dependence is to be inspected,
usually this is the train set
y : list of str or tuple of str
features : list of str or tuple of str
Name of features (or pair of features) to be inspected
feature_names : list of str
Name of features of the dataset
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment