Commit 82050193 authored by Leodegario Lorenzo II's avatar Leodegario Lorenzo II
Browse files

Set num features for feature importance as min of actual and set num_feat

parent a0dee5fe
......@@ -91,30 +91,30 @@ class MLModels:
self.coef = None
self.classes = None
self._setting = None
def list_all_methods():
"""Print a list of all possible methods"""
# Print header
print("Classification\n"+"-"*14)
# Print classification methods
i = 0
for m in MLModels.all_methods()['Classification'].keys():
print(f"{i+1}. {m}")
i += 1
# Print Regression header
print("\nRegression\n"+"-"*10)
# Print regression methods
for i, m in enumerate(MLModels.all_methods()['Regression'].keys()):
print(f"{i+1}. {m}")
def all_methods(
n_nb=list(range(1, 51)),
n_nb=list(range(1, 51)),
C=[1e-5, 1e-4, 1e-3, .01, 0.1, 0.2, 0.4, 0.75, 1, 1.5, 3, 5, 10,
15, 20, 100, 1000, 5000, 10000, 50000, 100000],
max_depth=list(range(1, 51)),
15, 20, 100, 1000, 5000, 10000, 50000, 100000],
max_depth=list(range(1, 51)),
tree_rs=None):
"""
Return a dict of list of classification and regression methods
......@@ -142,7 +142,7 @@ class MLModels:
"""
# Initialize methods container
methods = {}
# Set classification methods
methods['Classification'] = {
'kNN': KNNClassifier(n_nb),
......@@ -167,7 +167,7 @@ class MLModels:
'XGB Classifier': XGBC(max_depth, tree_rs),
'LightGBM Classifier': LGBMC(max_depth, tree_rs),
'CatBoost Classifier': CBClassifier(max_depth, tree_rs)}
# Set Regression methods
methods['Regression'] = {
'kNN': KNNRegressor(n_nb),
......@@ -189,9 +189,9 @@ class MLModels:
'XGB Regressor': XGBR(max_depth, tree_rs),
'LightGBM Regressor': LGBMR(max_depth, tree_rs),
'CatBoost Regressor': CBRegressor(max_depth, tree_rs)}
return methods
def scalers():
"""Return a dict of all scaler methods"""
return {'standard': StandardScaler(),
......@@ -225,12 +225,12 @@ class MLModels:
def plot_results(methods):
"""
Plot the accuracies of all the trained machine learning methods given
Parameters
----------
methods : dict
Dictionary containing the trained machine learning models
Returns
-------
axes : list of matplotlib axes
......@@ -238,30 +238,30 @@ class MLModels:
"""
# Initialize axes container
axes = []
# Iterate through all methods
for k in methods:
# Get trained machine learning method
m = methods[k]
# Plot the results of the current method
ax = m.plot_accuracy()
# Set title
ax.set_title(k, fontsize=16)
# Show figure
plt.show()
# Append axes
axes.append(ax)
return axes
def plot_accuracy(self):
"""
Plots the train and test accuracy of the model for various settings
Returns
-------
ax : matplotlib.Axes
......@@ -270,13 +270,13 @@ class MLModels:
# Initialize figure
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
# Plot train and test accuracy points
ax.plot(self._setting, self.training_accuracy,
ax.plot(self._setting, self.training_accuracy,
label="training accuracy", lw=3.0, color='tab:blue')
ax.plot(self._setting, self.test_accuracy,
ax.plot(self._setting, self.test_accuracy,
label="test accuracy", lw=3.0, color='tab:orange')
# Visualize the standard deviation as a filled area
ax.fill_between(self._setting,
self.training_accuracy - self.training_std,
......@@ -285,10 +285,10 @@ class MLModels:
ax.fill_between(self._setting, self.test_accuracy - self.test_std,
self.test_accuracy + self.test_std, alpha=0.2,
color='tab:orange')
# Set axis labels
ax.set_ylabel("Accuracy", fontsize=14)
# Check if n_neighbors
if self._setting_name == 'n_neighbors':
ax.set_xlabel("$n_{neighbors}$", fontsize=14)
......@@ -296,7 +296,7 @@ class MLModels:
ax.set_xlabel("Max Depth", fontsize=14)
else:
ax.set_xlabel("${}$".format(self._setting_label), fontsize=14)
# Remove spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
......@@ -304,30 +304,30 @@ class MLModels:
# Set axis scale depending on method
if self.log_plot:
ax.set_xscale('log')
# Set y axis tickers as percentages
ax.yaxis.set_major_formatter(ticker.FuncFormatter(percent))
# Get xlim and ylim
xlim = ax.get_xlim()
ylim = ax.get_ylim()
xrange = xlim[1] - xlim[0]
yrange = ylim[1] - ylim[0]
# Draw legends
ax.text(s="Train", fontsize=14, weight='bold', color='tab:blue',
x=np.array(self._setting).max() + xrange*0.01,
x=np.array(self._setting).max() + xrange*0.01,
y=self.training_accuracy[-1] + yrange*0.015)
ax.text(s="Test", fontsize=14, weight='bold', color='tab:orange',
x=np.array(self._setting).max() + xrange*0.01,
x=np.array(self._setting).max() + xrange*0.01,
y=self.test_accuracy[-1] - yrange*0.015)
return ax
def plot_feature_importance(self, feature_names=None, num_feat=10):
"""
Plots the imporatance of each feature in descending order
Parameters
----------
self : MLModels object
......@@ -336,7 +336,7 @@ class MLModels:
String of list to be used as feature names
num_feat : int
Top `n_feat` to be shown in the plot
Returns
-------
ax : matplotlib axes
......@@ -347,6 +347,9 @@ class MLModels:
>>> m = MLModels.run_classifier(X, label)
>>> ax = m['Decision Tree'].plot_feature_importance(feature_names)
"""
# Set num feat depending on the number of features
num_feat = min([len(self.feature_importance), num_feat])
# Get weights and indices
weights = self.feature_importance
indices = weights.argsort()[::-1][:num_feat]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment