September 26, 2019

Python imblearn module

Python imbalanced learn (imblearn) package

from imblearn.pipeline import make_pipeline as imb_make_pipeline

from imblearn.over_sampling import SMOTE # SMOTE - Synthetic Minority Over-sampling TEchnique
smt = SMOTE(random_state=2)
sm = SMOTE(random_state=12, ratio = 1.0)
train_X_sm, train_y_sm = smt.fit_sample(train_X, train_y)
x_train_res, y_train_res = smt.fit_sample(x_train, y_train.ravel())
X, y = SMOTE().fit_resample(X, y)
print('Resampled dataset shape %s' % Counter(y_res))

from imblearn.over_sampling import SVMSMOTE
sm = SVMSMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)
print(training_target.value_counts(), np.bincount(y_res))

from imblearn.over_sampling import BorderlineSMOTE
BorderlineSMOTE(random_state=0, kind='borderline-1')
X_resampled, y_resampled = BorderlineSMOTE().fit_resample(X, y)

from imblearn.over_sampling import KMeansSMOTE
KMeansSMOTE(random_state=0)

from imblearn.over_sampling import SMOTENC
smote_nc = SMOTENC(categorical_features=[0, 2], random_state=0)
X_resampled, y_resampled = smote_nc.fit_resample(X, y)

from imblearn.over_sampling import ADASYN # ADASYN - Adaptive Synthetic sampling approach
sm = ADASYN()
X, y = sm.fit_sample(X1, y)
train2, target = sm.fit_sample(train2, target.ravel())

from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=42)
X_res, y_res = ros.fit_resample(X, y)
print('Resampled dataset shape %s' % Counter(y_res))

from imblearn.under_sampling import NearMiss
nr = NearMiss()
X_train, y_train = nr.fit_sample(X_train, y_train)
np.bincount(y_train)

from imblearn.under_sampling import RandomUnderSampler
from imblearn.under_sampling import TomekLinks
tl = TomekLinks()
X_res, y_res = tl.fit_resample(X_res, y_res)

from imblearn.under_sampling import AllKNN
allknn = AllKNN()
X_res, y_res = allknn.fit_resample(X_res, y_res)

from imblearn.under_sampling import NeighbourhoodCleaningRule
ncr = NeighbourhoodCleaningRule()
X_res, y_res = ncr.fit_resample(X_res, y_res)

from imblearn.combine import SMOTETomek
smt = SMOTETomek(ratio='auto')
x_ohe1, y1 = smt.fit_sample(x_ohe, y)

from imblearn.combine import SMOTEENN
smt = SMOTEENN(random_state=0)
>>> X_res, y_res = sme.fit_resample(X, y)

from imblearn.ensemble import BalancedBaggingClassifier, EasyEnsemble
rf_clf = RandomForestClassifier(n_estimators=500, max_features=0.25, criterion="entropy", class_weight="balanced")
resampled_rf = BalancedBaggingClassifier(base_estimator=rf_clf,  n_estimators=10,  random_state=123)
resampled_lr = BalancedBaggingClassifier(base_estimator=rf, n_estimators=200, random_state=2019).fit(x_train, y_train)


Related Python Articles: tqdm package to show progress bar

No comments:

Post a Comment