1 분 소요

Optimize classification problem by gridsearching through several models and hyperparameters.

Prerequisites

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

1. LogisticRegression

from sklearn.linear_model import LogisticRegression

# Define machine learning model
lr = LogisticRegression()

# Fit the model
lr.fit(train_X, train_y)

# Define Hyperparameters
parameters = {'C' : [0.01, 0.1, 1, 10],
             'random_state' : [1]}

# Fit the model using optimized parameters
model_lr = GridSearchCV(estimator = lr, param_grid = parameters,
                       scoring = 'f1', cv = 10, refit = True)
model_lr.fit(train_X, train_y)

2. SupportVectorMachine

from sklearn.linear_model import SVC

svm = SVC()
svm.fit(train_X, train_y)

parameters = {'kernel' : ['rbf', 'linear', 'poly'],
             'C' : [0.5, 1.5, 10],
             'random_state' : [1]}

model_svm = GridSearchCV(estimator = model_svm, param_grid = parameters,
                        scoring = 'recall', cv = 10, refit = True)
model_svm.fit(train_X, train_y)

3. DecisionTreeClassifier

from sklearn.tree import DecisionTreeClassifier

clf_tree = DecisionTreeClassifier()

clf_tree.fit(train_X, train_y)

parameters={'criterion':['gini', 'entropy'],
            'max_depth':[5,10,15,20,None]}

model_dt=GridSearchCV(estimator=clf_tree, param_grid=parameters,
                     scoring='f1', cv=10, refit=True)

model_dt.fit(train_X,train_y)

4. RandomForestClassifier

randomforest=RandomForestClassifier()

randomforest.fit(train_X, train_y)

parameters = {'max_depth' : [3, 5, 10],
             'n_estimators' : [100, 200, 300],
             'random_state' : 1}

model_rfc=GridSearchCV(estimator = randomforest, param_grid = parameters,
                      scoring = 'f1', cv = 10, refit = True)

model_rfc.fit(train_X,train_y)

5. XGBClassifier

from xgboost import XGBClassifier

xgboost=XGBClassifier()

xgboost.fit(train_X, train_y)

parameters = {'max_depth' : [3, 5, 10],
             'n_estimators' : [100, 200, 300],
             'learning_rate' : [0.001, 0.01, 0.1, 1],
             'gamma' : [0.5, 1, 2],
             'random_state' : [1]}

model_xgb = GridSearchCV(estimator = xgboost, param_grid = parameters,
                        scoring = 'f1', cv = 10, refit = True)

model_xgb.fit(train_X, train_y)

6. LGBMClassifier

from lightgbm import LGBMClassifier

# 객체 선언
lightgbm=LGBMClassifier()

lightgbm.fit(train_X, train_y)

parameters = {'max_depth' : [3, 5, 10],
             'n_estimators' : [100, 200, 300],
             'learning_rate' : [0.001, 0.01, 0.1, 1],
             'random_state' : [1]}

model_lgb = GridSearchCV(estimator = lightgbm, param_grid = parameters,
                        scoring = 'f1', cv = 10, refit = True)

model_lgb.fit(train_X, train_y)

7. Model Evaluation

f1-score

print("DecisionTree model : {:.3f}".format(f1_score(train_y, model_dt.predict(train_X))))
print("RandomForest model : {:.3f}".format(f1_score(train_y, model_rfc.predict(train_X))))
print("XGBoost model : {:.3f}".format(f1_score(train_y, model_xgb.predict(train_X))))
print("LightGBM model : {:.3f}".format(f1_score(train_y, model_lgb.predict(train_X))))

accuracy score

print("DecisionTree model : {:.3f}".format(model_dt.best_score_))
print("RandomForest model : {:.3f}".format(model_rfc.best_score_))
print("XGBoost model : {:.3f}".format(model_xgb.best_score_))
print("LightGBM model : {:.3f}".format(model_lgb.best_score_))

precision score

print("DecisionTree model : {:.3f}".format(precision_score(test_y, model_dt.predict(test_X))))
print("RandomForest model : {:.3f}".format(precision_score(test_y, model_rfc.predict(test_X))))
print("XGBoost model : {:.3f}".format(precision_score(test_y, model_xgb.predict(test_X))))
print("LightGBM model : {:.3f}".format(precision_score(test_y, model_lgb.predict(test_X))))

댓글남기기