Credit scoring using scorecardpy with XGBoost



I used XGBoost for scoring creditworthiness. At first I thought I could use predict_proba for scoring but then I saw that there was a module scorecardpy based on WOE to claculate code scoring. I tried to use it with my XGBoost like in an exemple but my ROC AUC fell to 0.5 and I don't see what I am doing wrong. Thanks for your help.

data = pd.read_csv('data.csv')

train_index = data['date'] < '2018-04-01'
test_index = data['date'] >= '2018-04-01'

data_final = data.drop('date', axis=1)

df_train = data_final[train_index]
df_test = data_final[test_index]

data_final_vars = data_final.columns.values.tolist()
X=[i for i in data_final_vars if i not in y]

# woe binning ------
bins = sc.woebin(data_final, y="label")

# binning adjustment
# # adjust breaks interactively
# breaks_adj = sc.woebin_adj(dt_s, "creditability", bins)
# # or specify breaks manually
breaks_adj = {
'age': [26, 35, 40, 50, 60]
bins_adj = sc.woebin(data_final, y="label", breaks_list=breaks_adj)

# converting train and test into woe values
train_woe = sc.woebin_ply(df_train, bins_adj)
test_woe = sc.woebin_ply(df_test, bins_adj)

ytrain = train_woe.loc[:,'label']
xtrain = train_woe.loc[:, train_woe.columns != 'label']
ytest = test_woe.loc[:,'label']
xtest = test_woe.loc[:, test_woe.columns != 'label']

print("shape of xtrain: {}".format(xtrain.shape))
print("shape of xtrain: {}".format(xtest.shape))

from xgboost import XGBClassifier

XGB = XGBClassifier(n_estimators=100, n_jobs=6, verbose=1)
# List the default parameters.

# Train and evaluate, ytrain, eval_metric=['rmse'], eval_set=[((xtrain, ytrain)),(xtest, ytest)])

# # Classifier

from sklearn.metrics import roc_auc_score

probs = XGB.predict_proba(xtest)
roc = roc_auc_score(y_true=ytest, y_score=probs[:, 1])
print("RF roc score: {}".format(roc))

from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(ytest, probs[:,1])
plt.plot(fpr, tpr, label='XGBoost Classifier (area = %0.2f)' % roc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")

from sklearn import model_selection
from sklearn.model_selection import cross_val_score
kfold = model_selection.KFold(n_splits=10, random_state=7)
modelCV = XGB
scoring = 'accuracy'
results = model_selection.cross_val_score(modelCV, xtrain, ytrain, cv=kfold, scoring=scoring)
print("10-fold cross validation average accuracy: {}".format(results.mean()))

# score ------
card = sc.scorecard(bins_adj, XGB, xtrain.columns)
# credit score
train_score = sc.scorecard_ply(df_train, card, print_step=0)
test_score = sc.scorecard_ply(df_test, card, print_step=0)

# psi
score = {'train':train_score, 'test':test_score},
label = {'train':y_train, 'test':y_test}

share|improve this question




    I used XGBoost for scoring creditworthiness. At first I thought I could use predict_proba for scoring but then I saw that there was a module scorecardpy based on WOE to claculate code scoring. I tried to use it with my XGBoost like in an exemple but my ROC AUC fell to 0.5 and I don't see what I am doing wrong. Thanks for your help.

    data = pd.read_csv('data.csv')

    train_index = data['date'] < '2018-04-01'
    test_index = data['date'] >= '2018-04-01'

    data_final = data.drop('date', axis=1)

    df_train = data_final[train_index]
    df_test = data_final[test_index]

    data_final_vars = data_final.columns.values.tolist()
    X=[i for i in data_final_vars if i not in y]

    # woe binning ------
    bins = sc.woebin(data_final, y="label")

    # binning adjustment
    # # adjust breaks interactively
    # breaks_adj = sc.woebin_adj(dt_s, "creditability", bins)
    # # or specify breaks manually
    breaks_adj = {
    'age': [26, 35, 40, 50, 60]
    bins_adj = sc.woebin(data_final, y="label", breaks_list=breaks_adj)

    # converting train and test into woe values
    train_woe = sc.woebin_ply(df_train, bins_adj)
    test_woe = sc.woebin_ply(df_test, bins_adj)

    ytrain = train_woe.loc[:,'label']
    xtrain = train_woe.loc[:, train_woe.columns != 'label']
    ytest = test_woe.loc[:,'label']
    xtest = test_woe.loc[:, test_woe.columns != 'label']

    print("shape of xtrain: {}".format(xtrain.shape))
    print("shape of xtrain: {}".format(xtest.shape))

    from xgboost import XGBClassifier

    XGB = XGBClassifier(n_estimators=100, n_jobs=6, verbose=1)
    # List the default parameters.

    # Train and evaluate, ytrain, eval_metric=['rmse'], eval_set=[((xtrain, ytrain)),(xtest, ytest)])

    # # Classifier

    from sklearn.metrics import roc_auc_score

    probs = XGB.predict_proba(xtest)
    roc = roc_auc_score(y_true=ytest, y_score=probs[:, 1])
    print("RF roc score: {}".format(roc))

    from sklearn.metrics import roc_curve
    fpr, tpr, thresholds = roc_curve(ytest, probs[:,1])
    plt.plot(fpr, tpr, label='XGBoost Classifier (area = %0.2f)' % roc)
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")

    from sklearn import model_selection
    from sklearn.model_selection import cross_val_score
    kfold = model_selection.KFold(n_splits=10, random_state=7)
    modelCV = XGB
    scoring = 'accuracy'
    results = model_selection.cross_val_score(modelCV, xtrain, ytrain, cv=kfold, scoring=scoring)
    print("10-fold cross validation average accuracy: {}".format(results.mean()))

    # score ------
    card = sc.scorecard(bins_adj, XGB, xtrain.columns)
    # credit score
    train_score = sc.scorecard_ply(df_train, card, print_step=0)
    test_score = sc.scorecard_ply(df_test, card, print_step=0)

    # psi
    score = {'train':train_score, 'test':test_score},
    label = {'train':y_train, 'test':y_test}

    share|improve this question






      I used XGBoost for scoring creditworthiness. At first I thought I could use predict_proba for scoring but then I saw that there was a module scorecardpy based on WOE to claculate code scoring. I tried to use it with my XGBoost like in an exemple but my ROC AUC fell to 0.5 and I don't see what I am doing wrong. Thanks for your help.

      data = pd.read_csv('data.csv')

      train_index = data['date'] < '2018-04-01'
      test_index = data['date'] >= '2018-04-01'

      data_final = data.drop('date', axis=1)

      df_train = data_final[train_index]
      df_test = data_final[test_index]

      data_final_vars = data_final.columns.values.tolist()
      X=[i for i in data_final_vars if i not in y]

      # woe binning ------
      bins = sc.woebin(data_final, y="label")

      # binning adjustment
      # # adjust breaks interactively
      # breaks_adj = sc.woebin_adj(dt_s, "creditability", bins)
      # # or specify breaks manually
      breaks_adj = {
      'age': [26, 35, 40, 50, 60]
      bins_adj = sc.woebin(data_final, y="label", breaks_list=breaks_adj)

      # converting train and test into woe values
      train_woe = sc.woebin_ply(df_train, bins_adj)
      test_woe = sc.woebin_ply(df_test, bins_adj)

      ytrain = train_woe.loc[:,'label']
      xtrain = train_woe.loc[:, train_woe.columns != 'label']
      ytest = test_woe.loc[:,'label']
      xtest = test_woe.loc[:, test_woe.columns != 'label']

      print("shape of xtrain: {}".format(xtrain.shape))
      print("shape of xtrain: {}".format(xtest.shape))

      from xgboost import XGBClassifier

      XGB = XGBClassifier(n_estimators=100, n_jobs=6, verbose=1)
      # List the default parameters.

      # Train and evaluate, ytrain, eval_metric=['rmse'], eval_set=[((xtrain, ytrain)),(xtest, ytest)])

      # # Classifier

      from sklearn.metrics import roc_auc_score

      probs = XGB.predict_proba(xtest)
      roc = roc_auc_score(y_true=ytest, y_score=probs[:, 1])
      print("RF roc score: {}".format(roc))

      from sklearn.metrics import roc_curve
      fpr, tpr, thresholds = roc_curve(ytest, probs[:,1])
      plt.plot(fpr, tpr, label='XGBoost Classifier (area = %0.2f)' % roc)
      plt.plot([0, 1], [0, 1],'r--')
      plt.xlim([0.0, 1.0])
      plt.ylim([0.0, 1.05])
      plt.xlabel('False Positive Rate')
      plt.ylabel('True Positive Rate')
      plt.title('Receiver operating characteristic')
      plt.legend(loc="lower right")

      from sklearn import model_selection
      from sklearn.model_selection import cross_val_score
      kfold = model_selection.KFold(n_splits=10, random_state=7)
      modelCV = XGB
      scoring = 'accuracy'
      results = model_selection.cross_val_score(modelCV, xtrain, ytrain, cv=kfold, scoring=scoring)
      print("10-fold cross validation average accuracy: {}".format(results.mean()))

      # score ------
      card = sc.scorecard(bins_adj, XGB, xtrain.columns)
      # credit score
      train_score = sc.scorecard_ply(df_train, card, print_step=0)
      test_score = sc.scorecard_ply(df_test, card, print_step=0)

      # psi
      score = {'train':train_score, 'test':test_score},
      label = {'train':y_train, 'test':y_test}

      share|improve this question


      I used XGBoost for scoring creditworthiness. At first I thought I could use predict_proba for scoring but then I saw that there was a module scorecardpy based on WOE to claculate code scoring. I tried to use it with my XGBoost like in an exemple but my ROC AUC fell to 0.5 and I don't see what I am doing wrong. Thanks for your help.

      data = pd.read_csv('data.csv')

      train_index = data['date'] < '2018-04-01'
      test_index = data['date'] >= '2018-04-01'

      data_final = data.drop('date', axis=1)

      df_train = data_final[train_index]
      df_test = data_final[test_index]

      data_final_vars = data_final.columns.values.tolist()
      X=[i for i in data_final_vars if i not in y]

      # woe binning ------
      bins = sc.woebin(data_final, y="label")

      # binning adjustment
      # # adjust breaks interactively
      # breaks_adj = sc.woebin_adj(dt_s, "creditability", bins)
      # # or specify breaks manually
      breaks_adj = {
      'age': [26, 35, 40, 50, 60]
      bins_adj = sc.woebin(data_final, y="label", breaks_list=breaks_adj)

      # converting train and test into woe values
      train_woe = sc.woebin_ply(df_train, bins_adj)
      test_woe = sc.woebin_ply(df_test, bins_adj)

      ytrain = train_woe.loc[:,'label']
      xtrain = train_woe.loc[:, train_woe.columns != 'label']
      ytest = test_woe.loc[:,'label']
      xtest = test_woe.loc[:, test_woe.columns != 'label']

      print("shape of xtrain: {}".format(xtrain.shape))
      print("shape of xtrain: {}".format(xtest.shape))

      from xgboost import XGBClassifier

      XGB = XGBClassifier(n_estimators=100, n_jobs=6, verbose=1)
      # List the default parameters.

      # Train and evaluate, ytrain, eval_metric=['rmse'], eval_set=[((xtrain, ytrain)),(xtest, ytest)])

      # # Classifier

      from sklearn.metrics import roc_auc_score

      probs = XGB.predict_proba(xtest)
      roc = roc_auc_score(y_true=ytest, y_score=probs[:, 1])
      print("RF roc score: {}".format(roc))

      from sklearn.metrics import roc_curve
      fpr, tpr, thresholds = roc_curve(ytest, probs[:,1])
      plt.plot(fpr, tpr, label='XGBoost Classifier (area = %0.2f)' % roc)
      plt.plot([0, 1], [0, 1],'r--')
      plt.xlim([0.0, 1.0])
      plt.ylim([0.0, 1.05])
      plt.xlabel('False Positive Rate')
      plt.ylabel('True Positive Rate')
      plt.title('Receiver operating characteristic')
      plt.legend(loc="lower right")

      from sklearn import model_selection
      from sklearn.model_selection import cross_val_score
      kfold = model_selection.KFold(n_splits=10, random_state=7)
      modelCV = XGB
      scoring = 'accuracy'
      results = model_selection.cross_val_score(modelCV, xtrain, ytrain, cv=kfold, scoring=scoring)
      print("10-fold cross validation average accuracy: {}".format(results.mean()))

      # score ------
      card = sc.scorecard(bins_adj, XGB, xtrain.columns)
      # credit score
      train_score = sc.scorecard_ply(df_train, card, print_step=0)
      test_score = sc.scorecard_ply(df_test, card, print_step=0)

      # psi
      score = {'train':train_score, 'test':test_score},
      label = {'train':y_train, 'test':y_test}

      machine-learning python decision-trees xgboost scoring

      share|improve this question

      share|improve this question

      share|improve this question

      share|improve this question

      edited Sep 26 '18 at 14:55

      Minila S

      asked Sep 26 '18 at 14:06

      Minila SMinila S



          1 Answer






          It happened to me as well, although I used a logistic regression model not XGBoost.

          The problem is not about which model to choose, but rather there is something wrong with "woebin_ply" function. I didn't read the source code but the woe value I'm getting doesn't match the value for the corresponding bin/input value. (You can double check your results as well)

          After manually matching input value with bin with corresponding woe value, my scorecard model performs at similar level with my benchmarking models.

          Hope this help!

          share|improve this answer

          New contributor

          lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
          Check out our Code of Conduct.


            Your Answer

            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
            }, "mathjax-editing");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "557"
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            else {

            function createEditor() {
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href=""u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href=""u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href=""u003e(content policy)u003c/au003e",
            allowUrls: true
            onDemand: true,
            discardSelector: ".discard-answer"


            draft saved

            draft discarded

            function () {
            StackExchange.openid.initPostLogin('.new-post-login', '', 'question_page');

            Post as a guest

            Required, but never shown

            1 Answer




            1 Answer












            It happened to me as well, although I used a logistic regression model not XGBoost.

            The problem is not about which model to choose, but rather there is something wrong with "woebin_ply" function. I didn't read the source code but the woe value I'm getting doesn't match the value for the corresponding bin/input value. (You can double check your results as well)

            After manually matching input value with bin with corresponding woe value, my scorecard model performs at similar level with my benchmarking models.

            Hope this help!

            share|improve this answer

            New contributor

            lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
            Check out our Code of Conduct.




              It happened to me as well, although I used a logistic regression model not XGBoost.

              The problem is not about which model to choose, but rather there is something wrong with "woebin_ply" function. I didn't read the source code but the woe value I'm getting doesn't match the value for the corresponding bin/input value. (You can double check your results as well)

              After manually matching input value with bin with corresponding woe value, my scorecard model performs at similar level with my benchmarking models.

              Hope this help!

              share|improve this answer

              New contributor

              lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
              Check out our Code of Conduct.






                It happened to me as well, although I used a logistic regression model not XGBoost.

                The problem is not about which model to choose, but rather there is something wrong with "woebin_ply" function. I didn't read the source code but the woe value I'm getting doesn't match the value for the corresponding bin/input value. (You can double check your results as well)

                After manually matching input value with bin with corresponding woe value, my scorecard model performs at similar level with my benchmarking models.

                Hope this help!

                share|improve this answer

                New contributor

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.


                It happened to me as well, although I used a logistic regression model not XGBoost.

                The problem is not about which model to choose, but rather there is something wrong with "woebin_ply" function. I didn't read the source code but the woe value I'm getting doesn't match the value for the corresponding bin/input value. (You can double check your results as well)

                After manually matching input value with bin with corresponding woe value, my scorecard model performs at similar level with my benchmarking models.

                Hope this help!

                share|improve this answer

                New contributor

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.

                share|improve this answer

                share|improve this answer

                New contributor

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.

                answered 4 hours ago




                New contributor

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.

                New contributor

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.

                lsbillups is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.

                    draft saved

                    draft discarded

                    Thanks for contributing an answer to Data Science Stack Exchange!

                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid

                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.

                    Use MathJax to format equations. MathJax reference.

                    To learn more, see our tips on writing great answers.

                    draft saved

                    draft discarded

                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', '', 'question_page');

                    Post as a guest

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Required, but never shown

                    Popular posts from this blog

                    Ponta tanko

                    Tantalo (mitologio)

                    Erzsébet Schaár