{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "a40fae48", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, Pipeline\n", "import random\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "\n", "# dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n", "# dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n", "\n", "dat[\"random\"] = [random.random() for _ in range(len(dat))]\n", "\n", "# duration_in_month = list(dat[\"duration_in_month\"])\n", "# duration_in_month[0] = \"missing\"\n", "# dat[\"duration_in_month\"] = duration_in_month\n", "\n", "# purpose = list(dat[\"purpose\"])\n", "# purpose[0] = \"missing\"\n", "# purpose[1] = None\n", "# dat[\"purpose\"] = purpose\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "261f1f07", "metadata": { "code_folding": [], "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/demo】\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 8/8 [00:23<00:00, 2.89s/it]\n" ] }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量iv

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
变量ivpsivif释义
0status_of_existing_checking_account0.6290.0061.114-
6credit_history0.2720.0161.057借贷历史
5duration_in_month0.2700.0391.034-
4purpose0.1520.0131.029-
1savings_account_and_bonds0.1440.0151.064-
3age_in_years0.1050.0141.065年龄
2present_employment_since0.1040.0071.057-
\n", "
" ], "text/plain": [ " 变量 iv psi vif 释义\n", "0 status_of_existing_checking_account 0.629 0.006 1.114 -\n", "6 credit_history 0.272 0.016 1.057 借贷历史\n", "5 duration_in_month 0.270 0.039 1.034 -\n", "4 purpose 0.152 0.013 1.029 -\n", "1 savings_account_and_bonds 0.144 0.015 1.064 -\n", "3 age_in_years 0.105 0.014 1.065 年龄\n", "2 present_employment_since 0.104 0.007 1.057 -" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量趋势

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "变量切分点:\n", "{\n", " \"status_of_existing_checking_account\": [\n", " \"... < 0 DM%,%0 <= ... < 200 DM\",\n", " \"... >= 200 DM / salary assignments for at least 1 year\",\n", " \"no checking account\"\n", " ],\n", " \"credit_history\": [\n", " \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n", " \"existing credits paid back duly till now\",\n", " \"delay in paying off in the past\",\n", " \"critical account/ other credits existing (not at this bank)\"\n", " ],\n", " \"savings_account_and_bonds\": [\n", " \"... < 100 DM%,%100 <= ... < 500 DM\",\n", " \"500 <= ... < 1000 DM%,%... >= 1000 DM\",\n", " \"unknown/ no savings account\"\n", " ],\n", " \"duration_in_month\": [\n", " 8,\n", " 15,\n", " 25,\n", " 35\n", " ],\n", " \"purpose\": [\n", " \"retraining%,%car (used)\",\n", " \"radio/television\",\n", " \"furniture/equipment%,%business%,%repairs\",\n", " \"domestic appliances%,%education%,%car (new)%,%others\"\n", " ],\n", " \"age_in_years\": [\n", " 25,\n", " 30,\n", " 35\n", " ],\n", " \"present_employment_since\": [\n", " \"unemployed%,%... < 1 year\",\n", " \"1 <= ... < 4 years\",\n", " \"4 <= ... < 7 years\",\n", " \"... >= 7 years\"\n", " ]\n", "}\n", "选中变量不同分箱数下变量的推荐切分点:\n", "-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n", "[35]\n", "[8, 35]\n", "[8, 15, 35]\n", "[8, 15, 25, 35]\n", "[8, 10, 15, 35]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【age_in_years】不同分箱数下变量的推荐切分点-----\n", "[30]\n", "[30, 35]\n", "[25, 30, 35]\n", "[20, 30, 60]\n", "[20, 25, 30, 35]\n", "[20, 30, 35, 60]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

快速筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "剔除train_iv小于阈值\n", "筛选前变量数量:21\n", "['status_of_existing_checking_account', 'duration_in_month', 'credit_history', 'purpose', 'credit_amount', 'savings_account_and_bonds', 'present_employment_since', 'installment_rate_in_percentage_of_disposable_income', 'personal_status_and_sex', 'other_debtors_or_guarantors', 'present_residence_since', 'property', 'age_in_years', 'other_installment_plans', 'housing', 'number_of_existing_credits_at_this_bank', 'job', 'number_of_people_being_liable_to_provide_maintenance_for', 'telephone', 'foreign_worker', 'random']\n", "快速筛选剔除变量数量:13\n", "housing 因为train_iv【0.042】小于阈值被剔除\n", "other_debtors_or_guarantors 因为train_iv【0.017】小于阈值被剔除\n", "number_of_people_being_liable_to_provide_maintenance_for 因为train_iv【0.0】小于阈值被剔除\n", "present_residence_since 因为train_iv【0.001】小于阈值被剔除\n", "personal_status_and_sex 因为train_iv【0.01】小于阈值被剔除\n", "property 因为train_iv【0.08】小于阈值被剔除\n", "job 因为train_iv【0.021】小于阈值被剔除\n", "other_installment_plans 因为train_iv【0.064】小于阈值被剔除\n", "foreign_worker 因为train_iv【0.0】小于阈值被剔除\n", "telephone 因为train_iv【0.001】小于阈值被剔除\n", "random 因为train_iv【0.042】小于阈值被剔除\n", "number_of_existing_credits_at_this_bank 因为train_iv【0.003】小于阈值被剔除\n", "installment_rate_in_percentage_of_disposable_income 因为train_iv【0.033】小于阈值被剔除\n", "\n" ] }, { "data": { "text/html": [ "

数值变量筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] }, { "data": { "text/html": [ "

相关性筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "duration_in_month: 【credit_amount_iv0.299_corr0.495】 \n", "\n", "-----相关性筛选保留的【duration_in_month】-----\n", "-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n", "[35]\n", "[8, 35]\n", "[8, 15, 35]\n", "[8, 15, 25, 35]\n", "[8, 10, 15, 35]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【credit_amount】不同分箱数下变量的推荐切分点-----\n", "[4000.0]\n", "[4000.0, 9000.0]\n", "[3000.0, 6000.0, 9000.0]\n", "[3000.0, 4000.0, 9000.0]\n", "[2000.0, 3000.0, 4000.0, 9000.0]\n", "[3000.0, 4000.0, 5000.0, 9000.0]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

vif筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] }, { "data": { "text/html": [ "

ivtop筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "iv = train_iv + test_iv\n", "\n" ] }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0训练集0.80360.4718
1测试集0.79480.4691
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 训练集 0.8036 0.4718\n", "1 测试集 0.7948 0.4691" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0训练集0.80650.4711
1测试集0.80230.4918
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 训练集 0.8065 0.4711\n", "1 测试集 0.8023 0.4918" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型变量系数

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Generalized Linear Model Regression Results \n", "==============================================================================\n", "Dep. Variable: creditability No. Observations: 709\n", "Model: GLM Df Residuals: 702\n", "Model Family: Binomial Df Model: 6\n", "Link Function: logit Scale: 1.0000\n", "Method: IRLS Log-Likelihood: -384.55\n", "Date: Wed, 26 Feb 2025 Deviance: 769.09\n", "Time: 11:28:17 Pearson chi2: 689.\n", "No. Iterations: 5 \n", "Covariance Type: nonrobust \n", "==============================================================================\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
varcoefstd errzP>|z|[0.0250.975]
0present_employment_since_woe0.84610.2822.9970.0030.2931.399
1savings_account_and_bonds_woe0.56190.2302.4430.0150.1111.013
2status_of_existing_checking_account_woe0.76640.1077.1750.0000.5570.976
3age_in_years_woe0.61600.2722.2640.0240.0831.149
4purpose_woe1.03630.2264.5880.0000.5941.479
5duration_in_month_woe1.02010.1805.6670.0000.6671.373
6credit_history_woe0.82730.1874.4270.0000.4611.194
\n", "
" ], "text/plain": [ " var coef std err z \\\n", "0 present_employment_since_woe 0.8461 0.282 2.997 \n", "1 savings_account_and_bonds_woe 0.5619 0.230 2.443 \n", "2 status_of_existing_checking_account_woe 0.7664 0.107 7.175 \n", "3 age_in_years_woe 0.6160 0.272 2.264 \n", "4 purpose_woe 1.0363 0.226 4.588 \n", "5 duration_in_month_woe 1.0201 0.180 5.667 \n", "6 credit_history_woe 0.8273 0.187 4.427 \n", "\n", " P>|z| [0.025 0.975] \n", "0 0.003 0.293 1.399 \n", "1 0.015 0.111 1.013 \n", "2 0.000 0.557 0.976 \n", "3 0.024 0.083 1.149 \n", "4 0.000 0.594 1.479 \n", "5 0.000 0.667 1.373 \n", "6 0.000 0.461 1.194 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型psi

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(-inf, 436.0]0.00473360.1030.124
1(436.0, 476.0]0.00372250.1020.086
2(476.0, 504.4]0.00068270.0960.093
3(504.4, 531.0]0.00074290.1040.100
4(531.0, 559.0]0.00269330.0970.113
5(559.0, 591.0]0.00070300.0990.103
6(591.0, 618.0]0.00472240.1020.082
7(618.0, 646.8]0.00069270.0970.093
8(646.8, 681.0]0.01072210.1020.072
9(681.0, inf]0.01170390.0990.134
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (-inf, 436.0] 0.004 73 36 0.103 0.124\n", "1 (436.0, 476.0] 0.003 72 25 0.102 0.086\n", "2 (476.0, 504.4] 0.000 68 27 0.096 0.093\n", "3 (504.4, 531.0] 0.000 74 29 0.104 0.100\n", "4 (531.0, 559.0] 0.002 69 33 0.097 0.113\n", "5 (559.0, 591.0] 0.000 70 30 0.099 0.103\n", "6 (591.0, 618.0] 0.004 72 24 0.102 0.082\n", "7 (618.0, 646.8] 0.000 69 27 0.097 0.093\n", "8 (646.8, 681.0] 0.010 72 21 0.102 0.072\n", "9 (681.0, inf] 0.011 70 39 0.099 0.134" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.034\n", "加入规则后:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(-inf, 433.0]0.00172330.1020.113
1(433.0, 474.0]0.00071280.1000.096
2(474.0, 502.4]0.00070300.0990.103
3(502.4, 524.2]0.00471240.1000.082
4(524.2, 554.0]0.00172320.1020.110
5(554.0, 585.0]0.00070290.0990.100
6(585.0, 615.0]0.00171270.1000.093
7(615.0, 646.0]0.00071300.1000.103
8(646.0, 681.0]0.00971210.1000.072
9(681.0, inf]0.00770370.0990.127
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (-inf, 433.0] 0.001 72 33 0.102 0.113\n", "1 (433.0, 474.0] 0.000 71 28 0.100 0.096\n", "2 (474.0, 502.4] 0.000 70 30 0.099 0.103\n", "3 (502.4, 524.2] 0.004 71 24 0.100 0.082\n", "4 (524.2, 554.0] 0.001 72 32 0.102 0.110\n", "5 (554.0, 585.0] 0.000 70 29 0.099 0.100\n", "6 (585.0, 615.0] 0.001 71 27 0.100 0.093\n", "7 (615.0, 646.0] 0.000 71 30 0.100 0.103\n", "8 (646.0, 681.0] 0.009 71 21 0.100 0.072\n", "9 (681.0, inf] 0.007 70 37 0.099 0.127" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.023\n" ] }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "训练集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 436.0]7350230.6850.1032114980.2985023730.2370.0460.1912.298
1(436.0, 476.0]7244280.6110.1022114980.29894511450.4450.1020.3432.175
2(476.0, 504.4]6831370.4560.0962114980.298125882130.5920.1770.4151.969
3(504.4, 531.0]7424500.3240.1042114980.2981491382870.7060.2770.4291.742
4(531.0, 559.0]6926430.3770.0972114980.2981751813560.8290.3630.4661.650
5(559.0, 591.0]7014560.2000.0992114980.2981892374260.8960.4760.4201.489
6(591.0, 618.0]729630.1250.1022114980.2981983004980.9380.6020.3361.334
7(618.0, 646.8]6910590.1450.0972114980.2982083595670.9860.7210.2651.231
8(646.8, 681.0]722700.0280.1022114980.2982104296390.9950.8610.1341.103
9(681.0, inf]701690.0140.0992114980.2982114987091.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 436.0] 73 50 23 0.685 0.103 211 498 0.298 \n", "1 (436.0, 476.0] 72 44 28 0.611 0.102 211 498 0.298 \n", "2 (476.0, 504.4] 68 31 37 0.456 0.096 211 498 0.298 \n", "3 (504.4, 531.0] 74 24 50 0.324 0.104 211 498 0.298 \n", "4 (531.0, 559.0] 69 26 43 0.377 0.097 211 498 0.298 \n", "5 (559.0, 591.0] 70 14 56 0.200 0.099 211 498 0.298 \n", "6 (591.0, 618.0] 72 9 63 0.125 0.102 211 498 0.298 \n", "7 (618.0, 646.8] 69 10 59 0.145 0.097 211 498 0.298 \n", "8 (646.8, 681.0] 72 2 70 0.028 0.102 211 498 0.298 \n", "9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 50 23 73 0.237 0.046 0.191 2.298 \n", "1 94 51 145 0.445 0.102 0.343 2.175 \n", "2 125 88 213 0.592 0.177 0.415 1.969 \n", "3 149 138 287 0.706 0.277 0.429 1.742 \n", "4 175 181 356 0.829 0.363 0.466 1.650 \n", "5 189 237 426 0.896 0.476 0.420 1.489 \n", "6 198 300 498 0.938 0.602 0.336 1.334 \n", "7 208 359 567 0.986 0.721 0.265 1.231 \n", "8 210 429 639 0.995 0.861 0.134 1.103 \n", "9 211 498 709 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n", "训练集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 433.0]7252200.7220.1022114980.2985220720.2460.0400.2062.424
1(433.0, 474.0]7142290.5920.1002114980.29894491430.4450.0980.3472.206
2(474.0, 502.4]7031390.4430.0992114980.298125882130.5920.1770.4151.969
3(502.4, 524.2]7126450.3660.1002114980.2981511332840.7160.2670.4491.784
4(524.2, 554.0]7222500.3060.1022114980.2981731833560.8200.3670.4531.631
5(554.0, 585.0]7018520.2570.0992114980.2981912354260.9050.4720.4331.505
6(585.0, 615.0]718630.1130.1002114980.2981992984970.9430.5980.3451.344
7(615.0, 646.0]719620.1270.1002114980.2982083605680.9860.7230.2631.229
8(646.0, 681.0]712690.0280.1002114980.2982104296390.9950.8610.1341.103
9(681.0, inf]701690.0140.0992114980.2982114987091.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 433.0] 72 52 20 0.722 0.102 211 498 0.298 \n", "1 (433.0, 474.0] 71 42 29 0.592 0.100 211 498 0.298 \n", "2 (474.0, 502.4] 70 31 39 0.443 0.099 211 498 0.298 \n", "3 (502.4, 524.2] 71 26 45 0.366 0.100 211 498 0.298 \n", "4 (524.2, 554.0] 72 22 50 0.306 0.102 211 498 0.298 \n", "5 (554.0, 585.0] 70 18 52 0.257 0.099 211 498 0.298 \n", "6 (585.0, 615.0] 71 8 63 0.113 0.100 211 498 0.298 \n", "7 (615.0, 646.0] 71 9 62 0.127 0.100 211 498 0.298 \n", "8 (646.0, 681.0] 71 2 69 0.028 0.100 211 498 0.298 \n", "9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 52 20 72 0.246 0.040 0.206 2.424 \n", "1 94 49 143 0.445 0.098 0.347 2.206 \n", "2 125 88 213 0.592 0.177 0.415 1.969 \n", "3 151 133 284 0.716 0.267 0.449 1.784 \n", "4 173 183 356 0.820 0.367 0.453 1.631 \n", "5 191 235 426 0.905 0.472 0.433 1.505 \n", "6 199 298 497 0.943 0.598 0.345 1.344 \n", "7 208 360 568 0.986 0.723 0.263 1.229 \n", "8 210 429 639 0.995 0.861 0.134 1.103 \n", "9 211 498 709 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "测试集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 436.0]3624120.6670.124892020.3062412360.2700.0590.2112.179
1(436.0, 476.0]2514110.5600.086892020.3063823610.4270.1140.3132.036
2(476.0, 504.4]2714130.5190.093892020.3065236880.5840.1780.4061.931
3(504.4, 531.0]2910190.3450.100892020.30662551170.6970.2720.4251.732
4(531.0, 559.0]3311220.3330.113892020.30673771500.8200.3810.4391.590
5(559.0, 591.0]306240.2000.103892020.306791011800.8880.5000.3881.434
6(591.0, 618.0]245190.2080.082892020.306841202040.9440.5940.3501.346
7(618.0, 646.8]272250.0740.093892020.306861452310.9660.7180.2481.217
8(646.8, 681.0]212190.0950.072892020.306881642520.9890.8120.1771.141
9(681.0, inf]391380.0260.134892020.306892022911.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 436.0] 36 24 12 0.667 0.124 89 202 0.306 \n", "1 (436.0, 476.0] 25 14 11 0.560 0.086 89 202 0.306 \n", "2 (476.0, 504.4] 27 14 13 0.519 0.093 89 202 0.306 \n", "3 (504.4, 531.0] 29 10 19 0.345 0.100 89 202 0.306 \n", "4 (531.0, 559.0] 33 11 22 0.333 0.113 89 202 0.306 \n", "5 (559.0, 591.0] 30 6 24 0.200 0.103 89 202 0.306 \n", "6 (591.0, 618.0] 24 5 19 0.208 0.082 89 202 0.306 \n", "7 (618.0, 646.8] 27 2 25 0.074 0.093 89 202 0.306 \n", "8 (646.8, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n", "9 (681.0, inf] 39 1 38 0.026 0.134 89 202 0.306 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 24 12 36 0.270 0.059 0.211 2.179 \n", "1 38 23 61 0.427 0.114 0.313 2.036 \n", "2 52 36 88 0.584 0.178 0.406 1.931 \n", "3 62 55 117 0.697 0.272 0.425 1.732 \n", "4 73 77 150 0.820 0.381 0.439 1.590 \n", "5 79 101 180 0.888 0.500 0.388 1.434 \n", "6 84 120 204 0.944 0.594 0.350 1.346 \n", "7 86 145 231 0.966 0.718 0.248 1.217 \n", "8 88 164 252 0.989 0.812 0.177 1.141 \n", "9 89 202 291 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n", "测试集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 433.0]3323100.6970.113892020.3062310330.2580.0500.2082.278
1(433.0, 474.0]2816120.5710.096892020.3063922610.4380.1090.3292.089
2(474.0, 502.4]3015150.5000.103892020.3065437910.6070.1830.4241.939
3(502.4, 524.2]2410140.4170.082892020.30664511150.7190.2520.4671.819
4(524.2, 554.0]3210220.3120.110892020.30674731470.8310.3610.4701.645
5(554.0, 585.0]296230.2070.100892020.30680961760.8990.4750.4241.485
6(585.0, 615.0]275220.1850.093892020.306851182030.9550.5840.3711.368
7(615.0, 646.0]301290.0330.103892020.306861472330.9660.7280.2381.206
8(646.0, 681.0]212190.0950.072892020.306881662540.9890.8220.1671.132
9(681.0, inf]371360.0270.127892020.306892022911.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 433.0] 33 23 10 0.697 0.113 89 202 0.306 \n", "1 (433.0, 474.0] 28 16 12 0.571 0.096 89 202 0.306 \n", "2 (474.0, 502.4] 30 15 15 0.500 0.103 89 202 0.306 \n", "3 (502.4, 524.2] 24 10 14 0.417 0.082 89 202 0.306 \n", "4 (524.2, 554.0] 32 10 22 0.312 0.110 89 202 0.306 \n", "5 (554.0, 585.0] 29 6 23 0.207 0.100 89 202 0.306 \n", "6 (585.0, 615.0] 27 5 22 0.185 0.093 89 202 0.306 \n", "7 (615.0, 646.0] 30 1 29 0.033 0.103 89 202 0.306 \n", "8 (646.0, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n", "9 (681.0, inf] 37 1 36 0.027 0.127 89 202 0.306 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 23 10 33 0.258 0.050 0.208 2.278 \n", "1 39 22 61 0.438 0.109 0.329 2.089 \n", "2 54 37 91 0.607 0.183 0.424 1.939 \n", "3 64 51 115 0.719 0.252 0.467 1.819 \n", "4 74 73 147 0.831 0.361 0.470 1.645 \n", "5 80 96 176 0.899 0.475 0.424 1.485 \n", "6 85 118 203 0.955 0.584 0.371 1.368 \n", "7 86 147 233 0.966 0.728 0.238 1.206 \n", "8 88 166 254 0.989 0.822 0.167 1.132 \n", "9 89 202 291 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

评分卡

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "评分卡不包含规则\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablebinpoints
0basepointsNaN538.0
6present_employment_sinceunemployed%,%... < 1 year-30.0
7present_employment_since1 <= ... < 4 years-0.0
8present_employment_since4 <= ... < 7 years28.0
9present_employment_since... >= 7 years13.0
3savings_account_and_bonds... < 100 DM%,%100 <= ... < 500 DM-8.0
4savings_account_and_bonds500 <= ... < 1000 DM%,%... >= 1000 DM38.0
5savings_account_and_bondsunknown/ no savings account19.0
0status_of_existing_checking_account... < 0 DM%,%0 <= ... < 200 DM-34.0
1status_of_existing_checking_account... >= 200 DM / salary assignments for at leas...26.0
2status_of_existing_checking_accountno checking account65.0
10age_in_years3647.0
11age_in_years[-inf,25.0)-20.0
12age_in_years[25.0,30.0)-7.0
13age_in_years[30.0,35.0)-6.0
14age_in_years[35.0,inf)11.0
15purposeretraining%,%car (used)65.0
16purposeradio/television28.0
17purposefurniture/equipment%,%business%,%repairs-10.0
18purposedomestic appliances%,%education%,%car (new)%,%...-28.0
19duration_in_month[-inf,8.0)107.0
20duration_in_month[8.0,15.0)17.0
21duration_in_month[15.0,25.0)-0.0
22duration_in_month[25.0,35.0)-12.0
23duration_in_month[35.0,inf)-57.0
24credit_historyno credits taken/ all credits paid back duly%,...-78.0
25credit_historyexisting credits paid back duly till now-2.0
26credit_historydelay in paying off in the past-9.0
27credit_historycritical account/ other credits existing (not ...40.0
\n", "
" ], "text/plain": [ " variable \\\n", "0 basepoints \n", "6 present_employment_since \n", "7 present_employment_since \n", "8 present_employment_since \n", "9 present_employment_since \n", "3 savings_account_and_bonds \n", "4 savings_account_and_bonds \n", "5 savings_account_and_bonds \n", "0 status_of_existing_checking_account \n", "1 status_of_existing_checking_account \n", "2 status_of_existing_checking_account \n", "10 age_in_years \n", "11 age_in_years \n", "12 age_in_years \n", "13 age_in_years \n", "14 age_in_years \n", "15 purpose \n", "16 purpose \n", "17 purpose \n", "18 purpose \n", "19 duration_in_month \n", "20 duration_in_month \n", "21 duration_in_month \n", "22 duration_in_month \n", "23 duration_in_month \n", "24 credit_history \n", "25 credit_history \n", "26 credit_history \n", "27 credit_history \n", "\n", " bin points \n", "0 NaN 538.0 \n", "6 unemployed%,%... < 1 year -30.0 \n", "7 1 <= ... < 4 years -0.0 \n", "8 4 <= ... < 7 years 28.0 \n", "9 ... >= 7 years 13.0 \n", "3 ... < 100 DM%,%100 <= ... < 500 DM -8.0 \n", "4 500 <= ... < 1000 DM%,%... >= 1000 DM 38.0 \n", "5 unknown/ no savings account 19.0 \n", "0 ... < 0 DM%,%0 <= ... < 200 DM -34.0 \n", "1 ... >= 200 DM / salary assignments for at leas... 26.0 \n", "2 no checking account 65.0 \n", "10 36 47.0 \n", "11 [-inf,25.0) -20.0 \n", "12 [25.0,30.0) -7.0 \n", "13 [30.0,35.0) -6.0 \n", "14 [35.0,inf) 11.0 \n", "15 retraining%,%car (used) 65.0 \n", "16 radio/television 28.0 \n", "17 furniture/equipment%,%business%,%repairs -10.0 \n", "18 domestic appliances%,%education%,%car (new)%,%... -28.0 \n", "19 [-inf,8.0) 107.0 \n", "20 [8.0,15.0) 17.0 \n", "21 [15.0,25.0) -0.0 \n", "22 [25.0,35.0) -12.0 \n", "23 [35.0,inf) -57.0 \n", "24 no credits taken/ all credits paid back duly%,... -78.0 \n", "25 existing credits paid back duly till now -2.0 \n", "26 delay in paying off in the past -9.0 \n", "27 critical account/ other credits existing (not ... 40.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

压力测试

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
违约率抽样次数样本数好样本数坏样本数平均AUC最大AUC最小AUCAUC标准差95%置信区间AUC平均KS最大KS最小KSKS标准差95%置信区间KS
00.0101089008811890.8022180.8024860.8019570.0001820.8019 - 0.80260.46900.4700.4680.0004470.4681 - 0.4699
10.0711012531164890.8016990.8033850.7989930.0013800.7990 - 0.80440.46200.4630.4590.0013420.4594 - 0.4646
20.13210674585890.8026590.8060410.7988760.0019800.7988 - 0.80650.45350.4700.4490.0057660.4422 - 0.4648
30.19410458369890.8008660.8075270.7948600.0036490.7937 - 0.80800.47360.4880.4640.0078890.4581 - 0.4891
40.25510349260890.8058730.8165510.7942740.0066070.7929 - 0.81880.48630.5030.4790.0061000.4743 - 0.4983
50.31610281192890.8023140.8116220.7930710.0049080.7927 - 0.81190.46730.4820.4480.0113140.4451 - 0.4895
60.37710236147890.7994000.8144920.7806310.0113340.7772 - 0.82160.46500.4900.4450.0121570.4412 - 0.4888
70.43810203114890.8014140.8301790.7771040.0158770.7703 - 0.83250.47130.5220.4050.0278930.4166 - 0.5260
80.4991017889890.8065210.8286830.7741450.0173690.7725 - 0.84060.48220.5280.4050.0367420.4102 - 0.5542
90.5611015869890.8082800.8524670.7546000.0306420.7482 - 0.86830.48180.6050.4220.0536300.3767 - 0.5869
\n", "
" ], "text/plain": [ " 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n", "0 0.010 10 8900 8811 89 0.802218 0.802486 0.801957 0.000182 \n", "1 0.071 10 1253 1164 89 0.801699 0.803385 0.798993 0.001380 \n", "2 0.132 10 674 585 89 0.802659 0.806041 0.798876 0.001980 \n", "3 0.194 10 458 369 89 0.800866 0.807527 0.794860 0.003649 \n", "4 0.255 10 349 260 89 0.805873 0.816551 0.794274 0.006607 \n", "5 0.316 10 281 192 89 0.802314 0.811622 0.793071 0.004908 \n", "6 0.377 10 236 147 89 0.799400 0.814492 0.780631 0.011334 \n", "7 0.438 10 203 114 89 0.801414 0.830179 0.777104 0.015877 \n", "8 0.499 10 178 89 89 0.806521 0.828683 0.774145 0.017369 \n", "9 0.561 10 158 69 89 0.808280 0.852467 0.754600 0.030642 \n", "\n", " 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n", "0 0.8019 - 0.8026 0.4690 0.470 0.468 0.000447 0.4681 - 0.4699 \n", "1 0.7990 - 0.8044 0.4620 0.463 0.459 0.001342 0.4594 - 0.4646 \n", "2 0.7988 - 0.8065 0.4535 0.470 0.449 0.005766 0.4422 - 0.4648 \n", "3 0.7937 - 0.8080 0.4736 0.488 0.464 0.007889 0.4581 - 0.4891 \n", "4 0.7929 - 0.8188 0.4863 0.503 0.479 0.006100 0.4743 - 0.4983 \n", "5 0.7927 - 0.8119 0.4673 0.482 0.448 0.011314 0.4451 - 0.4895 \n", "6 0.7772 - 0.8216 0.4650 0.490 0.445 0.012157 0.4412 - 0.4888 \n", "7 0.7703 - 0.8325 0.4713 0.522 0.405 0.027893 0.4166 - 0.5260 \n", "8 0.7725 - 0.8406 0.4822 0.528 0.405 0.036742 0.4102 - 0.5542 \n", "9 0.7482 - 0.8683 0.4818 0.605 0.422 0.053630 0.3767 - 0.5869 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型报告文件储存路径:./cache/train/demo/模型报告.docx\n", "mlcfg save to【./cache/train/demo/mlcfg.json】success. \n", "feature save to【./cache/train/demo/feature.csv】success. \n", "model save to【./cache/train/demo/model.pkl】success. \n", "model save to【./cache/train/demo/card.csv】success. \n" ] } ], "source": [ "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n", "# 特征处理\n", "cfg = {\n", "# 项目名称,影响数据存储位置\n", "\"project_name\": \"demo\",\n", "# jupyter下输出内容\n", "\"jupyter_print\": True,\n", "# 打印推荐分箱细节\n", "\"bin_detail_print\": True,\n", "# 是否开启粗分箱\n", "\"format_bin\": True,\n", "# 变量切分点搜索采样率,没有粗分箱的话建议不超过0.2\n", "\"bin_sample_rate\": 0.1,\n", "# 保留iv值最大的N个变量\n", "\"max_feature_num\": 10,\n", "# 单调性允许变化次数\n", "\"monto_shift_threshold\":1,\n", "# iv阈值,这里指训练集的iv\n", "\"iv_threshold\": 0.1,\n", "# woe后相关性阈值, \n", "\"corr_threshold\": 0.4,\n", "# 变量分箱后的psi阈值\n", "\"psi_threshold\": 0.2,\n", "# woe后vif阈值, \n", "\"vif_threshold\": 10,\n", "# 压力测试\n", "\"stress_test\":True,\n", "# 压力测试抽样次数\n", "\"stress_sample_times\": 10,\n", "# 特殊值,单独一箱\n", "\"special_values\": {\"age_in_years\": [36]},\n", "# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n", "\"breaks_list\": { \n", "# 'duration_in_month': [12, 18, 48], \n", "# 'credit_amount': [2000, 3500, 4000, 7000], \n", "# 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n", "# 'age_in_years': [27, 34, 58]\n", " },\n", "# y \n", "\"y_column\": \"creditability\",\n", "# 参与建模的候选变量\n", "# \"x_columns\": [\n", "# \"duration_in_month\",\n", "# \"credit_amount\",\n", "# \"age_in_years\",\n", "# \"purpose\",\n", "# \"credit_history\",\n", "# \"random\",\n", " \n", "# \"credit_amount_corr1\",\n", "# \"credit_amount_corr2\",\n", "# ],\n", "# 变量释义\n", "\"columns_anns\":{\n", " \"age_in_years\": \"年龄\",\n", " \"credit_history\": \"借贷历史\"\n", "},\n", "# 被排除的变量\n", "\"columns_exclude\": [],\n", "# 强制使用的变量 \n", "# \"columns_include\": [\"credit_amount\"],\n", "# 加减分规则\n", "# 格式如例子,SCORE和df不能变\n", "\"rules\":[\"df.loc[df['credit_amount']>=9000,'SCORE'] += -50\"]\n", "}\n", "\n", "# 训练并生成报告\n", "pipeline = Pipeline(data=data, **cfg)\n", "pipeline.train()\n", "pipeline.report()\n", "pipeline.save()" ] }, { "cell_type": "code", "execution_count": 6, "id": "3405fc86", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-----【age_in_years】不同分箱数下变量的推荐切分点-----\n", "[30]\n", "[30, 35]\n", "[25, 30, 35]\n", "[20, 30, 60]\n", "[20, 25, 30, 35]\n", "[20, 30, 35, 60]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "pipeline.variable_analyse(\"age_in_years\", format_bin=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "7c1f971d", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mlcfg load to【./cache/train/demo/mlcfg.json】success. \n", "项目路径:【./cache/train/demo】\n", "feature load from【./cache/train/demo/feature.csv】success.\n", "model load from【./cache/train/demo/model.pkl】success.\n", "model load from【./cache/train/demo/card.csv】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.3836,\n", " 'AUC': 0.7481,\n", " 'Gini': 0.4962,\n", " 'pic':
}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline2 = Pipeline.load(\"./cache/train/demo\")\n", "score = pipeline2.score(test_data)\n", "score_rule = pipeline2.score_rule(test_data)\n", "# score\n", "sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)\n", "sc.perf_eva(test_data[\"creditability\"], score_rule, title=\"train\", show_plot=True)" ] } ], "metadata": { "celltoolbar": "编辑元数据", "kernelspec": { "display_name": "Python [conda env:analysis]", "language": "python", "name": "conda-env-analysis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "372.364px" }, "toc_section_display": true, "toc_window_display": true }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "toc-showtags": false }, "nbformat": 4, "nbformat_minor": 5 }