{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "72fcd4bd-020f-4642-8286-3c96a133f1b0",
"metadata": {
"pycharm": {
"name": "#%%\n",
"is_executing": true
}
},
"outputs": [],
"source": [
"%matplotlib agg\n",
"import sys\n",
"sys.path.append(\"/root/project\")\n",
"from easy_ml import DataSplitEntity, FilterStrategyFactory, ModelFactory,Pipeline\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "263a6e1a-2ac7-4ec5-a100-e0539576e04c",
"metadata": {
"pycharm": {
"name": "#%%\n",
"is_executing": true
}
},
"outputs": [],
"source": [
"# 加载demo数据\n",
"import scorecardpy as sc\n",
"dat = sc.germancredit()\n",
"dat_columns = dat.columns.tolist()\n",
"dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n",
"dat.columns = dat_columns\n",
"dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2ff2a864-438e-4524-b1ed-20003bba498a",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 9.35it/s]\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本 | \n",
" 样本数 | \n",
" 样本占比 | \n",
" 坏样本数 | \n",
" 坏样本比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 709 | \n",
" 70.90% | \n",
" 211 | \n",
" 29.76% | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 291 | \n",
" 29.10% | \n",
" 89 | \n",
" 30.58% | \n",
"
\n",
" \n",
" 2 | \n",
" 合计 | \n",
" 1000 | \n",
" 100% | \n",
" 300 | \n",
" 30.00% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本 样本数 样本占比 坏样本数 坏样本比例\n",
"0 训练集 709 70.90% 211 29.76%\n",
"1 测试集 291 29.10% 89 30.58%\n",
"2 合计 1000 100% 300 30.00%"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" IV | \n",
" psi | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" credit_history | \n",
" 0.2722 | \n",
" 0.0156 | \n",
"
\n",
" \n",
" 1 | \n",
" duration_in_month | \n",
" 0.2509 | \n",
" 0.0271 | \n",
"
\n",
" \n",
" 2 | \n",
" purpose | \n",
" 0.1519 | \n",
" 0.0126 | \n",
"
\n",
" \n",
" 3 | \n",
" credit_amount | \n",
" 0.1447 | \n",
" 0.0204 | \n",
"
\n",
" \n",
" 4 | \n",
" savings_account_and_bonds | \n",
" 0.1445 | \n",
" 0.0154 | \n",
"
\n",
" \n",
" 5 | \n",
" age_in_years | \n",
" 0.0716 | \n",
" 0.0039 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 IV psi\n",
"0 credit_history 0.2722 0.0156\n",
"1 duration_in_month 0.2509 0.0271\n",
"2 purpose 0.1519 0.0126\n",
"3 credit_amount 0.1447 0.0204\n",
"4 savings_account_and_bonds 0.1445 0.0154\n",
"5 age_in_years 0.0716 0.0039"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" vif | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" age_in_years_woe | \n",
" 1.028369 | \n",
"
\n",
" \n",
" 1 | \n",
" purpose_woe | \n",
" 1.015325 | \n",
"
\n",
" \n",
" 2 | \n",
" credit_history_woe | \n",
" 1.030331 | \n",
"
\n",
" \n",
" 3 | \n",
" credit_amount_woe | \n",
" 1.071660 | \n",
"
\n",
" \n",
" 4 | \n",
" duration_in_month_woe | \n",
" 1.106351 | \n",
"
\n",
" \n",
" 5 | \n",
" savings_account_and_bonds_woe | \n",
" 1.018661 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 vif\n",
"0 age_in_years_woe 1.028369\n",
"1 purpose_woe 1.015325\n",
"2 credit_history_woe 1.030331\n",
"3 credit_amount_woe 1.071660\n",
"4 duration_in_month_woe 1.106351\n",
"5 savings_account_and_bonds_woe 1.018661"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"变量切分点:\n",
"{\n",
" \"credit_history\": [\n",
" \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n",
" \"existing credits paid back duly till now\",\n",
" \"delay in paying off in the past\",\n",
" \"critical account/ other credits existing (not at this bank)\"\n",
" ],\n",
" \"savings_account_and_bonds\": [\n",
" \"... < 100 DM%,%100 <= ... < 500 DM\",\n",
" \"500 <= ... < 1000 DM%,%... >= 1000 DM\",\n",
" \"unknown/ no savings account\"\n",
" ],\n",
" \"duration_in_month\": [\n",
" 12,\n",
" 18,\n",
" 48\n",
" ],\n",
" \"purpose\": [\n",
" \"retraining%,%car (used)\",\n",
" \"radio/television\",\n",
" \"furniture/equipment%,%business%,%repairs\",\n",
" \"domestic appliances%,%education%,%car (new)%,%others\"\n",
" ],\n",
" \"credit_amount\": [\n",
" 2000,\n",
" 3500,\n",
" 4000,\n",
" 7000\n",
" ],\n",
" \"age_in_years\": [\n",
" 27,\n",
" 34,\n",
" 58\n",
" ]\n",
"}\n",
"-----模型结果-----\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.7617 | \n",
" 0.4182 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7850 | \n",
" 0.4977 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.7617 0.4182\n",
"1 测试集 0.7850 0.4977"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 521.0] | \n",
" 0.0000 | \n",
" 72 | \n",
" 29 | \n",
" 0.1016 | \n",
" 0.0997 | \n",
"
\n",
" \n",
" 1 | \n",
" (521.0, 552.0] | \n",
" 0.0000 | \n",
" 71 | \n",
" 29 | \n",
" 0.1001 | \n",
" 0.0997 | \n",
"
\n",
" \n",
" 2 | \n",
" (552.0, 578.0] | \n",
" 0.0000 | \n",
" 72 | \n",
" 30 | \n",
" 0.1016 | \n",
" 0.1031 | \n",
"
\n",
" \n",
" 3 | \n",
" (578.0, 596.0] | \n",
" 0.0028 | \n",
" 83 | \n",
" 29 | \n",
" 0.1171 | \n",
" 0.0997 | \n",
"
\n",
" \n",
" 4 | \n",
" (596.0, 611.0] | \n",
" 0.0048 | \n",
" 57 | \n",
" 18 | \n",
" 0.0804 | \n",
" 0.0619 | \n",
"
\n",
" \n",
" 5 | \n",
" (611.0, 630.0] | \n",
" 0.0049 | \n",
" 79 | \n",
" 26 | \n",
" 0.1114 | \n",
" 0.0893 | \n",
"
\n",
" \n",
" 6 | \n",
" (630.0, 649.0] | \n",
" 0.0000 | \n",
" 66 | \n",
" 27 | \n",
" 0.0931 | \n",
" 0.0928 | \n",
"
\n",
" \n",
" 7 | \n",
" (649.0, 675.4] | \n",
" 0.0161 | \n",
" 67 | \n",
" 40 | \n",
" 0.0945 | \n",
" 0.1375 | \n",
"
\n",
" \n",
" 8 | \n",
" (675.4, 706.4] | \n",
" 0.0000 | \n",
" 71 | \n",
" 29 | \n",
" 0.1001 | \n",
" 0.0997 | \n",
"
\n",
" \n",
" 9 | \n",
" (706.4, inf] | \n",
" 0.0026 | \n",
" 71 | \n",
" 34 | \n",
" 0.1001 | \n",
" 0.1168 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 521.0] 0.0000 72 29 0.1016 0.0997\n",
"1 (521.0, 552.0] 0.0000 71 29 0.1001 0.0997\n",
"2 (552.0, 578.0] 0.0000 72 30 0.1016 0.1031\n",
"3 (578.0, 596.0] 0.0028 83 29 0.1171 0.0997\n",
"4 (596.0, 611.0] 0.0048 57 18 0.0804 0.0619\n",
"5 (611.0, 630.0] 0.0049 79 26 0.1114 0.0893\n",
"6 (630.0, 649.0] 0.0000 66 27 0.0931 0.0928\n",
"7 (649.0, 675.4] 0.0161 67 40 0.0945 0.1375\n",
"8 (675.4, 706.4] 0.0000 71 29 0.1001 0.0997\n",
"9 (706.4, inf] 0.0026 71 34 0.1001 0.1168"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.0312\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" 变量系数 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" age_in_years_woe | \n",
" 0.739076 | \n",
"
\n",
" \n",
" 1 | \n",
" purpose_woe | \n",
" 1.161895 | \n",
"
\n",
" \n",
" 2 | \n",
" credit_history_woe | \n",
" 0.896308 | \n",
"
\n",
" \n",
" 3 | \n",
" credit_amount_woe | \n",
" 0.742090 | \n",
"
\n",
" \n",
" 4 | \n",
" duration_in_month_woe | \n",
" 0.811411 | \n",
"
\n",
" \n",
" 5 | \n",
" savings_account_and_bonds_woe | \n",
" 0.910304 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 变量系数\n",
"0 age_in_years_woe 0.739076\n",
"1 purpose_woe 1.161895\n",
"2 credit_history_woe 0.896308\n",
"3 credit_amount_woe 0.742090\n",
"4 duration_in_month_woe 0.811411\n",
"5 savings_account_and_bonds_woe 0.910304"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----训练集-分数分箱-----\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 521.0] | \n",
" 72 | \n",
" 45 | \n",
" 27 | \n",
" 0.6250 | \n",
" 0.1016 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 45 | \n",
" 27 | \n",
" 72 | \n",
" 0.2133 | \n",
" 0.0542 | \n",
" 0.1591 | \n",
" 2.1001 | \n",
"
\n",
" \n",
" 1 | \n",
" (521.0, 552.0] | \n",
" 71 | \n",
" 34 | \n",
" 37 | \n",
" 0.4789 | \n",
" 0.1001 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 79 | \n",
" 64 | \n",
" 143 | \n",
" 0.3744 | \n",
" 0.1285 | \n",
" 0.2459 | \n",
" 1.8563 | \n",
"
\n",
" \n",
" 2 | \n",
" (552.0, 578.0] | \n",
" 72 | \n",
" 35 | \n",
" 37 | \n",
" 0.4861 | \n",
" 0.1016 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 114 | \n",
" 101 | \n",
" 215 | \n",
" 0.5403 | \n",
" 0.2028 | \n",
" 0.3375 | \n",
" 1.7817 | \n",
"
\n",
" \n",
" 3 | \n",
" (578.0, 596.0] | \n",
" 83 | \n",
" 32 | \n",
" 51 | \n",
" 0.3855 | \n",
" 0.1171 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 146 | \n",
" 152 | \n",
" 298 | \n",
" 0.6919 | \n",
" 0.3052 | \n",
" 0.3867 | \n",
" 1.6463 | \n",
"
\n",
" \n",
" 4 | \n",
" (596.0, 611.0] | \n",
" 57 | \n",
" 17 | \n",
" 40 | \n",
" 0.2982 | \n",
" 0.0804 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 163 | \n",
" 192 | \n",
" 355 | \n",
" 0.7725 | \n",
" 0.3855 | \n",
" 0.3870 | \n",
" 1.5429 | \n",
"
\n",
" \n",
" 5 | \n",
" (611.0, 630.0] | \n",
" 79 | \n",
" 23 | \n",
" 56 | \n",
" 0.2911 | \n",
" 0.1114 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 186 | \n",
" 248 | \n",
" 434 | \n",
" 0.8815 | \n",
" 0.4980 | \n",
" 0.3835 | \n",
" 1.4401 | \n",
"
\n",
" \n",
" 6 | \n",
" (630.0, 649.0] | \n",
" 66 | \n",
" 8 | \n",
" 58 | \n",
" 0.1212 | \n",
" 0.0931 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 194 | \n",
" 306 | \n",
" 500 | \n",
" 0.9194 | \n",
" 0.6145 | \n",
" 0.3049 | \n",
" 1.3038 | \n",
"
\n",
" \n",
" 7 | \n",
" (649.0, 675.4] | \n",
" 67 | \n",
" 10 | \n",
" 57 | \n",
" 0.1493 | \n",
" 0.0945 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 204 | \n",
" 363 | \n",
" 567 | \n",
" 0.9668 | \n",
" 0.7289 | \n",
" 0.2379 | \n",
" 1.2090 | \n",
"
\n",
" \n",
" 8 | \n",
" (675.4, 706.4] | \n",
" 71 | \n",
" 5 | \n",
" 66 | \n",
" 0.0704 | \n",
" 0.1001 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 209 | \n",
" 429 | \n",
" 638 | \n",
" 0.9905 | \n",
" 0.8614 | \n",
" 0.1291 | \n",
" 1.1008 | \n",
"
\n",
" \n",
" 9 | \n",
" (706.4, inf] | \n",
" 71 | \n",
" 2 | \n",
" 69 | \n",
" 0.0282 | \n",
" 0.1001 | \n",
" 211 | \n",
" 498 | \n",
" 0.2976 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.0000 | \n",
" 1.0000 | \n",
" 0.0000 | \n",
" 1.0000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 521.0] 72 45 27 0.6250 0.1016 211 498 0.2976 \n",
"1 (521.0, 552.0] 71 34 37 0.4789 0.1001 211 498 0.2976 \n",
"2 (552.0, 578.0] 72 35 37 0.4861 0.1016 211 498 0.2976 \n",
"3 (578.0, 596.0] 83 32 51 0.3855 0.1171 211 498 0.2976 \n",
"4 (596.0, 611.0] 57 17 40 0.2982 0.0804 211 498 0.2976 \n",
"5 (611.0, 630.0] 79 23 56 0.2911 0.1114 211 498 0.2976 \n",
"6 (630.0, 649.0] 66 8 58 0.1212 0.0931 211 498 0.2976 \n",
"7 (649.0, 675.4] 67 10 57 0.1493 0.0945 211 498 0.2976 \n",
"8 (675.4, 706.4] 71 5 66 0.0704 0.1001 211 498 0.2976 \n",
"9 (706.4, inf] 71 2 69 0.0282 0.1001 211 498 0.2976 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 45 27 72 0.2133 0.0542 0.1591 2.1001 \n",
"1 79 64 143 0.3744 0.1285 0.2459 1.8563 \n",
"2 114 101 215 0.5403 0.2028 0.3375 1.7817 \n",
"3 146 152 298 0.6919 0.3052 0.3867 1.6463 \n",
"4 163 192 355 0.7725 0.3855 0.3870 1.5429 \n",
"5 186 248 434 0.8815 0.4980 0.3835 1.4401 \n",
"6 194 306 500 0.9194 0.6145 0.3049 1.3038 \n",
"7 204 363 567 0.9668 0.7289 0.2379 1.2090 \n",
"8 209 429 638 0.9905 0.8614 0.1291 1.1008 \n",
"9 211 498 709 1.0000 1.0000 0.0000 1.0000 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----测试集-分数分箱-----\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 521.0] | \n",
" 29 | \n",
" 21 | \n",
" 8 | \n",
" 0.7241 | \n",
" 0.0997 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 21 | \n",
" 8 | \n",
" 29 | \n",
" 0.2360 | \n",
" 0.0396 | \n",
" 0.1964 | \n",
" 2.3680 | \n",
"
\n",
" \n",
" 1 | \n",
" (521.0, 552.0] | \n",
" 29 | \n",
" 16 | \n",
" 13 | \n",
" 0.5517 | \n",
" 0.0997 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 37 | \n",
" 21 | \n",
" 58 | \n",
" 0.4157 | \n",
" 0.1040 | \n",
" 0.3117 | \n",
" 2.0861 | \n",
"
\n",
" \n",
" 2 | \n",
" (552.0, 578.0] | \n",
" 30 | \n",
" 14 | \n",
" 16 | \n",
" 0.4667 | \n",
" 0.1031 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 51 | \n",
" 37 | \n",
" 88 | \n",
" 0.5730 | \n",
" 0.1832 | \n",
" 0.3898 | \n",
" 1.8952 | \n",
"
\n",
" \n",
" 3 | \n",
" (578.0, 596.0] | \n",
" 29 | \n",
" 14 | \n",
" 15 | \n",
" 0.4828 | \n",
" 0.0997 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 65 | \n",
" 52 | \n",
" 117 | \n",
" 0.7303 | \n",
" 0.2574 | \n",
" 0.4729 | \n",
" 1.8167 | \n",
"
\n",
" \n",
" 4 | \n",
" (596.0, 611.0] | \n",
" 18 | \n",
" 4 | \n",
" 14 | \n",
" 0.2222 | \n",
" 0.0619 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 69 | \n",
" 66 | \n",
" 135 | \n",
" 0.7753 | \n",
" 0.3267 | \n",
" 0.4486 | \n",
" 1.6714 | \n",
"
\n",
" \n",
" 5 | \n",
" (611.0, 630.0] | \n",
" 26 | \n",
" 7 | \n",
" 19 | \n",
" 0.2692 | \n",
" 0.0893 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 76 | \n",
" 85 | \n",
" 161 | \n",
" 0.8539 | \n",
" 0.4208 | \n",
" 0.4331 | \n",
" 1.5437 | \n",
"
\n",
" \n",
" 6 | \n",
" (630.0, 649.0] | \n",
" 27 | \n",
" 3 | \n",
" 24 | \n",
" 0.1111 | \n",
" 0.0928 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 79 | \n",
" 109 | \n",
" 188 | \n",
" 0.8876 | \n",
" 0.5396 | \n",
" 0.3480 | \n",
" 1.3741 | \n",
"
\n",
" \n",
" 7 | \n",
" (649.0, 675.4] | \n",
" 40 | \n",
" 6 | \n",
" 34 | \n",
" 0.1500 | \n",
" 0.1375 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 85 | \n",
" 143 | \n",
" 228 | \n",
" 0.9551 | \n",
" 0.7079 | \n",
" 0.2472 | \n",
" 1.2191 | \n",
"
\n",
" \n",
" 8 | \n",
" (675.4, 706.4] | \n",
" 29 | \n",
" 2 | \n",
" 27 | \n",
" 0.0690 | \n",
" 0.0997 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 87 | \n",
" 170 | \n",
" 257 | \n",
" 0.9775 | \n",
" 0.8416 | \n",
" 0.1359 | \n",
" 1.1070 | \n",
"
\n",
" \n",
" 9 | \n",
" (706.4, inf] | \n",
" 34 | \n",
" 2 | \n",
" 32 | \n",
" 0.0588 | \n",
" 0.1168 | \n",
" 89 | \n",
" 202 | \n",
" 0.3058 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.0000 | \n",
" 1.0000 | \n",
" 0.0000 | \n",
" 1.0001 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 521.0] 29 21 8 0.7241 0.0997 89 202 0.3058 \n",
"1 (521.0, 552.0] 29 16 13 0.5517 0.0997 89 202 0.3058 \n",
"2 (552.0, 578.0] 30 14 16 0.4667 0.1031 89 202 0.3058 \n",
"3 (578.0, 596.0] 29 14 15 0.4828 0.0997 89 202 0.3058 \n",
"4 (596.0, 611.0] 18 4 14 0.2222 0.0619 89 202 0.3058 \n",
"5 (611.0, 630.0] 26 7 19 0.2692 0.0893 89 202 0.3058 \n",
"6 (630.0, 649.0] 27 3 24 0.1111 0.0928 89 202 0.3058 \n",
"7 (649.0, 675.4] 40 6 34 0.1500 0.1375 89 202 0.3058 \n",
"8 (675.4, 706.4] 29 2 27 0.0690 0.0997 89 202 0.3058 \n",
"9 (706.4, inf] 34 2 32 0.0588 0.1168 89 202 0.3058 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 21 8 29 0.2360 0.0396 0.1964 2.3680 \n",
"1 37 21 58 0.4157 0.1040 0.3117 2.0861 \n",
"2 51 37 88 0.5730 0.1832 0.3898 1.8952 \n",
"3 65 52 117 0.7303 0.2574 0.4729 1.8167 \n",
"4 69 66 135 0.7753 0.3267 0.4486 1.6714 \n",
"5 76 85 161 0.8539 0.4208 0.4331 1.5437 \n",
"6 79 109 188 0.8876 0.5396 0.3480 1.3741 \n",
"7 85 143 228 0.9551 0.7079 0.2472 1.2191 \n",
"8 87 170 257 0.9775 0.8416 0.1359 1.1070 \n",
"9 89 202 291 1.0000 1.0000 0.0000 1.0001 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" bin | \n",
" points | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" basepoints | \n",
" NaN | \n",
" 599.0 | \n",
"
\n",
" \n",
" 0 | \n",
" age_in_years | \n",
" [-inf,27.0) | \n",
" -16.0 | \n",
"
\n",
" \n",
" 1 | \n",
" age_in_years | \n",
" [27.0,34.0) | \n",
" -8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" age_in_years | \n",
" [34.0,58.0) | \n",
" 17.0 | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years | \n",
" [58.0,inf) | \n",
" -9.0 | \n",
"
\n",
" \n",
" 4 | \n",
" purpose | \n",
" retraining%,%car (used) | \n",
" 73.0 | \n",
"
\n",
" \n",
" 5 | \n",
" purpose | \n",
" radio/television | \n",
" 31.0 | \n",
"
\n",
" \n",
" 6 | \n",
" purpose | \n",
" furniture/equipment%,%business%,%repairs | \n",
" -12.0 | \n",
"
\n",
" \n",
" 7 | \n",
" purpose | \n",
" domestic appliances%,%education%,%car (new)%,%... | \n",
" -32.0 | \n",
"
\n",
" \n",
" 8 | \n",
" credit_history | \n",
" no credits taken/ all credits paid back duly%,... | \n",
" -84.0 | \n",
"
\n",
" \n",
" 9 | \n",
" credit_history | \n",
" existing credits paid back duly till now | \n",
" -3.0 | \n",
"
\n",
" \n",
" 10 | \n",
" credit_history | \n",
" delay in paying off in the past | \n",
" -9.0 | \n",
"
\n",
" \n",
" 11 | \n",
" credit_history | \n",
" critical account/ other credits existing (not ... | \n",
" 43.0 | \n",
"
\n",
" \n",
" 12 | \n",
" credit_amount | \n",
" [-inf,2000.0) | \n",
" 5.0 | \n",
"
\n",
" \n",
" 13 | \n",
" credit_amount | \n",
" [2000.0,3500.0) | \n",
" 14.0 | \n",
"
\n",
" \n",
" 14 | \n",
" credit_amount | \n",
" [3500.0,4000.0) | \n",
" 50.0 | \n",
"
\n",
" \n",
" 15 | \n",
" credit_amount | \n",
" [4000.0,7000.0) | \n",
" -21.0 | \n",
"
\n",
" \n",
" 16 | \n",
" credit_amount | \n",
" [7000.0,inf) | \n",
" -40.0 | \n",
"
\n",
" \n",
" 17 | \n",
" duration_in_month | \n",
" [-inf,12.0) | \n",
" 58.0 | \n",
"
\n",
" \n",
" 18 | \n",
" duration_in_month | \n",
" [12.0,18.0) | \n",
" 12.0 | \n",
"
\n",
" \n",
" 19 | \n",
" duration_in_month | \n",
" [18.0,48.0) | \n",
" -15.0 | \n",
"
\n",
" \n",
" 20 | \n",
" duration_in_month | \n",
" [48.0,inf) | \n",
" -53.0 | \n",
"
\n",
" \n",
" 21 | \n",
" savings_account_and_bonds | \n",
" ... < 100 DM%,%100 <= ... < 500 DM | \n",
" -14.0 | \n",
"
\n",
" \n",
" 22 | \n",
" savings_account_and_bonds | \n",
" 500 <= ... < 1000 DM%,%... >= 1000 DM | \n",
" 62.0 | \n",
"
\n",
" \n",
" 23 | \n",
" savings_account_and_bonds | \n",
" unknown/ no savings account | \n",
" 31.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable \\\n",
"0 basepoints \n",
"0 age_in_years \n",
"1 age_in_years \n",
"2 age_in_years \n",
"3 age_in_years \n",
"4 purpose \n",
"5 purpose \n",
"6 purpose \n",
"7 purpose \n",
"8 credit_history \n",
"9 credit_history \n",
"10 credit_history \n",
"11 credit_history \n",
"12 credit_amount \n",
"13 credit_amount \n",
"14 credit_amount \n",
"15 credit_amount \n",
"16 credit_amount \n",
"17 duration_in_month \n",
"18 duration_in_month \n",
"19 duration_in_month \n",
"20 duration_in_month \n",
"21 savings_account_and_bonds \n",
"22 savings_account_and_bonds \n",
"23 savings_account_and_bonds \n",
"\n",
" bin points \n",
"0 NaN 599.0 \n",
"0 [-inf,27.0) -16.0 \n",
"1 [27.0,34.0) -8.0 \n",
"2 [34.0,58.0) 17.0 \n",
"3 [58.0,inf) -9.0 \n",
"4 retraining%,%car (used) 73.0 \n",
"5 radio/television 31.0 \n",
"6 furniture/equipment%,%business%,%repairs -12.0 \n",
"7 domestic appliances%,%education%,%car (new)%,%... -32.0 \n",
"8 no credits taken/ all credits paid back duly%,... -84.0 \n",
"9 existing credits paid back duly till now -3.0 \n",
"10 delay in paying off in the past -9.0 \n",
"11 critical account/ other credits existing (not ... 43.0 \n",
"12 [-inf,2000.0) 5.0 \n",
"13 [2000.0,3500.0) 14.0 \n",
"14 [3500.0,4000.0) 50.0 \n",
"15 [4000.0,7000.0) -21.0 \n",
"16 [7000.0,inf) -40.0 \n",
"17 [-inf,12.0) 58.0 \n",
"18 [12.0,18.0) 12.0 \n",
"19 [18.0,48.0) -15.0 \n",
"20 [48.0,inf) -53.0 \n",
"21 ... < 100 DM%,%100 <= ... < 500 DM -14.0 \n",
"22 500 <= ... < 1000 DM%,%... >= 1000 DM 62.0 \n",
"23 unknown/ no savings account 31.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型报告文件储存路径:./cache/train/2025_01_03_14_38_35/模型报告.docx\n"
]
}
],
"source": [
"data = DataSplitEntity(train_data=dat[:709], test_data=dat[709:])\n",
"# 特征处理\n",
"cfg = {\n",
"# jupyter下输出内容\n",
"\"jupyter_print\": True,\n",
"# 是否开启粗分箱\n",
"\"format_bin\": False,\n",
"# 变量切分点搜索采样率\n",
"\"sample_rate\": 0.1,\n",
"# 最多候选变量数\n",
"\"max_feature_num\": 10,\n",
"# 单调性允许变化次数\n",
"\"monto_shift_threshold\":1,\n",
"# 特殊值\n",
"# \"special_values\": {\"age_in_years\": [36]},\n",
"# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n",
"\"breaks_list\": { 'duration_in_month': [12, 18, 48], \n",
" 'credit_amount': [2000, 3500, 4000, 7000], \n",
" 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n",
" 'age_in_years': [27, 34, 58]},\n",
"# y \n",
"\"y_column\": \"creditability\",\n",
"# 候选变量\n",
"\"x_columns\": [\n",
"\"duration_in_month\",\n",
"\"credit_amount\",\n",
"\"age_in_years\",\n",
"\"purpose\",\n",
"\"credit_history\",\n",
"\"savings_account_and_bonds\"\n",
" ]\n",
"}\n",
"\n",
"# 选择特征筛选策略\n",
"feature_strategy_clazz = FilterStrategyFactory.get_strategy(\"iv\")\n",
"feature_strategy = feature_strategy_clazz(**cfg)\n",
"\n",
"\n",
"# 选择模型\n",
"model_clazz = ModelFactory.get_model(\"lr\")\n",
"model = model_clazz()\n",
"\n",
"# 训练并生成报告\n",
"train_pipeline = Pipeline(feature_strategy, model, data)\n",
"train_pipeline.train()\n",
"train_pipeline.report()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:easy_ml]",
"language": "python",
"name": "conda-env-easy_ml-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}