{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "151b3ecc",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"%matplotlib agg\n",
"import matplotlib.pyplot as plt\n",
"import sys\n",
"sys.path.append(\"/root/project\")\n",
"from easy_ml import DataSplitEntity, Pipeline\n",
"\n",
"# 加载demo数据\n",
"import scorecardpy as sc\n",
"dat = sc.germancredit()\n",
"dat_columns = dat.columns.tolist()\n",
"dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n",
"dat.columns = dat_columns\n",
"dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n",
"\n",
"dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n",
"dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n",
"\n",
"# duration_in_month = list(dat[\"duration_in_month\"])\n",
"# duration_in_month[0] = \"missing\"\n",
"# dat[\"duration_in_month\"] = duration_in_month\n",
"\n",
"# purpose = list(dat[\"purpose\"])\n",
"# purpose[0] = \"missing\"\n",
"# purpose[1] = None\n",
"# dat[\"purpose\"] = purpose\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "dda26a85",
"metadata": {
"code_folding": [],
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"项目路径:【./cache/train/demo】\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 7/7 [00:13<00:00, 1.86s/it]\n"
]
},
{
"data": {
"text/html": [
"
样本分布
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本 | \n",
" 样本数 | \n",
" 样本占比 | \n",
" 坏样本数 | \n",
" 坏样本比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 709 | \n",
" 70.90% | \n",
" 211 | \n",
" 29.76% | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 291 | \n",
" 29.10% | \n",
" 89 | \n",
" 30.58% | \n",
"
\n",
" \n",
" 2 | \n",
" 合计 | \n",
" 1000 | \n",
" 100% | \n",
" 300 | \n",
" 30.00% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本 样本数 样本占比 坏样本数 坏样本比例\n",
"0 训练集 709 70.90% 211 29.76%\n",
"1 测试集 291 29.10% 89 30.58%\n",
"2 合计 1000 100% 300 30.00%"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量iv
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" iv | \n",
" psi | \n",
" vif | \n",
" 释义 | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" credit_history | \n",
" 0.272 | \n",
" 0.016 | \n",
" 1.029 | \n",
" 借贷历史 | \n",
"
\n",
" \n",
" 1 | \n",
" duration_in_month | \n",
" 0.262 | \n",
" 0.036 | \n",
" 1.100 | \n",
" - | \n",
"
\n",
" \n",
" 0 | \n",
" purpose | \n",
" 0.152 | \n",
" 0.013 | \n",
" 1.005 | \n",
" - | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years | \n",
" 0.105 | \n",
" 0.014 | \n",
" 1.025 | \n",
" 年龄 | \n",
"
\n",
" \n",
" 2 | \n",
" credit_amount_corr1 | \n",
" 0.077 | \n",
" 0.005 | \n",
" 1.091 | \n",
" - | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 iv psi vif 释义\n",
"4 credit_history 0.272 0.016 1.029 借贷历史\n",
"1 duration_in_month 0.262 0.036 1.100 -\n",
"0 purpose 0.152 0.013 1.005 -\n",
"3 age_in_years 0.105 0.014 1.025 年龄\n",
"2 credit_amount_corr1 0.077 0.005 1.091 -"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量趋势
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"变量切分点:\n",
"{\n",
" \"credit_history\": [\n",
" \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n",
" \"existing credits paid back duly till now\",\n",
" \"delay in paying off in the past\",\n",
" \"critical account/ other credits existing (not at this bank)\"\n",
" ],\n",
" \"duration_in_month\": [\n",
" 8,\n",
" 15,\n",
" 30\n",
" ],\n",
" \"purpose\": [\n",
" \"retraining%,%car (used)\",\n",
" \"radio/television\",\n",
" \"furniture/equipment%,%business%,%repairs\",\n",
" \"domestic appliances%,%education%,%car (new)%,%others\"\n",
" ],\n",
" \"credit_amount_corr1\": [\n",
" 15000.0,\n",
" 20000.0\n",
" ],\n",
" \"age_in_years\": [\n",
" 25,\n",
" 30,\n",
" 35\n",
" ]\n",
"}\n",
"选中变量不同分箱数下变量的推荐切分点:\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 30]\n",
"[6, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【credit_amount_corr1】不同分箱数下变量的推荐切分点-----\n",
"[15000.0]\n",
"[15000.0, 20000.0]\n",
"[5000.0, 15000.0, 20000.0]\n",
"[5000.0, 10000.0, 20000.0]\n",
"[5000.0, 10000.0, 15000.0, 20000.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【age_in_years】不同分箱数下变量的推荐切分点-----\n",
"[30]\n",
"[30, 35]\n",
"[25, 30, 35]\n",
"[20, 25, 30, 35]\n",
"[20, 30, 35, 40]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"快速筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"剔除train_iv小于阈值\n",
"筛选前变量数量:7\n",
"['duration_in_month', 'credit_amount', 'age_in_years', 'purpose', 'credit_history', 'credit_amount_corr1', 'credit_amount_corr2']\n",
"快速筛选剔除变量数量:0\n",
"\n"
]
},
{
"data": {
"text/html": [
"数值变量筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"相关性筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"duration_in_month: 【credit_amount_iv0.299_corr0.483】 【credit_amount_corr2_iv0.24_corr0.511】 \n",
"\n",
"-----相关性筛选保留的【duration_in_month】-----\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 30]\n",
"[6, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【credit_amount】不同分箱数下变量的推荐切分点-----\n",
"[4000.0]\n",
"[4000.0, 9000.0]\n",
"[3000.0, 7000.0, 9000.0]\n",
"[4000.0, 5000.0, 7000.0]\n",
"[2000.0, 3000.0, 7000.0, 9000.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【credit_amount_corr2】不同分箱数下变量的推荐切分点-----\n",
"[25000.0]\n",
"[20000.0, 25000.0]\n",
"[10000.0, 20000.0, 25000.0]\n",
"[10000.0, 15000.0, 20000.0, 25000.0]\n",
"[5000.0, 10000.0, 20000.0, 25000.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"vif筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"ivtop筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iv = train_iv + test_iv\n",
"\n"
]
},
{
"data": {
"text/html": [
"模型结果
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.7495 | \n",
" 0.3900 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7494 | \n",
" 0.4074 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.7495 0.3900\n",
"1 测试集 0.7494 0.4074"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.7518 | \n",
" 0.3935 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7564 | \n",
" 0.3985 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.7518 0.3935\n",
"1 测试集 0.7564 0.3985"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型变量系数
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Generalized Linear Model Regression Results \n",
"==============================================================================\n",
"Dep. Variable: creditability No. Observations: 709\n",
"Model: GLM Df Residuals: 704\n",
"Model Family: Binomial Df Model: 4\n",
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -425.51\n",
"Date: Tue, 25 Feb 2025 Deviance: 851.02\n",
"Time: 10:54:04 Pearson chi2: 731.\n",
"No. Iterations: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" var | \n",
" coef | \n",
" std err | \n",
" z | \n",
" P>|z| | \n",
" [0.025 | \n",
" 0.975] | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" purpose_woe | \n",
" 1.1976 | \n",
" 0.213 | \n",
" 5.633 | \n",
" 0.000 | \n",
" 0.781 | \n",
" 1.614 | \n",
"
\n",
" \n",
" 1 | \n",
" duration_in_month_woe | \n",
" 0.9058 | \n",
" 0.172 | \n",
" 5.252 | \n",
" 0.000 | \n",
" 0.568 | \n",
" 1.244 | \n",
"
\n",
" \n",
" 2 | \n",
" credit_amount_corr1_woe | \n",
" 0.6386 | \n",
" 0.357 | \n",
" 1.789 | \n",
" 0.074 | \n",
" -0.061 | \n",
" 1.338 | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years_woe | \n",
" 0.9041 | \n",
" 0.256 | \n",
" 3.531 | \n",
" 0.000 | \n",
" 0.402 | \n",
" 1.406 | \n",
"
\n",
" \n",
" 4 | \n",
" credit_history_woe | \n",
" 0.9796 | \n",
" 0.175 | \n",
" 5.599 | \n",
" 0.000 | \n",
" 0.637 | \n",
" 1.323 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" var coef std err z P>|z| \\\n",
"0 purpose_woe 1.1976 0.213 5.633 0.000 \n",
"1 duration_in_month_woe 0.9058 0.172 5.252 0.000 \n",
"2 credit_amount_corr1_woe 0.6386 0.357 1.789 0.074 \n",
"3 age_in_years_woe 0.9041 0.256 3.531 0.000 \n",
"4 credit_history_woe 0.9796 0.175 5.599 0.000 \n",
"\n",
" [0.025 0.975] \n",
"0 0.781 1.614 \n",
"1 0.568 1.244 \n",
"2 -0.061 1.338 \n",
"3 0.402 1.406 \n",
"4 0.637 1.323 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型psi
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 453.0] | \n",
" 0.003 | \n",
" 72 | \n",
" 25 | \n",
" 0.102 | \n",
" 0.086 | \n",
"
\n",
" \n",
" 1 | \n",
" (453.0, 497.0] | \n",
" 0.008 | \n",
" 77 | \n",
" 41 | \n",
" 0.109 | \n",
" 0.141 | \n",
"
\n",
" \n",
" 2 | \n",
" (497.0, 516.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 29 | \n",
" 0.099 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 3 | \n",
" (516.0, 534.0] | \n",
" 0.003 | \n",
" 70 | \n",
" 24 | \n",
" 0.099 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 4 | \n",
" (534.0, 548.0] | \n",
" 0.000 | \n",
" 67 | \n",
" 29 | \n",
" 0.094 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 0.004 | \n",
" 71 | \n",
" 24 | \n",
" 0.100 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 0.000 | \n",
" 90 | \n",
" 37 | \n",
" 0.127 | \n",
" 0.127 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 0.000 | \n",
" 50 | \n",
" 20 | \n",
" 0.071 | \n",
" 0.069 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 0.002 | \n",
" 72 | \n",
" 34 | \n",
" 0.102 | \n",
" 0.117 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 0.000 | \n",
" 70 | \n",
" 28 | \n",
" 0.099 | \n",
" 0.096 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 453.0] 0.003 72 25 0.102 0.086\n",
"1 (453.0, 497.0] 0.008 77 41 0.109 0.141\n",
"2 (497.0, 516.0] 0.000 70 29 0.099 0.100\n",
"3 (516.0, 534.0] 0.003 70 24 0.099 0.082\n",
"4 (534.0, 548.0] 0.000 67 29 0.094 0.100\n",
"5 (548.0, 563.0] 0.004 71 24 0.100 0.082\n",
"6 (563.0, 588.0] 0.000 90 37 0.127 0.127\n",
"7 (588.0, 603.4] 0.000 50 20 0.071 0.069\n",
"8 (603.4, 638.0] 0.002 72 34 0.102 0.117\n",
"9 (638.0, inf] 0.000 70 28 0.099 0.096"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.02\n",
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 452.0] | \n",
" 0.000 | \n",
" 75 | \n",
" 29 | \n",
" 0.106 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 1 | \n",
" (452.0, 496.0] | \n",
" 0.005 | \n",
" 68 | \n",
" 35 | \n",
" 0.096 | \n",
" 0.120 | \n",
"
\n",
" \n",
" 2 | \n",
" (496.0, 513.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 28 | \n",
" 0.100 | \n",
" 0.096 | \n",
"
\n",
" \n",
" 3 | \n",
" (513.0, 532.0] | \n",
" 0.000 | \n",
" 73 | \n",
" 29 | \n",
" 0.103 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 4 | \n",
" (532.0, 548.0] | \n",
" 0.000 | \n",
" 73 | \n",
" 30 | \n",
" 0.103 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 0.005 | \n",
" 70 | \n",
" 23 | \n",
" 0.099 | \n",
" 0.079 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 0.000 | \n",
" 87 | \n",
" 36 | \n",
" 0.123 | \n",
" 0.124 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 0.001 | \n",
" 50 | \n",
" 19 | \n",
" 0.071 | \n",
" 0.065 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 0.002 | \n",
" 72 | \n",
" 34 | \n",
" 0.102 | \n",
" 0.117 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 0.000 | \n",
" 70 | \n",
" 28 | \n",
" 0.099 | \n",
" 0.096 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 452.0] 0.000 75 29 0.106 0.100\n",
"1 (452.0, 496.0] 0.005 68 35 0.096 0.120\n",
"2 (496.0, 513.0] 0.000 71 28 0.100 0.096\n",
"3 (513.0, 532.0] 0.000 73 29 0.103 0.100\n",
"4 (532.0, 548.0] 0.000 73 30 0.103 0.103\n",
"5 (548.0, 563.0] 0.005 70 23 0.099 0.079\n",
"6 (563.0, 588.0] 0.000 87 36 0.123 0.124\n",
"7 (588.0, 603.4] 0.001 50 19 0.071 0.065\n",
"8 (603.4, 638.0] 0.002 72 34 0.102 0.117\n",
"9 (638.0, inf] 0.000 70 28 0.099 0.096"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.013\n"
]
},
{
"data": {
"text/html": [
"分数分箱
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 453.0] | \n",
" 72 | \n",
" 45 | \n",
" 27 | \n",
" 0.625 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 45 | \n",
" 27 | \n",
" 72 | \n",
" 0.213 | \n",
" 0.054 | \n",
" 0.159 | \n",
" 2.097 | \n",
"
\n",
" \n",
" 1 | \n",
" (453.0, 497.0] | \n",
" 77 | \n",
" 41 | \n",
" 36 | \n",
" 0.532 | \n",
" 0.109 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 86 | \n",
" 63 | \n",
" 149 | \n",
" 0.408 | \n",
" 0.127 | \n",
" 0.281 | \n",
" 1.937 | \n",
"
\n",
" \n",
" 2 | \n",
" (497.0, 516.0] | \n",
" 70 | \n",
" 32 | \n",
" 38 | \n",
" 0.457 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 118 | \n",
" 101 | \n",
" 219 | \n",
" 0.559 | \n",
" 0.203 | \n",
" 0.356 | \n",
" 1.808 | \n",
"
\n",
" \n",
" 3 | \n",
" (516.0, 534.0] | \n",
" 70 | \n",
" 24 | \n",
" 46 | \n",
" 0.343 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 142 | \n",
" 147 | \n",
" 289 | \n",
" 0.673 | \n",
" 0.295 | \n",
" 0.378 | \n",
" 1.649 | \n",
"
\n",
" \n",
" 4 | \n",
" (534.0, 548.0] | \n",
" 67 | \n",
" 20 | \n",
" 47 | \n",
" 0.299 | \n",
" 0.094 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 162 | \n",
" 194 | \n",
" 356 | \n",
" 0.768 | \n",
" 0.390 | \n",
" 0.378 | \n",
" 1.527 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 71 | \n",
" 15 | \n",
" 56 | \n",
" 0.211 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 177 | \n",
" 250 | \n",
" 427 | \n",
" 0.839 | \n",
" 0.502 | \n",
" 0.337 | \n",
" 1.391 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 90 | \n",
" 16 | \n",
" 74 | \n",
" 0.178 | \n",
" 0.127 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 193 | \n",
" 324 | \n",
" 517 | \n",
" 0.915 | \n",
" 0.651 | \n",
" 0.264 | \n",
" 1.253 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 50 | \n",
" 6 | \n",
" 44 | \n",
" 0.120 | \n",
" 0.071 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 199 | \n",
" 368 | \n",
" 567 | \n",
" 0.943 | \n",
" 0.739 | \n",
" 0.204 | \n",
" 1.178 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 72 | \n",
" 10 | \n",
" 62 | \n",
" 0.139 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 209 | \n",
" 430 | \n",
" 639 | \n",
" 0.991 | \n",
" 0.863 | \n",
" 0.128 | \n",
" 1.098 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 70 | \n",
" 2 | \n",
" 68 | \n",
" 0.029 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 453.0] 72 45 27 0.625 0.102 211 498 0.298 \n",
"1 (453.0, 497.0] 77 41 36 0.532 0.109 211 498 0.298 \n",
"2 (497.0, 516.0] 70 32 38 0.457 0.099 211 498 0.298 \n",
"3 (516.0, 534.0] 70 24 46 0.343 0.099 211 498 0.298 \n",
"4 (534.0, 548.0] 67 20 47 0.299 0.094 211 498 0.298 \n",
"5 (548.0, 563.0] 71 15 56 0.211 0.100 211 498 0.298 \n",
"6 (563.0, 588.0] 90 16 74 0.178 0.127 211 498 0.298 \n",
"7 (588.0, 603.4] 50 6 44 0.120 0.071 211 498 0.298 \n",
"8 (603.4, 638.0] 72 10 62 0.139 0.102 211 498 0.298 \n",
"9 (638.0, inf] 70 2 68 0.029 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 45 27 72 0.213 0.054 0.159 2.097 \n",
"1 86 63 149 0.408 0.127 0.281 1.937 \n",
"2 118 101 219 0.559 0.203 0.356 1.808 \n",
"3 142 147 289 0.673 0.295 0.378 1.649 \n",
"4 162 194 356 0.768 0.390 0.378 1.527 \n",
"5 177 250 427 0.839 0.502 0.337 1.391 \n",
"6 193 324 517 0.915 0.651 0.264 1.253 \n",
"7 199 368 567 0.943 0.739 0.204 1.178 \n",
"8 209 430 639 0.991 0.863 0.128 1.098 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 452.0] | \n",
" 75 | \n",
" 46 | \n",
" 29 | \n",
" 0.613 | \n",
" 0.106 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 46 | \n",
" 29 | \n",
" 75 | \n",
" 0.218 | \n",
" 0.058 | \n",
" 0.160 | \n",
" 2.058 | \n",
"
\n",
" \n",
" 1 | \n",
" (452.0, 496.0] | \n",
" 68 | \n",
" 39 | \n",
" 29 | \n",
" 0.574 | \n",
" 0.096 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 85 | \n",
" 58 | \n",
" 143 | \n",
" 0.403 | \n",
" 0.116 | \n",
" 0.287 | \n",
" 1.995 | \n",
"
\n",
" \n",
" 2 | \n",
" (496.0, 513.0] | \n",
" 71 | \n",
" 31 | \n",
" 40 | \n",
" 0.437 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 116 | \n",
" 98 | \n",
" 214 | \n",
" 0.550 | \n",
" 0.197 | \n",
" 0.353 | \n",
" 1.819 | \n",
"
\n",
" \n",
" 3 | \n",
" (513.0, 532.0] | \n",
" 73 | \n",
" 26 | \n",
" 47 | \n",
" 0.356 | \n",
" 0.103 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 142 | \n",
" 145 | \n",
" 287 | \n",
" 0.673 | \n",
" 0.291 | \n",
" 0.382 | \n",
" 1.660 | \n",
"
\n",
" \n",
" 4 | \n",
" (532.0, 548.0] | \n",
" 73 | \n",
" 22 | \n",
" 51 | \n",
" 0.301 | \n",
" 0.103 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 164 | \n",
" 196 | \n",
" 360 | \n",
" 0.777 | \n",
" 0.394 | \n",
" 0.383 | \n",
" 1.529 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 70 | \n",
" 15 | \n",
" 55 | \n",
" 0.214 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 179 | \n",
" 251 | \n",
" 430 | \n",
" 0.848 | \n",
" 0.504 | \n",
" 0.344 | \n",
" 1.397 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 87 | \n",
" 14 | \n",
" 73 | \n",
" 0.161 | \n",
" 0.123 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 193 | \n",
" 324 | \n",
" 517 | \n",
" 0.915 | \n",
" 0.651 | \n",
" 0.264 | \n",
" 1.253 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 50 | \n",
" 6 | \n",
" 44 | \n",
" 0.120 | \n",
" 0.071 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 199 | \n",
" 368 | \n",
" 567 | \n",
" 0.943 | \n",
" 0.739 | \n",
" 0.204 | \n",
" 1.178 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 72 | \n",
" 10 | \n",
" 62 | \n",
" 0.139 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 209 | \n",
" 430 | \n",
" 639 | \n",
" 0.991 | \n",
" 0.863 | \n",
" 0.128 | \n",
" 1.098 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 70 | \n",
" 2 | \n",
" 68 | \n",
" 0.029 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 452.0] 75 46 29 0.613 0.106 211 498 0.298 \n",
"1 (452.0, 496.0] 68 39 29 0.574 0.096 211 498 0.298 \n",
"2 (496.0, 513.0] 71 31 40 0.437 0.100 211 498 0.298 \n",
"3 (513.0, 532.0] 73 26 47 0.356 0.103 211 498 0.298 \n",
"4 (532.0, 548.0] 73 22 51 0.301 0.103 211 498 0.298 \n",
"5 (548.0, 563.0] 70 15 55 0.214 0.099 211 498 0.298 \n",
"6 (563.0, 588.0] 87 14 73 0.161 0.123 211 498 0.298 \n",
"7 (588.0, 603.4] 50 6 44 0.120 0.071 211 498 0.298 \n",
"8 (603.4, 638.0] 72 10 62 0.139 0.102 211 498 0.298 \n",
"9 (638.0, inf] 70 2 68 0.029 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 46 29 75 0.218 0.058 0.160 2.058 \n",
"1 85 58 143 0.403 0.116 0.287 1.995 \n",
"2 116 98 214 0.550 0.197 0.353 1.819 \n",
"3 142 145 287 0.673 0.291 0.382 1.660 \n",
"4 164 196 360 0.777 0.394 0.383 1.529 \n",
"5 179 251 430 0.848 0.504 0.344 1.397 \n",
"6 193 324 517 0.915 0.651 0.264 1.253 \n",
"7 199 368 567 0.943 0.739 0.204 1.178 \n",
"8 209 430 639 0.991 0.863 0.128 1.098 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 453.0] | \n",
" 25 | \n",
" 17 | \n",
" 8 | \n",
" 0.680 | \n",
" 0.086 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 17 | \n",
" 8 | \n",
" 25 | \n",
" 0.191 | \n",
" 0.040 | \n",
" 0.151 | \n",
" 2.222 | \n",
"
\n",
" \n",
" 1 | \n",
" (453.0, 497.0] | \n",
" 41 | \n",
" 21 | \n",
" 20 | \n",
" 0.512 | \n",
" 0.141 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 38 | \n",
" 28 | \n",
" 66 | \n",
" 0.427 | \n",
" 0.139 | \n",
" 0.288 | \n",
" 1.882 | \n",
"
\n",
" \n",
" 2 | \n",
" (497.0, 516.0] | \n",
" 29 | \n",
" 8 | \n",
" 21 | \n",
" 0.276 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 46 | \n",
" 49 | \n",
" 95 | \n",
" 0.517 | \n",
" 0.243 | \n",
" 0.274 | \n",
" 1.582 | \n",
"
\n",
" \n",
" 3 | \n",
" (516.0, 534.0] | \n",
" 24 | \n",
" 9 | \n",
" 15 | \n",
" 0.375 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 55 | \n",
" 64 | \n",
" 119 | \n",
" 0.618 | \n",
" 0.317 | \n",
" 0.301 | \n",
" 1.510 | \n",
"
\n",
" \n",
" 4 | \n",
" (534.0, 548.0] | \n",
" 29 | \n",
" 11 | \n",
" 18 | \n",
" 0.379 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 66 | \n",
" 82 | \n",
" 148 | \n",
" 0.742 | \n",
" 0.406 | \n",
" 0.336 | \n",
" 1.457 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 24 | \n",
" 11 | \n",
" 13 | \n",
" 0.458 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 77 | \n",
" 95 | \n",
" 172 | \n",
" 0.865 | \n",
" 0.470 | \n",
" 0.395 | \n",
" 1.463 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 37 | \n",
" 7 | \n",
" 30 | \n",
" 0.189 | \n",
" 0.127 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 84 | \n",
" 125 | \n",
" 209 | \n",
" 0.944 | \n",
" 0.619 | \n",
" 0.325 | \n",
" 1.313 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 20 | \n",
" 3 | \n",
" 17 | \n",
" 0.150 | \n",
" 0.069 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 87 | \n",
" 142 | \n",
" 229 | \n",
" 0.978 | \n",
" 0.703 | \n",
" 0.275 | \n",
" 1.242 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 34 | \n",
" 2 | \n",
" 32 | \n",
" 0.059 | \n",
" 0.117 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 174 | \n",
" 263 | \n",
" 1.000 | \n",
" 0.861 | \n",
" 0.139 | \n",
" 1.106 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 28 | \n",
" 0 | \n",
" 28 | \n",
" 0.000 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 453.0] 25 17 8 0.680 0.086 89 202 0.306 \n",
"1 (453.0, 497.0] 41 21 20 0.512 0.141 89 202 0.306 \n",
"2 (497.0, 516.0] 29 8 21 0.276 0.100 89 202 0.306 \n",
"3 (516.0, 534.0] 24 9 15 0.375 0.082 89 202 0.306 \n",
"4 (534.0, 548.0] 29 11 18 0.379 0.100 89 202 0.306 \n",
"5 (548.0, 563.0] 24 11 13 0.458 0.082 89 202 0.306 \n",
"6 (563.0, 588.0] 37 7 30 0.189 0.127 89 202 0.306 \n",
"7 (588.0, 603.4] 20 3 17 0.150 0.069 89 202 0.306 \n",
"8 (603.4, 638.0] 34 2 32 0.059 0.117 89 202 0.306 \n",
"9 (638.0, inf] 28 0 28 0.000 0.096 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 17 8 25 0.191 0.040 0.151 2.222 \n",
"1 38 28 66 0.427 0.139 0.288 1.882 \n",
"2 46 49 95 0.517 0.243 0.274 1.582 \n",
"3 55 64 119 0.618 0.317 0.301 1.510 \n",
"4 66 82 148 0.742 0.406 0.336 1.457 \n",
"5 77 95 172 0.865 0.470 0.395 1.463 \n",
"6 84 125 209 0.944 0.619 0.325 1.313 \n",
"7 87 142 229 0.978 0.703 0.275 1.242 \n",
"8 89 174 263 1.000 0.861 0.139 1.106 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 452.0] | \n",
" 29 | \n",
" 18 | \n",
" 11 | \n",
" 0.621 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 18 | \n",
" 11 | \n",
" 29 | \n",
" 0.202 | \n",
" 0.054 | \n",
" 0.148 | \n",
" 2.028 | \n",
"
\n",
" \n",
" 1 | \n",
" (452.0, 496.0] | \n",
" 35 | \n",
" 21 | \n",
" 14 | \n",
" 0.600 | \n",
" 0.120 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 39 | \n",
" 25 | \n",
" 64 | \n",
" 0.438 | \n",
" 0.124 | \n",
" 0.314 | \n",
" 1.991 | \n",
"
\n",
" \n",
" 2 | \n",
" (496.0, 513.0] | \n",
" 28 | \n",
" 10 | \n",
" 18 | \n",
" 0.357 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 49 | \n",
" 43 | \n",
" 92 | \n",
" 0.551 | \n",
" 0.213 | \n",
" 0.338 | \n",
" 1.741 | \n",
"
\n",
" \n",
" 3 | \n",
" (513.0, 532.0] | \n",
" 29 | \n",
" 8 | \n",
" 21 | \n",
" 0.276 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 57 | \n",
" 64 | \n",
" 121 | \n",
" 0.640 | \n",
" 0.317 | \n",
" 0.323 | \n",
" 1.539 | \n",
"
\n",
" \n",
" 4 | \n",
" (532.0, 548.0] | \n",
" 30 | \n",
" 11 | \n",
" 19 | \n",
" 0.367 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 68 | \n",
" 83 | \n",
" 151 | \n",
" 0.764 | \n",
" 0.411 | \n",
" 0.353 | \n",
" 1.472 | \n",
"
\n",
" \n",
" 5 | \n",
" (548.0, 563.0] | \n",
" 23 | \n",
" 9 | \n",
" 14 | \n",
" 0.391 | \n",
" 0.079 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 77 | \n",
" 97 | \n",
" 174 | \n",
" 0.865 | \n",
" 0.480 | \n",
" 0.385 | \n",
" 1.446 | \n",
"
\n",
" \n",
" 6 | \n",
" (563.0, 588.0] | \n",
" 36 | \n",
" 7 | \n",
" 29 | \n",
" 0.194 | \n",
" 0.124 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 84 | \n",
" 126 | \n",
" 210 | \n",
" 0.944 | \n",
" 0.624 | \n",
" 0.320 | \n",
" 1.307 | \n",
"
\n",
" \n",
" 7 | \n",
" (588.0, 603.4] | \n",
" 19 | \n",
" 3 | \n",
" 16 | \n",
" 0.158 | \n",
" 0.065 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 87 | \n",
" 142 | \n",
" 229 | \n",
" 0.978 | \n",
" 0.703 | \n",
" 0.275 | \n",
" 1.242 | \n",
"
\n",
" \n",
" 8 | \n",
" (603.4, 638.0] | \n",
" 34 | \n",
" 2 | \n",
" 32 | \n",
" 0.059 | \n",
" 0.117 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 174 | \n",
" 263 | \n",
" 1.000 | \n",
" 0.861 | \n",
" 0.139 | \n",
" 1.106 | \n",
"
\n",
" \n",
" 9 | \n",
" (638.0, inf] | \n",
" 28 | \n",
" 0 | \n",
" 28 | \n",
" 0.000 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 452.0] 29 18 11 0.621 0.100 89 202 0.306 \n",
"1 (452.0, 496.0] 35 21 14 0.600 0.120 89 202 0.306 \n",
"2 (496.0, 513.0] 28 10 18 0.357 0.096 89 202 0.306 \n",
"3 (513.0, 532.0] 29 8 21 0.276 0.100 89 202 0.306 \n",
"4 (532.0, 548.0] 30 11 19 0.367 0.103 89 202 0.306 \n",
"5 (548.0, 563.0] 23 9 14 0.391 0.079 89 202 0.306 \n",
"6 (563.0, 588.0] 36 7 29 0.194 0.124 89 202 0.306 \n",
"7 (588.0, 603.4] 19 3 16 0.158 0.065 89 202 0.306 \n",
"8 (603.4, 638.0] 34 2 32 0.059 0.117 89 202 0.306 \n",
"9 (638.0, inf] 28 0 28 0.000 0.096 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 18 11 29 0.202 0.054 0.148 2.028 \n",
"1 39 25 64 0.438 0.124 0.314 1.991 \n",
"2 49 43 92 0.551 0.213 0.338 1.741 \n",
"3 57 64 121 0.640 0.317 0.323 1.539 \n",
"4 68 83 151 0.764 0.411 0.353 1.472 \n",
"5 77 97 174 0.865 0.480 0.385 1.446 \n",
"6 84 126 210 0.944 0.624 0.320 1.307 \n",
"7 87 142 229 0.978 0.703 0.275 1.242 \n",
"8 89 174 263 1.000 0.861 0.139 1.106 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"评分卡
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"评分卡不包含规则\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" bin | \n",
" points | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" basepoints | \n",
" NaN | \n",
" 538.0 | \n",
"
\n",
" \n",
" 0 | \n",
" purpose | \n",
" retraining%,%car (used) | \n",
" 75.0 | \n",
"
\n",
" \n",
" 1 | \n",
" purpose | \n",
" radio/television | \n",
" 32.0 | \n",
"
\n",
" \n",
" 2 | \n",
" purpose | \n",
" furniture/equipment%,%business%,%repairs | \n",
" -12.0 | \n",
"
\n",
" \n",
" 3 | \n",
" purpose | \n",
" domestic appliances%,%education%,%car (new)%,%... | \n",
" -33.0 | \n",
"
\n",
" \n",
" 4 | \n",
" duration_in_month | \n",
" [-inf,8.0) | \n",
" 95.0 | \n",
"
\n",
" \n",
" 5 | \n",
" duration_in_month | \n",
" [8.0,15.0) | \n",
" 15.0 | \n",
"
\n",
" \n",
" 6 | \n",
" duration_in_month | \n",
" [15.0,30.0) | \n",
" -0.0 | \n",
"
\n",
" \n",
" 7 | \n",
" duration_in_month | \n",
" [30.0,inf) | \n",
" -45.0 | \n",
"
\n",
" \n",
" 8 | \n",
" credit_amount_corr1 | \n",
" [-inf,15000.0) | \n",
" 4.0 | \n",
"
\n",
" \n",
" 9 | \n",
" credit_amount_corr1 | \n",
" [15000.0,20000.0) | \n",
" -25.0 | \n",
"
\n",
" \n",
" 10 | \n",
" credit_amount_corr1 | \n",
" [20000.0,inf) | \n",
" -54.0 | \n",
"
\n",
" \n",
" 11 | \n",
" age_in_years | \n",
" 36 | \n",
" 69.0 | \n",
"
\n",
" \n",
" 12 | \n",
" age_in_years | \n",
" [-inf,25.0) | \n",
" -30.0 | \n",
"
\n",
" \n",
" 13 | \n",
" age_in_years | \n",
" [25.0,30.0) | \n",
" -10.0 | \n",
"
\n",
" \n",
" 14 | \n",
" age_in_years | \n",
" [30.0,35.0) | \n",
" -8.0 | \n",
"
\n",
" \n",
" 15 | \n",
" age_in_years | \n",
" [35.0,inf) | \n",
" 17.0 | \n",
"
\n",
" \n",
" 16 | \n",
" credit_history | \n",
" no credits taken/ all credits paid back duly%,... | \n",
" -92.0 | \n",
"
\n",
" \n",
" 17 | \n",
" credit_history | \n",
" existing credits paid back duly till now | \n",
" -3.0 | \n",
"
\n",
" \n",
" 18 | \n",
" credit_history | \n",
" delay in paying off in the past | \n",
" -10.0 | \n",
"
\n",
" \n",
" 19 | \n",
" credit_history | \n",
" critical account/ other credits existing (not ... | \n",
" 47.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable bin \\\n",
"0 basepoints NaN \n",
"0 purpose retraining%,%car (used) \n",
"1 purpose radio/television \n",
"2 purpose furniture/equipment%,%business%,%repairs \n",
"3 purpose domestic appliances%,%education%,%car (new)%,%... \n",
"4 duration_in_month [-inf,8.0) \n",
"5 duration_in_month [8.0,15.0) \n",
"6 duration_in_month [15.0,30.0) \n",
"7 duration_in_month [30.0,inf) \n",
"8 credit_amount_corr1 [-inf,15000.0) \n",
"9 credit_amount_corr1 [15000.0,20000.0) \n",
"10 credit_amount_corr1 [20000.0,inf) \n",
"11 age_in_years 36 \n",
"12 age_in_years [-inf,25.0) \n",
"13 age_in_years [25.0,30.0) \n",
"14 age_in_years [30.0,35.0) \n",
"15 age_in_years [35.0,inf) \n",
"16 credit_history no credits taken/ all credits paid back duly%,... \n",
"17 credit_history existing credits paid back duly till now \n",
"18 credit_history delay in paying off in the past \n",
"19 credit_history critical account/ other credits existing (not ... \n",
"\n",
" points \n",
"0 538.0 \n",
"0 75.0 \n",
"1 32.0 \n",
"2 -12.0 \n",
"3 -33.0 \n",
"4 95.0 \n",
"5 15.0 \n",
"6 -0.0 \n",
"7 -45.0 \n",
"8 4.0 \n",
"9 -25.0 \n",
"10 -54.0 \n",
"11 69.0 \n",
"12 -30.0 \n",
"13 -10.0 \n",
"14 -8.0 \n",
"15 17.0 \n",
"16 -92.0 \n",
"17 -3.0 \n",
"18 -10.0 \n",
"19 47.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"压力测试
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 违约率 | \n",
" 抽样次数 | \n",
" 样本数 | \n",
" 好样本数 | \n",
" 坏样本数 | \n",
" 平均AUC | \n",
" 最大AUC | \n",
" 最小AUC | \n",
" AUC标准差 | \n",
" 95%置信区间AUC | \n",
" 平均KS | \n",
" 最大KS | \n",
" 最小KS | \n",
" KS标准差 | \n",
" 95%置信区间KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.010 | \n",
" 10 | \n",
" 8900 | \n",
" 8811 | \n",
" 89 | \n",
" 0.756364 | \n",
" 0.756667 | \n",
" 0.756099 | \n",
" 0.000173 | \n",
" 0.7560 - 0.7567 | \n",
" 0.3830 | \n",
" 0.384 | \n",
" 0.382 | \n",
" 0.000447 | \n",
" 0.3821 - 0.3839 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.071 | \n",
" 10 | \n",
" 1253 | \n",
" 1164 | \n",
" 89 | \n",
" 0.755881 | \n",
" 0.757698 | \n",
" 0.753601 | \n",
" 0.001254 | \n",
" 0.7534 - 0.7583 | \n",
" 0.3833 | \n",
" 0.389 | \n",
" 0.377 | \n",
" 0.003348 | \n",
" 0.3767 - 0.3899 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.132 | \n",
" 10 | \n",
" 674 | \n",
" 585 | \n",
" 89 | \n",
" 0.756997 | \n",
" 0.758869 | \n",
" 0.754019 | \n",
" 0.001557 | \n",
" 0.7539 - 0.7600 | \n",
" 0.3853 | \n",
" 0.396 | \n",
" 0.354 | \n",
" 0.012626 | \n",
" 0.3606 - 0.4100 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.194 | \n",
" 10 | \n",
" 458 | \n",
" 369 | \n",
" 89 | \n",
" 0.755071 | \n",
" 0.761883 | \n",
" 0.751287 | \n",
" 0.003245 | \n",
" 0.7487 - 0.7614 | \n",
" 0.3577 | \n",
" 0.365 | \n",
" 0.349 | \n",
" 0.004859 | \n",
" 0.3482 - 0.3672 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.255 | \n",
" 10 | \n",
" 349 | \n",
" 260 | \n",
" 89 | \n",
" 0.757679 | \n",
" 0.768669 | \n",
" 0.742740 | \n",
" 0.007634 | \n",
" 0.7427 - 0.7726 | \n",
" 0.3801 | \n",
" 0.388 | \n",
" 0.369 | \n",
" 0.007190 | \n",
" 0.3660 - 0.3942 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.316 | \n",
" 10 | \n",
" 281 | \n",
" 192 | \n",
" 89 | \n",
" 0.755914 | \n",
" 0.764133 | \n",
" 0.747601 | \n",
" 0.004598 | \n",
" 0.7469 - 0.7649 | \n",
" 0.3850 | \n",
" 0.402 | \n",
" 0.370 | \n",
" 0.009767 | \n",
" 0.3659 - 0.4041 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.377 | \n",
" 10 | \n",
" 236 | \n",
" 147 | \n",
" 89 | \n",
" 0.753474 | \n",
" 0.769395 | \n",
" 0.733165 | \n",
" 0.011501 | \n",
" 0.7309 - 0.7760 | \n",
" 0.3848 | \n",
" 0.423 | \n",
" 0.351 | \n",
" 0.022737 | \n",
" 0.3402 - 0.4294 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.438 | \n",
" 10 | \n",
" 203 | \n",
" 114 | \n",
" 89 | \n",
" 0.754716 | \n",
" 0.783757 | \n",
" 0.726296 | \n",
" 0.016984 | \n",
" 0.7214 - 0.7880 | \n",
" 0.3727 | \n",
" 0.450 | \n",
" 0.309 | \n",
" 0.038210 | \n",
" 0.2978 - 0.4476 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.499 | \n",
" 10 | \n",
" 178 | \n",
" 89 | \n",
" 89 | \n",
" 0.754419 | \n",
" 0.779258 | \n",
" 0.711589 | \n",
" 0.020627 | \n",
" 0.7140 - 0.7948 | \n",
" 0.3764 | \n",
" 0.427 | \n",
" 0.337 | \n",
" 0.028161 | \n",
" 0.3212 - 0.4316 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.561 | \n",
" 10 | \n",
" 158 | \n",
" 69 | \n",
" 89 | \n",
" 0.753395 | \n",
" 0.797916 | \n",
" 0.685963 | \n",
" 0.028749 | \n",
" 0.6970 - 0.8097 | \n",
" 0.3894 | \n",
" 0.455 | \n",
" 0.347 | \n",
" 0.037487 | \n",
" 0.3159 - 0.4629 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n",
"0 0.010 10 8900 8811 89 0.756364 0.756667 0.756099 0.000173 \n",
"1 0.071 10 1253 1164 89 0.755881 0.757698 0.753601 0.001254 \n",
"2 0.132 10 674 585 89 0.756997 0.758869 0.754019 0.001557 \n",
"3 0.194 10 458 369 89 0.755071 0.761883 0.751287 0.003245 \n",
"4 0.255 10 349 260 89 0.757679 0.768669 0.742740 0.007634 \n",
"5 0.316 10 281 192 89 0.755914 0.764133 0.747601 0.004598 \n",
"6 0.377 10 236 147 89 0.753474 0.769395 0.733165 0.011501 \n",
"7 0.438 10 203 114 89 0.754716 0.783757 0.726296 0.016984 \n",
"8 0.499 10 178 89 89 0.754419 0.779258 0.711589 0.020627 \n",
"9 0.561 10 158 69 89 0.753395 0.797916 0.685963 0.028749 \n",
"\n",
" 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n",
"0 0.7560 - 0.7567 0.3830 0.384 0.382 0.000447 0.3821 - 0.3839 \n",
"1 0.7534 - 0.7583 0.3833 0.389 0.377 0.003348 0.3767 - 0.3899 \n",
"2 0.7539 - 0.7600 0.3853 0.396 0.354 0.012626 0.3606 - 0.4100 \n",
"3 0.7487 - 0.7614 0.3577 0.365 0.349 0.004859 0.3482 - 0.3672 \n",
"4 0.7427 - 0.7726 0.3801 0.388 0.369 0.007190 0.3660 - 0.3942 \n",
"5 0.7469 - 0.7649 0.3850 0.402 0.370 0.009767 0.3659 - 0.4041 \n",
"6 0.7309 - 0.7760 0.3848 0.423 0.351 0.022737 0.3402 - 0.4294 \n",
"7 0.7214 - 0.7880 0.3727 0.450 0.309 0.038210 0.2978 - 0.4476 \n",
"8 0.7140 - 0.7948 0.3764 0.427 0.337 0.028161 0.3212 - 0.4316 \n",
"9 0.6970 - 0.8097 0.3894 0.455 0.347 0.037487 0.3159 - 0.4629 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型报告文件储存路径:./cache/train/demo/模型报告.docx\n",
"mlcfg save to【./cache/train/demo/mlcfg.json】success. \n",
"feature save to【./cache/train/demo/feature.csv】success. \n",
"model save to【./cache/train/demo/model.pkl】success. \n",
"model save to【./cache/train/demo/card.csv】success. \n"
]
}
],
"source": [
"train_data=dat[:709]\n",
"test_data=dat[709:]\n",
"data = DataSplitEntity(train_data=train_data, test_data=test_data)\n",
"# 特征处理\n",
"cfg = {\n",
"\"project_name\": \"demo\",\n",
"# jupyter下输出内容\n",
"\"jupyter_print\": True,\n",
"\"bin_detail_print\": True,\n",
"# 是否开启粗分箱\n",
"\"format_bin\": True,\n",
"# 变量切分点搜索采样率\n",
"\"bin_sample_rate\": 0.01,\n",
"# 最多保留候选变量数\n",
"\"max_feature_num\": 10,\n",
"# 单调性允许变化次数\n",
"\"monto_shift_threshold\":1,\n",
"\"iv_threshold\": 0.01,\n",
"\"corr_threshold\": 0.4,\n",
"\"psi_threshold\": 0.2,\n",
"\"vif_threshold\": 10,\n",
"# 压力测试\n",
"\"stress_test\":True,\n",
"\"stress_sample_times\": 10,\n",
"# 特殊值\n",
"\"special_values\": {\"age_in_years\": [36]},\n",
"# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n",
"\"breaks_list\": { \n",
"# 'duration_in_month': [12, 18, 48], \n",
"# 'credit_amount': [2000, 3500, 4000, 7000], \n",
" 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n",
"# 'age_in_years': [27, 34, 58]\n",
" },\n",
"# y \n",
"\"y_column\": \"creditability\",\n",
"# 候选变量\n",
"\"x_columns\": [\n",
"\"duration_in_month\",\n",
"\"credit_amount\",\n",
"\"age_in_years\",\n",
"\"purpose\",\n",
"\"credit_history\",\n",
" \n",
"\"credit_amount_corr1\",\n",
"\"credit_amount_corr2\",\n",
" ],\n",
"\"columns_anns\":{\n",
" \"age_in_years\": \"年龄\",\n",
" \"credit_history\": \"借贷历史\"\n",
"},\n",
"\"columns_exclude\": [],\n",
"# \"columns_include\": [\"credit_amount\"],\n",
"\"rules\":[\"df.loc[df['credit_amount']>=9000,'SCORE'] += -50\"]\n",
"}\n",
"\n",
"# 训练并生成报告\n",
"pipeline = Pipeline(data=data, **cfg)\n",
"pipeline.train()\n",
"pipeline.report()\n",
"pipeline.save()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c50e771f",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"只能针对数值型变量进行分析。\n"
]
}
],
"source": [
"pipeline.variable_analyse(\"age_in_years\", format_bin=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0804cdca",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"项目路径:【./cache/train/demo】\n",
"feature load from【./cache/train/demo/feature.csv】success.\n",
"model load from【./cache/train/demo】success.\n"
]
},
{
"data": {
"text/plain": [
"{'KS': 0.328,\n",
" 'AUC': 0.7145,\n",
" 'Gini': 0.429,\n",
" 'pic': }"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2 = Pipeline.load(\"./cache/train/demo\")\n",
"score = pipeline2.score(test_data)\n",
"# score\n",
"sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)"
]
}
],
"metadata": {
"celltoolbar": "编辑元数据",
"kernelspec": {
"display_name": "Python [conda env:analysis]",
"language": "python",
"name": "conda-env-analysis-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "372.364px"
},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}