{ "cells": [ { "cell_type": "code", "execution_count": 5, "id": "151b3ecc", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, Pipeline\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "\n", "dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n", "dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n", "\n", "# duration_in_month = list(dat[\"duration_in_month\"])\n", "# duration_in_month[0] = \"missing\"\n", "# dat[\"duration_in_month\"] = duration_in_month\n", "\n", "# purpose = list(dat[\"purpose\"])\n", "# purpose[0] = \"missing\"\n", "# purpose[1] = None\n", "# dat[\"purpose\"] = purpose\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "dda26a85", "metadata": { "code_folding": [], "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/demo】\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 7/7 [00:13<00:00, 1.86s/it]\n" ] }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量iv

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
变量ivpsivif释义
4credit_history0.2720.0161.029借贷历史
1duration_in_month0.2620.0361.100-
0purpose0.1520.0131.005-
3age_in_years0.1050.0141.025年龄
2credit_amount_corr10.0770.0051.091-
\n", "
" ], "text/plain": [ " 变量 iv psi vif 释义\n", "4 credit_history 0.272 0.016 1.029 借贷历史\n", "1 duration_in_month 0.262 0.036 1.100 -\n", "0 purpose 0.152 0.013 1.005 -\n", "3 age_in_years 0.105 0.014 1.025 年龄\n", "2 credit_amount_corr1 0.077 0.005 1.091 -" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量趋势

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "变量切分点:\n", "{\n", " \"credit_history\": [\n", " \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n", " \"existing credits paid back duly till now\",\n", " \"delay in paying off in the past\",\n", " \"critical account/ other credits existing (not at this bank)\"\n", " ],\n", " \"duration_in_month\": [\n", " 8,\n", " 15,\n", " 30\n", " ],\n", " \"purpose\": [\n", " \"retraining%,%car (used)\",\n", " \"radio/television\",\n", " \"furniture/equipment%,%business%,%repairs\",\n", " \"domestic appliances%,%education%,%car (new)%,%others\"\n", " ],\n", " \"credit_amount_corr1\": [\n", " 15000.0,\n", " 20000.0\n", " ],\n", " \"age_in_years\": [\n", " 25,\n", " 30,\n", " 35\n", " ]\n", "}\n", "选中变量不同分箱数下变量的推荐切分点:\n", "-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n", "[35]\n", "[8, 35]\n", "[8, 15, 30]\n", "[6, 15, 25, 35]\n", "[8, 10, 15, 35]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【credit_amount_corr1】不同分箱数下变量的推荐切分点-----\n", "[15000.0]\n", "[15000.0, 20000.0]\n", "[5000.0, 15000.0, 20000.0]\n", "[5000.0, 10000.0, 20000.0]\n", "[5000.0, 10000.0, 15000.0, 20000.0]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【age_in_years】不同分箱数下变量的推荐切分点-----\n", "[30]\n", "[30, 35]\n", "[25, 30, 35]\n", "[20, 25, 30, 35]\n", "[20, 30, 35, 40]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

快速筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "剔除train_iv小于阈值\n", "筛选前变量数量:7\n", "['duration_in_month', 'credit_amount', 'age_in_years', 'purpose', 'credit_history', 'credit_amount_corr1', 'credit_amount_corr2']\n", "快速筛选剔除变量数量:0\n", "\n" ] }, { "data": { "text/html": [ "

数值变量筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] }, { "data": { "text/html": [ "

相关性筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "duration_in_month: 【credit_amount_iv0.299_corr0.483】 【credit_amount_corr2_iv0.24_corr0.511】 \n", "\n", "-----相关性筛选保留的【duration_in_month】-----\n", "-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n", "[35]\n", "[8, 35]\n", "[8, 15, 30]\n", "[6, 15, 25, 35]\n", "[8, 10, 15, 35]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【credit_amount】不同分箱数下变量的推荐切分点-----\n", "[4000.0]\n", "[4000.0, 9000.0]\n", "[3000.0, 7000.0, 9000.0]\n", "[4000.0, 5000.0, 7000.0]\n", "[2000.0, 3000.0, 7000.0, 9000.0]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "-----【credit_amount_corr2】不同分箱数下变量的推荐切分点-----\n", "[25000.0]\n", "[20000.0, 25000.0]\n", "[10000.0, 20000.0, 25000.0]\n", "[10000.0, 15000.0, 20000.0, 25000.0]\n", "[5000.0, 10000.0, 20000.0, 25000.0]\n" ] }, { "data": { "text/html": [ "
训练集
测试集
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

vif筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] }, { "data": { "text/html": [ "

ivtop筛选过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "iv = train_iv + test_iv\n", "\n" ] }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0训练集0.74950.3900
1测试集0.74940.4074
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 训练集 0.7495 0.3900\n", "1 测试集 0.7494 0.4074" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0训练集0.75180.3935
1测试集0.75640.3985
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 训练集 0.7518 0.3935\n", "1 测试集 0.7564 0.3985" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型变量系数

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Generalized Linear Model Regression Results \n", "==============================================================================\n", "Dep. Variable: creditability No. Observations: 709\n", "Model: GLM Df Residuals: 704\n", "Model Family: Binomial Df Model: 4\n", "Link Function: logit Scale: 1.0000\n", "Method: IRLS Log-Likelihood: -425.51\n", "Date: Tue, 25 Feb 2025 Deviance: 851.02\n", "Time: 10:54:04 Pearson chi2: 731.\n", "No. Iterations: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
varcoefstd errzP>|z|[0.0250.975]
0purpose_woe1.19760.2135.6330.0000.7811.614
1duration_in_month_woe0.90580.1725.2520.0000.5681.244
2credit_amount_corr1_woe0.63860.3571.7890.074-0.0611.338
3age_in_years_woe0.90410.2563.5310.0000.4021.406
4credit_history_woe0.97960.1755.5990.0000.6371.323
\n", "
" ], "text/plain": [ " var coef std err z P>|z| \\\n", "0 purpose_woe 1.1976 0.213 5.633 0.000 \n", "1 duration_in_month_woe 0.9058 0.172 5.252 0.000 \n", "2 credit_amount_corr1_woe 0.6386 0.357 1.789 0.074 \n", "3 age_in_years_woe 0.9041 0.256 3.531 0.000 \n", "4 credit_history_woe 0.9796 0.175 5.599 0.000 \n", "\n", " [0.025 0.975] \n", "0 0.781 1.614 \n", "1 0.568 1.244 \n", "2 -0.061 1.338 \n", "3 0.402 1.406 \n", "4 0.637 1.323 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型psi

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(-inf, 453.0]0.00372250.1020.086
1(453.0, 497.0]0.00877410.1090.141
2(497.0, 516.0]0.00070290.0990.100
3(516.0, 534.0]0.00370240.0990.082
4(534.0, 548.0]0.00067290.0940.100
5(548.0, 563.0]0.00471240.1000.082
6(563.0, 588.0]0.00090370.1270.127
7(588.0, 603.4]0.00050200.0710.069
8(603.4, 638.0]0.00272340.1020.117
9(638.0, inf]0.00070280.0990.096
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (-inf, 453.0] 0.003 72 25 0.102 0.086\n", "1 (453.0, 497.0] 0.008 77 41 0.109 0.141\n", "2 (497.0, 516.0] 0.000 70 29 0.099 0.100\n", "3 (516.0, 534.0] 0.003 70 24 0.099 0.082\n", "4 (534.0, 548.0] 0.000 67 29 0.094 0.100\n", "5 (548.0, 563.0] 0.004 71 24 0.100 0.082\n", "6 (563.0, 588.0] 0.000 90 37 0.127 0.127\n", "7 (588.0, 603.4] 0.000 50 20 0.071 0.069\n", "8 (603.4, 638.0] 0.002 72 34 0.102 0.117\n", "9 (638.0, inf] 0.000 70 28 0.099 0.096" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.02\n", "加入规则后:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(-inf, 452.0]0.00075290.1060.100
1(452.0, 496.0]0.00568350.0960.120
2(496.0, 513.0]0.00071280.1000.096
3(513.0, 532.0]0.00073290.1030.100
4(532.0, 548.0]0.00073300.1030.103
5(548.0, 563.0]0.00570230.0990.079
6(563.0, 588.0]0.00087360.1230.124
7(588.0, 603.4]0.00150190.0710.065
8(603.4, 638.0]0.00272340.1020.117
9(638.0, inf]0.00070280.0990.096
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (-inf, 452.0] 0.000 75 29 0.106 0.100\n", "1 (452.0, 496.0] 0.005 68 35 0.096 0.120\n", "2 (496.0, 513.0] 0.000 71 28 0.100 0.096\n", "3 (513.0, 532.0] 0.000 73 29 0.103 0.100\n", "4 (532.0, 548.0] 0.000 73 30 0.103 0.103\n", "5 (548.0, 563.0] 0.005 70 23 0.099 0.079\n", "6 (563.0, 588.0] 0.000 87 36 0.123 0.124\n", "7 (588.0, 603.4] 0.001 50 19 0.071 0.065\n", "8 (603.4, 638.0] 0.002 72 34 0.102 0.117\n", "9 (638.0, inf] 0.000 70 28 0.099 0.096" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.013\n" ] }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "训练集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 453.0]7245270.6250.1022114980.2984527720.2130.0540.1592.097
1(453.0, 497.0]7741360.5320.1092114980.29886631490.4080.1270.2811.937
2(497.0, 516.0]7032380.4570.0992114980.2981181012190.5590.2030.3561.808
3(516.0, 534.0]7024460.3430.0992114980.2981421472890.6730.2950.3781.649
4(534.0, 548.0]6720470.2990.0942114980.2981621943560.7680.3900.3781.527
5(548.0, 563.0]7115560.2110.1002114980.2981772504270.8390.5020.3371.391
6(563.0, 588.0]9016740.1780.1272114980.2981933245170.9150.6510.2641.253
7(588.0, 603.4]506440.1200.0712114980.2981993685670.9430.7390.2041.178
8(603.4, 638.0]7210620.1390.1022114980.2982094306390.9910.8630.1281.098
9(638.0, inf]702680.0290.0992114980.2982114987091.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 453.0] 72 45 27 0.625 0.102 211 498 0.298 \n", "1 (453.0, 497.0] 77 41 36 0.532 0.109 211 498 0.298 \n", "2 (497.0, 516.0] 70 32 38 0.457 0.099 211 498 0.298 \n", "3 (516.0, 534.0] 70 24 46 0.343 0.099 211 498 0.298 \n", "4 (534.0, 548.0] 67 20 47 0.299 0.094 211 498 0.298 \n", "5 (548.0, 563.0] 71 15 56 0.211 0.100 211 498 0.298 \n", "6 (563.0, 588.0] 90 16 74 0.178 0.127 211 498 0.298 \n", "7 (588.0, 603.4] 50 6 44 0.120 0.071 211 498 0.298 \n", "8 (603.4, 638.0] 72 10 62 0.139 0.102 211 498 0.298 \n", "9 (638.0, inf] 70 2 68 0.029 0.099 211 498 0.298 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 45 27 72 0.213 0.054 0.159 2.097 \n", "1 86 63 149 0.408 0.127 0.281 1.937 \n", "2 118 101 219 0.559 0.203 0.356 1.808 \n", "3 142 147 289 0.673 0.295 0.378 1.649 \n", "4 162 194 356 0.768 0.390 0.378 1.527 \n", "5 177 250 427 0.839 0.502 0.337 1.391 \n", "6 193 324 517 0.915 0.651 0.264 1.253 \n", "7 199 368 567 0.943 0.739 0.204 1.178 \n", "8 209 430 639 0.991 0.863 0.128 1.098 \n", "9 211 498 709 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n", "训练集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 452.0]7546290.6130.1062114980.2984629750.2180.0580.1602.058
1(452.0, 496.0]6839290.5740.0962114980.29885581430.4030.1160.2871.995
2(496.0, 513.0]7131400.4370.1002114980.298116982140.5500.1970.3531.819
3(513.0, 532.0]7326470.3560.1032114980.2981421452870.6730.2910.3821.660
4(532.0, 548.0]7322510.3010.1032114980.2981641963600.7770.3940.3831.529
5(548.0, 563.0]7015550.2140.0992114980.2981792514300.8480.5040.3441.397
6(563.0, 588.0]8714730.1610.1232114980.2981933245170.9150.6510.2641.253
7(588.0, 603.4]506440.1200.0712114980.2981993685670.9430.7390.2041.178
8(603.4, 638.0]7210620.1390.1022114980.2982094306390.9910.8630.1281.098
9(638.0, inf]702680.0290.0992114980.2982114987091.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 452.0] 75 46 29 0.613 0.106 211 498 0.298 \n", "1 (452.0, 496.0] 68 39 29 0.574 0.096 211 498 0.298 \n", "2 (496.0, 513.0] 71 31 40 0.437 0.100 211 498 0.298 \n", "3 (513.0, 532.0] 73 26 47 0.356 0.103 211 498 0.298 \n", "4 (532.0, 548.0] 73 22 51 0.301 0.103 211 498 0.298 \n", "5 (548.0, 563.0] 70 15 55 0.214 0.099 211 498 0.298 \n", "6 (563.0, 588.0] 87 14 73 0.161 0.123 211 498 0.298 \n", "7 (588.0, 603.4] 50 6 44 0.120 0.071 211 498 0.298 \n", "8 (603.4, 638.0] 72 10 62 0.139 0.102 211 498 0.298 \n", "9 (638.0, inf] 70 2 68 0.029 0.099 211 498 0.298 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 46 29 75 0.218 0.058 0.160 2.058 \n", "1 85 58 143 0.403 0.116 0.287 1.995 \n", "2 116 98 214 0.550 0.197 0.353 1.819 \n", "3 142 145 287 0.673 0.291 0.382 1.660 \n", "4 164 196 360 0.777 0.394 0.383 1.529 \n", "5 179 251 430 0.848 0.504 0.344 1.397 \n", "6 193 324 517 0.915 0.651 0.264 1.253 \n", "7 199 368 567 0.943 0.739 0.204 1.178 \n", "8 209 430 639 0.991 0.863 0.128 1.098 \n", "9 211 498 709 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "测试集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 453.0]251780.6800.086892020.306178250.1910.0400.1512.222
1(453.0, 497.0]4121200.5120.141892020.3063828660.4270.1390.2881.882
2(497.0, 516.0]298210.2760.100892020.3064649950.5170.2430.2741.582
3(516.0, 534.0]249150.3750.082892020.30655641190.6180.3170.3011.510
4(534.0, 548.0]2911180.3790.100892020.30666821480.7420.4060.3361.457
5(548.0, 563.0]2411130.4580.082892020.30677951720.8650.4700.3951.463
6(563.0, 588.0]377300.1890.127892020.306841252090.9440.6190.3251.313
7(588.0, 603.4]203170.1500.069892020.306871422290.9780.7030.2751.242
8(603.4, 638.0]342320.0590.117892020.306891742631.0000.8610.1391.106
9(638.0, inf]280280.0000.096892020.306892022911.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 453.0] 25 17 8 0.680 0.086 89 202 0.306 \n", "1 (453.0, 497.0] 41 21 20 0.512 0.141 89 202 0.306 \n", "2 (497.0, 516.0] 29 8 21 0.276 0.100 89 202 0.306 \n", "3 (516.0, 534.0] 24 9 15 0.375 0.082 89 202 0.306 \n", "4 (534.0, 548.0] 29 11 18 0.379 0.100 89 202 0.306 \n", "5 (548.0, 563.0] 24 11 13 0.458 0.082 89 202 0.306 \n", "6 (563.0, 588.0] 37 7 30 0.189 0.127 89 202 0.306 \n", "7 (588.0, 603.4] 20 3 17 0.150 0.069 89 202 0.306 \n", "8 (603.4, 638.0] 34 2 32 0.059 0.117 89 202 0.306 \n", "9 (638.0, inf] 28 0 28 0.000 0.096 89 202 0.306 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 17 8 25 0.191 0.040 0.151 2.222 \n", "1 38 28 66 0.427 0.139 0.288 1.882 \n", "2 46 49 95 0.517 0.243 0.274 1.582 \n", "3 55 64 119 0.618 0.317 0.301 1.510 \n", "4 66 82 148 0.742 0.406 0.336 1.457 \n", "5 77 95 172 0.865 0.470 0.395 1.463 \n", "6 84 125 209 0.944 0.619 0.325 1.313 \n", "7 87 142 229 0.978 0.703 0.275 1.242 \n", "8 89 174 263 1.000 0.861 0.139 1.106 \n", "9 89 202 291 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "加入规则后:\n", "测试集-分数分箱\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(-inf, 452.0]2918110.6210.100892020.3061811290.2020.0540.1482.028
1(452.0, 496.0]3521140.6000.120892020.3063925640.4380.1240.3141.991
2(496.0, 513.0]2810180.3570.096892020.3064943920.5510.2130.3381.741
3(513.0, 532.0]298210.2760.100892020.30657641210.6400.3170.3231.539
4(532.0, 548.0]3011190.3670.103892020.30668831510.7640.4110.3531.472
5(548.0, 563.0]239140.3910.079892020.30677971740.8650.4800.3851.446
6(563.0, 588.0]367290.1940.124892020.306841262100.9440.6240.3201.307
7(588.0, 603.4]193160.1580.065892020.306871422290.9780.7030.2751.242
8(603.4, 638.0]342320.0590.117892020.306891742631.0000.8610.1391.106
9(638.0, inf]280280.0000.096892020.306892022911.0001.0000.0000.999
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (-inf, 452.0] 29 18 11 0.621 0.100 89 202 0.306 \n", "1 (452.0, 496.0] 35 21 14 0.600 0.120 89 202 0.306 \n", "2 (496.0, 513.0] 28 10 18 0.357 0.096 89 202 0.306 \n", "3 (513.0, 532.0] 29 8 21 0.276 0.100 89 202 0.306 \n", "4 (532.0, 548.0] 30 11 19 0.367 0.103 89 202 0.306 \n", "5 (548.0, 563.0] 23 9 14 0.391 0.079 89 202 0.306 \n", "6 (563.0, 588.0] 36 7 29 0.194 0.124 89 202 0.306 \n", "7 (588.0, 603.4] 19 3 16 0.158 0.065 89 202 0.306 \n", "8 (603.4, 638.0] 34 2 32 0.059 0.117 89 202 0.306 \n", "9 (638.0, inf] 28 0 28 0.000 0.096 89 202 0.306 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 18 11 29 0.202 0.054 0.148 2.028 \n", "1 39 25 64 0.438 0.124 0.314 1.991 \n", "2 49 43 92 0.551 0.213 0.338 1.741 \n", "3 57 64 121 0.640 0.317 0.323 1.539 \n", "4 68 83 151 0.764 0.411 0.353 1.472 \n", "5 77 97 174 0.865 0.480 0.385 1.446 \n", "6 84 126 210 0.944 0.624 0.320 1.307 \n", "7 87 142 229 0.978 0.703 0.275 1.242 \n", "8 89 174 263 1.000 0.861 0.139 1.106 \n", "9 89 202 291 1.000 1.000 0.000 0.999 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

评分卡

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "评分卡不包含规则\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablebinpoints
0basepointsNaN538.0
0purposeretraining%,%car (used)75.0
1purposeradio/television32.0
2purposefurniture/equipment%,%business%,%repairs-12.0
3purposedomestic appliances%,%education%,%car (new)%,%...-33.0
4duration_in_month[-inf,8.0)95.0
5duration_in_month[8.0,15.0)15.0
6duration_in_month[15.0,30.0)-0.0
7duration_in_month[30.0,inf)-45.0
8credit_amount_corr1[-inf,15000.0)4.0
9credit_amount_corr1[15000.0,20000.0)-25.0
10credit_amount_corr1[20000.0,inf)-54.0
11age_in_years3669.0
12age_in_years[-inf,25.0)-30.0
13age_in_years[25.0,30.0)-10.0
14age_in_years[30.0,35.0)-8.0
15age_in_years[35.0,inf)17.0
16credit_historyno credits taken/ all credits paid back duly%,...-92.0
17credit_historyexisting credits paid back duly till now-3.0
18credit_historydelay in paying off in the past-10.0
19credit_historycritical account/ other credits existing (not ...47.0
\n", "
" ], "text/plain": [ " variable bin \\\n", "0 basepoints NaN \n", "0 purpose retraining%,%car (used) \n", "1 purpose radio/television \n", "2 purpose furniture/equipment%,%business%,%repairs \n", "3 purpose domestic appliances%,%education%,%car (new)%,%... \n", "4 duration_in_month [-inf,8.0) \n", "5 duration_in_month [8.0,15.0) \n", "6 duration_in_month [15.0,30.0) \n", "7 duration_in_month [30.0,inf) \n", "8 credit_amount_corr1 [-inf,15000.0) \n", "9 credit_amount_corr1 [15000.0,20000.0) \n", "10 credit_amount_corr1 [20000.0,inf) \n", "11 age_in_years 36 \n", "12 age_in_years [-inf,25.0) \n", "13 age_in_years [25.0,30.0) \n", "14 age_in_years [30.0,35.0) \n", "15 age_in_years [35.0,inf) \n", "16 credit_history no credits taken/ all credits paid back duly%,... \n", "17 credit_history existing credits paid back duly till now \n", "18 credit_history delay in paying off in the past \n", "19 credit_history critical account/ other credits existing (not ... \n", "\n", " points \n", "0 538.0 \n", "0 75.0 \n", "1 32.0 \n", "2 -12.0 \n", "3 -33.0 \n", "4 95.0 \n", "5 15.0 \n", "6 -0.0 \n", "7 -45.0 \n", "8 4.0 \n", "9 -25.0 \n", "10 -54.0 \n", "11 69.0 \n", "12 -30.0 \n", "13 -10.0 \n", "14 -8.0 \n", "15 17.0 \n", "16 -92.0 \n", "17 -3.0 \n", "18 -10.0 \n", "19 47.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

压力测试

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
违约率抽样次数样本数好样本数坏样本数平均AUC最大AUC最小AUCAUC标准差95%置信区间AUC平均KS最大KS最小KSKS标准差95%置信区间KS
00.0101089008811890.7563640.7566670.7560990.0001730.7560 - 0.75670.38300.3840.3820.0004470.3821 - 0.3839
10.0711012531164890.7558810.7576980.7536010.0012540.7534 - 0.75830.38330.3890.3770.0033480.3767 - 0.3899
20.13210674585890.7569970.7588690.7540190.0015570.7539 - 0.76000.38530.3960.3540.0126260.3606 - 0.4100
30.19410458369890.7550710.7618830.7512870.0032450.7487 - 0.76140.35770.3650.3490.0048590.3482 - 0.3672
40.25510349260890.7576790.7686690.7427400.0076340.7427 - 0.77260.38010.3880.3690.0071900.3660 - 0.3942
50.31610281192890.7559140.7641330.7476010.0045980.7469 - 0.76490.38500.4020.3700.0097670.3659 - 0.4041
60.37710236147890.7534740.7693950.7331650.0115010.7309 - 0.77600.38480.4230.3510.0227370.3402 - 0.4294
70.43810203114890.7547160.7837570.7262960.0169840.7214 - 0.78800.37270.4500.3090.0382100.2978 - 0.4476
80.4991017889890.7544190.7792580.7115890.0206270.7140 - 0.79480.37640.4270.3370.0281610.3212 - 0.4316
90.5611015869890.7533950.7979160.6859630.0287490.6970 - 0.80970.38940.4550.3470.0374870.3159 - 0.4629
\n", "
" ], "text/plain": [ " 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n", "0 0.010 10 8900 8811 89 0.756364 0.756667 0.756099 0.000173 \n", "1 0.071 10 1253 1164 89 0.755881 0.757698 0.753601 0.001254 \n", "2 0.132 10 674 585 89 0.756997 0.758869 0.754019 0.001557 \n", "3 0.194 10 458 369 89 0.755071 0.761883 0.751287 0.003245 \n", "4 0.255 10 349 260 89 0.757679 0.768669 0.742740 0.007634 \n", "5 0.316 10 281 192 89 0.755914 0.764133 0.747601 0.004598 \n", "6 0.377 10 236 147 89 0.753474 0.769395 0.733165 0.011501 \n", "7 0.438 10 203 114 89 0.754716 0.783757 0.726296 0.016984 \n", "8 0.499 10 178 89 89 0.754419 0.779258 0.711589 0.020627 \n", "9 0.561 10 158 69 89 0.753395 0.797916 0.685963 0.028749 \n", "\n", " 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n", "0 0.7560 - 0.7567 0.3830 0.384 0.382 0.000447 0.3821 - 0.3839 \n", "1 0.7534 - 0.7583 0.3833 0.389 0.377 0.003348 0.3767 - 0.3899 \n", "2 0.7539 - 0.7600 0.3853 0.396 0.354 0.012626 0.3606 - 0.4100 \n", "3 0.7487 - 0.7614 0.3577 0.365 0.349 0.004859 0.3482 - 0.3672 \n", "4 0.7427 - 0.7726 0.3801 0.388 0.369 0.007190 0.3660 - 0.3942 \n", "5 0.7469 - 0.7649 0.3850 0.402 0.370 0.009767 0.3659 - 0.4041 \n", "6 0.7309 - 0.7760 0.3848 0.423 0.351 0.022737 0.3402 - 0.4294 \n", "7 0.7214 - 0.7880 0.3727 0.450 0.309 0.038210 0.2978 - 0.4476 \n", "8 0.7140 - 0.7948 0.3764 0.427 0.337 0.028161 0.3212 - 0.4316 \n", "9 0.6970 - 0.8097 0.3894 0.455 0.347 0.037487 0.3159 - 0.4629 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型报告文件储存路径:./cache/train/demo/模型报告.docx\n", "mlcfg save to【./cache/train/demo/mlcfg.json】success. \n", "feature save to【./cache/train/demo/feature.csv】success. \n", "model save to【./cache/train/demo/model.pkl】success. \n", "model save to【./cache/train/demo/card.csv】success. \n" ] } ], "source": [ "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n", "# 特征处理\n", "cfg = {\n", "\"project_name\": \"demo\",\n", "# jupyter下输出内容\n", "\"jupyter_print\": True,\n", "\"bin_detail_print\": True,\n", "# 是否开启粗分箱\n", "\"format_bin\": True,\n", "# 变量切分点搜索采样率\n", "\"bin_sample_rate\": 0.01,\n", "# 最多保留候选变量数\n", "\"max_feature_num\": 10,\n", "# 单调性允许变化次数\n", "\"monto_shift_threshold\":1,\n", "\"iv_threshold\": 0.01,\n", "\"corr_threshold\": 0.4,\n", "\"psi_threshold\": 0.2,\n", "\"vif_threshold\": 10,\n", "# 压力测试\n", "\"stress_test\":True,\n", "\"stress_sample_times\": 10,\n", "# 特殊值\n", "\"special_values\": {\"age_in_years\": [36]},\n", "# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n", "\"breaks_list\": { \n", "# 'duration_in_month': [12, 18, 48], \n", "# 'credit_amount': [2000, 3500, 4000, 7000], \n", " 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n", "# 'age_in_years': [27, 34, 58]\n", " },\n", "# y \n", "\"y_column\": \"creditability\",\n", "# 候选变量\n", "\"x_columns\": [\n", "\"duration_in_month\",\n", "\"credit_amount\",\n", "\"age_in_years\",\n", "\"purpose\",\n", "\"credit_history\",\n", " \n", "\"credit_amount_corr1\",\n", "\"credit_amount_corr2\",\n", " ],\n", "\"columns_anns\":{\n", " \"age_in_years\": \"年龄\",\n", " \"credit_history\": \"借贷历史\"\n", "},\n", "\"columns_exclude\": [],\n", "# \"columns_include\": [\"credit_amount\"],\n", "\"rules\":[\"df.loc[df['credit_amount']>=9000,'SCORE'] += -50\"]\n", "}\n", "\n", "# 训练并生成报告\n", "pipeline = Pipeline(data=data, **cfg)\n", "pipeline.train()\n", "pipeline.report()\n", "pipeline.save()" ] }, { "cell_type": "code", "execution_count": 5, "id": "c50e771f", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "只能针对数值型变量进行分析。\n" ] } ], "source": [ "pipeline.variable_analyse(\"age_in_years\", format_bin=True)" ] }, { "cell_type": "code", "execution_count": 3, "id": "0804cdca", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/demo】\n", "feature load from【./cache/train/demo/feature.csv】success.\n", "model load from【./cache/train/demo】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.328,\n", " 'AUC': 0.7145,\n", " 'Gini': 0.429,\n", " 'pic':
}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline2 = Pipeline.load(\"./cache/train/demo\")\n", "score = pipeline2.score(test_data)\n", "# score\n", "sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)" ] } ], "metadata": { "celltoolbar": "编辑元数据", "kernelspec": { "display_name": "Python [conda env:analysis]", "language": "python", "name": "conda-env-analysis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "372.364px" }, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 5 }