{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "a40fae48",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"%matplotlib agg\n",
"import matplotlib.pyplot as plt\n",
"import sys\n",
"sys.path.append(\"/root/project\")\n",
"from easy_ml import DataSplitEntity, Pipeline\n",
"import random\n",
"\n",
"# 加载demo数据\n",
"import scorecardpy as sc\n",
"dat = sc.germancredit()\n",
"dat_columns = dat.columns.tolist()\n",
"dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n",
"dat.columns = dat_columns\n",
"dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n",
"\n",
"# dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n",
"# dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n",
"\n",
"dat[\"random\"] = [random.random() for _ in range(len(dat))]\n",
"\n",
"# duration_in_month = list(dat[\"duration_in_month\"])\n",
"# duration_in_month[0] = \"missing\"\n",
"# dat[\"duration_in_month\"] = duration_in_month\n",
"\n",
"# purpose = list(dat[\"purpose\"])\n",
"# purpose[0] = \"missing\"\n",
"# purpose[1] = None\n",
"# dat[\"purpose\"] = purpose\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "261f1f07",
"metadata": {
"code_folding": [],
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"项目路径:【./cache/train/demo】\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 8/8 [00:23<00:00, 2.89s/it]\n"
]
},
{
"data": {
"text/html": [
"
样本分布
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本 | \n",
" 样本数 | \n",
" 样本占比 | \n",
" 坏样本数 | \n",
" 坏样本比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 709 | \n",
" 70.90% | \n",
" 211 | \n",
" 29.76% | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 291 | \n",
" 29.10% | \n",
" 89 | \n",
" 30.58% | \n",
"
\n",
" \n",
" 2 | \n",
" 合计 | \n",
" 1000 | \n",
" 100% | \n",
" 300 | \n",
" 30.00% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本 样本数 样本占比 坏样本数 坏样本比例\n",
"0 训练集 709 70.90% 211 29.76%\n",
"1 测试集 291 29.10% 89 30.58%\n",
"2 合计 1000 100% 300 30.00%"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量iv
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" iv | \n",
" psi | \n",
" vif | \n",
" 释义 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" status_of_existing_checking_account | \n",
" 0.629 | \n",
" 0.006 | \n",
" 1.114 | \n",
" - | \n",
"
\n",
" \n",
" 6 | \n",
" credit_history | \n",
" 0.272 | \n",
" 0.016 | \n",
" 1.057 | \n",
" 借贷历史 | \n",
"
\n",
" \n",
" 5 | \n",
" duration_in_month | \n",
" 0.270 | \n",
" 0.039 | \n",
" 1.034 | \n",
" - | \n",
"
\n",
" \n",
" 4 | \n",
" purpose | \n",
" 0.152 | \n",
" 0.013 | \n",
" 1.029 | \n",
" - | \n",
"
\n",
" \n",
" 1 | \n",
" savings_account_and_bonds | \n",
" 0.144 | \n",
" 0.015 | \n",
" 1.064 | \n",
" - | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years | \n",
" 0.105 | \n",
" 0.014 | \n",
" 1.065 | \n",
" 年龄 | \n",
"
\n",
" \n",
" 2 | \n",
" present_employment_since | \n",
" 0.104 | \n",
" 0.007 | \n",
" 1.057 | \n",
" - | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 iv psi vif 释义\n",
"0 status_of_existing_checking_account 0.629 0.006 1.114 -\n",
"6 credit_history 0.272 0.016 1.057 借贷历史\n",
"5 duration_in_month 0.270 0.039 1.034 -\n",
"4 purpose 0.152 0.013 1.029 -\n",
"1 savings_account_and_bonds 0.144 0.015 1.064 -\n",
"3 age_in_years 0.105 0.014 1.065 年龄\n",
"2 present_employment_since 0.104 0.007 1.057 -"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量趋势
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"变量切分点:\n",
"{\n",
" \"status_of_existing_checking_account\": [\n",
" \"... < 0 DM%,%0 <= ... < 200 DM\",\n",
" \"... >= 200 DM / salary assignments for at least 1 year\",\n",
" \"no checking account\"\n",
" ],\n",
" \"credit_history\": [\n",
" \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n",
" \"existing credits paid back duly till now\",\n",
" \"delay in paying off in the past\",\n",
" \"critical account/ other credits existing (not at this bank)\"\n",
" ],\n",
" \"savings_account_and_bonds\": [\n",
" \"... < 100 DM%,%100 <= ... < 500 DM\",\n",
" \"500 <= ... < 1000 DM%,%... >= 1000 DM\",\n",
" \"unknown/ no savings account\"\n",
" ],\n",
" \"duration_in_month\": [\n",
" 8,\n",
" 15,\n",
" 25,\n",
" 35\n",
" ],\n",
" \"purpose\": [\n",
" \"retraining%,%car (used)\",\n",
" \"radio/television\",\n",
" \"furniture/equipment%,%business%,%repairs\",\n",
" \"domestic appliances%,%education%,%car (new)%,%others\"\n",
" ],\n",
" \"age_in_years\": [\n",
" 25,\n",
" 30,\n",
" 35\n",
" ],\n",
" \"present_employment_since\": [\n",
" \"unemployed%,%... < 1 year\",\n",
" \"1 <= ... < 4 years\",\n",
" \"4 <= ... < 7 years\",\n",
" \"... >= 7 years\"\n",
" ]\n",
"}\n",
"选中变量不同分箱数下变量的推荐切分点:\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 35]\n",
"[8, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【age_in_years】不同分箱数下变量的推荐切分点-----\n",
"[30]\n",
"[30, 35]\n",
"[25, 30, 35]\n",
"[20, 30, 60]\n",
"[20, 25, 30, 35]\n",
"[20, 30, 35, 60]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"快速筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"剔除train_iv小于阈值\n",
"筛选前变量数量:21\n",
"['status_of_existing_checking_account', 'duration_in_month', 'credit_history', 'purpose', 'credit_amount', 'savings_account_and_bonds', 'present_employment_since', 'installment_rate_in_percentage_of_disposable_income', 'personal_status_and_sex', 'other_debtors_or_guarantors', 'present_residence_since', 'property', 'age_in_years', 'other_installment_plans', 'housing', 'number_of_existing_credits_at_this_bank', 'job', 'number_of_people_being_liable_to_provide_maintenance_for', 'telephone', 'foreign_worker', 'random']\n",
"快速筛选剔除变量数量:13\n",
"housing 因为train_iv【0.042】小于阈值被剔除\n",
"other_debtors_or_guarantors 因为train_iv【0.017】小于阈值被剔除\n",
"number_of_people_being_liable_to_provide_maintenance_for 因为train_iv【0.0】小于阈值被剔除\n",
"present_residence_since 因为train_iv【0.001】小于阈值被剔除\n",
"personal_status_and_sex 因为train_iv【0.01】小于阈值被剔除\n",
"property 因为train_iv【0.08】小于阈值被剔除\n",
"job 因为train_iv【0.021】小于阈值被剔除\n",
"other_installment_plans 因为train_iv【0.064】小于阈值被剔除\n",
"foreign_worker 因为train_iv【0.0】小于阈值被剔除\n",
"telephone 因为train_iv【0.001】小于阈值被剔除\n",
"random 因为train_iv【0.042】小于阈值被剔除\n",
"number_of_existing_credits_at_this_bank 因为train_iv【0.003】小于阈值被剔除\n",
"installment_rate_in_percentage_of_disposable_income 因为train_iv【0.033】小于阈值被剔除\n",
"\n"
]
},
{
"data": {
"text/html": [
"数值变量筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"相关性筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"duration_in_month: 【credit_amount_iv0.299_corr0.495】 \n",
"\n",
"-----相关性筛选保留的【duration_in_month】-----\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 35]\n",
"[8, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【credit_amount】不同分箱数下变量的推荐切分点-----\n",
"[4000.0]\n",
"[4000.0, 9000.0]\n",
"[3000.0, 6000.0, 9000.0]\n",
"[3000.0, 4000.0, 9000.0]\n",
"[2000.0, 3000.0, 4000.0, 9000.0]\n",
"[3000.0, 4000.0, 5000.0, 9000.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"vif筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"ivtop筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iv = train_iv + test_iv\n",
"\n"
]
},
{
"data": {
"text/html": [
"模型结果
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.8036 | \n",
" 0.4718 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7948 | \n",
" 0.4691 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.8036 0.4718\n",
"1 测试集 0.7948 0.4691"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.8065 | \n",
" 0.4711 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.8023 | \n",
" 0.4918 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.8065 0.4711\n",
"1 测试集 0.8023 0.4918"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型变量系数
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Generalized Linear Model Regression Results \n",
"==============================================================================\n",
"Dep. Variable: creditability No. Observations: 709\n",
"Model: GLM Df Residuals: 702\n",
"Model Family: Binomial Df Model: 6\n",
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -384.55\n",
"Date: Wed, 26 Feb 2025 Deviance: 769.09\n",
"Time: 11:28:17 Pearson chi2: 689.\n",
"No. Iterations: 5 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" var | \n",
" coef | \n",
" std err | \n",
" z | \n",
" P>|z| | \n",
" [0.025 | \n",
" 0.975] | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" present_employment_since_woe | \n",
" 0.8461 | \n",
" 0.282 | \n",
" 2.997 | \n",
" 0.003 | \n",
" 0.293 | \n",
" 1.399 | \n",
"
\n",
" \n",
" 1 | \n",
" savings_account_and_bonds_woe | \n",
" 0.5619 | \n",
" 0.230 | \n",
" 2.443 | \n",
" 0.015 | \n",
" 0.111 | \n",
" 1.013 | \n",
"
\n",
" \n",
" 2 | \n",
" status_of_existing_checking_account_woe | \n",
" 0.7664 | \n",
" 0.107 | \n",
" 7.175 | \n",
" 0.000 | \n",
" 0.557 | \n",
" 0.976 | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years_woe | \n",
" 0.6160 | \n",
" 0.272 | \n",
" 2.264 | \n",
" 0.024 | \n",
" 0.083 | \n",
" 1.149 | \n",
"
\n",
" \n",
" 4 | \n",
" purpose_woe | \n",
" 1.0363 | \n",
" 0.226 | \n",
" 4.588 | \n",
" 0.000 | \n",
" 0.594 | \n",
" 1.479 | \n",
"
\n",
" \n",
" 5 | \n",
" duration_in_month_woe | \n",
" 1.0201 | \n",
" 0.180 | \n",
" 5.667 | \n",
" 0.000 | \n",
" 0.667 | \n",
" 1.373 | \n",
"
\n",
" \n",
" 6 | \n",
" credit_history_woe | \n",
" 0.8273 | \n",
" 0.187 | \n",
" 4.427 | \n",
" 0.000 | \n",
" 0.461 | \n",
" 1.194 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" var coef std err z \\\n",
"0 present_employment_since_woe 0.8461 0.282 2.997 \n",
"1 savings_account_and_bonds_woe 0.5619 0.230 2.443 \n",
"2 status_of_existing_checking_account_woe 0.7664 0.107 7.175 \n",
"3 age_in_years_woe 0.6160 0.272 2.264 \n",
"4 purpose_woe 1.0363 0.226 4.588 \n",
"5 duration_in_month_woe 1.0201 0.180 5.667 \n",
"6 credit_history_woe 0.8273 0.187 4.427 \n",
"\n",
" P>|z| [0.025 0.975] \n",
"0 0.003 0.293 1.399 \n",
"1 0.015 0.111 1.013 \n",
"2 0.000 0.557 0.976 \n",
"3 0.024 0.083 1.149 \n",
"4 0.000 0.594 1.479 \n",
"5 0.000 0.667 1.373 \n",
"6 0.000 0.461 1.194 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型psi
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 0.004 | \n",
" 73 | \n",
" 36 | \n",
" 0.103 | \n",
" 0.124 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 0.003 | \n",
" 72 | \n",
" 25 | \n",
" 0.102 | \n",
" 0.086 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 0.000 | \n",
" 68 | \n",
" 27 | \n",
" 0.096 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 0.000 | \n",
" 74 | \n",
" 29 | \n",
" 0.104 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 0.002 | \n",
" 69 | \n",
" 33 | \n",
" 0.097 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 30 | \n",
" 0.099 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 0.004 | \n",
" 72 | \n",
" 24 | \n",
" 0.102 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 0.000 | \n",
" 69 | \n",
" 27 | \n",
" 0.097 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 0.010 | \n",
" 72 | \n",
" 21 | \n",
" 0.102 | \n",
" 0.072 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 0.011 | \n",
" 70 | \n",
" 39 | \n",
" 0.099 | \n",
" 0.134 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 436.0] 0.004 73 36 0.103 0.124\n",
"1 (436.0, 476.0] 0.003 72 25 0.102 0.086\n",
"2 (476.0, 504.4] 0.000 68 27 0.096 0.093\n",
"3 (504.4, 531.0] 0.000 74 29 0.104 0.100\n",
"4 (531.0, 559.0] 0.002 69 33 0.097 0.113\n",
"5 (559.0, 591.0] 0.000 70 30 0.099 0.103\n",
"6 (591.0, 618.0] 0.004 72 24 0.102 0.082\n",
"7 (618.0, 646.8] 0.000 69 27 0.097 0.093\n",
"8 (646.8, 681.0] 0.010 72 21 0.102 0.072\n",
"9 (681.0, inf] 0.011 70 39 0.099 0.134"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.034\n",
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 33 | \n",
" 0.102 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 28 | \n",
" 0.100 | \n",
" 0.096 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 0.000 | \n",
" 70 | \n",
" 30 | \n",
" 0.099 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 0.004 | \n",
" 71 | \n",
" 24 | \n",
" 0.100 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 32 | \n",
" 0.102 | \n",
" 0.110 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 29 | \n",
" 0.099 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 0.001 | \n",
" 71 | \n",
" 27 | \n",
" 0.100 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 30 | \n",
" 0.100 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 0.009 | \n",
" 71 | \n",
" 21 | \n",
" 0.100 | \n",
" 0.072 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 0.007 | \n",
" 70 | \n",
" 37 | \n",
" 0.099 | \n",
" 0.127 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 433.0] 0.001 72 33 0.102 0.113\n",
"1 (433.0, 474.0] 0.000 71 28 0.100 0.096\n",
"2 (474.0, 502.4] 0.000 70 30 0.099 0.103\n",
"3 (502.4, 524.2] 0.004 71 24 0.100 0.082\n",
"4 (524.2, 554.0] 0.001 72 32 0.102 0.110\n",
"5 (554.0, 585.0] 0.000 70 29 0.099 0.100\n",
"6 (585.0, 615.0] 0.001 71 27 0.100 0.093\n",
"7 (615.0, 646.0] 0.000 71 30 0.100 0.103\n",
"8 (646.0, 681.0] 0.009 71 21 0.100 0.072\n",
"9 (681.0, inf] 0.007 70 37 0.099 0.127"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.023\n"
]
},
{
"data": {
"text/html": [
"分数分箱
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 73 | \n",
" 50 | \n",
" 23 | \n",
" 0.685 | \n",
" 0.103 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 50 | \n",
" 23 | \n",
" 73 | \n",
" 0.237 | \n",
" 0.046 | \n",
" 0.191 | \n",
" 2.298 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 72 | \n",
" 44 | \n",
" 28 | \n",
" 0.611 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 94 | \n",
" 51 | \n",
" 145 | \n",
" 0.445 | \n",
" 0.102 | \n",
" 0.343 | \n",
" 2.175 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 68 | \n",
" 31 | \n",
" 37 | \n",
" 0.456 | \n",
" 0.096 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 125 | \n",
" 88 | \n",
" 213 | \n",
" 0.592 | \n",
" 0.177 | \n",
" 0.415 | \n",
" 1.969 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 74 | \n",
" 24 | \n",
" 50 | \n",
" 0.324 | \n",
" 0.104 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 149 | \n",
" 138 | \n",
" 287 | \n",
" 0.706 | \n",
" 0.277 | \n",
" 0.429 | \n",
" 1.742 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 69 | \n",
" 26 | \n",
" 43 | \n",
" 0.377 | \n",
" 0.097 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 175 | \n",
" 181 | \n",
" 356 | \n",
" 0.829 | \n",
" 0.363 | \n",
" 0.466 | \n",
" 1.650 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 70 | \n",
" 14 | \n",
" 56 | \n",
" 0.200 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 189 | \n",
" 237 | \n",
" 426 | \n",
" 0.896 | \n",
" 0.476 | \n",
" 0.420 | \n",
" 1.489 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 72 | \n",
" 9 | \n",
" 63 | \n",
" 0.125 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 198 | \n",
" 300 | \n",
" 498 | \n",
" 0.938 | \n",
" 0.602 | \n",
" 0.336 | \n",
" 1.334 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 69 | \n",
" 10 | \n",
" 59 | \n",
" 0.145 | \n",
" 0.097 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 208 | \n",
" 359 | \n",
" 567 | \n",
" 0.986 | \n",
" 0.721 | \n",
" 0.265 | \n",
" 1.231 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 72 | \n",
" 2 | \n",
" 70 | \n",
" 0.028 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 210 | \n",
" 429 | \n",
" 639 | \n",
" 0.995 | \n",
" 0.861 | \n",
" 0.134 | \n",
" 1.103 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 70 | \n",
" 1 | \n",
" 69 | \n",
" 0.014 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 436.0] 73 50 23 0.685 0.103 211 498 0.298 \n",
"1 (436.0, 476.0] 72 44 28 0.611 0.102 211 498 0.298 \n",
"2 (476.0, 504.4] 68 31 37 0.456 0.096 211 498 0.298 \n",
"3 (504.4, 531.0] 74 24 50 0.324 0.104 211 498 0.298 \n",
"4 (531.0, 559.0] 69 26 43 0.377 0.097 211 498 0.298 \n",
"5 (559.0, 591.0] 70 14 56 0.200 0.099 211 498 0.298 \n",
"6 (591.0, 618.0] 72 9 63 0.125 0.102 211 498 0.298 \n",
"7 (618.0, 646.8] 69 10 59 0.145 0.097 211 498 0.298 \n",
"8 (646.8, 681.0] 72 2 70 0.028 0.102 211 498 0.298 \n",
"9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 50 23 73 0.237 0.046 0.191 2.298 \n",
"1 94 51 145 0.445 0.102 0.343 2.175 \n",
"2 125 88 213 0.592 0.177 0.415 1.969 \n",
"3 149 138 287 0.706 0.277 0.429 1.742 \n",
"4 175 181 356 0.829 0.363 0.466 1.650 \n",
"5 189 237 426 0.896 0.476 0.420 1.489 \n",
"6 198 300 498 0.938 0.602 0.336 1.334 \n",
"7 208 359 567 0.986 0.721 0.265 1.231 \n",
"8 210 429 639 0.995 0.861 0.134 1.103 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 72 | \n",
" 52 | \n",
" 20 | \n",
" 0.722 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 52 | \n",
" 20 | \n",
" 72 | \n",
" 0.246 | \n",
" 0.040 | \n",
" 0.206 | \n",
" 2.424 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 71 | \n",
" 42 | \n",
" 29 | \n",
" 0.592 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 94 | \n",
" 49 | \n",
" 143 | \n",
" 0.445 | \n",
" 0.098 | \n",
" 0.347 | \n",
" 2.206 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 70 | \n",
" 31 | \n",
" 39 | \n",
" 0.443 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 125 | \n",
" 88 | \n",
" 213 | \n",
" 0.592 | \n",
" 0.177 | \n",
" 0.415 | \n",
" 1.969 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 71 | \n",
" 26 | \n",
" 45 | \n",
" 0.366 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 151 | \n",
" 133 | \n",
" 284 | \n",
" 0.716 | \n",
" 0.267 | \n",
" 0.449 | \n",
" 1.784 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 72 | \n",
" 22 | \n",
" 50 | \n",
" 0.306 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 173 | \n",
" 183 | \n",
" 356 | \n",
" 0.820 | \n",
" 0.367 | \n",
" 0.453 | \n",
" 1.631 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 70 | \n",
" 18 | \n",
" 52 | \n",
" 0.257 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 191 | \n",
" 235 | \n",
" 426 | \n",
" 0.905 | \n",
" 0.472 | \n",
" 0.433 | \n",
" 1.505 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 71 | \n",
" 8 | \n",
" 63 | \n",
" 0.113 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 199 | \n",
" 298 | \n",
" 497 | \n",
" 0.943 | \n",
" 0.598 | \n",
" 0.345 | \n",
" 1.344 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 71 | \n",
" 9 | \n",
" 62 | \n",
" 0.127 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 208 | \n",
" 360 | \n",
" 568 | \n",
" 0.986 | \n",
" 0.723 | \n",
" 0.263 | \n",
" 1.229 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 71 | \n",
" 2 | \n",
" 69 | \n",
" 0.028 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 210 | \n",
" 429 | \n",
" 639 | \n",
" 0.995 | \n",
" 0.861 | \n",
" 0.134 | \n",
" 1.103 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 70 | \n",
" 1 | \n",
" 69 | \n",
" 0.014 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 433.0] 72 52 20 0.722 0.102 211 498 0.298 \n",
"1 (433.0, 474.0] 71 42 29 0.592 0.100 211 498 0.298 \n",
"2 (474.0, 502.4] 70 31 39 0.443 0.099 211 498 0.298 \n",
"3 (502.4, 524.2] 71 26 45 0.366 0.100 211 498 0.298 \n",
"4 (524.2, 554.0] 72 22 50 0.306 0.102 211 498 0.298 \n",
"5 (554.0, 585.0] 70 18 52 0.257 0.099 211 498 0.298 \n",
"6 (585.0, 615.0] 71 8 63 0.113 0.100 211 498 0.298 \n",
"7 (615.0, 646.0] 71 9 62 0.127 0.100 211 498 0.298 \n",
"8 (646.0, 681.0] 71 2 69 0.028 0.100 211 498 0.298 \n",
"9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 52 20 72 0.246 0.040 0.206 2.424 \n",
"1 94 49 143 0.445 0.098 0.347 2.206 \n",
"2 125 88 213 0.592 0.177 0.415 1.969 \n",
"3 151 133 284 0.716 0.267 0.449 1.784 \n",
"4 173 183 356 0.820 0.367 0.453 1.631 \n",
"5 191 235 426 0.905 0.472 0.433 1.505 \n",
"6 199 298 497 0.943 0.598 0.345 1.344 \n",
"7 208 360 568 0.986 0.723 0.263 1.229 \n",
"8 210 429 639 0.995 0.861 0.134 1.103 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 36 | \n",
" 24 | \n",
" 12 | \n",
" 0.667 | \n",
" 0.124 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 24 | \n",
" 12 | \n",
" 36 | \n",
" 0.270 | \n",
" 0.059 | \n",
" 0.211 | \n",
" 2.179 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 25 | \n",
" 14 | \n",
" 11 | \n",
" 0.560 | \n",
" 0.086 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 38 | \n",
" 23 | \n",
" 61 | \n",
" 0.427 | \n",
" 0.114 | \n",
" 0.313 | \n",
" 2.036 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 27 | \n",
" 14 | \n",
" 13 | \n",
" 0.519 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 52 | \n",
" 36 | \n",
" 88 | \n",
" 0.584 | \n",
" 0.178 | \n",
" 0.406 | \n",
" 1.931 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 29 | \n",
" 10 | \n",
" 19 | \n",
" 0.345 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 62 | \n",
" 55 | \n",
" 117 | \n",
" 0.697 | \n",
" 0.272 | \n",
" 0.425 | \n",
" 1.732 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 33 | \n",
" 11 | \n",
" 22 | \n",
" 0.333 | \n",
" 0.113 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 73 | \n",
" 77 | \n",
" 150 | \n",
" 0.820 | \n",
" 0.381 | \n",
" 0.439 | \n",
" 1.590 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 30 | \n",
" 6 | \n",
" 24 | \n",
" 0.200 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 79 | \n",
" 101 | \n",
" 180 | \n",
" 0.888 | \n",
" 0.500 | \n",
" 0.388 | \n",
" 1.434 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 24 | \n",
" 5 | \n",
" 19 | \n",
" 0.208 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 84 | \n",
" 120 | \n",
" 204 | \n",
" 0.944 | \n",
" 0.594 | \n",
" 0.350 | \n",
" 1.346 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 27 | \n",
" 2 | \n",
" 25 | \n",
" 0.074 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 86 | \n",
" 145 | \n",
" 231 | \n",
" 0.966 | \n",
" 0.718 | \n",
" 0.248 | \n",
" 1.217 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 21 | \n",
" 2 | \n",
" 19 | \n",
" 0.095 | \n",
" 0.072 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 88 | \n",
" 164 | \n",
" 252 | \n",
" 0.989 | \n",
" 0.812 | \n",
" 0.177 | \n",
" 1.141 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 39 | \n",
" 1 | \n",
" 38 | \n",
" 0.026 | \n",
" 0.134 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 436.0] 36 24 12 0.667 0.124 89 202 0.306 \n",
"1 (436.0, 476.0] 25 14 11 0.560 0.086 89 202 0.306 \n",
"2 (476.0, 504.4] 27 14 13 0.519 0.093 89 202 0.306 \n",
"3 (504.4, 531.0] 29 10 19 0.345 0.100 89 202 0.306 \n",
"4 (531.0, 559.0] 33 11 22 0.333 0.113 89 202 0.306 \n",
"5 (559.0, 591.0] 30 6 24 0.200 0.103 89 202 0.306 \n",
"6 (591.0, 618.0] 24 5 19 0.208 0.082 89 202 0.306 \n",
"7 (618.0, 646.8] 27 2 25 0.074 0.093 89 202 0.306 \n",
"8 (646.8, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n",
"9 (681.0, inf] 39 1 38 0.026 0.134 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 24 12 36 0.270 0.059 0.211 2.179 \n",
"1 38 23 61 0.427 0.114 0.313 2.036 \n",
"2 52 36 88 0.584 0.178 0.406 1.931 \n",
"3 62 55 117 0.697 0.272 0.425 1.732 \n",
"4 73 77 150 0.820 0.381 0.439 1.590 \n",
"5 79 101 180 0.888 0.500 0.388 1.434 \n",
"6 84 120 204 0.944 0.594 0.350 1.346 \n",
"7 86 145 231 0.966 0.718 0.248 1.217 \n",
"8 88 164 252 0.989 0.812 0.177 1.141 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 33 | \n",
" 23 | \n",
" 10 | \n",
" 0.697 | \n",
" 0.113 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 23 | \n",
" 10 | \n",
" 33 | \n",
" 0.258 | \n",
" 0.050 | \n",
" 0.208 | \n",
" 2.278 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 28 | \n",
" 16 | \n",
" 12 | \n",
" 0.571 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 39 | \n",
" 22 | \n",
" 61 | \n",
" 0.438 | \n",
" 0.109 | \n",
" 0.329 | \n",
" 2.089 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 30 | \n",
" 15 | \n",
" 15 | \n",
" 0.500 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 54 | \n",
" 37 | \n",
" 91 | \n",
" 0.607 | \n",
" 0.183 | \n",
" 0.424 | \n",
" 1.939 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 24 | \n",
" 10 | \n",
" 14 | \n",
" 0.417 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 64 | \n",
" 51 | \n",
" 115 | \n",
" 0.719 | \n",
" 0.252 | \n",
" 0.467 | \n",
" 1.819 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 32 | \n",
" 10 | \n",
" 22 | \n",
" 0.312 | \n",
" 0.110 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 74 | \n",
" 73 | \n",
" 147 | \n",
" 0.831 | \n",
" 0.361 | \n",
" 0.470 | \n",
" 1.645 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 29 | \n",
" 6 | \n",
" 23 | \n",
" 0.207 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 80 | \n",
" 96 | \n",
" 176 | \n",
" 0.899 | \n",
" 0.475 | \n",
" 0.424 | \n",
" 1.485 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 27 | \n",
" 5 | \n",
" 22 | \n",
" 0.185 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 85 | \n",
" 118 | \n",
" 203 | \n",
" 0.955 | \n",
" 0.584 | \n",
" 0.371 | \n",
" 1.368 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 30 | \n",
" 1 | \n",
" 29 | \n",
" 0.033 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 86 | \n",
" 147 | \n",
" 233 | \n",
" 0.966 | \n",
" 0.728 | \n",
" 0.238 | \n",
" 1.206 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 21 | \n",
" 2 | \n",
" 19 | \n",
" 0.095 | \n",
" 0.072 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 88 | \n",
" 166 | \n",
" 254 | \n",
" 0.989 | \n",
" 0.822 | \n",
" 0.167 | \n",
" 1.132 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 37 | \n",
" 1 | \n",
" 36 | \n",
" 0.027 | \n",
" 0.127 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 433.0] 33 23 10 0.697 0.113 89 202 0.306 \n",
"1 (433.0, 474.0] 28 16 12 0.571 0.096 89 202 0.306 \n",
"2 (474.0, 502.4] 30 15 15 0.500 0.103 89 202 0.306 \n",
"3 (502.4, 524.2] 24 10 14 0.417 0.082 89 202 0.306 \n",
"4 (524.2, 554.0] 32 10 22 0.312 0.110 89 202 0.306 \n",
"5 (554.0, 585.0] 29 6 23 0.207 0.100 89 202 0.306 \n",
"6 (585.0, 615.0] 27 5 22 0.185 0.093 89 202 0.306 \n",
"7 (615.0, 646.0] 30 1 29 0.033 0.103 89 202 0.306 \n",
"8 (646.0, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n",
"9 (681.0, inf] 37 1 36 0.027 0.127 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 23 10 33 0.258 0.050 0.208 2.278 \n",
"1 39 22 61 0.438 0.109 0.329 2.089 \n",
"2 54 37 91 0.607 0.183 0.424 1.939 \n",
"3 64 51 115 0.719 0.252 0.467 1.819 \n",
"4 74 73 147 0.831 0.361 0.470 1.645 \n",
"5 80 96 176 0.899 0.475 0.424 1.485 \n",
"6 85 118 203 0.955 0.584 0.371 1.368 \n",
"7 86 147 233 0.966 0.728 0.238 1.206 \n",
"8 88 166 254 0.989 0.822 0.167 1.132 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"评分卡
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"评分卡不包含规则\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" bin | \n",
" points | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" basepoints | \n",
" NaN | \n",
" 538.0 | \n",
"
\n",
" \n",
" 6 | \n",
" present_employment_since | \n",
" unemployed%,%... < 1 year | \n",
" -30.0 | \n",
"
\n",
" \n",
" 7 | \n",
" present_employment_since | \n",
" 1 <= ... < 4 years | \n",
" -0.0 | \n",
"
\n",
" \n",
" 8 | \n",
" present_employment_since | \n",
" 4 <= ... < 7 years | \n",
" 28.0 | \n",
"
\n",
" \n",
" 9 | \n",
" present_employment_since | \n",
" ... >= 7 years | \n",
" 13.0 | \n",
"
\n",
" \n",
" 3 | \n",
" savings_account_and_bonds | \n",
" ... < 100 DM%,%100 <= ... < 500 DM | \n",
" -8.0 | \n",
"
\n",
" \n",
" 4 | \n",
" savings_account_and_bonds | \n",
" 500 <= ... < 1000 DM%,%... >= 1000 DM | \n",
" 38.0 | \n",
"
\n",
" \n",
" 5 | \n",
" savings_account_and_bonds | \n",
" unknown/ no savings account | \n",
" 19.0 | \n",
"
\n",
" \n",
" 0 | \n",
" status_of_existing_checking_account | \n",
" ... < 0 DM%,%0 <= ... < 200 DM | \n",
" -34.0 | \n",
"
\n",
" \n",
" 1 | \n",
" status_of_existing_checking_account | \n",
" ... >= 200 DM / salary assignments for at leas... | \n",
" 26.0 | \n",
"
\n",
" \n",
" 2 | \n",
" status_of_existing_checking_account | \n",
" no checking account | \n",
" 65.0 | \n",
"
\n",
" \n",
" 10 | \n",
" age_in_years | \n",
" 36 | \n",
" 47.0 | \n",
"
\n",
" \n",
" 11 | \n",
" age_in_years | \n",
" [-inf,25.0) | \n",
" -20.0 | \n",
"
\n",
" \n",
" 12 | \n",
" age_in_years | \n",
" [25.0,30.0) | \n",
" -7.0 | \n",
"
\n",
" \n",
" 13 | \n",
" age_in_years | \n",
" [30.0,35.0) | \n",
" -6.0 | \n",
"
\n",
" \n",
" 14 | \n",
" age_in_years | \n",
" [35.0,inf) | \n",
" 11.0 | \n",
"
\n",
" \n",
" 15 | \n",
" purpose | \n",
" retraining%,%car (used) | \n",
" 65.0 | \n",
"
\n",
" \n",
" 16 | \n",
" purpose | \n",
" radio/television | \n",
" 28.0 | \n",
"
\n",
" \n",
" 17 | \n",
" purpose | \n",
" furniture/equipment%,%business%,%repairs | \n",
" -10.0 | \n",
"
\n",
" \n",
" 18 | \n",
" purpose | \n",
" domestic appliances%,%education%,%car (new)%,%... | \n",
" -28.0 | \n",
"
\n",
" \n",
" 19 | \n",
" duration_in_month | \n",
" [-inf,8.0) | \n",
" 107.0 | \n",
"
\n",
" \n",
" 20 | \n",
" duration_in_month | \n",
" [8.0,15.0) | \n",
" 17.0 | \n",
"
\n",
" \n",
" 21 | \n",
" duration_in_month | \n",
" [15.0,25.0) | \n",
" -0.0 | \n",
"
\n",
" \n",
" 22 | \n",
" duration_in_month | \n",
" [25.0,35.0) | \n",
" -12.0 | \n",
"
\n",
" \n",
" 23 | \n",
" duration_in_month | \n",
" [35.0,inf) | \n",
" -57.0 | \n",
"
\n",
" \n",
" 24 | \n",
" credit_history | \n",
" no credits taken/ all credits paid back duly%,... | \n",
" -78.0 | \n",
"
\n",
" \n",
" 25 | \n",
" credit_history | \n",
" existing credits paid back duly till now | \n",
" -2.0 | \n",
"
\n",
" \n",
" 26 | \n",
" credit_history | \n",
" delay in paying off in the past | \n",
" -9.0 | \n",
"
\n",
" \n",
" 27 | \n",
" credit_history | \n",
" critical account/ other credits existing (not ... | \n",
" 40.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable \\\n",
"0 basepoints \n",
"6 present_employment_since \n",
"7 present_employment_since \n",
"8 present_employment_since \n",
"9 present_employment_since \n",
"3 savings_account_and_bonds \n",
"4 savings_account_and_bonds \n",
"5 savings_account_and_bonds \n",
"0 status_of_existing_checking_account \n",
"1 status_of_existing_checking_account \n",
"2 status_of_existing_checking_account \n",
"10 age_in_years \n",
"11 age_in_years \n",
"12 age_in_years \n",
"13 age_in_years \n",
"14 age_in_years \n",
"15 purpose \n",
"16 purpose \n",
"17 purpose \n",
"18 purpose \n",
"19 duration_in_month \n",
"20 duration_in_month \n",
"21 duration_in_month \n",
"22 duration_in_month \n",
"23 duration_in_month \n",
"24 credit_history \n",
"25 credit_history \n",
"26 credit_history \n",
"27 credit_history \n",
"\n",
" bin points \n",
"0 NaN 538.0 \n",
"6 unemployed%,%... < 1 year -30.0 \n",
"7 1 <= ... < 4 years -0.0 \n",
"8 4 <= ... < 7 years 28.0 \n",
"9 ... >= 7 years 13.0 \n",
"3 ... < 100 DM%,%100 <= ... < 500 DM -8.0 \n",
"4 500 <= ... < 1000 DM%,%... >= 1000 DM 38.0 \n",
"5 unknown/ no savings account 19.0 \n",
"0 ... < 0 DM%,%0 <= ... < 200 DM -34.0 \n",
"1 ... >= 200 DM / salary assignments for at leas... 26.0 \n",
"2 no checking account 65.0 \n",
"10 36 47.0 \n",
"11 [-inf,25.0) -20.0 \n",
"12 [25.0,30.0) -7.0 \n",
"13 [30.0,35.0) -6.0 \n",
"14 [35.0,inf) 11.0 \n",
"15 retraining%,%car (used) 65.0 \n",
"16 radio/television 28.0 \n",
"17 furniture/equipment%,%business%,%repairs -10.0 \n",
"18 domestic appliances%,%education%,%car (new)%,%... -28.0 \n",
"19 [-inf,8.0) 107.0 \n",
"20 [8.0,15.0) 17.0 \n",
"21 [15.0,25.0) -0.0 \n",
"22 [25.0,35.0) -12.0 \n",
"23 [35.0,inf) -57.0 \n",
"24 no credits taken/ all credits paid back duly%,... -78.0 \n",
"25 existing credits paid back duly till now -2.0 \n",
"26 delay in paying off in the past -9.0 \n",
"27 critical account/ other credits existing (not ... 40.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"压力测试
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 违约率 | \n",
" 抽样次数 | \n",
" 样本数 | \n",
" 好样本数 | \n",
" 坏样本数 | \n",
" 平均AUC | \n",
" 最大AUC | \n",
" 最小AUC | \n",
" AUC标准差 | \n",
" 95%置信区间AUC | \n",
" 平均KS | \n",
" 最大KS | \n",
" 最小KS | \n",
" KS标准差 | \n",
" 95%置信区间KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.010 | \n",
" 10 | \n",
" 8900 | \n",
" 8811 | \n",
" 89 | \n",
" 0.802218 | \n",
" 0.802486 | \n",
" 0.801957 | \n",
" 0.000182 | \n",
" 0.8019 - 0.8026 | \n",
" 0.4690 | \n",
" 0.470 | \n",
" 0.468 | \n",
" 0.000447 | \n",
" 0.4681 - 0.4699 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.071 | \n",
" 10 | \n",
" 1253 | \n",
" 1164 | \n",
" 89 | \n",
" 0.801699 | \n",
" 0.803385 | \n",
" 0.798993 | \n",
" 0.001380 | \n",
" 0.7990 - 0.8044 | \n",
" 0.4620 | \n",
" 0.463 | \n",
" 0.459 | \n",
" 0.001342 | \n",
" 0.4594 - 0.4646 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.132 | \n",
" 10 | \n",
" 674 | \n",
" 585 | \n",
" 89 | \n",
" 0.802659 | \n",
" 0.806041 | \n",
" 0.798876 | \n",
" 0.001980 | \n",
" 0.7988 - 0.8065 | \n",
" 0.4535 | \n",
" 0.470 | \n",
" 0.449 | \n",
" 0.005766 | \n",
" 0.4422 - 0.4648 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.194 | \n",
" 10 | \n",
" 458 | \n",
" 369 | \n",
" 89 | \n",
" 0.800866 | \n",
" 0.807527 | \n",
" 0.794860 | \n",
" 0.003649 | \n",
" 0.7937 - 0.8080 | \n",
" 0.4736 | \n",
" 0.488 | \n",
" 0.464 | \n",
" 0.007889 | \n",
" 0.4581 - 0.4891 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.255 | \n",
" 10 | \n",
" 349 | \n",
" 260 | \n",
" 89 | \n",
" 0.805873 | \n",
" 0.816551 | \n",
" 0.794274 | \n",
" 0.006607 | \n",
" 0.7929 - 0.8188 | \n",
" 0.4863 | \n",
" 0.503 | \n",
" 0.479 | \n",
" 0.006100 | \n",
" 0.4743 - 0.4983 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.316 | \n",
" 10 | \n",
" 281 | \n",
" 192 | \n",
" 89 | \n",
" 0.802314 | \n",
" 0.811622 | \n",
" 0.793071 | \n",
" 0.004908 | \n",
" 0.7927 - 0.8119 | \n",
" 0.4673 | \n",
" 0.482 | \n",
" 0.448 | \n",
" 0.011314 | \n",
" 0.4451 - 0.4895 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.377 | \n",
" 10 | \n",
" 236 | \n",
" 147 | \n",
" 89 | \n",
" 0.799400 | \n",
" 0.814492 | \n",
" 0.780631 | \n",
" 0.011334 | \n",
" 0.7772 - 0.8216 | \n",
" 0.4650 | \n",
" 0.490 | \n",
" 0.445 | \n",
" 0.012157 | \n",
" 0.4412 - 0.4888 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.438 | \n",
" 10 | \n",
" 203 | \n",
" 114 | \n",
" 89 | \n",
" 0.801414 | \n",
" 0.830179 | \n",
" 0.777104 | \n",
" 0.015877 | \n",
" 0.7703 - 0.8325 | \n",
" 0.4713 | \n",
" 0.522 | \n",
" 0.405 | \n",
" 0.027893 | \n",
" 0.4166 - 0.5260 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.499 | \n",
" 10 | \n",
" 178 | \n",
" 89 | \n",
" 89 | \n",
" 0.806521 | \n",
" 0.828683 | \n",
" 0.774145 | \n",
" 0.017369 | \n",
" 0.7725 - 0.8406 | \n",
" 0.4822 | \n",
" 0.528 | \n",
" 0.405 | \n",
" 0.036742 | \n",
" 0.4102 - 0.5542 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.561 | \n",
" 10 | \n",
" 158 | \n",
" 69 | \n",
" 89 | \n",
" 0.808280 | \n",
" 0.852467 | \n",
" 0.754600 | \n",
" 0.030642 | \n",
" 0.7482 - 0.8683 | \n",
" 0.4818 | \n",
" 0.605 | \n",
" 0.422 | \n",
" 0.053630 | \n",
" 0.3767 - 0.5869 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n",
"0 0.010 10 8900 8811 89 0.802218 0.802486 0.801957 0.000182 \n",
"1 0.071 10 1253 1164 89 0.801699 0.803385 0.798993 0.001380 \n",
"2 0.132 10 674 585 89 0.802659 0.806041 0.798876 0.001980 \n",
"3 0.194 10 458 369 89 0.800866 0.807527 0.794860 0.003649 \n",
"4 0.255 10 349 260 89 0.805873 0.816551 0.794274 0.006607 \n",
"5 0.316 10 281 192 89 0.802314 0.811622 0.793071 0.004908 \n",
"6 0.377 10 236 147 89 0.799400 0.814492 0.780631 0.011334 \n",
"7 0.438 10 203 114 89 0.801414 0.830179 0.777104 0.015877 \n",
"8 0.499 10 178 89 89 0.806521 0.828683 0.774145 0.017369 \n",
"9 0.561 10 158 69 89 0.808280 0.852467 0.754600 0.030642 \n",
"\n",
" 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n",
"0 0.8019 - 0.8026 0.4690 0.470 0.468 0.000447 0.4681 - 0.4699 \n",
"1 0.7990 - 0.8044 0.4620 0.463 0.459 0.001342 0.4594 - 0.4646 \n",
"2 0.7988 - 0.8065 0.4535 0.470 0.449 0.005766 0.4422 - 0.4648 \n",
"3 0.7937 - 0.8080 0.4736 0.488 0.464 0.007889 0.4581 - 0.4891 \n",
"4 0.7929 - 0.8188 0.4863 0.503 0.479 0.006100 0.4743 - 0.4983 \n",
"5 0.7927 - 0.8119 0.4673 0.482 0.448 0.011314 0.4451 - 0.4895 \n",
"6 0.7772 - 0.8216 0.4650 0.490 0.445 0.012157 0.4412 - 0.4888 \n",
"7 0.7703 - 0.8325 0.4713 0.522 0.405 0.027893 0.4166 - 0.5260 \n",
"8 0.7725 - 0.8406 0.4822 0.528 0.405 0.036742 0.4102 - 0.5542 \n",
"9 0.7482 - 0.8683 0.4818 0.605 0.422 0.053630 0.3767 - 0.5869 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型报告文件储存路径:./cache/train/demo/模型报告.docx\n",
"mlcfg save to【./cache/train/demo/mlcfg.json】success. \n",
"feature save to【./cache/train/demo/feature.csv】success. \n",
"model save to【./cache/train/demo/model.pkl】success. \n",
"model save to【./cache/train/demo/card.csv】success. \n"
]
}
],
"source": [
"train_data=dat[:709]\n",
"test_data=dat[709:]\n",
"data = DataSplitEntity(train_data=train_data, test_data=test_data)\n",
"# 特征处理\n",
"cfg = {\n",
"# 项目名称,影响数据存储位置\n",
"\"project_name\": \"demo\",\n",
"# jupyter下输出内容\n",
"\"jupyter_print\": True,\n",
"# 打印推荐分箱细节\n",
"\"bin_detail_print\": True,\n",
"# 是否开启粗分箱\n",
"\"format_bin\": True,\n",
"# 变量切分点搜索采样率,没有粗分箱的话建议不超过0.2\n",
"\"bin_sample_rate\": 0.1,\n",
"# 保留iv值最大的N个变量\n",
"\"max_feature_num\": 10,\n",
"# 单调性允许变化次数\n",
"\"monto_shift_threshold\":1,\n",
"# iv阈值,这里指训练集的iv\n",
"\"iv_threshold\": 0.1,\n",
"# woe后相关性阈值, \n",
"\"corr_threshold\": 0.4,\n",
"# 变量分箱后的psi阈值\n",
"\"psi_threshold\": 0.2,\n",
"# woe后vif阈值, \n",
"\"vif_threshold\": 10,\n",
"# 压力测试\n",
"\"stress_test\":True,\n",
"# 压力测试抽样次数\n",
"\"stress_sample_times\": 10,\n",
"# 特殊值,单独一箱\n",
"\"special_values\": {\"age_in_years\": [36]},\n",
"# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n",
"\"breaks_list\": { \n",
"# 'duration_in_month': [12, 18, 48], \n",
"# 'credit_amount': [2000, 3500, 4000, 7000], \n",
"# 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n",
"# 'age_in_years': [27, 34, 58]\n",
" },\n",
"# y \n",
"\"y_column\": \"creditability\",\n",
"# 参与建模的候选变量\n",
"# \"x_columns\": [\n",
"# \"duration_in_month\",\n",
"# \"credit_amount\",\n",
"# \"age_in_years\",\n",
"# \"purpose\",\n",
"# \"credit_history\",\n",
"# \"random\",\n",
" \n",
"# \"credit_amount_corr1\",\n",
"# \"credit_amount_corr2\",\n",
"# ],\n",
"# 变量释义\n",
"\"columns_anns\":{\n",
" \"age_in_years\": \"年龄\",\n",
" \"credit_history\": \"借贷历史\"\n",
"},\n",
"# 被排除的变量\n",
"\"columns_exclude\": [],\n",
"# 强制使用的变量 \n",
"# \"columns_include\": [\"credit_amount\"],\n",
"# 加减分规则\n",
"# 格式如例子,SCORE和df不能变\n",
"\"rules\":[\"df.loc[df['credit_amount']>=9000,'SCORE'] += -50\"]\n",
"}\n",
"\n",
"# 训练并生成报告\n",
"pipeline = Pipeline(data=data, **cfg)\n",
"pipeline.train()\n",
"pipeline.report()\n",
"pipeline.save()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3405fc86",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【age_in_years】不同分箱数下变量的推荐切分点-----\n",
"[30]\n",
"[30, 35]\n",
"[25, 30, 35]\n",
"[20, 30, 60]\n",
"[20, 25, 30, 35]\n",
"[20, 30, 35, 60]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pipeline.variable_analyse(\"age_in_years\", format_bin=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7c1f971d",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mlcfg load to【./cache/train/demo/mlcfg.json】success. \n",
"项目路径:【./cache/train/demo】\n",
"feature load from【./cache/train/demo/feature.csv】success.\n",
"model load from【./cache/train/demo/model.pkl】success.\n",
"model load from【./cache/train/demo/card.csv】success.\n"
]
},
{
"data": {
"text/plain": [
"{'KS': 0.3836,\n",
" 'AUC': 0.7481,\n",
" 'Gini': 0.4962,\n",
" 'pic': }"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2 = Pipeline.load(\"./cache/train/demo\")\n",
"score = pipeline2.score(test_data)\n",
"score_rule = pipeline2.score_rule(test_data)\n",
"# score\n",
"sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)\n",
"sc.perf_eva(test_data[\"creditability\"], score_rule, title=\"train\", show_plot=True)"
]
}
],
"metadata": {
"celltoolbar": "编辑元数据",
"kernelspec": {
"display_name": "Python [conda env:analysis]",
"language": "python",
"name": "conda-env-analysis-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "372.364px"
},
"toc_section_display": true,
"toc_window_display": true
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showmarkdowntxt": false,
"toc-showtags": false
},
"nbformat": 4,
"nbformat_minor": 5
}