{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "429e05ad",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib agg\n",
"import matplotlib.pyplot as plt\n",
"import sys\n",
"sys.path.append(\"/root/project\")\n",
"from easy_ml import DataSplitEntity, Pipeline\n",
"\n",
"\n",
"# 加载demo数据\n",
"import random\n",
"import scorecardpy as sc\n",
"dat = sc.germancredit()\n",
"dat_columns = dat.columns.tolist()\n",
"dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n",
"dat.columns = dat_columns\n",
"dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n",
"\n",
"# dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n",
"# dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n",
"\n",
"dat[\"random\"] = [random.random() for _ in range(len(dat))]\n",
"\n",
"# duration_in_month = list(dat[\"duration_in_month\"])\n",
"# duration_in_month[0] = \"missing\"\n",
"# duration_in_month[1] = \"1\"\n",
"# dat[\"duration_in_month\"] = duration_in_month\n",
"\n",
"# purpose = list(dat[\"purpose\"])\n",
"# purpose[0] = \"missing\"\n",
"# purpose[1] = None\n",
"# dat[\"purpose\"] = purpose\n",
"train_data=dat[:709]\n",
"test_data=dat[709:]"
]
},
{
"cell_type": "markdown",
"id": "8d339650",
"metadata": {},
"source": [
"# LR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d30f58f6",
"metadata": {
"code_folding": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"项目路径:【./cache/train/demo】\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 8/8 [00:20<00:00, 2.62s/it]\n"
]
},
{
"data": {
"text/html": [
"
样本分布
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本 | \n",
" 样本数 | \n",
" 样本占比 | \n",
" 坏样本数 | \n",
" 坏样本比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 709 | \n",
" 70.90% | \n",
" 211 | \n",
" 29.76% | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 291 | \n",
" 29.10% | \n",
" 89 | \n",
" 30.58% | \n",
"
\n",
" \n",
" 2 | \n",
" 合计 | \n",
" 1000 | \n",
" 100% | \n",
" 300 | \n",
" 30.00% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本 样本数 样本占比 坏样本数 坏样本比例\n",
"0 训练集 709 70.90% 211 29.76%\n",
"1 测试集 291 29.10% 89 30.58%\n",
"2 合计 1000 100% 300 30.00%"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量iv
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 变量 | \n",
" iv | \n",
" psi | \n",
" vif | \n",
" 释义 | \n",
"
\n",
" \n",
" \n",
" \n",
" 5 | \n",
" status_of_existing_checking_account | \n",
" 0.629 | \n",
" 0.006 | \n",
" 1.114 | \n",
" - | \n",
"
\n",
" \n",
" 2 | \n",
" credit_history | \n",
" 0.272 | \n",
" 0.016 | \n",
" 1.057 | \n",
" 借贷历史 | \n",
"
\n",
" \n",
" 1 | \n",
" duration_in_month | \n",
" 0.270 | \n",
" 0.039 | \n",
" 1.034 | \n",
" - | \n",
"
\n",
" \n",
" 6 | \n",
" purpose | \n",
" 0.152 | \n",
" 0.013 | \n",
" 1.029 | \n",
" - | \n",
"
\n",
" \n",
" 4 | \n",
" savings_account_and_bonds | \n",
" 0.144 | \n",
" 0.015 | \n",
" 1.064 | \n",
" - | \n",
"
\n",
" \n",
" 0 | \n",
" age_in_years | \n",
" 0.105 | \n",
" 0.014 | \n",
" 1.065 | \n",
" 年龄 | \n",
"
\n",
" \n",
" 3 | \n",
" present_employment_since | \n",
" 0.104 | \n",
" 0.007 | \n",
" 1.057 | \n",
" - | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 变量 iv psi vif 释义\n",
"5 status_of_existing_checking_account 0.629 0.006 1.114 -\n",
"2 credit_history 0.272 0.016 1.057 借贷历史\n",
"1 duration_in_month 0.270 0.039 1.034 -\n",
"6 purpose 0.152 0.013 1.029 -\n",
"4 savings_account_and_bonds 0.144 0.015 1.064 -\n",
"0 age_in_years 0.105 0.014 1.065 年龄\n",
"3 present_employment_since 0.104 0.007 1.057 -"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"变量趋势
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"变量切分点:\n",
"{\n",
" \"status_of_existing_checking_account\": [\n",
" \"... < 0 DM%,%0 <= ... < 200 DM\",\n",
" \"... >= 200 DM / salary assignments for at least 1 year\",\n",
" \"no checking account\"\n",
" ],\n",
" \"credit_history\": [\n",
" \"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly\",\n",
" \"existing credits paid back duly till now\",\n",
" \"delay in paying off in the past\",\n",
" \"critical account/ other credits existing (not at this bank)\"\n",
" ],\n",
" \"savings_account_and_bonds\": [\n",
" \"... < 100 DM%,%100 <= ... < 500 DM\",\n",
" \"500 <= ... < 1000 DM%,%... >= 1000 DM\",\n",
" \"unknown/ no savings account\"\n",
" ],\n",
" \"duration_in_month\": [\n",
" 8,\n",
" 15,\n",
" 25,\n",
" 35\n",
" ],\n",
" \"purpose\": [\n",
" \"retraining%,%car (used)\",\n",
" \"radio/television\",\n",
" \"furniture/equipment%,%business%,%repairs\",\n",
" \"domestic appliances%,%education%,%car (new)%,%others\"\n",
" ],\n",
" \"age_in_years\": [\n",
" 25,\n",
" 30,\n",
" 35\n",
" ],\n",
" \"present_employment_since\": [\n",
" \"unemployed%,%... < 1 year\",\n",
" \"1 <= ... < 4 years\",\n",
" \"4 <= ... < 7 years\",\n",
" \"... >= 7 years\"\n",
" ]\n",
"}\n",
"选中变量不同分箱数下变量的推荐切分点:\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 35]\n",
"[8, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【age_in_years】不同分箱数下变量的推荐切分点-----\n",
"[30]\n",
"[30, 35]\n",
"[25, 30, 35]\n",
"[20, 30, 60]\n",
"[20, 25, 30, 35]\n",
"[20, 30, 35, 60]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"快速筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"剔除train_iv小于阈值\n",
"筛选前变量数量:21\n",
"['status_of_existing_checking_account', 'duration_in_month', 'credit_history', 'purpose', 'credit_amount', 'savings_account_and_bonds', 'present_employment_since', 'installment_rate_in_percentage_of_disposable_income', 'personal_status_and_sex', 'other_debtors_or_guarantors', 'present_residence_since', 'property', 'age_in_years', 'other_installment_plans', 'housing', 'number_of_existing_credits_at_this_bank', 'job', 'number_of_people_being_liable_to_provide_maintenance_for', 'telephone', 'foreign_worker', 'random']\n",
"快速筛选剔除变量数量:13\n",
"number_of_people_being_liable_to_provide_maintenance_for 因为train_iv【0.0】小于阈值被剔除\n",
"housing 因为train_iv【0.042】小于阈值被剔除\n",
"property 因为train_iv【0.08】小于阈值被剔除\n",
"personal_status_and_sex 因为train_iv【0.01】小于阈值被剔除\n",
"other_debtors_or_guarantors 因为train_iv【0.017】小于阈值被剔除\n",
"foreign_worker 因为train_iv【0.0】小于阈值被剔除\n",
"number_of_existing_credits_at_this_bank 因为train_iv【0.003】小于阈值被剔除\n",
"random 因为train_iv【0.039】小于阈值被剔除\n",
"other_installment_plans 因为train_iv【0.064】小于阈值被剔除\n",
"present_residence_since 因为train_iv【0.001】小于阈值被剔除\n",
"telephone 因为train_iv【0.001】小于阈值被剔除\n",
"installment_rate_in_percentage_of_disposable_income 因为train_iv【0.033】小于阈值被剔除\n",
"job 因为train_iv【0.021】小于阈值被剔除\n",
"\n"
]
},
{
"data": {
"text/html": [
"数值变量筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"相关性筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"duration_in_month: 【credit_amount_iv0.299_corr0.495】 \n",
"\n",
"-----相关性筛选保留的【duration_in_month】-----\n",
"-----【duration_in_month】不同分箱数下变量的推荐切分点-----\n",
"[35]\n",
"[8, 35]\n",
"[8, 15, 35]\n",
"[8, 15, 25, 35]\n",
"[8, 10, 15, 35]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【credit_amount】不同分箱数下变量的推荐切分点-----\n",
"[4000.0]\n",
"[4000.0, 9000.0]\n",
"[3000.0, 6000.0, 9000.0]\n",
"[3000.0, 4000.0, 9000.0]\n",
"[2000.0, 3000.0, 4000.0, 9000.0]\n",
"[3000.0, 4000.0, 5000.0, 9000.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"vif筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"data": {
"text/html": [
"ivtop筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iv = train_iv + test_iv\n",
"\n"
]
},
{
"data": {
"text/html": [
"模型结果
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.8036 | \n",
" 0.4718 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7948 | \n",
" 0.4691 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.8036 0.4718\n",
"1 测试集 0.7948 0.4691"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.8065 | \n",
" 0.4711 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.8023 | \n",
" 0.4918 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.8065 0.4711\n",
"1 测试集 0.8023 0.4918"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型变量系数
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Generalized Linear Model Regression Results \n",
"==============================================================================\n",
"Dep. Variable: creditability No. Observations: 709\n",
"Model: GLM Df Residuals: 702\n",
"Model Family: Binomial Df Model: 6\n",
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -384.55\n",
"Date: Thu, 06 Mar 2025 Deviance: 769.09\n",
"Time: 14:01:08 Pearson chi2: 689.\n",
"No. Iterations: 5 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" var | \n",
" coef | \n",
" std err | \n",
" z | \n",
" P>|z| | \n",
" [0.025 | \n",
" 0.975] | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" age_in_years_woe | \n",
" 0.6160 | \n",
" 0.272 | \n",
" 2.264 | \n",
" 0.024 | \n",
" 0.083 | \n",
" 1.149 | \n",
"
\n",
" \n",
" 1 | \n",
" duration_in_month_woe | \n",
" 1.0201 | \n",
" 0.180 | \n",
" 5.667 | \n",
" 0.000 | \n",
" 0.667 | \n",
" 1.373 | \n",
"
\n",
" \n",
" 2 | \n",
" credit_history_woe | \n",
" 0.8273 | \n",
" 0.187 | \n",
" 4.427 | \n",
" 0.000 | \n",
" 0.461 | \n",
" 1.194 | \n",
"
\n",
" \n",
" 3 | \n",
" status_of_existing_checking_account_woe | \n",
" 0.7664 | \n",
" 0.107 | \n",
" 7.175 | \n",
" 0.000 | \n",
" 0.557 | \n",
" 0.976 | \n",
"
\n",
" \n",
" 4 | \n",
" savings_account_and_bonds_woe | \n",
" 0.5619 | \n",
" 0.230 | \n",
" 2.443 | \n",
" 0.015 | \n",
" 0.111 | \n",
" 1.013 | \n",
"
\n",
" \n",
" 5 | \n",
" present_employment_since_woe | \n",
" 0.8461 | \n",
" 0.282 | \n",
" 2.997 | \n",
" 0.003 | \n",
" 0.293 | \n",
" 1.399 | \n",
"
\n",
" \n",
" 6 | \n",
" purpose_woe | \n",
" 1.0363 | \n",
" 0.226 | \n",
" 4.588 | \n",
" 0.000 | \n",
" 0.594 | \n",
" 1.479 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" var coef std err z \\\n",
"0 age_in_years_woe 0.6160 0.272 2.264 \n",
"1 duration_in_month_woe 1.0201 0.180 5.667 \n",
"2 credit_history_woe 0.8273 0.187 4.427 \n",
"3 status_of_existing_checking_account_woe 0.7664 0.107 7.175 \n",
"4 savings_account_and_bonds_woe 0.5619 0.230 2.443 \n",
"5 present_employment_since_woe 0.8461 0.282 2.997 \n",
"6 purpose_woe 1.0363 0.226 4.588 \n",
"\n",
" P>|z| [0.025 0.975] \n",
"0 0.024 0.083 1.149 \n",
"1 0.000 0.667 1.373 \n",
"2 0.000 0.461 1.194 \n",
"3 0.000 0.557 0.976 \n",
"4 0.015 0.111 1.013 \n",
"5 0.003 0.293 1.399 \n",
"6 0.000 0.594 1.479 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型psi
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 0.004 | \n",
" 73 | \n",
" 36 | \n",
" 0.103 | \n",
" 0.124 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 0.003 | \n",
" 72 | \n",
" 25 | \n",
" 0.102 | \n",
" 0.086 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 0.000 | \n",
" 68 | \n",
" 27 | \n",
" 0.096 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 0.000 | \n",
" 74 | \n",
" 29 | \n",
" 0.104 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 0.002 | \n",
" 69 | \n",
" 33 | \n",
" 0.097 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 30 | \n",
" 0.099 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 0.004 | \n",
" 72 | \n",
" 24 | \n",
" 0.102 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 0.000 | \n",
" 69 | \n",
" 27 | \n",
" 0.097 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 0.010 | \n",
" 72 | \n",
" 21 | \n",
" 0.102 | \n",
" 0.072 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 0.011 | \n",
" 70 | \n",
" 39 | \n",
" 0.099 | \n",
" 0.134 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 436.0] 0.004 73 36 0.103 0.124\n",
"1 (436.0, 476.0] 0.003 72 25 0.102 0.086\n",
"2 (476.0, 504.4] 0.000 68 27 0.096 0.093\n",
"3 (504.4, 531.0] 0.000 74 29 0.104 0.100\n",
"4 (531.0, 559.0] 0.002 69 33 0.097 0.113\n",
"5 (559.0, 591.0] 0.000 70 30 0.099 0.103\n",
"6 (591.0, 618.0] 0.004 72 24 0.102 0.082\n",
"7 (618.0, 646.8] 0.000 69 27 0.097 0.093\n",
"8 (646.8, 681.0] 0.010 72 21 0.102 0.072\n",
"9 (681.0, inf] 0.011 70 39 0.099 0.134"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.034\n",
"加入规则后:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 33 | \n",
" 0.102 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 28 | \n",
" 0.100 | \n",
" 0.096 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 0.000 | \n",
" 70 | \n",
" 30 | \n",
" 0.099 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 0.004 | \n",
" 71 | \n",
" 24 | \n",
" 0.100 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 32 | \n",
" 0.102 | \n",
" 0.110 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 29 | \n",
" 0.099 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 0.001 | \n",
" 71 | \n",
" 27 | \n",
" 0.100 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 30 | \n",
" 0.100 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 0.009 | \n",
" 71 | \n",
" 21 | \n",
" 0.100 | \n",
" 0.072 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 0.007 | \n",
" 70 | \n",
" 37 | \n",
" 0.099 | \n",
" 0.127 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 433.0] 0.001 72 33 0.102 0.113\n",
"1 (433.0, 474.0] 0.000 71 28 0.100 0.096\n",
"2 (474.0, 502.4] 0.000 70 30 0.099 0.103\n",
"3 (502.4, 524.2] 0.004 71 24 0.100 0.082\n",
"4 (524.2, 554.0] 0.001 72 32 0.102 0.110\n",
"5 (554.0, 585.0] 0.000 70 29 0.099 0.100\n",
"6 (585.0, 615.0] 0.001 71 27 0.100 0.093\n",
"7 (615.0, 646.0] 0.000 71 30 0.100 0.103\n",
"8 (646.0, 681.0] 0.009 71 21 0.100 0.072\n",
"9 (681.0, inf] 0.007 70 37 0.099 0.127"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.023\n"
]
},
{
"data": {
"text/html": [
"分数分箱
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 73 | \n",
" 50 | \n",
" 23 | \n",
" 0.685 | \n",
" 0.103 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 50 | \n",
" 23 | \n",
" 73 | \n",
" 0.237 | \n",
" 0.046 | \n",
" 0.191 | \n",
" 2.298 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 72 | \n",
" 44 | \n",
" 28 | \n",
" 0.611 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 94 | \n",
" 51 | \n",
" 145 | \n",
" 0.445 | \n",
" 0.102 | \n",
" 0.343 | \n",
" 2.175 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 68 | \n",
" 31 | \n",
" 37 | \n",
" 0.456 | \n",
" 0.096 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 125 | \n",
" 88 | \n",
" 213 | \n",
" 0.592 | \n",
" 0.177 | \n",
" 0.415 | \n",
" 1.969 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 74 | \n",
" 24 | \n",
" 50 | \n",
" 0.324 | \n",
" 0.104 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 149 | \n",
" 138 | \n",
" 287 | \n",
" 0.706 | \n",
" 0.277 | \n",
" 0.429 | \n",
" 1.742 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 69 | \n",
" 26 | \n",
" 43 | \n",
" 0.377 | \n",
" 0.097 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 175 | \n",
" 181 | \n",
" 356 | \n",
" 0.829 | \n",
" 0.363 | \n",
" 0.466 | \n",
" 1.650 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 70 | \n",
" 14 | \n",
" 56 | \n",
" 0.200 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 189 | \n",
" 237 | \n",
" 426 | \n",
" 0.896 | \n",
" 0.476 | \n",
" 0.420 | \n",
" 1.489 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 72 | \n",
" 9 | \n",
" 63 | \n",
" 0.125 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 198 | \n",
" 300 | \n",
" 498 | \n",
" 0.938 | \n",
" 0.602 | \n",
" 0.336 | \n",
" 1.334 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 69 | \n",
" 10 | \n",
" 59 | \n",
" 0.145 | \n",
" 0.097 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 208 | \n",
" 359 | \n",
" 567 | \n",
" 0.986 | \n",
" 0.721 | \n",
" 0.265 | \n",
" 1.231 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 72 | \n",
" 2 | \n",
" 70 | \n",
" 0.028 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 210 | \n",
" 429 | \n",
" 639 | \n",
" 0.995 | \n",
" 0.861 | \n",
" 0.134 | \n",
" 1.103 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 70 | \n",
" 1 | \n",
" 69 | \n",
" 0.014 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 436.0] 73 50 23 0.685 0.103 211 498 0.298 \n",
"1 (436.0, 476.0] 72 44 28 0.611 0.102 211 498 0.298 \n",
"2 (476.0, 504.4] 68 31 37 0.456 0.096 211 498 0.298 \n",
"3 (504.4, 531.0] 74 24 50 0.324 0.104 211 498 0.298 \n",
"4 (531.0, 559.0] 69 26 43 0.377 0.097 211 498 0.298 \n",
"5 (559.0, 591.0] 70 14 56 0.200 0.099 211 498 0.298 \n",
"6 (591.0, 618.0] 72 9 63 0.125 0.102 211 498 0.298 \n",
"7 (618.0, 646.8] 69 10 59 0.145 0.097 211 498 0.298 \n",
"8 (646.8, 681.0] 72 2 70 0.028 0.102 211 498 0.298 \n",
"9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 50 23 73 0.237 0.046 0.191 2.298 \n",
"1 94 51 145 0.445 0.102 0.343 2.175 \n",
"2 125 88 213 0.592 0.177 0.415 1.969 \n",
"3 149 138 287 0.706 0.277 0.429 1.742 \n",
"4 175 181 356 0.829 0.363 0.466 1.650 \n",
"5 189 237 426 0.896 0.476 0.420 1.489 \n",
"6 198 300 498 0.938 0.602 0.336 1.334 \n",
"7 208 359 567 0.986 0.721 0.265 1.231 \n",
"8 210 429 639 0.995 0.861 0.134 1.103 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 72 | \n",
" 52 | \n",
" 20 | \n",
" 0.722 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 52 | \n",
" 20 | \n",
" 72 | \n",
" 0.246 | \n",
" 0.040 | \n",
" 0.206 | \n",
" 2.424 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 71 | \n",
" 42 | \n",
" 29 | \n",
" 0.592 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 94 | \n",
" 49 | \n",
" 143 | \n",
" 0.445 | \n",
" 0.098 | \n",
" 0.347 | \n",
" 2.206 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 70 | \n",
" 31 | \n",
" 39 | \n",
" 0.443 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 125 | \n",
" 88 | \n",
" 213 | \n",
" 0.592 | \n",
" 0.177 | \n",
" 0.415 | \n",
" 1.969 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 71 | \n",
" 26 | \n",
" 45 | \n",
" 0.366 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 151 | \n",
" 133 | \n",
" 284 | \n",
" 0.716 | \n",
" 0.267 | \n",
" 0.449 | \n",
" 1.784 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 72 | \n",
" 22 | \n",
" 50 | \n",
" 0.306 | \n",
" 0.102 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 173 | \n",
" 183 | \n",
" 356 | \n",
" 0.820 | \n",
" 0.367 | \n",
" 0.453 | \n",
" 1.631 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 70 | \n",
" 18 | \n",
" 52 | \n",
" 0.257 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 191 | \n",
" 235 | \n",
" 426 | \n",
" 0.905 | \n",
" 0.472 | \n",
" 0.433 | \n",
" 1.505 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 71 | \n",
" 8 | \n",
" 63 | \n",
" 0.113 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 199 | \n",
" 298 | \n",
" 497 | \n",
" 0.943 | \n",
" 0.598 | \n",
" 0.345 | \n",
" 1.344 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 71 | \n",
" 9 | \n",
" 62 | \n",
" 0.127 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 208 | \n",
" 360 | \n",
" 568 | \n",
" 0.986 | \n",
" 0.723 | \n",
" 0.263 | \n",
" 1.229 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 71 | \n",
" 2 | \n",
" 69 | \n",
" 0.028 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 210 | \n",
" 429 | \n",
" 639 | \n",
" 0.995 | \n",
" 0.861 | \n",
" 0.134 | \n",
" 1.103 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 70 | \n",
" 1 | \n",
" 69 | \n",
" 0.014 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 433.0] 72 52 20 0.722 0.102 211 498 0.298 \n",
"1 (433.0, 474.0] 71 42 29 0.592 0.100 211 498 0.298 \n",
"2 (474.0, 502.4] 70 31 39 0.443 0.099 211 498 0.298 \n",
"3 (502.4, 524.2] 71 26 45 0.366 0.100 211 498 0.298 \n",
"4 (524.2, 554.0] 72 22 50 0.306 0.102 211 498 0.298 \n",
"5 (554.0, 585.0] 70 18 52 0.257 0.099 211 498 0.298 \n",
"6 (585.0, 615.0] 71 8 63 0.113 0.100 211 498 0.298 \n",
"7 (615.0, 646.0] 71 9 62 0.127 0.100 211 498 0.298 \n",
"8 (646.0, 681.0] 71 2 69 0.028 0.100 211 498 0.298 \n",
"9 (681.0, inf] 70 1 69 0.014 0.099 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 52 20 72 0.246 0.040 0.206 2.424 \n",
"1 94 49 143 0.445 0.098 0.347 2.206 \n",
"2 125 88 213 0.592 0.177 0.415 1.969 \n",
"3 151 133 284 0.716 0.267 0.449 1.784 \n",
"4 173 183 356 0.820 0.367 0.453 1.631 \n",
"5 191 235 426 0.905 0.472 0.433 1.505 \n",
"6 199 298 497 0.943 0.598 0.345 1.344 \n",
"7 208 360 568 0.986 0.723 0.263 1.229 \n",
"8 210 429 639 0.995 0.861 0.134 1.103 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 436.0] | \n",
" 36 | \n",
" 24 | \n",
" 12 | \n",
" 0.667 | \n",
" 0.124 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 24 | \n",
" 12 | \n",
" 36 | \n",
" 0.270 | \n",
" 0.059 | \n",
" 0.211 | \n",
" 2.179 | \n",
"
\n",
" \n",
" 1 | \n",
" (436.0, 476.0] | \n",
" 25 | \n",
" 14 | \n",
" 11 | \n",
" 0.560 | \n",
" 0.086 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 38 | \n",
" 23 | \n",
" 61 | \n",
" 0.427 | \n",
" 0.114 | \n",
" 0.313 | \n",
" 2.036 | \n",
"
\n",
" \n",
" 2 | \n",
" (476.0, 504.4] | \n",
" 27 | \n",
" 14 | \n",
" 13 | \n",
" 0.519 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 52 | \n",
" 36 | \n",
" 88 | \n",
" 0.584 | \n",
" 0.178 | \n",
" 0.406 | \n",
" 1.931 | \n",
"
\n",
" \n",
" 3 | \n",
" (504.4, 531.0] | \n",
" 29 | \n",
" 10 | \n",
" 19 | \n",
" 0.345 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 62 | \n",
" 55 | \n",
" 117 | \n",
" 0.697 | \n",
" 0.272 | \n",
" 0.425 | \n",
" 1.732 | \n",
"
\n",
" \n",
" 4 | \n",
" (531.0, 559.0] | \n",
" 33 | \n",
" 11 | \n",
" 22 | \n",
" 0.333 | \n",
" 0.113 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 73 | \n",
" 77 | \n",
" 150 | \n",
" 0.820 | \n",
" 0.381 | \n",
" 0.439 | \n",
" 1.590 | \n",
"
\n",
" \n",
" 5 | \n",
" (559.0, 591.0] | \n",
" 30 | \n",
" 6 | \n",
" 24 | \n",
" 0.200 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 79 | \n",
" 101 | \n",
" 180 | \n",
" 0.888 | \n",
" 0.500 | \n",
" 0.388 | \n",
" 1.434 | \n",
"
\n",
" \n",
" 6 | \n",
" (591.0, 618.0] | \n",
" 24 | \n",
" 5 | \n",
" 19 | \n",
" 0.208 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 84 | \n",
" 120 | \n",
" 204 | \n",
" 0.944 | \n",
" 0.594 | \n",
" 0.350 | \n",
" 1.346 | \n",
"
\n",
" \n",
" 7 | \n",
" (618.0, 646.8] | \n",
" 27 | \n",
" 2 | \n",
" 25 | \n",
" 0.074 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 86 | \n",
" 145 | \n",
" 231 | \n",
" 0.966 | \n",
" 0.718 | \n",
" 0.248 | \n",
" 1.217 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.8, 681.0] | \n",
" 21 | \n",
" 2 | \n",
" 19 | \n",
" 0.095 | \n",
" 0.072 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 88 | \n",
" 164 | \n",
" 252 | \n",
" 0.989 | \n",
" 0.812 | \n",
" 0.177 | \n",
" 1.141 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 39 | \n",
" 1 | \n",
" 38 | \n",
" 0.026 | \n",
" 0.134 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 436.0] 36 24 12 0.667 0.124 89 202 0.306 \n",
"1 (436.0, 476.0] 25 14 11 0.560 0.086 89 202 0.306 \n",
"2 (476.0, 504.4] 27 14 13 0.519 0.093 89 202 0.306 \n",
"3 (504.4, 531.0] 29 10 19 0.345 0.100 89 202 0.306 \n",
"4 (531.0, 559.0] 33 11 22 0.333 0.113 89 202 0.306 \n",
"5 (559.0, 591.0] 30 6 24 0.200 0.103 89 202 0.306 \n",
"6 (591.0, 618.0] 24 5 19 0.208 0.082 89 202 0.306 \n",
"7 (618.0, 646.8] 27 2 25 0.074 0.093 89 202 0.306 \n",
"8 (646.8, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n",
"9 (681.0, inf] 39 1 38 0.026 0.134 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 24 12 36 0.270 0.059 0.211 2.179 \n",
"1 38 23 61 0.427 0.114 0.313 2.036 \n",
"2 52 36 88 0.584 0.178 0.406 1.931 \n",
"3 62 55 117 0.697 0.272 0.425 1.732 \n",
"4 73 77 150 0.820 0.381 0.439 1.590 \n",
"5 79 101 180 0.888 0.500 0.388 1.434 \n",
"6 84 120 204 0.944 0.594 0.350 1.346 \n",
"7 86 145 231 0.966 0.718 0.248 1.217 \n",
"8 88 164 252 0.989 0.812 0.177 1.141 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"加入规则后:\n",
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 33 | \n",
" 23 | \n",
" 10 | \n",
" 0.697 | \n",
" 0.113 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 23 | \n",
" 10 | \n",
" 33 | \n",
" 0.258 | \n",
" 0.050 | \n",
" 0.208 | \n",
" 2.278 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 28 | \n",
" 16 | \n",
" 12 | \n",
" 0.571 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 39 | \n",
" 22 | \n",
" 61 | \n",
" 0.438 | \n",
" 0.109 | \n",
" 0.329 | \n",
" 2.089 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 30 | \n",
" 15 | \n",
" 15 | \n",
" 0.500 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 54 | \n",
" 37 | \n",
" 91 | \n",
" 0.607 | \n",
" 0.183 | \n",
" 0.424 | \n",
" 1.939 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 24 | \n",
" 10 | \n",
" 14 | \n",
" 0.417 | \n",
" 0.082 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 64 | \n",
" 51 | \n",
" 115 | \n",
" 0.719 | \n",
" 0.252 | \n",
" 0.467 | \n",
" 1.819 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 32 | \n",
" 10 | \n",
" 22 | \n",
" 0.312 | \n",
" 0.110 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 74 | \n",
" 73 | \n",
" 147 | \n",
" 0.831 | \n",
" 0.361 | \n",
" 0.470 | \n",
" 1.645 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 29 | \n",
" 6 | \n",
" 23 | \n",
" 0.207 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 80 | \n",
" 96 | \n",
" 176 | \n",
" 0.899 | \n",
" 0.475 | \n",
" 0.424 | \n",
" 1.485 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 27 | \n",
" 5 | \n",
" 22 | \n",
" 0.185 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 85 | \n",
" 118 | \n",
" 203 | \n",
" 0.955 | \n",
" 0.584 | \n",
" 0.371 | \n",
" 1.368 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 30 | \n",
" 1 | \n",
" 29 | \n",
" 0.033 | \n",
" 0.103 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 86 | \n",
" 147 | \n",
" 233 | \n",
" 0.966 | \n",
" 0.728 | \n",
" 0.238 | \n",
" 1.206 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 21 | \n",
" 2 | \n",
" 19 | \n",
" 0.095 | \n",
" 0.072 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 88 | \n",
" 166 | \n",
" 254 | \n",
" 0.989 | \n",
" 0.822 | \n",
" 0.167 | \n",
" 1.132 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 37 | \n",
" 1 | \n",
" 36 | \n",
" 0.027 | \n",
" 0.127 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (-inf, 433.0] 33 23 10 0.697 0.113 89 202 0.306 \n",
"1 (433.0, 474.0] 28 16 12 0.571 0.096 89 202 0.306 \n",
"2 (474.0, 502.4] 30 15 15 0.500 0.103 89 202 0.306 \n",
"3 (502.4, 524.2] 24 10 14 0.417 0.082 89 202 0.306 \n",
"4 (524.2, 554.0] 32 10 22 0.312 0.110 89 202 0.306 \n",
"5 (554.0, 585.0] 29 6 23 0.207 0.100 89 202 0.306 \n",
"6 (585.0, 615.0] 27 5 22 0.185 0.093 89 202 0.306 \n",
"7 (615.0, 646.0] 30 1 29 0.033 0.103 89 202 0.306 \n",
"8 (646.0, 681.0] 21 2 19 0.095 0.072 89 202 0.306 \n",
"9 (681.0, inf] 37 1 36 0.027 0.127 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 23 10 33 0.258 0.050 0.208 2.278 \n",
"1 39 22 61 0.438 0.109 0.329 2.089 \n",
"2 54 37 91 0.607 0.183 0.424 1.939 \n",
"3 64 51 115 0.719 0.252 0.467 1.819 \n",
"4 74 73 147 0.831 0.361 0.470 1.645 \n",
"5 80 96 176 0.899 0.475 0.424 1.485 \n",
"6 85 118 203 0.955 0.584 0.371 1.368 \n",
"7 86 147 233 0.966 0.728 0.238 1.206 \n",
"8 88 166 254 0.989 0.822 0.167 1.132 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"评分卡
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"评分卡不包含规则\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" bin | \n",
" points | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" basepoints | \n",
" NaN | \n",
" 538.0 | \n",
"
\n",
" \n",
" 0 | \n",
" age_in_years | \n",
" 36 | \n",
" 47.0 | \n",
"
\n",
" \n",
" 1 | \n",
" age_in_years | \n",
" [-inf,25.0) | \n",
" -20.0 | \n",
"
\n",
" \n",
" 2 | \n",
" age_in_years | \n",
" [25.0,30.0) | \n",
" -7.0 | \n",
"
\n",
" \n",
" 3 | \n",
" age_in_years | \n",
" [30.0,35.0) | \n",
" -6.0 | \n",
"
\n",
" \n",
" 4 | \n",
" age_in_years | \n",
" [35.0,inf) | \n",
" 11.0 | \n",
"
\n",
" \n",
" 5 | \n",
" duration_in_month | \n",
" [-inf,8.0) | \n",
" 107.0 | \n",
"
\n",
" \n",
" 6 | \n",
" duration_in_month | \n",
" [8.0,15.0) | \n",
" 17.0 | \n",
"
\n",
" \n",
" 7 | \n",
" duration_in_month | \n",
" [15.0,25.0) | \n",
" -0.0 | \n",
"
\n",
" \n",
" 8 | \n",
" duration_in_month | \n",
" [25.0,35.0) | \n",
" -12.0 | \n",
"
\n",
" \n",
" 9 | \n",
" duration_in_month | \n",
" [35.0,inf) | \n",
" -57.0 | \n",
"
\n",
" \n",
" 10 | \n",
" credit_history | \n",
" no credits taken/ all credits paid back duly%,... | \n",
" -78.0 | \n",
"
\n",
" \n",
" 11 | \n",
" credit_history | \n",
" existing credits paid back duly till now | \n",
" -2.0 | \n",
"
\n",
" \n",
" 12 | \n",
" credit_history | \n",
" delay in paying off in the past | \n",
" -9.0 | \n",
"
\n",
" \n",
" 13 | \n",
" credit_history | \n",
" critical account/ other credits existing (not ... | \n",
" 40.0 | \n",
"
\n",
" \n",
" 21 | \n",
" status_of_existing_checking_account | \n",
" ... < 0 DM%,%0 <= ... < 200 DM | \n",
" -34.0 | \n",
"
\n",
" \n",
" 22 | \n",
" status_of_existing_checking_account | \n",
" ... >= 200 DM / salary assignments for at leas... | \n",
" 26.0 | \n",
"
\n",
" \n",
" 23 | \n",
" status_of_existing_checking_account | \n",
" no checking account | \n",
" 65.0 | \n",
"
\n",
" \n",
" 18 | \n",
" savings_account_and_bonds | \n",
" ... < 100 DM%,%100 <= ... < 500 DM | \n",
" -8.0 | \n",
"
\n",
" \n",
" 19 | \n",
" savings_account_and_bonds | \n",
" 500 <= ... < 1000 DM%,%... >= 1000 DM | \n",
" 38.0 | \n",
"
\n",
" \n",
" 20 | \n",
" savings_account_and_bonds | \n",
" unknown/ no savings account | \n",
" 19.0 | \n",
"
\n",
" \n",
" 14 | \n",
" present_employment_since | \n",
" unemployed%,%... < 1 year | \n",
" -30.0 | \n",
"
\n",
" \n",
" 15 | \n",
" present_employment_since | \n",
" 1 <= ... < 4 years | \n",
" -0.0 | \n",
"
\n",
" \n",
" 16 | \n",
" present_employment_since | \n",
" 4 <= ... < 7 years | \n",
" 28.0 | \n",
"
\n",
" \n",
" 17 | \n",
" present_employment_since | \n",
" ... >= 7 years | \n",
" 13.0 | \n",
"
\n",
" \n",
" 24 | \n",
" purpose | \n",
" retraining%,%car (used) | \n",
" 65.0 | \n",
"
\n",
" \n",
" 25 | \n",
" purpose | \n",
" radio/television | \n",
" 28.0 | \n",
"
\n",
" \n",
" 26 | \n",
" purpose | \n",
" furniture/equipment%,%business%,%repairs | \n",
" -10.0 | \n",
"
\n",
" \n",
" 27 | \n",
" purpose | \n",
" domestic appliances%,%education%,%car (new)%,%... | \n",
" -28.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable \\\n",
"0 basepoints \n",
"0 age_in_years \n",
"1 age_in_years \n",
"2 age_in_years \n",
"3 age_in_years \n",
"4 age_in_years \n",
"5 duration_in_month \n",
"6 duration_in_month \n",
"7 duration_in_month \n",
"8 duration_in_month \n",
"9 duration_in_month \n",
"10 credit_history \n",
"11 credit_history \n",
"12 credit_history \n",
"13 credit_history \n",
"21 status_of_existing_checking_account \n",
"22 status_of_existing_checking_account \n",
"23 status_of_existing_checking_account \n",
"18 savings_account_and_bonds \n",
"19 savings_account_and_bonds \n",
"20 savings_account_and_bonds \n",
"14 present_employment_since \n",
"15 present_employment_since \n",
"16 present_employment_since \n",
"17 present_employment_since \n",
"24 purpose \n",
"25 purpose \n",
"26 purpose \n",
"27 purpose \n",
"\n",
" bin points \n",
"0 NaN 538.0 \n",
"0 36 47.0 \n",
"1 [-inf,25.0) -20.0 \n",
"2 [25.0,30.0) -7.0 \n",
"3 [30.0,35.0) -6.0 \n",
"4 [35.0,inf) 11.0 \n",
"5 [-inf,8.0) 107.0 \n",
"6 [8.0,15.0) 17.0 \n",
"7 [15.0,25.0) -0.0 \n",
"8 [25.0,35.0) -12.0 \n",
"9 [35.0,inf) -57.0 \n",
"10 no credits taken/ all credits paid back duly%,... -78.0 \n",
"11 existing credits paid back duly till now -2.0 \n",
"12 delay in paying off in the past -9.0 \n",
"13 critical account/ other credits existing (not ... 40.0 \n",
"21 ... < 0 DM%,%0 <= ... < 200 DM -34.0 \n",
"22 ... >= 200 DM / salary assignments for at leas... 26.0 \n",
"23 no checking account 65.0 \n",
"18 ... < 100 DM%,%100 <= ... < 500 DM -8.0 \n",
"19 500 <= ... < 1000 DM%,%... >= 1000 DM 38.0 \n",
"20 unknown/ no savings account 19.0 \n",
"14 unemployed%,%... < 1 year -30.0 \n",
"15 1 <= ... < 4 years -0.0 \n",
"16 4 <= ... < 7 years 28.0 \n",
"17 ... >= 7 years 13.0 \n",
"24 retraining%,%car (used) 65.0 \n",
"25 radio/television 28.0 \n",
"26 furniture/equipment%,%business%,%repairs -10.0 \n",
"27 domestic appliances%,%education%,%car (new)%,%... -28.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"压力测试
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 违约率 | \n",
" 抽样次数 | \n",
" 样本数 | \n",
" 好样本数 | \n",
" 坏样本数 | \n",
" 平均AUC | \n",
" 最大AUC | \n",
" 最小AUC | \n",
" AUC标准差 | \n",
" 95%置信区间AUC | \n",
" 平均KS | \n",
" 最大KS | \n",
" 最小KS | \n",
" KS标准差 | \n",
" 95%置信区间KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.010 | \n",
" 10 | \n",
" 8900 | \n",
" 8811 | \n",
" 89 | \n",
" 0.802218 | \n",
" 0.802486 | \n",
" 0.801957 | \n",
" 0.000182 | \n",
" 0.8019 - 0.8026 | \n",
" 0.4690 | \n",
" 0.470 | \n",
" 0.468 | \n",
" 0.000447 | \n",
" 0.4681 - 0.4699 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.071 | \n",
" 10 | \n",
" 1253 | \n",
" 1164 | \n",
" 89 | \n",
" 0.801699 | \n",
" 0.803385 | \n",
" 0.798993 | \n",
" 0.001380 | \n",
" 0.7990 - 0.8044 | \n",
" 0.4620 | \n",
" 0.463 | \n",
" 0.459 | \n",
" 0.001342 | \n",
" 0.4594 - 0.4646 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.132 | \n",
" 10 | \n",
" 674 | \n",
" 585 | \n",
" 89 | \n",
" 0.802659 | \n",
" 0.806041 | \n",
" 0.798876 | \n",
" 0.001980 | \n",
" 0.7988 - 0.8065 | \n",
" 0.4535 | \n",
" 0.470 | \n",
" 0.449 | \n",
" 0.005766 | \n",
" 0.4422 - 0.4648 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.194 | \n",
" 10 | \n",
" 458 | \n",
" 369 | \n",
" 89 | \n",
" 0.800866 | \n",
" 0.807527 | \n",
" 0.794860 | \n",
" 0.003649 | \n",
" 0.7937 - 0.8080 | \n",
" 0.4736 | \n",
" 0.488 | \n",
" 0.464 | \n",
" 0.007889 | \n",
" 0.4581 - 0.4891 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.255 | \n",
" 10 | \n",
" 349 | \n",
" 260 | \n",
" 89 | \n",
" 0.805873 | \n",
" 0.816551 | \n",
" 0.794274 | \n",
" 0.006607 | \n",
" 0.7929 - 0.8188 | \n",
" 0.4863 | \n",
" 0.503 | \n",
" 0.479 | \n",
" 0.006100 | \n",
" 0.4743 - 0.4983 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.316 | \n",
" 10 | \n",
" 281 | \n",
" 192 | \n",
" 89 | \n",
" 0.802314 | \n",
" 0.811622 | \n",
" 0.793071 | \n",
" 0.004908 | \n",
" 0.7927 - 0.8119 | \n",
" 0.4673 | \n",
" 0.482 | \n",
" 0.448 | \n",
" 0.011314 | \n",
" 0.4451 - 0.4895 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.377 | \n",
" 10 | \n",
" 236 | \n",
" 147 | \n",
" 89 | \n",
" 0.799400 | \n",
" 0.814492 | \n",
" 0.780631 | \n",
" 0.011334 | \n",
" 0.7772 - 0.8216 | \n",
" 0.4650 | \n",
" 0.490 | \n",
" 0.445 | \n",
" 0.012157 | \n",
" 0.4412 - 0.4888 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.438 | \n",
" 10 | \n",
" 203 | \n",
" 114 | \n",
" 89 | \n",
" 0.801414 | \n",
" 0.830179 | \n",
" 0.777104 | \n",
" 0.015877 | \n",
" 0.7703 - 0.8325 | \n",
" 0.4713 | \n",
" 0.522 | \n",
" 0.405 | \n",
" 0.027893 | \n",
" 0.4166 - 0.5260 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.499 | \n",
" 10 | \n",
" 178 | \n",
" 89 | \n",
" 89 | \n",
" 0.806521 | \n",
" 0.828683 | \n",
" 0.774145 | \n",
" 0.017369 | \n",
" 0.7725 - 0.8406 | \n",
" 0.4822 | \n",
" 0.528 | \n",
" 0.405 | \n",
" 0.036742 | \n",
" 0.4102 - 0.5542 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.561 | \n",
" 10 | \n",
" 158 | \n",
" 69 | \n",
" 89 | \n",
" 0.808280 | \n",
" 0.852467 | \n",
" 0.754600 | \n",
" 0.030642 | \n",
" 0.7482 - 0.8683 | \n",
" 0.4818 | \n",
" 0.605 | \n",
" 0.422 | \n",
" 0.053630 | \n",
" 0.3767 - 0.5869 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n",
"0 0.010 10 8900 8811 89 0.802218 0.802486 0.801957 0.000182 \n",
"1 0.071 10 1253 1164 89 0.801699 0.803385 0.798993 0.001380 \n",
"2 0.132 10 674 585 89 0.802659 0.806041 0.798876 0.001980 \n",
"3 0.194 10 458 369 89 0.800866 0.807527 0.794860 0.003649 \n",
"4 0.255 10 349 260 89 0.805873 0.816551 0.794274 0.006607 \n",
"5 0.316 10 281 192 89 0.802314 0.811622 0.793071 0.004908 \n",
"6 0.377 10 236 147 89 0.799400 0.814492 0.780631 0.011334 \n",
"7 0.438 10 203 114 89 0.801414 0.830179 0.777104 0.015877 \n",
"8 0.499 10 178 89 89 0.806521 0.828683 0.774145 0.017369 \n",
"9 0.561 10 158 69 89 0.808280 0.852467 0.754600 0.030642 \n",
"\n",
" 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n",
"0 0.8019 - 0.8026 0.4690 0.470 0.468 0.000447 0.4681 - 0.4699 \n",
"1 0.7990 - 0.8044 0.4620 0.463 0.459 0.001342 0.4594 - 0.4646 \n",
"2 0.7988 - 0.8065 0.4535 0.470 0.449 0.005766 0.4422 - 0.4648 \n",
"3 0.7937 - 0.8080 0.4736 0.488 0.464 0.007889 0.4581 - 0.4891 \n",
"4 0.7929 - 0.8188 0.4863 0.503 0.479 0.006100 0.4743 - 0.4983 \n",
"5 0.7927 - 0.8119 0.4673 0.482 0.448 0.011314 0.4451 - 0.4895 \n",
"6 0.7772 - 0.8216 0.4650 0.490 0.445 0.012157 0.4412 - 0.4888 \n",
"7 0.7703 - 0.8325 0.4713 0.522 0.405 0.027893 0.4166 - 0.5260 \n",
"8 0.7725 - 0.8406 0.4822 0.528 0.405 0.036742 0.4102 - 0.5542 \n",
"9 0.7482 - 0.8683 0.4818 0.605 0.422 0.053630 0.3767 - 0.5869 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型报告文件储存路径:./cache/train/demo/模型报告.docx\n",
"mlcfg save to【./cache/train/demo/mlcfg.json】success. \n",
"feature save to【./cache/train/demo/feature.csv】success. \n",
"model save to【./cache/train/demo/model.pkl】success. \n",
"model save to【./cache/train/demo/coef.json】success. \n",
"model save to【./cache/train/demo/card.csv】success. \n",
"model save to【./cache/train/demo/card.cfg】success. \n"
]
}
],
"source": [
"data = DataSplitEntity(train_data=train_data, test_data=test_data)\n",
"# 特征处理\n",
"cfg = {\n",
"# 项目名称,影响数据存储位置\n",
"\"project_name\": \"demo\",\n",
"# jupyter下输出内容\n",
"\"jupyter_print\": True,\n",
"# 打印推荐分箱细节\n",
"\"bin_detail_print\": True,\n",
"# 是否开启粗分箱\n",
"\"format_bin\": True,\n",
"# 变量切分点搜索采样率,没有粗分箱的话建议不超过0.2\n",
"\"bin_sample_rate\": 0.1,\n",
"# 保留iv值最大的N个变量\n",
"\"max_feature_num\": 10,\n",
"# 单调性允许变化次数\n",
"\"monto_shift_threshold\":1,\n",
"# iv阈值,这里指训练集的iv\n",
"\"iv_threshold\": 0.1,\n",
"# woe后相关性阈值, \n",
"\"corr_threshold\": 0.4,\n",
"# 变量分箱后的psi阈值\n",
"\"psi_threshold\": 0.2,\n",
"# woe后vif阈值, \n",
"\"vif_threshold\": 10,\n",
"# 压力测试\n",
"\"stress_test\":True,\n",
"# 压力测试抽样次数\n",
"\"stress_sample_times\": 10,\n",
"# 特殊值,单独一箱\n",
"\"special_values\": {\"age_in_years\": [36]},\n",
"# 手动定义切分点,字符型的变量以'%,%'合并枚举值\n",
"\"breaks_list\": { \n",
"# 'duration_in_month': [12, 18, 48], \n",
"# 'credit_amount': [2000, 3500, 4000, 7000], \n",
"# 'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs', 'domestic appliances%,%education%,%car (new)%,%others'], \n",
"# 'age_in_years': [27, 34, 58]\n",
" },\n",
"# y \n",
"\"y_column\": \"creditability\",\n",
"# 参与建模的候选变量\n",
"# \"x_columns\": [\n",
"# \"duration_in_month\",\n",
"# \"credit_amount\",\n",
"# \"age_in_years\",\n",
"# \"purpose\",\n",
"# \"credit_history\",\n",
"# \"random\",\n",
" \n",
"# \"credit_amount_corr1\",\n",
"# \"credit_amount_corr2\",\n",
"# ],\n",
"# 变量释义\n",
"\"columns_anns\":{\n",
" \"age_in_years\": \"年龄\",\n",
" \"credit_history\": \"借贷历史\"\n",
"},\n",
"# 被排除的变量\n",
"\"columns_exclude\": [],\n",
"# 强制使用的变量 \n",
"# \"columns_include\": [\"credit_amount\"],\n",
"# 加减分规则\n",
"# 格式如例子,SCORE和df不能变\n",
"\"rules\":[\"df.loc[df['credit_amount']>=9000,'SCORE'] += -50\"],\n",
"\"model_type\": \"lr\",\n",
"}\n",
"\n",
"# 训练并生成报告\n",
"pipeline = Pipeline(data=data, **cfg)\n",
"# 规则表达式语法测试\n",
"# pipeline.rules_test()\n",
"pipeline.train()\n",
"pipeline.report()\n",
"pipeline.save()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6311d7f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----【age_in_years】不同分箱数下变量的推荐切分点-----\n",
"[26.0]\n",
"[26.0, 35.0]\n",
"[27.0, 30.0, 35.0]\n",
"[27.0, 35.0, 39.0]\n",
"[23.0, 24.0, 30.0, 35.0]\n",
"[24.0, 26.0, 35.0, 39.0]\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pipeline.variable_analyse(\"age_in_years\", format_bin=False)"
]
},
{
"cell_type": "markdown",
"id": "a586b1b9",
"metadata": {},
"source": [
"## 模型加载"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f05b6b43",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mlcfg load from【./cache/train/demo/mlcfg.json】success. \n",
"项目路径:【./cache/train/demo】\n",
"feature load from【./cache/train/demo/feature.csv】success.\n",
"model load from【./cache/train/demo/model.pkl】success.\n",
"model load from【./cache/train/demo/card.csv】success.\n"
]
},
{
"data": {
"text/plain": [
"{'KS': 0.4918,\n",
" 'AUC': 0.8023,\n",
" 'Gini': 0.6045,\n",
" 'pic': }"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2 = Pipeline.load(\"./cache/train/demo\")\n",
"score = pipeline2.score(test_data)\n",
"score_rule = pipeline2.score_rule(test_data)\n",
"# score\n",
"sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)\n",
"sc.perf_eva(test_data[\"creditability\"], score_rule, title=\"train\", show_plot=True)"
]
},
{
"cell_type": "markdown",
"id": "9ec5b63f",
"metadata": {},
"source": [
"## 模型psi"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "25a71be4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.023\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (-inf, 433.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 33 | \n",
" 0.102 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 1 | \n",
" (433.0, 474.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 28 | \n",
" 0.100 | \n",
" 0.096 | \n",
"
\n",
" \n",
" 2 | \n",
" (474.0, 502.4] | \n",
" 0.000 | \n",
" 70 | \n",
" 30 | \n",
" 0.099 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 3 | \n",
" (502.4, 524.2] | \n",
" 0.004 | \n",
" 71 | \n",
" 24 | \n",
" 0.100 | \n",
" 0.082 | \n",
"
\n",
" \n",
" 4 | \n",
" (524.2, 554.0] | \n",
" 0.001 | \n",
" 72 | \n",
" 32 | \n",
" 0.102 | \n",
" 0.110 | \n",
"
\n",
" \n",
" 5 | \n",
" (554.0, 585.0] | \n",
" 0.000 | \n",
" 70 | \n",
" 29 | \n",
" 0.099 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 6 | \n",
" (585.0, 615.0] | \n",
" 0.001 | \n",
" 71 | \n",
" 27 | \n",
" 0.100 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 7 | \n",
" (615.0, 646.0] | \n",
" 0.000 | \n",
" 71 | \n",
" 30 | \n",
" 0.100 | \n",
" 0.103 | \n",
"
\n",
" \n",
" 8 | \n",
" (646.0, 681.0] | \n",
" 0.009 | \n",
" 71 | \n",
" 21 | \n",
" 0.100 | \n",
" 0.072 | \n",
"
\n",
" \n",
" 9 | \n",
" (681.0, inf] | \n",
" 0.007 | \n",
" 70 | \n",
" 37 | \n",
" 0.099 | \n",
" 0.127 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (-inf, 433.0] 0.001 72 33 0.102 0.113\n",
"1 (433.0, 474.0] 0.000 71 28 0.100 0.096\n",
"2 (474.0, 502.4] 0.000 70 30 0.099 0.103\n",
"3 (502.4, 524.2] 0.004 71 24 0.100 0.082\n",
"4 (524.2, 554.0] 0.001 72 32 0.102 0.110\n",
"5 (554.0, 585.0] 0.000 70 29 0.099 0.100\n",
"6 (585.0, 615.0] 0.001 71 27 0.100 0.093\n",
"7 (615.0, 646.0] 0.000 71 30 0.100 0.103\n",
"8 (646.0, 681.0] 0.009 71 21 0.100 0.072\n",
"9 (681.0, inf] 0.007 70 37 0.099 0.127"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2.psi(train_data, test_data)"
]
},
{
"cell_type": "markdown",
"id": "eb109a81",
"metadata": {},
"source": [
"# xgb"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "61e7f86a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"项目路径:【./cache/train/demo】\n"
]
},
{
"data": {
"text/html": [
"样本分布
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本 | \n",
" 样本数 | \n",
" 样本占比 | \n",
" 坏样本数 | \n",
" 坏样本比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 709 | \n",
" 70.90% | \n",
" 211 | \n",
" 29.76% | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 291 | \n",
" 29.10% | \n",
" 89 | \n",
" 30.58% | \n",
"
\n",
" \n",
" 2 | \n",
" 合计 | \n",
" 1000 | \n",
" 100% | \n",
" 300 | \n",
" 30.00% | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本 样本数 样本占比 坏样本数 坏样本比例\n",
"0 训练集 709 70.90% 211 29.76%\n",
"1 测试集 291 29.10% 89 30.58%\n",
"2 合计 1000 100% 300 30.00%"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"入模变量
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" feature | \n",
" importance_weight | \n",
" 释义 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" credit_amount | \n",
" 0.166667 | \n",
" - | \n",
"
\n",
" \n",
" 1 | \n",
" age_in_years | \n",
" 0.098901 | \n",
" 年龄 | \n",
"
\n",
" \n",
" 2 | \n",
" status_of_existing_checking_account | \n",
" 0.091575 | \n",
" - | \n",
"
\n",
" \n",
" 3 | \n",
" duration_in_month | \n",
" 0.086081 | \n",
" - | \n",
"
\n",
" \n",
" 4 | \n",
" credit_history | \n",
" 0.082418 | \n",
" 借贷历史 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" feature importance_weight 释义\n",
"0 credit_amount 0.166667 -\n",
"1 age_in_years 0.098901 年龄\n",
"2 status_of_existing_checking_account 0.091575 -\n",
"3 duration_in_month 0.086081 -\n",
"4 credit_history 0.082418 借贷历史"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"快速筛选过程
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"剔除变量重要性排名5以后的变量\n",
"筛选前变量数量:20\n",
"['status_of_existing_checking_account', 'duration_in_month', 'credit_history', 'purpose', 'credit_amount', 'savings_account_and_bonds', 'present_employment_since', 'installment_rate_in_percentage_of_disposable_income', 'personal_status_and_sex', 'other_debtors_or_guarantors', 'present_residence_since', 'property', 'age_in_years', 'other_installment_plans', 'housing', 'number_of_existing_credits_at_this_bank', 'job', 'number_of_people_being_liable_to_provide_maintenance_for', 'telephone', 'foreign_worker']\n",
"快速筛选剔除变量数量:15\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" feature | \n",
" importance_weight | \n",
" 释义 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" credit_amount | \n",
" 0.166667 | \n",
" - | \n",
"
\n",
" \n",
" 1 | \n",
" age_in_years | \n",
" 0.098901 | \n",
" 年龄 | \n",
"
\n",
" \n",
" 2 | \n",
" status_of_existing_checking_account | \n",
" 0.091575 | \n",
" - | \n",
"
\n",
" \n",
" 3 | \n",
" duration_in_month | \n",
" 0.086081 | \n",
" - | \n",
"
\n",
" \n",
" 4 | \n",
" credit_history | \n",
" 0.082418 | \n",
" 借贷历史 | \n",
"
\n",
" \n",
" 5 | \n",
" purpose | \n",
" 0.080586 | \n",
" - | \n",
"
\n",
" \n",
" 6 | \n",
" present_employment_since | \n",
" 0.075092 | \n",
" - | \n",
"
\n",
" \n",
" 7 | \n",
" other_installment_plans | \n",
" 0.069597 | \n",
" - | \n",
"
\n",
" \n",
" 8 | \n",
" installment_rate_in_percentage_of_disposable_i... | \n",
" 0.040293 | \n",
" - | \n",
"
\n",
" \n",
" 9 | \n",
" savings_account_and_bonds | \n",
" 0.038462 | \n",
" - | \n",
"
\n",
" \n",
" 10 | \n",
" other_debtors_or_guarantors | \n",
" 0.029304 | \n",
" - | \n",
"
\n",
" \n",
" 11 | \n",
" property | \n",
" 0.029304 | \n",
" - | \n",
"
\n",
" \n",
" 12 | \n",
" housing | \n",
" 0.023810 | \n",
" - | \n",
"
\n",
" \n",
" 13 | \n",
" present_residence_since | \n",
" 0.021978 | \n",
" - | \n",
"
\n",
" \n",
" 14 | \n",
" number_of_existing_credits_at_this_bank | \n",
" 0.018315 | \n",
" - | \n",
"
\n",
" \n",
" 15 | \n",
" personal_status_and_sex | \n",
" 0.018315 | \n",
" - | \n",
"
\n",
" \n",
" 16 | \n",
" number_of_people_being_liable_to_provide_maint... | \n",
" 0.010989 | \n",
" - | \n",
"
\n",
" \n",
" 17 | \n",
" foreign_worker | \n",
" 0.010989 | \n",
" - | \n",
"
\n",
" \n",
" 18 | \n",
" job | \n",
" 0.007326 | \n",
" - | \n",
"
\n",
" \n",
" 19 | \n",
" telephone | \n",
" 0.000000 | \n",
" - | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" feature importance_weight 释义\n",
"0 credit_amount 0.166667 -\n",
"1 age_in_years 0.098901 年龄\n",
"2 status_of_existing_checking_account 0.091575 -\n",
"3 duration_in_month 0.086081 -\n",
"4 credit_history 0.082418 借贷历史\n",
"5 purpose 0.080586 -\n",
"6 present_employment_since 0.075092 -\n",
"7 other_installment_plans 0.069597 -\n",
"8 installment_rate_in_percentage_of_disposable_i... 0.040293 -\n",
"9 savings_account_and_bonds 0.038462 -\n",
"10 other_debtors_or_guarantors 0.029304 -\n",
"11 property 0.029304 -\n",
"12 housing 0.023810 -\n",
"13 present_residence_since 0.021978 -\n",
"14 number_of_existing_credits_at_this_bank 0.018315 -\n",
"15 personal_status_and_sex 0.018315 -\n",
"16 number_of_people_being_liable_to_provide_maint... 0.010989 -\n",
"17 foreign_worker 0.010989 -\n",
"18 job 0.007326 -\n",
"19 telephone 0.000000 -"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------开始训练--------------------------------------------------\n",
"[0]\tvalidation_0-auc:0.75831\tvalidation_1-auc:0.72756\n",
"Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.\n",
"\n",
"Will train until validation_1-auc hasn't improved in 20 rounds.\n",
"[10]\tvalidation_0-auc:0.81092\tvalidation_1-auc:0.74266\n",
"[20]\tvalidation_0-auc:0.83770\tvalidation_1-auc:0.76494\n",
"[30]\tvalidation_0-auc:0.85714\tvalidation_1-auc:0.77606\n",
"[40]\tvalidation_0-auc:0.86472\tvalidation_1-auc:0.77898\n",
"[50]\tvalidation_0-auc:0.87123\tvalidation_1-auc:0.78129\n",
"[60]\tvalidation_0-auc:0.87962\tvalidation_1-auc:0.78524\n",
"[70]\tvalidation_0-auc:0.88838\tvalidation_1-auc:0.78521\n",
"[80]\tvalidation_0-auc:0.89547\tvalidation_1-auc:0.78693\n",
"[90]\tvalidation_0-auc:0.90393\tvalidation_1-auc:0.78552\n",
"Stopping. Best iteration:\n",
"[79]\tvalidation_0-auc:0.89452\tvalidation_1-auc:0.78871\n",
"\n",
"model save to【./cache/train/demo/model.pmml】success. \n",
"pmml模型结果一致率(误差小于0.001):100.0%\n"
]
},
{
"data": {
"text/html": [
"模型结果
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 样本集 | \n",
" AUC | \n",
" KS | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 训练集 | \n",
" 0.8945 | \n",
" 0.6335 | \n",
"
\n",
" \n",
" 1 | \n",
" 测试集 | \n",
" 0.7887 | \n",
" 0.4931 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 样本集 AUC KS\n",
"0 训练集 0.8945 0.6335\n",
"1 测试集 0.7887 0.4931"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"

"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"模型超参数
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"objective\": \"binary:logistic\",\n",
" \"base_score\": 0.5,\n",
" \"booster\": null,\n",
" \"colsample_bylevel\": 1,\n",
" \"colsample_bynode\": 1,\n",
" \"colsample_bytree\": 1,\n",
" \"gamma\": 0,\n",
" \"gpu_id\": -1,\n",
" \"importance_type\": \"weight\",\n",
" \"interaction_constraints\": null,\n",
" \"learning_rate\": 0.1,\n",
" \"max_delta_step\": 0,\n",
" \"max_depth\": 3,\n",
" \"min_child_weight\": 1,\n",
" \"missing\": NaN,\n",
" \"monotone_constraints\": null,\n",
" \"n_estimators\": 500,\n",
" \"n_jobs\": 0,\n",
" \"num_parallel_tree\": 1,\n",
" \"random_state\": 2025,\n",
" \"reg_alpha\": 0,\n",
" \"reg_lambda\": 1,\n",
" \"scale_pos_weight\": 1,\n",
" \"subsample\": 1,\n",
" \"tree_method\": null,\n",
" \"validate_parameters\": false,\n",
" \"verbosity\": null\n",
"}\n"
]
},
{
"data": {
"text/html": [
"模型psi
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (0.622, inf] | \n",
" 0.000 | \n",
" 71 | \n",
" 29 | \n",
" 0.100 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 1 | \n",
" (0.5, 0.622] | \n",
" 0.004 | \n",
" 71 | \n",
" 35 | \n",
" 0.100 | \n",
" 0.120 | \n",
"
\n",
" \n",
" 2 | \n",
" (0.405, 0.5] | \n",
" 0.000 | \n",
" 71 | \n",
" 28 | \n",
" 0.100 | \n",
" 0.096 | \n",
"
\n",
" \n",
" 3 | \n",
" (0.331, 0.405] | \n",
" 0.001 | \n",
" 70 | \n",
" 26 | \n",
" 0.099 | \n",
" 0.089 | \n",
"
\n",
" \n",
" 4 | \n",
" (0.253, 0.331] | \n",
" 0.024 | \n",
" 71 | \n",
" 45 | \n",
" 0.100 | \n",
" 0.155 | \n",
"
\n",
" \n",
" 5 | \n",
" (0.176, 0.253] | \n",
" 0.000 | \n",
" 71 | \n",
" 29 | \n",
" 0.100 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 6 | \n",
" (0.124, 0.176] | \n",
" 0.001 | \n",
" 71 | \n",
" 27 | \n",
" 0.100 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 7 | \n",
" (0.0894, 0.124] | \n",
" 0.060 | \n",
" 71 | \n",
" 11 | \n",
" 0.100 | \n",
" 0.038 | \n",
"
\n",
" \n",
" 8 | \n",
" (0.0641, 0.0894] | \n",
" 0.001 | \n",
" 71 | \n",
" 26 | \n",
" 0.100 | \n",
" 0.089 | \n",
"
\n",
" \n",
" 9 | \n",
" (-inf, 0.0641] | \n",
" 0.004 | \n",
" 71 | \n",
" 35 | \n",
" 0.100 | \n",
" 0.120 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (0.622, inf] 0.000 71 29 0.100 0.100\n",
"1 (0.5, 0.622] 0.004 71 35 0.100 0.120\n",
"2 (0.405, 0.5] 0.000 71 28 0.100 0.096\n",
"3 (0.331, 0.405] 0.001 70 26 0.099 0.089\n",
"4 (0.253, 0.331] 0.024 71 45 0.100 0.155\n",
"5 (0.176, 0.253] 0.000 71 29 0.100 0.100\n",
"6 (0.124, 0.176] 0.001 71 27 0.100 0.093\n",
"7 (0.0894, 0.124] 0.060 71 11 0.100 0.038\n",
"8 (0.0641, 0.0894] 0.001 71 26 0.100 0.089\n",
"9 (-inf, 0.0641] 0.004 71 35 0.100 0.120"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.095\n"
]
},
{
"data": {
"text/html": [
"分数分箱
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"训练集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (0.622, inf] | \n",
" 71 | \n",
" 67 | \n",
" 4 | \n",
" 0.944 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 67 | \n",
" 4 | \n",
" 71 | \n",
" 0.318 | \n",
" 0.008 | \n",
" 0.310 | \n",
" 3.167 | \n",
"
\n",
" \n",
" 1 | \n",
" (0.5, 0.622] | \n",
" 71 | \n",
" 53 | \n",
" 18 | \n",
" 0.746 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 120 | \n",
" 22 | \n",
" 142 | \n",
" 0.569 | \n",
" 0.044 | \n",
" 0.525 | \n",
" 2.836 | \n",
"
\n",
" \n",
" 2 | \n",
" (0.405, 0.5] | \n",
" 71 | \n",
" 30 | \n",
" 41 | \n",
" 0.423 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 150 | \n",
" 63 | \n",
" 213 | \n",
" 0.711 | \n",
" 0.127 | \n",
" 0.584 | \n",
" 2.363 | \n",
"
\n",
" \n",
" 3 | \n",
" (0.331, 0.405] | \n",
" 70 | \n",
" 26 | \n",
" 44 | \n",
" 0.371 | \n",
" 0.099 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 176 | \n",
" 107 | \n",
" 283 | \n",
" 0.834 | \n",
" 0.215 | \n",
" 0.619 | \n",
" 2.087 | \n",
"
\n",
" \n",
" 4 | \n",
" (0.253, 0.331] | \n",
" 71 | \n",
" 13 | \n",
" 58 | \n",
" 0.183 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 189 | \n",
" 165 | \n",
" 354 | \n",
" 0.896 | \n",
" 0.331 | \n",
" 0.565 | \n",
" 1.792 | \n",
"
\n",
" \n",
" 5 | \n",
" (0.176, 0.253] | \n",
" 71 | \n",
" 12 | \n",
" 59 | \n",
" 0.169 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 201 | \n",
" 224 | \n",
" 425 | \n",
" 0.953 | \n",
" 0.450 | \n",
" 0.503 | \n",
" 1.587 | \n",
"
\n",
" \n",
" 6 | \n",
" (0.124, 0.176] | \n",
" 71 | \n",
" 4 | \n",
" 67 | \n",
" 0.056 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 205 | \n",
" 291 | \n",
" 496 | \n",
" 0.972 | \n",
" 0.584 | \n",
" 0.388 | \n",
" 1.387 | \n",
"
\n",
" \n",
" 7 | \n",
" (0.0894, 0.124] | \n",
" 71 | \n",
" 5 | \n",
" 66 | \n",
" 0.070 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 210 | \n",
" 357 | \n",
" 567 | \n",
" 0.995 | \n",
" 0.717 | \n",
" 0.278 | \n",
" 1.243 | \n",
"
\n",
" \n",
" 8 | \n",
" (0.0641, 0.0894] | \n",
" 71 | \n",
" 1 | \n",
" 70 | \n",
" 0.014 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 427 | \n",
" 638 | \n",
" 1.000 | \n",
" 0.857 | \n",
" 0.143 | \n",
" 1.110 | \n",
"
\n",
" \n",
" 9 | \n",
" (-inf, 0.0641] | \n",
" 71 | \n",
" 0 | \n",
" 71 | \n",
" 0.000 | \n",
" 0.100 | \n",
" 211 | \n",
" 498 | \n",
" 0.298 | \n",
" 211 | \n",
" 498 | \n",
" 709 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (0.622, inf] 71 67 4 0.944 0.100 211 498 0.298 \n",
"1 (0.5, 0.622] 71 53 18 0.746 0.100 211 498 0.298 \n",
"2 (0.405, 0.5] 71 30 41 0.423 0.100 211 498 0.298 \n",
"3 (0.331, 0.405] 70 26 44 0.371 0.099 211 498 0.298 \n",
"4 (0.253, 0.331] 71 13 58 0.183 0.100 211 498 0.298 \n",
"5 (0.176, 0.253] 71 12 59 0.169 0.100 211 498 0.298 \n",
"6 (0.124, 0.176] 71 4 67 0.056 0.100 211 498 0.298 \n",
"7 (0.0894, 0.124] 71 5 66 0.070 0.100 211 498 0.298 \n",
"8 (0.0641, 0.0894] 71 1 70 0.014 0.100 211 498 0.298 \n",
"9 (-inf, 0.0641] 71 0 71 0.000 0.100 211 498 0.298 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 67 4 71 0.318 0.008 0.310 3.167 \n",
"1 120 22 142 0.569 0.044 0.525 2.836 \n",
"2 150 63 213 0.711 0.127 0.584 2.363 \n",
"3 176 107 283 0.834 0.215 0.619 2.087 \n",
"4 189 165 354 0.896 0.331 0.565 1.792 \n",
"5 201 224 425 0.953 0.450 0.503 1.587 \n",
"6 205 291 496 0.972 0.584 0.388 1.387 \n",
"7 210 357 567 0.995 0.717 0.278 1.243 \n",
"8 211 427 638 1.000 0.857 0.143 1.110 \n",
"9 211 498 709 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"测试集-分数分箱\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" 样本数 | \n",
" 坏样本数 | \n",
" 好样本数 | \n",
" 坏样本比例 | \n",
" 样本数比例 | \n",
" 总坏样本数 | \n",
" 总好样本数 | \n",
" 平均坏样本率 | \n",
" 累计坏样本数 | \n",
" 累计好样本数 | \n",
" 累计样本数 | \n",
" 累计坏样本比例 | \n",
" 累计好样本比例 | \n",
" KS | \n",
" LIFT | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (0.622, inf] | \n",
" 29 | \n",
" 22 | \n",
" 7 | \n",
" 0.759 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 22 | \n",
" 7 | \n",
" 29 | \n",
" 0.247 | \n",
" 0.035 | \n",
" 0.212 | \n",
" 2.479 | \n",
"
\n",
" \n",
" 1 | \n",
" (0.5, 0.622] | \n",
" 35 | \n",
" 19 | \n",
" 16 | \n",
" 0.543 | \n",
" 0.120 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 41 | \n",
" 23 | \n",
" 64 | \n",
" 0.461 | \n",
" 0.114 | \n",
" 0.347 | \n",
" 2.094 | \n",
"
\n",
" \n",
" 2 | \n",
" (0.405, 0.5] | \n",
" 28 | \n",
" 14 | \n",
" 14 | \n",
" 0.500 | \n",
" 0.096 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 55 | \n",
" 37 | \n",
" 92 | \n",
" 0.618 | \n",
" 0.183 | \n",
" 0.435 | \n",
" 1.954 | \n",
"
\n",
" \n",
" 3 | \n",
" (0.331, 0.405] | \n",
" 26 | \n",
" 9 | \n",
" 17 | \n",
" 0.346 | \n",
" 0.089 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 64 | \n",
" 54 | \n",
" 118 | \n",
" 0.719 | \n",
" 0.267 | \n",
" 0.452 | \n",
" 1.772 | \n",
"
\n",
" \n",
" 4 | \n",
" (0.253, 0.331] | \n",
" 45 | \n",
" 8 | \n",
" 37 | \n",
" 0.178 | \n",
" 0.155 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 72 | \n",
" 91 | \n",
" 163 | \n",
" 0.809 | \n",
" 0.450 | \n",
" 0.359 | \n",
" 1.444 | \n",
"
\n",
" \n",
" 5 | \n",
" (0.176, 0.253] | \n",
" 29 | \n",
" 9 | \n",
" 20 | \n",
" 0.310 | \n",
" 0.100 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 81 | \n",
" 111 | \n",
" 192 | \n",
" 0.910 | \n",
" 0.550 | \n",
" 0.360 | \n",
" 1.379 | \n",
"
\n",
" \n",
" 6 | \n",
" (0.124, 0.176] | \n",
" 27 | \n",
" 1 | \n",
" 26 | \n",
" 0.037 | \n",
" 0.093 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 82 | \n",
" 137 | \n",
" 219 | \n",
" 0.921 | \n",
" 0.678 | \n",
" 0.243 | \n",
" 1.224 | \n",
"
\n",
" \n",
" 7 | \n",
" (0.0894, 0.124] | \n",
" 11 | \n",
" 2 | \n",
" 9 | \n",
" 0.182 | \n",
" 0.038 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 84 | \n",
" 146 | \n",
" 230 | \n",
" 0.944 | \n",
" 0.723 | \n",
" 0.221 | \n",
" 1.194 | \n",
"
\n",
" \n",
" 8 | \n",
" (0.0641, 0.0894] | \n",
" 26 | \n",
" 5 | \n",
" 21 | \n",
" 0.192 | \n",
" 0.089 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 167 | \n",
" 256 | \n",
" 1.000 | \n",
" 0.827 | \n",
" 0.173 | \n",
" 1.136 | \n",
"
\n",
" \n",
" 9 | \n",
" (-inf, 0.0641] | \n",
" 35 | \n",
" 0 | \n",
" 35 | \n",
" 0.000 | \n",
" 0.120 | \n",
" 89 | \n",
" 202 | \n",
" 0.306 | \n",
" 89 | \n",
" 202 | \n",
" 291 | \n",
" 1.000 | \n",
" 1.000 | \n",
" 0.000 | \n",
" 0.999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n",
"0 (0.622, inf] 29 22 7 0.759 0.100 89 202 0.306 \n",
"1 (0.5, 0.622] 35 19 16 0.543 0.120 89 202 0.306 \n",
"2 (0.405, 0.5] 28 14 14 0.500 0.096 89 202 0.306 \n",
"3 (0.331, 0.405] 26 9 17 0.346 0.089 89 202 0.306 \n",
"4 (0.253, 0.331] 45 8 37 0.178 0.155 89 202 0.306 \n",
"5 (0.176, 0.253] 29 9 20 0.310 0.100 89 202 0.306 \n",
"6 (0.124, 0.176] 27 1 26 0.037 0.093 89 202 0.306 \n",
"7 (0.0894, 0.124] 11 2 9 0.182 0.038 89 202 0.306 \n",
"8 (0.0641, 0.0894] 26 5 21 0.192 0.089 89 202 0.306 \n",
"9 (-inf, 0.0641] 35 0 35 0.000 0.120 89 202 0.306 \n",
"\n",
" 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n",
"0 22 7 29 0.247 0.035 0.212 2.479 \n",
"1 41 23 64 0.461 0.114 0.347 2.094 \n",
"2 55 37 92 0.618 0.183 0.435 1.954 \n",
"3 64 54 118 0.719 0.267 0.452 1.772 \n",
"4 72 91 163 0.809 0.450 0.359 1.444 \n",
"5 81 111 192 0.910 0.550 0.360 1.379 \n",
"6 82 137 219 0.921 0.678 0.243 1.224 \n",
"7 84 146 230 0.944 0.723 0.221 1.194 \n",
"8 89 167 256 1.000 0.827 0.173 1.136 \n",
"9 89 202 291 1.000 1.000 0.000 0.999 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型报告文件储存路径:./cache/train/demo/模型报告.docx\n",
"mlcfg save to【./cache/train/demo/mlcfg.json】success. \n",
"feature save to【./cache/train/demo/feature.pkl】success. \n",
"model save to【./cache/train/demo/model.pkl】success. \n"
]
}
],
"source": [
"%matplotlib agg\n",
"import matplotlib.pyplot as plt\n",
"import sys\n",
"sys.path.append(\"/root/project\")\n",
"import os\n",
"# os.environ['PATH'] = f\"{os.environ['PATH']}:/usr/local/jdk1.8/bin\"\n",
"# print(os.environ['PATH'])\n",
"\n",
"from easy_ml import DataSplitEntity, Pipeline\n",
"\n",
"\n",
"# 加载demo数据\n",
"import random\n",
"import scorecardpy as sc\n",
"dat = sc.germancredit()\n",
"dat_columns = dat.columns.tolist()\n",
"dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n",
"dat.columns = dat_columns\n",
"dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n",
"\n",
"# dat[\"credit_amount_corr1\"] = dat[\"credit_amount\"]*2\n",
"# dat[\"credit_amount_corr2\"] = dat[\"credit_amount\"]*3\n",
"\n",
"# dat[\"random\"] = [random.random() for _ in range(len(dat))]\n",
"\n",
"# duration_in_month = list(dat[\"duration_in_month\"])\n",
"# duration_in_month[0] = \"missing\"\n",
"# duration_in_month[1] = \"1\"\n",
"# dat[\"duration_in_month\"] = duration_in_month\n",
"\n",
"# purpose = list(dat[\"purpose\"])\n",
"# purpose[0] = \"missing\"\n",
"# purpose[1] = None\n",
"# dat[\"purpose\"] = purpose\n",
"train_data=dat[:709]\n",
"test_data=dat[709:]\n",
"\n",
"data = DataSplitEntity(train_data=train_data, test_data=test_data)\n",
"# 特征处理\n",
"cfg = {\n",
"# 项目名称,影响数据存储位置\n",
"\"project_name\": \"demo\",\n",
"# jupyter下输出内容\n",
"\"jupyter_print\": True,\n",
"# 是否开启粗分箱\n",
"\"format_bin\": False,\n",
"\"max_feature_num\":5,\n",
"# 压力测试\n",
"\"stress_test\":False,\n",
"# 压力测试抽样次数\n",
"\"stress_sample_times\": 10,\n",
"# y\n",
"\"y_column\": \"creditability\",\n",
"# 参与建模的候选变量\n",
"# \"x_columns\": [\n",
"# \"duration_in_month\",\n",
"# ],\n",
"# 变量释义\n",
"\"columns_anns\":{\n",
" \"age_in_years\": \"年龄\",\n",
" \"credit_history\": \"借贷历史\"\n",
"},\n",
"# 被排除的变量\n",
"\"columns_exclude\": [],\n",
"# 强制使用的变量 \n",
"# \"columns_include\": [\"credit_amount\"],\n",
"\"model_type\": \"xgb\",\n",
"\"params_xgb\": {\n",
" 'objective': 'binary:logistic',\n",
" 'eval_metric': 'auc',\n",
" 'learning_rate': 0.1,\n",
" 'max_depth': 3,\n",
" 'subsample': None,\n",
" 'colsample_bytree': None,\n",
" 'alpha': None,\n",
" 'num_boost_round': 500,\n",
" 'early_stopping_rounds': 20,\n",
" 'verbose_eval': 10,\n",
" 'random_state': 2025,\n",
" 'save_pmml': True,\n",
" 'trees_print': False,\n",
" } \n",
"}\n",
"\n",
"# 训练并生成报告\n",
"pipeline = Pipeline(data=data, **cfg)\n",
"pipeline.train()\n",
"pipeline.report()\n",
"pipeline.save()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4e508928",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mlcfg load from【./cache/train/demo/mlcfg.json】success. \n",
"项目路径:【./cache/train/demo】\n",
"feature load from【./cache/train/demo/feature.pkl】success.\n",
"model load from【./cache/train/demo/model.pkl】success.\n"
]
},
{
"data": {
"text/plain": [
"{'KS': 0.4918,\n",
" 'AUC': 0.8069,\n",
" 'Gini': 0.6138,\n",
" 'pic': }"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2 = Pipeline.load(\"./cache/train/demo\")\n",
"score = pipeline2.prob(test_data)\n",
"# score\n",
"sc.perf_eva(test_data[\"creditability\"], score, title=\"train\", show_plot=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "443d45a3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"模型psi: 0.026000000000000002\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MODEL_SCORE_BIN | \n",
" psi | \n",
" 训练样本数 | \n",
" 测试样本数 | \n",
" 训练样本数比例 | \n",
" 测试样本数比例 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" (0.597, inf] | \n",
" 0.001 | \n",
" 71 | \n",
" 26 | \n",
" 0.100 | \n",
" 0.089 | \n",
"
\n",
" \n",
" 1 | \n",
" (0.468, 0.597] | \n",
" 0.000 | \n",
" 71 | \n",
" 31 | \n",
" 0.100 | \n",
" 0.107 | \n",
"
\n",
" \n",
" 2 | \n",
" (0.4, 0.468] | \n",
" 0.004 | \n",
" 71 | \n",
" 35 | \n",
" 0.100 | \n",
" 0.120 | \n",
"
\n",
" \n",
" 3 | \n",
" (0.352, 0.4] | \n",
" 0.000 | \n",
" 71 | \n",
" 29 | \n",
" 0.100 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 4 | \n",
" (0.269, 0.352] | \n",
" 0.006 | \n",
" 70 | \n",
" 22 | \n",
" 0.099 | \n",
" 0.076 | \n",
"
\n",
" \n",
" 5 | \n",
" (0.191, 0.269] | \n",
" 0.002 | \n",
" 71 | \n",
" 33 | \n",
" 0.100 | \n",
" 0.113 | \n",
"
\n",
" \n",
" 6 | \n",
" (0.132, 0.191] | \n",
" 0.000 | \n",
" 70 | \n",
" 27 | \n",
" 0.099 | \n",
" 0.093 | \n",
"
\n",
" \n",
" 7 | \n",
" (0.096, 0.132] | \n",
" 0.000 | \n",
" 72 | \n",
" 29 | \n",
" 0.102 | \n",
" 0.100 | \n",
"
\n",
" \n",
" 8 | \n",
" (0.0649, 0.096] | \n",
" 0.007 | \n",
" 71 | \n",
" 22 | \n",
" 0.100 | \n",
" 0.076 | \n",
"
\n",
" \n",
" 9 | \n",
" (-inf, 0.0649] | \n",
" 0.006 | \n",
" 71 | \n",
" 37 | \n",
" 0.100 | \n",
" 0.127 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n",
"0 (0.597, inf] 0.001 71 26 0.100 0.089\n",
"1 (0.468, 0.597] 0.000 71 31 0.100 0.107\n",
"2 (0.4, 0.468] 0.004 71 35 0.100 0.120\n",
"3 (0.352, 0.4] 0.000 71 29 0.100 0.100\n",
"4 (0.269, 0.352] 0.006 70 22 0.099 0.076\n",
"5 (0.191, 0.269] 0.002 71 33 0.100 0.113\n",
"6 (0.132, 0.191] 0.000 70 27 0.099 0.093\n",
"7 (0.096, 0.132] 0.000 72 29 0.102 0.100\n",
"8 (0.0649, 0.096] 0.007 71 22 0.100 0.076\n",
"9 (-inf, 0.0649] 0.006 71 37 0.100 0.127"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline2.psi(train_data, test_data)"
]
}
],
"metadata": {
"celltoolbar": "编辑元数据",
"kernelspec": {
"display_name": "Python [conda env:analysis]",
"language": "python",
"name": "conda-env-analysis-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "372.364px"
},
"toc_section_display": true,
"toc_window_display": true
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showmarkdowntxt": false,
"toc-showtags": false
},
"nbformat": 4,
"nbformat_minor": 5
}