{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e796cb12", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "日志路径:/root/project/easy_ml/logs/app.log\n" ] } ], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, OnlineLearningTrainer\n", "\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "\n", "# 加载数据\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "b40219b8", "metadata": { "code_folding": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【/root/notebook/ol_resources_demo/coef.json】success.\n", "card.cfg load from【/root/notebook/ol_resources_demo/coef.json】success.\n", "feature load from【/root/notebook/ol_resources_demo/feature.csv】success.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 20/20 [00:04<00:00, 4.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "选择epoch:【3】的参数:\n", "{'age_in_years': 0.3448204833214924, 'credit_history': 0.38666791361493663, 'duration_in_month': 0.31717857388239995, 'present_employment_since': 0.3182701082371682, 'purpose': 0.34568628455181505, 'savings_account_and_bonds': 0.3074829183742356, 'status_of_existing_checking_account': 0.3877371389194942, 'auc_test': 0.7911, 'ks_test': 0.4584, 'epoch': 3, 'loss_train': 0.579820491150681, 'loss_test': 0.5821033735284096}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "

模型系数优化过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
age_in_yearscredit_historyduration_in_monthpresent_employment_sincepurposesavings_account_and_bondsstatus_of_existing_checking_accountauc_testks_testepochloss_trainloss_test
00.0616040.0827350.0201250.0846090.0363480.0561910.0766430.76910.4632000.666412
10.1722760.1931740.1264860.1834260.1480740.1532880.1897400.78630.458310.6274160.628051
20.2650290.2953150.2258020.2573400.2511000.2365570.2946500.78940.467020.598720.600769
30.3448200.3866680.3171790.3182700.3456860.3074830.3877370.79110.458430.579820.582103
40.4106540.4655630.3993730.3672320.4308450.3654130.4666320.79250.461140.5680890.569897
50.4632810.5322370.4724390.4059710.5067930.4113260.5311020.79270.463050.5610460.562113
60.5044780.5879590.5371200.4365730.5743350.4470770.5823140.79220.463060.5568340.557198
70.5363030.6343300.5943600.4609620.6344100.4746820.6220720.79290.465370.5542420.554094
80.5606770.6729000.6450710.4807020.6878830.4959520.6523300.79320.463080.5525470.552126
90.5792280.7050280.6900490.4969770.7354960.5123760.6749320.79370.467690.551340.550872
100.5932720.7318460.7299690.5106610.7778710.5251280.6915000.79380.4742100.55040.550071
110.6038470.7542850.7654020.5223840.8155400.5351100.7034020.79420.4742110.5496150.549563
120.6117610.7731040.7968400.5326030.8489650.5430040.7117590.79470.4693120.5489310.549247
130.6176410.7889210.8247070.5416430.8785620.5493260.7174710.79520.4748130.5483230.549058
140.6219670.8022430.8493810.5497430.9047030.5544610.7212430.79490.4748140.5477810.548955
150.6251080.8134850.8711960.5570720.9277340.5586940.7236190.79500.4811150.5473010.548909
160.6273480.8229870.8904540.5637560.9479720.5622380.7250120.79530.4860160.5468810.548903
170.6289040.8310310.9074290.5698860.9657110.5652480.7257300.79490.4811170.5465170.548922
180.6299390.8378500.9223660.5755300.9812190.5678380.7259970.79490.4923180.5462060.548958
190.6305800.8436360.9354890.5807390.9947460.5700930.7259750.79530.4923190.5459450.549003
200.6309240.8485500.9470000.5855541.0065180.5720750.7257780.79560.4923200.5457290.549054
\n", "
" ], "text/plain": [ " age_in_years credit_history duration_in_month present_employment_since \\\n", "0 0.061604 0.082735 0.020125 0.084609 \n", "1 0.172276 0.193174 0.126486 0.183426 \n", "2 0.265029 0.295315 0.225802 0.257340 \n", "3 0.344820 0.386668 0.317179 0.318270 \n", "4 0.410654 0.465563 0.399373 0.367232 \n", "5 0.463281 0.532237 0.472439 0.405971 \n", "6 0.504478 0.587959 0.537120 0.436573 \n", "7 0.536303 0.634330 0.594360 0.460962 \n", "8 0.560677 0.672900 0.645071 0.480702 \n", "9 0.579228 0.705028 0.690049 0.496977 \n", "10 0.593272 0.731846 0.729969 0.510661 \n", "11 0.603847 0.754285 0.765402 0.522384 \n", "12 0.611761 0.773104 0.796840 0.532603 \n", "13 0.617641 0.788921 0.824707 0.541643 \n", "14 0.621967 0.802243 0.849381 0.549743 \n", "15 0.625108 0.813485 0.871196 0.557072 \n", "16 0.627348 0.822987 0.890454 0.563756 \n", "17 0.628904 0.831031 0.907429 0.569886 \n", "18 0.629939 0.837850 0.922366 0.575530 \n", "19 0.630580 0.843636 0.935489 0.580739 \n", "20 0.630924 0.848550 0.947000 0.585554 \n", "\n", " purpose savings_account_and_bonds status_of_existing_checking_account \\\n", "0 0.036348 0.056191 0.076643 \n", "1 0.148074 0.153288 0.189740 \n", "2 0.251100 0.236557 0.294650 \n", "3 0.345686 0.307483 0.387737 \n", "4 0.430845 0.365413 0.466632 \n", "5 0.506793 0.411326 0.531102 \n", "6 0.574335 0.447077 0.582314 \n", "7 0.634410 0.474682 0.622072 \n", "8 0.687883 0.495952 0.652330 \n", "9 0.735496 0.512376 0.674932 \n", "10 0.777871 0.525128 0.691500 \n", "11 0.815540 0.535110 0.703402 \n", "12 0.848965 0.543004 0.711759 \n", "13 0.878562 0.549326 0.717471 \n", "14 0.904703 0.554461 0.721243 \n", "15 0.927734 0.558694 0.723619 \n", "16 0.947972 0.562238 0.725012 \n", "17 0.965711 0.565248 0.725730 \n", "18 0.981219 0.567838 0.725997 \n", "19 0.994746 0.570093 0.725975 \n", "20 1.006518 0.572075 0.725778 \n", "\n", " auc_test ks_test epoch loss_train loss_test \n", "0 0.7691 0.4632 0 0 0.666412 \n", "1 0.7863 0.4583 1 0.627416 0.628051 \n", "2 0.7894 0.4670 2 0.59872 0.600769 \n", "3 0.7911 0.4584 3 0.57982 0.582103 \n", "4 0.7925 0.4611 4 0.568089 0.569897 \n", "5 0.7927 0.4630 5 0.561046 0.562113 \n", "6 0.7922 0.4630 6 0.556834 0.557198 \n", "7 0.7929 0.4653 7 0.554242 0.554094 \n", "8 0.7932 0.4630 8 0.552547 0.552126 \n", "9 0.7937 0.4676 9 0.55134 0.550872 \n", "10 0.7938 0.4742 10 0.5504 0.550071 \n", "11 0.7942 0.4742 11 0.549615 0.549563 \n", "12 0.7947 0.4693 12 0.548931 0.549247 \n", "13 0.7952 0.4748 13 0.548323 0.549058 \n", "14 0.7949 0.4748 14 0.547781 0.548955 \n", "15 0.7950 0.4811 15 0.547301 0.548909 \n", "16 0.7953 0.4860 16 0.546881 0.548903 \n", "17 0.7949 0.4811 17 0.546517 0.548922 \n", "18 0.7949 0.4923 18 0.546206 0.548958 \n", "19 0.7953 0.4923 19 0.545945 0.549003 \n", "20 0.7956 0.4923 20 0.545729 0.549054 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.77790.4224
1训练集0.78150.4335
2测试集0.76910.4632
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7779 0.4224\n", "1 训练集 0.7815 0.4335\n", "2 测试集 0.7691 0.4632" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.79640.4671
1训练集0.79850.4696
2测试集0.79110.4584
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7964 0.4671\n", "1 训练集 0.7985 0.4696\n", "2 测试集 0.7911 0.4584" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型系数

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
变量原变量WOE拟合系数新变量WOE拟合系数释义
0age_in_years0.06160.3448年龄
1credit_history0.08270.3867-
2duration_in_month0.02010.3172-
3present_employment_since0.08460.3183-
4purpose0.03630.3457-
5savings_account_and_bonds0.05620.3075-
6status_of_existing_checking_account0.07660.3877-
\n", "
" ], "text/plain": [ " 变量 原变量WOE拟合系数 新变量WOE拟合系数 释义\n", "0 age_in_years 0.0616 0.3448 年龄\n", "1 credit_history 0.0827 0.3867 -\n", "2 duration_in_month 0.0201 0.3172 -\n", "3 present_employment_since 0.0846 0.3183 -\n", "4 purpose 0.0363 0.3457 -\n", "5 savings_account_and_bonds 0.0562 0.3075 -\n", "6 status_of_existing_checking_account 0.0766 0.3877 -" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上分数分箱\n", "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.529, inf]10069310.6900.1003007000.369311000.2300.0440.1862.300
1(0.519, 0.529]10054460.5400.1003007000.3123772000.4100.1100.3002.050
2(0.512, 0.519]9943560.4340.0993007000.31661332990.5530.1900.3631.851
3(0.504, 0.512]10038620.3800.1003007000.32041953990.6800.2790.4011.704
4(0.495, 0.504]10134670.3370.1013007000.32382625000.7930.3740.4191.587
5(0.487, 0.495]10028720.2800.1003007000.32663346000.8870.4770.4101.478
6(0.477, 0.487]1009910.0900.1003007000.32754257000.9170.6070.3101.310
7(0.466, 0.477]10015850.1500.1003007000.32905108000.9670.7290.2381.208
8(0.456, 0.466]1007930.0700.1003007000.32976039000.9900.8610.1291.100
9(-inf, 0.456]1003970.0300.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.529, inf] 100 69 31 0.690 0.100 300 700 0.3 \n", "1 (0.519, 0.529] 100 54 46 0.540 0.100 300 700 0.3 \n", "2 (0.512, 0.519] 99 43 56 0.434 0.099 300 700 0.3 \n", "3 (0.504, 0.512] 100 38 62 0.380 0.100 300 700 0.3 \n", "4 (0.495, 0.504] 101 34 67 0.337 0.101 300 700 0.3 \n", "5 (0.487, 0.495] 100 28 72 0.280 0.100 300 700 0.3 \n", "6 (0.477, 0.487] 100 9 91 0.090 0.100 300 700 0.3 \n", "7 (0.466, 0.477] 100 15 85 0.150 0.100 300 700 0.3 \n", "8 (0.456, 0.466] 100 7 93 0.070 0.100 300 700 0.3 \n", "9 (-inf, 0.456] 100 3 97 0.030 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 69 31 100 0.230 0.044 0.186 2.300 \n", "1 123 77 200 0.410 0.110 0.300 2.050 \n", "2 166 133 299 0.553 0.190 0.363 1.851 \n", "3 204 195 399 0.680 0.279 0.401 1.704 \n", "4 238 262 500 0.793 0.374 0.419 1.587 \n", "5 266 334 600 0.887 0.477 0.410 1.478 \n", "6 275 425 700 0.917 0.607 0.310 1.310 \n", "7 290 510 800 0.967 0.729 0.238 1.208 \n", "8 297 603 900 0.990 0.861 0.129 1.100 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.652, inf]10068320.6800.1003007000.368321000.2270.0460.1812.267
1(0.604, 0.652]9963360.6360.0993007000.3131681990.4370.0970.3402.194
2(0.562, 0.604]10142590.4160.1013007000.31731273000.5770.1810.3961.922
3(0.517, 0.562]10039610.3900.1003007000.32121884000.7070.2690.4381.767
4(0.47, 0.517]10033670.3300.1003007000.32452555000.8170.3640.4531.633
5(0.424, 0.47]10023770.2300.1003007000.32683326000.8930.4740.4191.489
6(0.378, 0.424]10012880.1200.1003007000.32804207000.9330.6000.3331.333
7(0.333, 0.378]9911880.1110.0993007000.32915087990.9700.7260.2441.214
8(0.283, 0.333]1017940.0690.1013007000.32986029000.9930.8600.1331.104
9(-inf, 0.283]1002980.0200.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.652, inf] 100 68 32 0.680 0.100 300 700 0.3 \n", "1 (0.604, 0.652] 99 63 36 0.636 0.099 300 700 0.3 \n", "2 (0.562, 0.604] 101 42 59 0.416 0.101 300 700 0.3 \n", "3 (0.517, 0.562] 100 39 61 0.390 0.100 300 700 0.3 \n", "4 (0.47, 0.517] 100 33 67 0.330 0.100 300 700 0.3 \n", "5 (0.424, 0.47] 100 23 77 0.230 0.100 300 700 0.3 \n", "6 (0.378, 0.424] 100 12 88 0.120 0.100 300 700 0.3 \n", "7 (0.333, 0.378] 99 11 88 0.111 0.099 300 700 0.3 \n", "8 (0.283, 0.333] 101 7 94 0.069 0.101 300 700 0.3 \n", "9 (-inf, 0.283] 100 2 98 0.020 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 68 32 100 0.227 0.046 0.181 2.267 \n", "1 131 68 199 0.437 0.097 0.340 2.194 \n", "2 173 127 300 0.577 0.181 0.396 1.922 \n", "3 212 188 400 0.707 0.269 0.438 1.767 \n", "4 245 255 500 0.817 0.364 0.453 1.633 \n", "5 268 332 600 0.893 0.474 0.419 1.489 \n", "6 280 420 700 0.933 0.600 0.333 1.333 \n", "7 291 508 799 0.970 0.726 0.244 1.214 \n", "8 298 602 900 0.993 0.860 0.133 1.104 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量趋势

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上变量趋势\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

压力测试

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
违约率抽样次数样本数好样本数坏样本数平均AUC最大AUC最小AUCAUC标准差95%置信区间AUC平均KS最大KS最小KSKS标准差95%置信区间KS
00.011030000297003000.7963550.7965090.7961800.0001000.7962 - 0.79660.45240.4530.4500.0009170.4506 - 0.4542
10.0710428539853000.7964570.7970910.7955740.0004060.7957 - 0.79730.45200.4530.4480.0014140.4492 - 0.4548
20.1310230720073000.7962760.7981560.7943070.0011120.7941 - 0.79850.46100.4630.4600.0011830.4587 - 0.4633
30.1910157812783000.7965680.8001430.7924470.0020560.7925 - 0.80060.44270.4450.4350.0038740.4351 - 0.4503
40.251012009003000.7963700.8020520.7898240.0032900.7899 - 0.80280.44880.4630.4360.0077050.4337 - 0.4639
50.31109676673000.7953700.7982330.7906900.0021130.7912 - 0.79950.45150.4550.4400.0040800.4435 - 0.4595
60.37108105103000.7975650.8017120.7904670.0028670.7919 - 0.80320.46030.4800.4340.0141000.4327 - 0.4879
70.43106973973000.7977990.8044460.7886400.0048200.7884 - 0.80720.44690.4690.4270.0115970.4242 - 0.4696
80.49106123123000.7939750.8057910.7782260.0083810.7775 - 0.81040.44730.4780.4250.0164990.4150 - 0.4796
90.55105452453000.7947400.8125310.7723880.0123150.7706 - 0.81890.45620.4890.4100.0227540.4116 - 0.5008
\n", "
" ], "text/plain": [ " 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n", "0 0.01 10 30000 29700 300 0.796355 0.796509 0.796180 0.000100 \n", "1 0.07 10 4285 3985 300 0.796457 0.797091 0.795574 0.000406 \n", "2 0.13 10 2307 2007 300 0.796276 0.798156 0.794307 0.001112 \n", "3 0.19 10 1578 1278 300 0.796568 0.800143 0.792447 0.002056 \n", "4 0.25 10 1200 900 300 0.796370 0.802052 0.789824 0.003290 \n", "5 0.31 10 967 667 300 0.795370 0.798233 0.790690 0.002113 \n", "6 0.37 10 810 510 300 0.797565 0.801712 0.790467 0.002867 \n", "7 0.43 10 697 397 300 0.797799 0.804446 0.788640 0.004820 \n", "8 0.49 10 612 312 300 0.793975 0.805791 0.778226 0.008381 \n", "9 0.55 10 545 245 300 0.794740 0.812531 0.772388 0.012315 \n", "\n", " 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n", "0 0.7962 - 0.7966 0.4524 0.453 0.450 0.000917 0.4506 - 0.4542 \n", "1 0.7957 - 0.7973 0.4520 0.453 0.448 0.001414 0.4492 - 0.4548 \n", "2 0.7941 - 0.7985 0.4610 0.463 0.460 0.001183 0.4587 - 0.4633 \n", "3 0.7925 - 0.8006 0.4427 0.445 0.435 0.003874 0.4351 - 0.4503 \n", "4 0.7899 - 0.8028 0.4488 0.463 0.436 0.007705 0.4337 - 0.4639 \n", "5 0.7912 - 0.7995 0.4515 0.455 0.440 0.004080 0.4435 - 0.4595 \n", "6 0.7919 - 0.8032 0.4603 0.480 0.434 0.014100 0.4327 - 0.4879 \n", "7 0.7884 - 0.8072 0.4469 0.469 0.427 0.011597 0.4242 - 0.4696 \n", "8 0.7775 - 0.8104 0.4473 0.478 0.425 0.016499 0.4150 - 0.4796 \n", "9 0.7706 - 0.8189 0.4562 0.489 0.410 0.022754 0.4116 - 0.5008 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

评分卡

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablebinpoints
0basepointsNaN538.0
23age_in_years3626.0
24age_in_years[-inf,25.0)-11.0
25age_in_years[25.0,30.0)-4.0
26age_in_years[30.0,35.0)-3.0
27age_in_years[35.0,inf)6.0
11credit_historyno credits taken/ all credits paid back duly%,...-36.0
12credit_historyexisting credits paid back duly till now-1.0
13credit_historydelay in paying off in the past-4.0
14credit_historycritical account/ other credits existing (not ...19.0
6duration_in_month[-inf,8.0)33.0
7duration_in_month[8.0,15.0)5.0
8duration_in_month[15.0,25.0)-0.0
9duration_in_month[25.0,35.0)-4.0
10duration_in_month[35.0,inf)-18.0
15present_employment_sinceunemployed%,%... < 1 year-11.0
16present_employment_since1 <= ... < 4 years-0.0
17present_employment_since4 <= ... < 7 years10.0
18present_employment_since... >= 7 years5.0
19purposeretraining%,%car (used)22.0
20purposeradio/television9.0
21purposefurniture/equipment%,%business%,%repairs-3.0
22purposedomestic appliances%,%education%,%car (new)%,%...-9.0
0savings_account_and_bonds... < 100 DM%,%100 <= ... < 500 DM-5.0
1savings_account_and_bonds500 <= ... < 1000 DM%,%... >= 1000 DM21.0
2savings_account_and_bondsunknown/ no savings account11.0
3status_of_existing_checking_account... < 0 DM%,%0 <= ... < 200 DM-17.0
4status_of_existing_checking_account... >= 200 DM / salary assignments for at leas...13.0
5status_of_existing_checking_accountno checking account33.0
\n", "
" ], "text/plain": [ " variable \\\n", "0 basepoints \n", "23 age_in_years \n", "24 age_in_years \n", "25 age_in_years \n", "26 age_in_years \n", "27 age_in_years \n", "11 credit_history \n", "12 credit_history \n", "13 credit_history \n", "14 credit_history \n", "6 duration_in_month \n", "7 duration_in_month \n", "8 duration_in_month \n", "9 duration_in_month \n", "10 duration_in_month \n", "15 present_employment_since \n", "16 present_employment_since \n", "17 present_employment_since \n", "18 present_employment_since \n", "19 purpose \n", "20 purpose \n", "21 purpose \n", "22 purpose \n", "0 savings_account_and_bonds \n", "1 savings_account_and_bonds \n", "2 savings_account_and_bonds \n", "3 status_of_existing_checking_account \n", "4 status_of_existing_checking_account \n", "5 status_of_existing_checking_account \n", "\n", " bin points \n", "0 NaN 538.0 \n", "23 36 26.0 \n", "24 [-inf,25.0) -11.0 \n", "25 [25.0,30.0) -4.0 \n", "26 [30.0,35.0) -3.0 \n", "27 [35.0,inf) 6.0 \n", "11 no credits taken/ all credits paid back duly%,... -36.0 \n", "12 existing credits paid back duly till now -1.0 \n", "13 delay in paying off in the past -4.0 \n", "14 critical account/ other credits existing (not ... 19.0 \n", "6 [-inf,8.0) 33.0 \n", "7 [8.0,15.0) 5.0 \n", "8 [15.0,25.0) -0.0 \n", "9 [25.0,35.0) -4.0 \n", "10 [35.0,inf) -18.0 \n", "15 unemployed%,%... < 1 year -11.0 \n", "16 1 <= ... < 4 years -0.0 \n", "17 4 <= ... < 7 years 10.0 \n", "18 ... >= 7 years 5.0 \n", "19 retraining%,%car (used) 22.0 \n", "20 radio/television 9.0 \n", "21 furniture/equipment%,%business%,%repairs -3.0 \n", "22 domestic appliances%,%education%,%car (new)%,%... -9.0 \n", "0 ... < 100 DM%,%100 <= ... < 500 DM -5.0 \n", "1 500 <= ... < 1000 DM%,%... >= 1000 DM 21.0 \n", "2 unknown/ no savings account 11.0 \n", "3 ... < 0 DM%,%0 <= ... < 200 DM -17.0 \n", "4 ... >= 200 DM / salary assignments for at leas... 13.0 \n", "5 no checking account 33.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型报告文件储存路径:./cache/train/OnlineLearningDemo/OnlineLearning报告.docx\n", "olcfg save to【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "feature save to【./cache/train/OnlineLearningDemo/feature.csv】success. \n", "model save to【./cache/train/OnlineLearningDemo/coef.json】success. \n", "model save to【./cache/train/OnlineLearningDemo/card.csv】success. \n" ] } ], "source": [ "# 特征处理\n", "cfg = {\n", "# 模型系数,分箱信息等,请参考ol_resources_demo目录下文件\n", "# 模型系数文件 coef.json(如果有常数项(截距)请用const作为key)\n", "# 分箱信息文件 feature.csv(数值型的分箱信息请按升序排列)\n", "# 生成评分卡所需信息 card.cfg() \n", "\"path_resources\": \"/root/notebook/ol_resources_demo\",\n", "# 项目名称,影响数据存储位置\n", "\"project_name\": \"OnlineLearningDemo\",\n", "\"y_column\": \"creditability\",\n", "# 学习率\n", "\"lr\": 0.01,\n", "# 单次更新批大小\n", "\"batch_size\": 64,\n", "# 训练轮数\n", "\"epochs\": 20,\n", "\"jupyter_print\": True,\n", "# 压力测试\n", "\"stress_test\": True,\n", "# 压力测试抽样次数\n", "\"stress_sample_times\": 10,\n", "\"columns_anns\":{\n", " \"age_in_years\":\"年龄\"\n", "}\n", "}\n", "\n", "# 训练并生成报告\n", "trainer = OnlineLearningTrainer(data=data, **cfg)\n", "trainer.train()\n", "trainer.report(epoch=3)\n", "trainer.save()" ] }, { "cell_type": "markdown", "id": "2421e9cc", "metadata": {}, "source": [ "## 加载模型" ] }, { "cell_type": "code", "execution_count": 5, "id": "2583f08b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "olcfg load from【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【./cache/train/OnlineLearningDemo/coef.json】success.\n", "feature load from【./cache/train/OnlineLearningDemo/feature.csv】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.4584,\n", " 'AUC': 0.7911,\n", " 'Gini': 0.5822,\n", " 'pic':
}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = OnlineLearningTrainer.load(\"./cache/train/OnlineLearningDemo/\")\n", "prob = model.prob(test_data)\n", "sc.perf_eva(test_data[\"creditability\"], prob, title=\"test\", show_plot=True)" ] }, { "cell_type": "markdown", "id": "aa289b61", "metadata": {}, "source": [ "## 计算psi" ] }, { "cell_type": "code", "execution_count": 6, "id": "225fa7d7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.061\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.649, inf]0.00170310.0990.107
1(0.602, 0.649]0.00172320.1020.110
2(0.562, 0.602]0.00171260.1000.089
3(0.519, 0.562]0.00271250.1000.086
4(0.467, 0.519]0.01469400.0970.137
5(0.421, 0.467]0.00472240.1020.082
6(0.382, 0.421]0.02371170.1000.058
7(0.337, 0.382]0.00171270.1000.093
8(0.293, 0.337]0.00071280.1000.096
9(-inf, 0.293]0.01471410.1000.141
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.649, inf] 0.001 70 31 0.099 0.107\n", "1 (0.602, 0.649] 0.001 72 32 0.102 0.110\n", "2 (0.562, 0.602] 0.001 71 26 0.100 0.089\n", "3 (0.519, 0.562] 0.002 71 25 0.100 0.086\n", "4 (0.467, 0.519] 0.014 69 40 0.097 0.137\n", "5 (0.421, 0.467] 0.004 72 24 0.102 0.082\n", "6 (0.382, 0.421] 0.023 71 17 0.100 0.058\n", "7 (0.337, 0.382] 0.001 71 27 0.100 0.093\n", "8 (0.293, 0.337] 0.000 71 28 0.100 0.096\n", "9 (-inf, 0.293] 0.014 71 41 0.100 0.141" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "# points=[0, 0.2, 0.3, 0.4, 1]\n", "points=None\n", "psi = model.psi(train_data, test_data, points=points)\n", "psi\n", "# psi[\"psi\"].sum()" ] } ], "metadata": { "celltoolbar": "编辑元数据", "kernelspec": { "display_name": "Python [conda env:analysis]", "language": "python", "name": "conda-env-analysis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "372.364px" }, "toc_section_display": true, "toc_window_display": true }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "toc-showtags": false }, "nbformat": 4, "nbformat_minor": 5 }