{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "402afffb", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "日志路径:/root/project/easy_ml/logs/app.log\n" ] } ], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, OnlineLearningTrainer\n", "\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "\n", "# 加载数据\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "71f54feb", "metadata": { "code_folding": [], "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【/root/notebook/ol_resources_demo/coef.dict】success.\n", "feature load from【/root/notebook/ol_resources_demo/feature.csv】success.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 20/20 [00:04<00:00, 4.12it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "选择epoch:【3】的参数:\n", "{'age_in_years': 0.30540947796839885, 'credit_history': 0.336316924212706, 'duration_in_month': 0.31766018659713036, 'present_employment_since': 0.28917071986582443, 'purpose': 0.3451668602655048, 'savings_account_and_bonds': 0.3143211829465453, 'status_of_existing_checking_account': 0.37895404100465513, 'auc_test': 0.7922, 'ks_test': 0.4823, 'epoch': 3, 'loss_train': 0.5830171033930547, 'loss_test': 0.5842627196638215}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "

模型系数优化过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
age_in_yearscredit_historyduration_in_monthpresent_employment_sincepurposesavings_account_and_bondsstatus_of_existing_checking_accountauc_testks_testepochloss_trainloss_test
00.0160410.0273480.0201250.0460920.0363480.0619150.0664260.77990.4434000.672917
10.1276370.1389230.1267040.1464610.1478730.1595310.1797270.79070.461110.6340750.632831
20.2226780.2427220.2261550.2241160.2507430.2431260.2851420.79210.477320.6034330.604080
30.3054090.3363170.3176600.2891710.3451670.3143210.3789540.79220.482330.5830170.584263
40.3746890.4180120.3999590.3422150.4301260.3724250.4587610.79290.477340.5701910.571214
50.4310480.4879050.4730890.3847390.5058330.4183840.5242640.79280.467450.5624040.562849
60.4760430.5470940.5377890.4187010.5731080.4540440.5765560.79340.469760.5577120.557549
70.5115540.5970260.5950100.4459950.6329120.4814320.6173770.79350.470270.5548250.554202
80.5393800.6391320.6456720.4682010.6861350.5023850.6486300.79350.465380.5529540.552085
90.5610780.6746810.6905800.4865490.7335340.5184180.6721250.79320.463090.5516460.550746
100.5779340.7047470.7304180.5019640.7757420.5307290.6894670.79430.4676100.550650.549902
110.5909800.7302220.7657660.5151260.8132950.5402430.7020180.79430.4676110.5498350.549376
120.6010390.7518420.7971170.5265370.8466570.5476590.7109010.79480.4748120.5491340.549058
130.6087590.7702190.8249030.5365650.8762360.5535080.7170260.79460.4748130.5485150.548876
140.6146500.7858590.8495020.5454800.9024040.5581850.7211090.79480.4811140.5479630.548785
150.6191130.7991830.8712500.5534840.9254970.5619830.7237110.79480.4811150.5474740.548754
160.6224620.8105440.8904510.5607250.9458250.5651210.7252580.79500.4811160.5470420.548763
170.6249410.8202380.9073770.5673160.9636760.5677570.7260730.79510.4874170.5466660.548798
180.6267430.8285130.9222750.5733420.9793110.5700070.7263930.79520.4874180.5463430.548848
190.6280170.8355790.9353660.5788690.9929750.5719560.7263920.79540.4923190.5460690.548907
200.6288810.8416140.9468530.5839491.0048870.5736650.7261920.79560.4923200.5458390.548970
\n", "
" ], "text/plain": [ " age_in_years credit_history duration_in_month present_employment_since \\\n", "0 0.016041 0.027348 0.020125 0.046092 \n", "1 0.127637 0.138923 0.126704 0.146461 \n", "2 0.222678 0.242722 0.226155 0.224116 \n", "3 0.305409 0.336317 0.317660 0.289171 \n", "4 0.374689 0.418012 0.399959 0.342215 \n", "5 0.431048 0.487905 0.473089 0.384739 \n", "6 0.476043 0.547094 0.537789 0.418701 \n", "7 0.511554 0.597026 0.595010 0.445995 \n", "8 0.539380 0.639132 0.645672 0.468201 \n", "9 0.561078 0.674681 0.690580 0.486549 \n", "10 0.577934 0.704747 0.730418 0.501964 \n", "11 0.590980 0.730222 0.765766 0.515126 \n", "12 0.601039 0.751842 0.797117 0.526537 \n", "13 0.608759 0.770219 0.824903 0.536565 \n", "14 0.614650 0.785859 0.849502 0.545480 \n", "15 0.619113 0.799183 0.871250 0.553484 \n", "16 0.622462 0.810544 0.890451 0.560725 \n", "17 0.624941 0.820238 0.907377 0.567316 \n", "18 0.626743 0.828513 0.922275 0.573342 \n", "19 0.628017 0.835579 0.935366 0.578869 \n", "20 0.628881 0.841614 0.946853 0.583949 \n", "\n", " purpose savings_account_and_bonds status_of_existing_checking_account \\\n", "0 0.036348 0.061915 0.066426 \n", "1 0.147873 0.159531 0.179727 \n", "2 0.250743 0.243126 0.285142 \n", "3 0.345167 0.314321 0.378954 \n", "4 0.430126 0.372425 0.458761 \n", "5 0.505833 0.418384 0.524264 \n", "6 0.573108 0.454044 0.576556 \n", "7 0.632912 0.481432 0.617377 \n", "8 0.686135 0.502385 0.648630 \n", "9 0.733534 0.518418 0.672125 \n", "10 0.775742 0.530729 0.689467 \n", "11 0.813295 0.540243 0.702018 \n", "12 0.846657 0.547659 0.710901 \n", "13 0.876236 0.553508 0.717026 \n", "14 0.902404 0.558185 0.721109 \n", "15 0.925497 0.561983 0.723711 \n", "16 0.945825 0.565121 0.725258 \n", "17 0.963676 0.567757 0.726073 \n", "18 0.979311 0.570007 0.726393 \n", "19 0.992975 0.571956 0.726392 \n", "20 1.004887 0.573665 0.726192 \n", "\n", " auc_test ks_test epoch loss_train loss_test \n", "0 0.7799 0.4434 0 0 0.672917 \n", "1 0.7907 0.4611 1 0.634075 0.632831 \n", "2 0.7921 0.4773 2 0.603433 0.604080 \n", "3 0.7922 0.4823 3 0.583017 0.584263 \n", "4 0.7929 0.4773 4 0.570191 0.571214 \n", "5 0.7928 0.4674 5 0.562404 0.562849 \n", "6 0.7934 0.4697 6 0.557712 0.557549 \n", "7 0.7935 0.4702 7 0.554825 0.554202 \n", "8 0.7935 0.4653 8 0.552954 0.552085 \n", "9 0.7932 0.4630 9 0.551646 0.550746 \n", "10 0.7943 0.4676 10 0.55065 0.549902 \n", "11 0.7943 0.4676 11 0.549835 0.549376 \n", "12 0.7948 0.4748 12 0.549134 0.549058 \n", "13 0.7946 0.4748 13 0.548515 0.548876 \n", "14 0.7948 0.4811 14 0.547963 0.548785 \n", "15 0.7948 0.4811 15 0.547474 0.548754 \n", "16 0.7950 0.4811 16 0.547042 0.548763 \n", "17 0.7951 0.4874 17 0.546666 0.548798 \n", "18 0.7952 0.4874 18 0.546343 0.548848 \n", "19 0.7954 0.4923 19 0.546069 0.548907 \n", "20 0.7956 0.4923 20 0.545839 0.548970 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.78380.4333
1训练集0.78510.4507
2测试集0.77990.4434
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7838 0.4333\n", "1 训练集 0.7851 0.4507\n", "2 测试集 0.7799 0.4434" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.79690.4638
1训练集0.79900.4714
2测试集0.79220.4823
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7969 0.4638\n", "1 训练集 0.7990 0.4714\n", "2 测试集 0.7922 0.4823" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型系数

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
变量原变量WOE拟合系数新变量WOE拟合系数释义
0age_in_years0.01600.3054年龄
1credit_history0.02730.3363-
2duration_in_month0.02010.3177-
3present_employment_since0.04610.2892-
4purpose0.03630.3452-
5savings_account_and_bonds0.06190.3143-
6status_of_existing_checking_account0.06640.3790-
\n", "
" ], "text/plain": [ " 变量 原变量WOE拟合系数 新变量WOE拟合系数 释义\n", "0 age_in_years 0.0160 0.3054 年龄\n", "1 credit_history 0.0273 0.3363 -\n", "2 duration_in_month 0.0201 0.3177 -\n", "3 present_employment_since 0.0461 0.2892 -\n", "4 purpose 0.0363 0.3452 -\n", "5 savings_account_and_bonds 0.0619 0.3143 -\n", "6 status_of_existing_checking_account 0.0664 0.3790 -" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上分数分箱\n", "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.52, inf]10072280.7200.1003007000.372281000.2400.0400.2002.400
1(0.516, 0.52]10056440.5600.1003007000.3128722000.4270.1030.3242.133
2(0.511, 0.516]9944550.4440.0993007000.31721272990.5730.1810.3921.918
3(0.506, 0.511]10137640.3660.1013007000.32091914000.6970.2730.4241.742
4(0.499, 0.506]10025750.2500.1003007000.32342665000.7800.3800.4001.560
5(0.488, 0.499]10031690.3100.1003007000.32653356000.8830.4790.4041.472
6(0.481, 0.488]10014860.1400.1003007000.32794217000.9300.6010.3291.329
7(0.475, 0.481]10010900.1000.1003007000.32895118000.9630.7300.2331.204
8(0.466, 0.475]1008920.0800.1003007000.32976039000.9900.8610.1291.100
9(-inf, 0.466]1003970.0300.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.52, inf] 100 72 28 0.720 0.100 300 700 0.3 \n", "1 (0.516, 0.52] 100 56 44 0.560 0.100 300 700 0.3 \n", "2 (0.511, 0.516] 99 44 55 0.444 0.099 300 700 0.3 \n", "3 (0.506, 0.511] 101 37 64 0.366 0.101 300 700 0.3 \n", "4 (0.499, 0.506] 100 25 75 0.250 0.100 300 700 0.3 \n", "5 (0.488, 0.499] 100 31 69 0.310 0.100 300 700 0.3 \n", "6 (0.481, 0.488] 100 14 86 0.140 0.100 300 700 0.3 \n", "7 (0.475, 0.481] 100 10 90 0.100 0.100 300 700 0.3 \n", "8 (0.466, 0.475] 100 8 92 0.080 0.100 300 700 0.3 \n", "9 (-inf, 0.466] 100 3 97 0.030 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 72 28 100 0.240 0.040 0.200 2.400 \n", "1 128 72 200 0.427 0.103 0.324 2.133 \n", "2 172 127 299 0.573 0.181 0.392 1.918 \n", "3 209 191 400 0.697 0.273 0.424 1.742 \n", "4 234 266 500 0.780 0.380 0.400 1.560 \n", "5 265 335 600 0.883 0.479 0.404 1.472 \n", "6 279 421 700 0.930 0.601 0.329 1.329 \n", "7 289 511 800 0.963 0.730 0.233 1.204 \n", "8 297 603 900 0.990 0.861 0.129 1.100 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.644, inf]9969300.6970.0993007000.36930990.2300.0430.1872.323
1(0.599, 0.644]10160410.5940.1013007000.3129712000.4300.1010.3292.150
2(0.561, 0.599]10043570.4300.1003007000.31721283000.5730.1830.3901.911
3(0.516, 0.561]10039610.3900.1003007000.32111894000.7030.2700.4331.758
4(0.476, 0.516]10034660.3400.1003007000.32452555000.8170.3640.4531.633
5(0.423, 0.476]9923760.2320.0993007000.32683315990.8930.4730.4201.491
6(0.383, 0.423]10112890.1190.1013007000.32804207000.9330.6000.3331.333
7(0.339, 0.383]10012880.1200.1003007000.32925088000.9730.7260.2471.217
8(0.293, 0.339]1006940.0600.1003007000.32986029000.9930.8600.1331.104
9(-inf, 0.293]1002980.0200.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.644, inf] 99 69 30 0.697 0.099 300 700 0.3 \n", "1 (0.599, 0.644] 101 60 41 0.594 0.101 300 700 0.3 \n", "2 (0.561, 0.599] 100 43 57 0.430 0.100 300 700 0.3 \n", "3 (0.516, 0.561] 100 39 61 0.390 0.100 300 700 0.3 \n", "4 (0.476, 0.516] 100 34 66 0.340 0.100 300 700 0.3 \n", "5 (0.423, 0.476] 99 23 76 0.232 0.099 300 700 0.3 \n", "6 (0.383, 0.423] 101 12 89 0.119 0.101 300 700 0.3 \n", "7 (0.339, 0.383] 100 12 88 0.120 0.100 300 700 0.3 \n", "8 (0.293, 0.339] 100 6 94 0.060 0.100 300 700 0.3 \n", "9 (-inf, 0.293] 100 2 98 0.020 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 69 30 99 0.230 0.043 0.187 2.323 \n", "1 129 71 200 0.430 0.101 0.329 2.150 \n", "2 172 128 300 0.573 0.183 0.390 1.911 \n", "3 211 189 400 0.703 0.270 0.433 1.758 \n", "4 245 255 500 0.817 0.364 0.453 1.633 \n", "5 268 331 599 0.893 0.473 0.420 1.491 \n", "6 280 420 700 0.933 0.600 0.333 1.333 \n", "7 292 508 800 0.973 0.726 0.247 1.217 \n", "8 298 602 900 0.993 0.860 0.133 1.104 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量趋势

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上变量趋势\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

压力测试

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
违约率抽样次数样本数好样本数坏样本数平均AUC最大AUC最小AUCAUC标准差95%置信区间AUC平均KS最大KS最小KSKS标准差95%置信区间KS
00.011030000297003000.7969240.7970770.7967540.0000970.7967 - 0.79710.44290.4430.4420.0003000.4423 - 0.4435
10.0710428539853000.7970330.7976630.7961410.0004060.7962 - 0.79780.45090.4540.4490.0014460.4481 - 0.4537
20.1310230720073000.7968750.7988370.7949050.0011210.7947 - 0.79910.45950.4680.4520.0055000.4487 - 0.4703
30.1910157812783000.7971700.8009020.7930670.0020820.7931 - 0.80120.44510.4480.4430.0015130.4421 - 0.4481
40.251012009003000.7969000.8027300.7905980.0032390.7906 - 0.80320.44970.4630.4440.0047130.4405 - 0.4589
50.31109676673000.7960360.7988480.7913340.0021080.7919 - 0.80020.45220.4550.4470.0022270.4478 - 0.4566
60.37108105103000.7981480.8024840.7910230.0029400.7924 - 0.80390.45150.4720.4290.0133660.4253 - 0.4777
70.43106973973000.7982940.8047480.7895970.0046830.7891 - 0.80750.44900.4590.4270.0109820.4275 - 0.4705
80.49106123123000.7942460.8058440.7789530.0081470.7783 - 0.81020.45210.4710.4250.0135970.4254 - 0.4788
90.55105452453000.7950270.8132790.7733950.0121290.7713 - 0.81880.45990.4890.4100.0203690.4200 - 0.4998
\n", "
" ], "text/plain": [ " 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n", "0 0.01 10 30000 29700 300 0.796924 0.797077 0.796754 0.000097 \n", "1 0.07 10 4285 3985 300 0.797033 0.797663 0.796141 0.000406 \n", "2 0.13 10 2307 2007 300 0.796875 0.798837 0.794905 0.001121 \n", "3 0.19 10 1578 1278 300 0.797170 0.800902 0.793067 0.002082 \n", "4 0.25 10 1200 900 300 0.796900 0.802730 0.790598 0.003239 \n", "5 0.31 10 967 667 300 0.796036 0.798848 0.791334 0.002108 \n", "6 0.37 10 810 510 300 0.798148 0.802484 0.791023 0.002940 \n", "7 0.43 10 697 397 300 0.798294 0.804748 0.789597 0.004683 \n", "8 0.49 10 612 312 300 0.794246 0.805844 0.778953 0.008147 \n", "9 0.55 10 545 245 300 0.795027 0.813279 0.773395 0.012129 \n", "\n", " 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n", "0 0.7967 - 0.7971 0.4429 0.443 0.442 0.000300 0.4423 - 0.4435 \n", "1 0.7962 - 0.7978 0.4509 0.454 0.449 0.001446 0.4481 - 0.4537 \n", "2 0.7947 - 0.7991 0.4595 0.468 0.452 0.005500 0.4487 - 0.4703 \n", "3 0.7931 - 0.8012 0.4451 0.448 0.443 0.001513 0.4421 - 0.4481 \n", "4 0.7906 - 0.8032 0.4497 0.463 0.444 0.004713 0.4405 - 0.4589 \n", "5 0.7919 - 0.8002 0.4522 0.455 0.447 0.002227 0.4478 - 0.4566 \n", "6 0.7924 - 0.8039 0.4515 0.472 0.429 0.013366 0.4253 - 0.4777 \n", "7 0.7891 - 0.8075 0.4490 0.459 0.427 0.010982 0.4275 - 0.4705 \n", "8 0.7783 - 0.8102 0.4521 0.471 0.425 0.013597 0.4254 - 0.4788 \n", "9 0.7713 - 0.8188 0.4599 0.489 0.410 0.020369 0.4200 - 0.4998 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型报告文件储存路径:./cache/train/OnlineLearningDemo/OnlineLearning报告.docx\n", "olcfg save to【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "feature save to【./cache/train/OnlineLearningDemo/feature.csv】success. \n", "model save to【./cache/train/OnlineLearningDemo/coef.dict】success. \n" ] } ], "source": [ "# 特征处理\n", "cfg = {\n", "# 模型系数,分箱信息等,请参考ol_resources_demo目录下文件\n", "# 模型系数文件 coef.dict(如果有常数项(截距)请用const作为key)\n", "# 分箱信息文件 feature.csv(数值型的分箱信息请按升序排列)\n", "\"path_resources\": \"/root/notebook/ol_resources_demo\",\n", "# 项目名称,影响数据存储位置\n", "\"project_name\": \"OnlineLearningDemo\",\n", "\"y_column\": \"creditability\",\n", "# 学习率\n", "\"lr\": 0.01,\n", "# 单次更新批大小\n", "\"batch_size\": 64,\n", "# 训练轮数\n", "\"epochs\": 20,\n", "\"jupyter_print\": True,\n", "# 压力测试\n", "\"stress_test\": True,\n", "# 压力测试抽样次数\n", "\"stress_sample_times\": 10,\n", "\"columns_anns\":{\n", " \"age_in_years\":\"年龄\"\n", "}\n", "}\n", "\n", "# 训练并生成报告\n", "trainer = OnlineLearningTrainer(data=data, **cfg)\n", "trainer.train()\n", "trainer.report(epoch=3)\n", "trainer.save()" ] }, { "cell_type": "markdown", "id": "b85d0e91", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## 加载模型" ] }, { "cell_type": "code", "execution_count": 3, "id": "60bac83d", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "olcfg load from【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【./cache/train/OnlineLearningDemo/coef.dict】success.\n", "feature load from【./cache/train/OnlineLearningDemo/feature.csv】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.4823,\n", " 'AUC': 0.7922,\n", " 'Gini': 0.5843,\n", " 'pic':
}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = OnlineLearningTrainer.load(\"./cache/train/OnlineLearningDemo/\")\n", "prob = model.prob(test_data)\n", "sc.perf_eva(test_data[\"creditability\"], prob, title=\"test\", show_plot=True)" ] }, { "cell_type": "markdown", "id": "51025e2d", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## 计算psi" ] }, { "cell_type": "code", "execution_count": 4, "id": "08077cb4", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.054\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.641, inf]0.00370340.0990.117
1(0.599, 0.641]0.00172270.1020.093
2(0.561, 0.599]0.00171260.1000.089
3(0.518, 0.561]0.00171270.1000.093
4(0.473, 0.518]0.00770370.0990.127
5(0.422, 0.473]0.00071280.1000.096
6(0.385, 0.422]0.02371170.1000.058
7(0.342, 0.385]0.00171270.1000.093
8(0.301, 0.342]0.00171260.1000.089
9(-inf, 0.301]0.01671420.1000.144
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.641, inf] 0.003 70 34 0.099 0.117\n", "1 (0.599, 0.641] 0.001 72 27 0.102 0.093\n", "2 (0.561, 0.599] 0.001 71 26 0.100 0.089\n", "3 (0.518, 0.561] 0.001 71 27 0.100 0.093\n", "4 (0.473, 0.518] 0.007 70 37 0.099 0.127\n", "5 (0.422, 0.473] 0.000 71 28 0.100 0.096\n", "6 (0.385, 0.422] 0.023 71 17 0.100 0.058\n", "7 (0.342, 0.385] 0.001 71 27 0.100 0.093\n", "8 (0.301, 0.342] 0.001 71 26 0.100 0.089\n", "9 (-inf, 0.301] 0.016 71 42 0.100 0.144" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "# points=[0, 0.2, 0.3, 0.4, 1]\n", "points=None\n", "psi = model.psi(train_data, test_data, points=points)\n", "psi\n", "# psi[\"psi\"].sum()" ] } ], "metadata": { "celltoolbar": "编辑元数据", "kernelspec": { "display_name": "Python [conda env:analysis]", "language": "python", "name": "conda-env-analysis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "372.364px" }, "toc_section_display": true, "toc_window_display": true }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "toc-showtags": false }, "nbformat": 4, "nbformat_minor": 5 }