{ "cells": [ { "cell_type": "markdown", "id": "38ecba89", "metadata": {}, "source": [ "# lr" ] }, { "cell_type": "code", "execution_count": 2, "id": "4807cd30", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "日志路径:/root/project/easy_ml/logs/app.log\n" ] } ], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, OnlineLearningTrainerLr\n", "\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "\n", "# 加载数据\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "20ee7fc7", "metadata": { "code_folding": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【/root/notebook/ol_resources_demo/coef.json】success.\n", "card.cfg load from【/root/notebook/ol_resources_demo/card.cfg】success.\n", "feature load from【/root/notebook/ol_resources_demo/feature.csv】success.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 20/20 [00:12<00:00, 1.60it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "选择epoch:【3】的参数:\n", "{'age_in_years': 0.3448204833214924, 'credit_history': 0.38666791361493663, 'duration_in_month': 0.31717857388239995, 'present_employment_since': 0.3182701082371682, 'purpose': 0.34568628455181505, 'savings_account_and_bonds': 0.3074829183742356, 'status_of_existing_checking_account': 0.3877371389194942, 'auc_test': 0.7911, 'ks_test': 0.4584, 'psi': 0.061, 'epoch': 3, 'loss_train': 0.579820491150681, 'loss_test': 0.5821033735284096}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "

模型系数优化过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
age_in_yearscredit_historyduration_in_monthpresent_employment_sincepurposesavings_account_and_bondsstatus_of_existing_checking_accountauc_testks_testpsiepochloss_trainloss_test
00.0616040.0827350.0201250.0846090.0363480.0561910.0766430.76910.46320.063000.666412
10.1722760.1931740.1264860.1834260.1480740.1532880.1897400.78630.45830.06710.6274160.628051
20.2650290.2953150.2258020.2573400.2511000.2365570.2946500.78940.46700.06420.598720.600769
30.3448200.3866680.3171790.3182700.3456860.3074830.3877370.79110.45840.06130.579820.582103
40.4106540.4655630.3993730.3672320.4308450.3654130.4666320.79250.46110.04440.5680890.569897
50.4632810.5322370.4724390.4059710.5067930.4113260.5311020.79270.46300.04150.5610460.562113
60.5044780.5879590.5371200.4365730.5743350.4470770.5823140.79220.46300.04160.5568340.557198
70.5363030.6343300.5943600.4609620.6344100.4746820.6220720.79290.46530.05170.5542420.554094
80.5606770.6729000.6450710.4807020.6878830.4959520.6523300.79320.46300.04480.5525470.552126
90.5792280.7050280.6900490.4969770.7354960.5123760.6749320.79370.46760.03890.551340.550872
100.5932720.7318460.7299690.5106610.7778710.5251280.6915000.79380.47420.039100.55040.550071
110.6038470.7542850.7654020.5223840.8155400.5351100.7034020.79420.47420.048110.5496150.549563
120.6117610.7731040.7968400.5326030.8489650.5430040.7117590.79470.46930.058120.5489310.549247
130.6176410.7889210.8247070.5416430.8785620.5493260.7174710.79520.47480.070130.5483230.549058
140.6219670.8022430.8493810.5497430.9047030.5544610.7212430.79490.47480.072140.5477810.548955
150.6251080.8134850.8711960.5570720.9277340.5586940.7236190.79500.48110.067150.5473010.548909
160.6273480.8229870.8904540.5637560.9479720.5622380.7250120.79530.48600.067160.5468810.548903
170.6289040.8310310.9074290.5698860.9657110.5652480.7257300.79490.48110.062170.5465170.548922
180.6299390.8378500.9223660.5755300.9812190.5678380.7259970.79490.49230.054180.5462060.548958
190.6305800.8436360.9354890.5807390.9947460.5700930.7259750.79530.49230.042190.5459450.549003
200.6309240.8485500.9470000.5855541.0065180.5720750.7257780.79560.49230.042200.5457290.549054
\n", "
" ], "text/plain": [ " age_in_years credit_history duration_in_month present_employment_since \\\n", "0 0.061604 0.082735 0.020125 0.084609 \n", "1 0.172276 0.193174 0.126486 0.183426 \n", "2 0.265029 0.295315 0.225802 0.257340 \n", "3 0.344820 0.386668 0.317179 0.318270 \n", "4 0.410654 0.465563 0.399373 0.367232 \n", "5 0.463281 0.532237 0.472439 0.405971 \n", "6 0.504478 0.587959 0.537120 0.436573 \n", "7 0.536303 0.634330 0.594360 0.460962 \n", "8 0.560677 0.672900 0.645071 0.480702 \n", "9 0.579228 0.705028 0.690049 0.496977 \n", "10 0.593272 0.731846 0.729969 0.510661 \n", "11 0.603847 0.754285 0.765402 0.522384 \n", "12 0.611761 0.773104 0.796840 0.532603 \n", "13 0.617641 0.788921 0.824707 0.541643 \n", "14 0.621967 0.802243 0.849381 0.549743 \n", "15 0.625108 0.813485 0.871196 0.557072 \n", "16 0.627348 0.822987 0.890454 0.563756 \n", "17 0.628904 0.831031 0.907429 0.569886 \n", "18 0.629939 0.837850 0.922366 0.575530 \n", "19 0.630580 0.843636 0.935489 0.580739 \n", "20 0.630924 0.848550 0.947000 0.585554 \n", "\n", " purpose savings_account_and_bonds status_of_existing_checking_account \\\n", "0 0.036348 0.056191 0.076643 \n", "1 0.148074 0.153288 0.189740 \n", "2 0.251100 0.236557 0.294650 \n", "3 0.345686 0.307483 0.387737 \n", "4 0.430845 0.365413 0.466632 \n", "5 0.506793 0.411326 0.531102 \n", "6 0.574335 0.447077 0.582314 \n", "7 0.634410 0.474682 0.622072 \n", "8 0.687883 0.495952 0.652330 \n", "9 0.735496 0.512376 0.674932 \n", "10 0.777871 0.525128 0.691500 \n", "11 0.815540 0.535110 0.703402 \n", "12 0.848965 0.543004 0.711759 \n", "13 0.878562 0.549326 0.717471 \n", "14 0.904703 0.554461 0.721243 \n", "15 0.927734 0.558694 0.723619 \n", "16 0.947972 0.562238 0.725012 \n", "17 0.965711 0.565248 0.725730 \n", "18 0.981219 0.567838 0.725997 \n", "19 0.994746 0.570093 0.725975 \n", "20 1.006518 0.572075 0.725778 \n", "\n", " auc_test ks_test psi epoch loss_train loss_test \n", "0 0.7691 0.4632 0.063 0 0 0.666412 \n", "1 0.7863 0.4583 0.067 1 0.627416 0.628051 \n", "2 0.7894 0.4670 0.064 2 0.59872 0.600769 \n", "3 0.7911 0.4584 0.061 3 0.57982 0.582103 \n", "4 0.7925 0.4611 0.044 4 0.568089 0.569897 \n", "5 0.7927 0.4630 0.041 5 0.561046 0.562113 \n", "6 0.7922 0.4630 0.041 6 0.556834 0.557198 \n", "7 0.7929 0.4653 0.051 7 0.554242 0.554094 \n", "8 0.7932 0.4630 0.044 8 0.552547 0.552126 \n", "9 0.7937 0.4676 0.038 9 0.55134 0.550872 \n", "10 0.7938 0.4742 0.039 10 0.5504 0.550071 \n", "11 0.7942 0.4742 0.048 11 0.549615 0.549563 \n", "12 0.7947 0.4693 0.058 12 0.548931 0.549247 \n", "13 0.7952 0.4748 0.070 13 0.548323 0.549058 \n", "14 0.7949 0.4748 0.072 14 0.547781 0.548955 \n", "15 0.7950 0.4811 0.067 15 0.547301 0.548909 \n", "16 0.7953 0.4860 0.067 16 0.546881 0.548903 \n", "17 0.7949 0.4811 0.062 17 0.546517 0.548922 \n", "18 0.7949 0.4923 0.054 18 0.546206 0.548958 \n", "19 0.7953 0.4923 0.042 19 0.545945 0.549003 \n", "20 0.7956 0.4923 0.042 20 0.545729 0.549054 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.77790.4224
1训练集0.78150.4335
2测试集0.76910.4632
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7779 0.4224\n", "1 训练集 0.7815 0.4335\n", "2 测试集 0.7691 0.4632" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.79640.4671
1训练集0.79850.4696
2测试集0.79110.4584
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.7964 0.4671\n", "1 训练集 0.7985 0.4696\n", "2 测试集 0.7911 0.4584" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型系数

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
变量原变量WOE拟合系数新变量WOE拟合系数释义
0age_in_years0.06160.3448年龄
1credit_history0.08270.3867-
2duration_in_month0.02010.3172-
3present_employment_since0.08460.3183-
4purpose0.03630.3457-
5savings_account_and_bonds0.05620.3075-
6status_of_existing_checking_account0.07660.3877-
\n", "
" ], "text/plain": [ " 变量 原变量WOE拟合系数 新变量WOE拟合系数 释义\n", "0 age_in_years 0.0616 0.3448 年龄\n", "1 credit_history 0.0827 0.3867 -\n", "2 duration_in_month 0.0201 0.3172 -\n", "3 present_employment_since 0.0846 0.3183 -\n", "4 purpose 0.0363 0.3457 -\n", "5 savings_account_and_bonds 0.0562 0.3075 -\n", "6 status_of_existing_checking_account 0.0766 0.3877 -" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型psi

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.649, inf]0.00170310.0990.107
1(0.602, 0.649]0.00172320.1020.110
2(0.562, 0.602]0.00171260.1000.089
3(0.519, 0.562]0.00271250.1000.086
4(0.467, 0.519]0.01469400.0970.137
5(0.421, 0.467]0.00472240.1020.082
6(0.382, 0.421]0.02371170.1000.058
7(0.337, 0.382]0.00171270.1000.093
8(0.293, 0.337]0.00071280.1000.096
9(-inf, 0.293]0.01471410.1000.141
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.649, inf] 0.001 70 31 0.099 0.107\n", "1 (0.602, 0.649] 0.001 72 32 0.102 0.110\n", "2 (0.562, 0.602] 0.001 71 26 0.100 0.089\n", "3 (0.519, 0.562] 0.002 71 25 0.100 0.086\n", "4 (0.467, 0.519] 0.014 69 40 0.097 0.137\n", "5 (0.421, 0.467] 0.004 72 24 0.102 0.082\n", "6 (0.382, 0.421] 0.023 71 17 0.100 0.058\n", "7 (0.337, 0.382] 0.001 71 27 0.100 0.093\n", "8 (0.293, 0.337] 0.000 71 28 0.100 0.096\n", "9 (-inf, 0.293] 0.014 71 41 0.100 0.141" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.061\n" ] }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上分数分箱\n", "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.529, inf]10069310.6900.1003007000.369311000.2300.0440.1862.300
1(0.519, 0.529]10054460.5400.1003007000.3123772000.4100.1100.3002.050
2(0.512, 0.519]9943560.4340.0993007000.31661332990.5530.1900.3631.851
3(0.504, 0.512]10038620.3800.1003007000.32041953990.6800.2790.4011.704
4(0.495, 0.504]10134670.3370.1013007000.32382625000.7930.3740.4191.587
5(0.487, 0.495]10028720.2800.1003007000.32663346000.8870.4770.4101.478
6(0.477, 0.487]1009910.0900.1003007000.32754257000.9170.6070.3101.310
7(0.466, 0.477]10015850.1500.1003007000.32905108000.9670.7290.2381.208
8(0.456, 0.466]1007930.0700.1003007000.32976039000.9900.8610.1291.100
9(-inf, 0.456]1003970.0300.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.529, inf] 100 69 31 0.690 0.100 300 700 0.3 \n", "1 (0.519, 0.529] 100 54 46 0.540 0.100 300 700 0.3 \n", "2 (0.512, 0.519] 99 43 56 0.434 0.099 300 700 0.3 \n", "3 (0.504, 0.512] 100 38 62 0.380 0.100 300 700 0.3 \n", "4 (0.495, 0.504] 101 34 67 0.337 0.101 300 700 0.3 \n", "5 (0.487, 0.495] 100 28 72 0.280 0.100 300 700 0.3 \n", "6 (0.477, 0.487] 100 9 91 0.090 0.100 300 700 0.3 \n", "7 (0.466, 0.477] 100 15 85 0.150 0.100 300 700 0.3 \n", "8 (0.456, 0.466] 100 7 93 0.070 0.100 300 700 0.3 \n", "9 (-inf, 0.456] 100 3 97 0.030 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 69 31 100 0.230 0.044 0.186 2.300 \n", "1 123 77 200 0.410 0.110 0.300 2.050 \n", "2 166 133 299 0.553 0.190 0.363 1.851 \n", "3 204 195 399 0.680 0.279 0.401 1.704 \n", "4 238 262 500 0.793 0.374 0.419 1.587 \n", "5 266 334 600 0.887 0.477 0.410 1.478 \n", "6 275 425 700 0.917 0.607 0.310 1.310 \n", "7 290 510 800 0.967 0.729 0.238 1.208 \n", "8 297 603 900 0.990 0.861 0.129 1.100 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.652, inf]10068320.6800.1003007000.368321000.2270.0460.1812.267
1(0.604, 0.652]9963360.6360.0993007000.3131681990.4370.0970.3402.194
2(0.562, 0.604]10142590.4160.1013007000.31731273000.5770.1810.3961.922
3(0.517, 0.562]10039610.3900.1003007000.32121884000.7070.2690.4381.767
4(0.47, 0.517]10033670.3300.1003007000.32452555000.8170.3640.4531.633
5(0.424, 0.47]10023770.2300.1003007000.32683326000.8930.4740.4191.489
6(0.378, 0.424]10012880.1200.1003007000.32804207000.9330.6000.3331.333
7(0.333, 0.378]9911880.1110.0993007000.32915087990.9700.7260.2441.214
8(0.283, 0.333]1017940.0690.1013007000.32986029000.9930.8600.1331.104
9(-inf, 0.283]1002980.0200.1003007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.652, inf] 100 68 32 0.680 0.100 300 700 0.3 \n", "1 (0.604, 0.652] 99 63 36 0.636 0.099 300 700 0.3 \n", "2 (0.562, 0.604] 101 42 59 0.416 0.101 300 700 0.3 \n", "3 (0.517, 0.562] 100 39 61 0.390 0.100 300 700 0.3 \n", "4 (0.47, 0.517] 100 33 67 0.330 0.100 300 700 0.3 \n", "5 (0.424, 0.47] 100 23 77 0.230 0.100 300 700 0.3 \n", "6 (0.378, 0.424] 100 12 88 0.120 0.100 300 700 0.3 \n", "7 (0.333, 0.378] 99 11 88 0.111 0.099 300 700 0.3 \n", "8 (0.283, 0.333] 101 7 94 0.069 0.101 300 700 0.3 \n", "9 (-inf, 0.283] 100 2 98 0.020 0.100 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 68 32 100 0.227 0.046 0.181 2.267 \n", "1 131 68 199 0.437 0.097 0.340 2.194 \n", "2 173 127 300 0.577 0.181 0.396 1.922 \n", "3 212 188 400 0.707 0.269 0.438 1.767 \n", "4 245 255 500 0.817 0.364 0.453 1.633 \n", "5 268 332 600 0.893 0.474 0.419 1.489 \n", "6 280 420 700 0.933 0.600 0.333 1.333 \n", "7 291 508 799 0.970 0.726 0.244 1.214 \n", "8 298 602 900 0.993 0.860 0.133 1.104 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

变量趋势

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上变量趋势\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

压力测试

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
违约率抽样次数样本数好样本数坏样本数平均AUC最大AUC最小AUCAUC标准差95%置信区间AUC平均KS最大KS最小KSKS标准差95%置信区间KS
00.011030000297003000.7963550.7965090.7961800.0001000.7962 - 0.79660.45240.4530.4500.0009170.4506 - 0.4542
10.0710428539853000.7964570.7970910.7955740.0004060.7957 - 0.79730.45200.4530.4480.0014140.4492 - 0.4548
20.1310230720073000.7962760.7981560.7943070.0011120.7941 - 0.79850.46100.4630.4600.0011830.4587 - 0.4633
30.1910157812783000.7965680.8001430.7924470.0020560.7925 - 0.80060.44270.4450.4350.0038740.4351 - 0.4503
40.251012009003000.7963700.8020520.7898240.0032900.7899 - 0.80280.44880.4630.4360.0077050.4337 - 0.4639
50.31109676673000.7953700.7982330.7906900.0021130.7912 - 0.79950.45150.4550.4400.0040800.4435 - 0.4595
60.37108105103000.7975650.8017120.7904670.0028670.7919 - 0.80320.46030.4800.4340.0141000.4327 - 0.4879
70.43106973973000.7977990.8044460.7886400.0048200.7884 - 0.80720.44690.4690.4270.0115970.4242 - 0.4696
80.49106123123000.7939750.8057910.7782260.0083810.7775 - 0.81040.44730.4780.4250.0164990.4150 - 0.4796
90.55105452453000.7947400.8125310.7723880.0123150.7706 - 0.81890.45620.4890.4100.0227540.4116 - 0.5008
\n", "
" ], "text/plain": [ " 违约率 抽样次数 样本数 好样本数 坏样本数 平均AUC 最大AUC 最小AUC AUC标准差 \\\n", "0 0.01 10 30000 29700 300 0.796355 0.796509 0.796180 0.000100 \n", "1 0.07 10 4285 3985 300 0.796457 0.797091 0.795574 0.000406 \n", "2 0.13 10 2307 2007 300 0.796276 0.798156 0.794307 0.001112 \n", "3 0.19 10 1578 1278 300 0.796568 0.800143 0.792447 0.002056 \n", "4 0.25 10 1200 900 300 0.796370 0.802052 0.789824 0.003290 \n", "5 0.31 10 967 667 300 0.795370 0.798233 0.790690 0.002113 \n", "6 0.37 10 810 510 300 0.797565 0.801712 0.790467 0.002867 \n", "7 0.43 10 697 397 300 0.797799 0.804446 0.788640 0.004820 \n", "8 0.49 10 612 312 300 0.793975 0.805791 0.778226 0.008381 \n", "9 0.55 10 545 245 300 0.794740 0.812531 0.772388 0.012315 \n", "\n", " 95%置信区间AUC 平均KS 最大KS 最小KS KS标准差 95%置信区间KS \n", "0 0.7962 - 0.7966 0.4524 0.453 0.450 0.000917 0.4506 - 0.4542 \n", "1 0.7957 - 0.7973 0.4520 0.453 0.448 0.001414 0.4492 - 0.4548 \n", "2 0.7941 - 0.7985 0.4610 0.463 0.460 0.001183 0.4587 - 0.4633 \n", "3 0.7925 - 0.8006 0.4427 0.445 0.435 0.003874 0.4351 - 0.4503 \n", "4 0.7899 - 0.8028 0.4488 0.463 0.436 0.007705 0.4337 - 0.4639 \n", "5 0.7912 - 0.7995 0.4515 0.455 0.440 0.004080 0.4435 - 0.4595 \n", "6 0.7919 - 0.8032 0.4603 0.480 0.434 0.014100 0.4327 - 0.4879 \n", "7 0.7884 - 0.8072 0.4469 0.469 0.427 0.011597 0.4242 - 0.4696 \n", "8 0.7775 - 0.8104 0.4473 0.478 0.425 0.016499 0.4150 - 0.4796 \n", "9 0.7706 - 0.8189 0.4562 0.489 0.410 0.022754 0.4116 - 0.5008 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

评分卡

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablebinpoints
0basepointsNaN538.0
23age_in_years3626.0
24age_in_years[-inf,25.0)-11.0
25age_in_years[25.0,30.0)-4.0
26age_in_years[30.0,35.0)-3.0
27age_in_years[35.0,inf)6.0
11credit_historyno credits taken/ all credits paid back duly%,...-36.0
12credit_historyexisting credits paid back duly till now-1.0
13credit_historydelay in paying off in the past-4.0
14credit_historycritical account/ other credits existing (not ...19.0
6duration_in_month[-inf,8.0)33.0
7duration_in_month[8.0,15.0)5.0
8duration_in_month[15.0,25.0)-0.0
9duration_in_month[25.0,35.0)-4.0
10duration_in_month[35.0,inf)-18.0
15present_employment_sinceunemployed%,%... < 1 year-11.0
16present_employment_since1 <= ... < 4 years-0.0
17present_employment_since4 <= ... < 7 years10.0
18present_employment_since... >= 7 years5.0
19purposeretraining%,%car (used)22.0
20purposeradio/television9.0
21purposefurniture/equipment%,%business%,%repairs-3.0
22purposedomestic appliances%,%education%,%car (new)%,%...-9.0
0savings_account_and_bonds... < 100 DM%,%100 <= ... < 500 DM-5.0
1savings_account_and_bonds500 <= ... < 1000 DM%,%... >= 1000 DM21.0
2savings_account_and_bondsunknown/ no savings account11.0
3status_of_existing_checking_account... < 0 DM%,%0 <= ... < 200 DM-17.0
4status_of_existing_checking_account... >= 200 DM / salary assignments for at leas...13.0
5status_of_existing_checking_accountno checking account33.0
\n", "
" ], "text/plain": [ " variable \\\n", "0 basepoints \n", "23 age_in_years \n", "24 age_in_years \n", "25 age_in_years \n", "26 age_in_years \n", "27 age_in_years \n", "11 credit_history \n", "12 credit_history \n", "13 credit_history \n", "14 credit_history \n", "6 duration_in_month \n", "7 duration_in_month \n", "8 duration_in_month \n", "9 duration_in_month \n", "10 duration_in_month \n", "15 present_employment_since \n", "16 present_employment_since \n", "17 present_employment_since \n", "18 present_employment_since \n", "19 purpose \n", "20 purpose \n", "21 purpose \n", "22 purpose \n", "0 savings_account_and_bonds \n", "1 savings_account_and_bonds \n", "2 savings_account_and_bonds \n", "3 status_of_existing_checking_account \n", "4 status_of_existing_checking_account \n", "5 status_of_existing_checking_account \n", "\n", " bin points \n", "0 NaN 538.0 \n", "23 36 26.0 \n", "24 [-inf,25.0) -11.0 \n", "25 [25.0,30.0) -4.0 \n", "26 [30.0,35.0) -3.0 \n", "27 [35.0,inf) 6.0 \n", "11 no credits taken/ all credits paid back duly%,... -36.0 \n", "12 existing credits paid back duly till now -1.0 \n", "13 delay in paying off in the past -4.0 \n", "14 critical account/ other credits existing (not ... 19.0 \n", "6 [-inf,8.0) 33.0 \n", "7 [8.0,15.0) 5.0 \n", "8 [15.0,25.0) -0.0 \n", "9 [25.0,35.0) -4.0 \n", "10 [35.0,inf) -18.0 \n", "15 unemployed%,%... < 1 year -11.0 \n", "16 1 <= ... < 4 years -0.0 \n", "17 4 <= ... < 7 years 10.0 \n", "18 ... >= 7 years 5.0 \n", "19 retraining%,%car (used) 22.0 \n", "20 radio/television 9.0 \n", "21 furniture/equipment%,%business%,%repairs -3.0 \n", "22 domestic appliances%,%education%,%car (new)%,%... -9.0 \n", "0 ... < 100 DM%,%100 <= ... < 500 DM -5.0 \n", "1 500 <= ... < 1000 DM%,%... >= 1000 DM 21.0 \n", "2 unknown/ no savings account 11.0 \n", "3 ... < 0 DM%,%0 <= ... < 200 DM -17.0 \n", "4 ... >= 200 DM / salary assignments for at leas... 13.0 \n", "5 no checking account 33.0 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型报告文件储存路径:./cache/train/OnlineLearningDemo/OnlineLearning报告.docx\n", "olcfg save to【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "feature save to【./cache/train/OnlineLearningDemo/feature.csv】success. \n", "model save to【./cache/train/OnlineLearningDemo/coef.json】success. \n", "model save to【./cache/train/OnlineLearningDemo/card.csv】success. \n" ] } ], "source": [ "cfg = {\n", "# 模型系数,分箱信息等,请参考ol_resources_demo目录下文件\n", "# 模型系数文件 coef.json(如果有常数项(截距)请用const作为key)\n", "# 分箱信息文件 feature.csv(数值型的分箱信息请按升序排列)\n", "# 生成评分卡所需信息 card.cfg(如果没有该文件,则不会生成评分卡) \n", "\"path_resources\": \"/root/notebook/ol_resources_demo\",\n", "# 项目名称,影响数据存储位置\n", "\"project_name\": \"OnlineLearningDemo\",\n", "\"y_column\": \"creditability\",\n", "# 学习率\n", "\"lr\": 0.01,\n", "# 单次更新批大小\n", "\"batch_size\": 64,\n", "# 训练轮数\n", "\"epochs\": 20,\n", "\"jupyter_print\": True,\n", "# 压力测试\n", "\"stress_test\": True,\n", "# 压力测试抽样次数\n", "\"stress_sample_times\": 10,\n", "\"columns_anns\":{\n", " \"age_in_years\":\"年龄\"\n", "}\n", "}\n", "\n", "# 训练并生成报告\n", "trainer = OnlineLearningTrainerLr(data=data, **cfg)\n", "trainer.train()\n", "trainer.report(epoch=3)\n", "trainer.save()" ] }, { "cell_type": "markdown", "id": "1c57b8b9", "metadata": {}, "source": [ "## 加载模型" ] }, { "cell_type": "code", "execution_count": 6, "id": "34773917", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "olcfg load from【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "项目路径:【./cache/train/OnlineLearningDemo】\n", "coef load from【./cache/train/OnlineLearningDemo/coef.json】success.\n", "feature load from【./cache/train/OnlineLearningDemo/feature.csv】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.4584,\n", " 'AUC': 0.7911,\n", " 'Gini': 0.5822,\n", " 'pic':
}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = OnlineLearningTrainerLr.load(\"./cache/train/OnlineLearningDemo/\")\n", "prob = model.prob(test_data)\n", "sc.perf_eva(test_data[\"creditability\"], prob, title=\"test\", show_plot=True)" ] }, { "cell_type": "markdown", "id": "a6e8bb2c", "metadata": {}, "source": [ "## 计算psi" ] }, { "cell_type": "code", "execution_count": 4, "id": "97e6c7fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.061\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.649, inf]0.00170310.0990.107
1(0.602, 0.649]0.00172320.1020.110
2(0.562, 0.602]0.00171260.1000.089
3(0.519, 0.562]0.00271250.1000.086
4(0.467, 0.519]0.01469400.0970.137
5(0.421, 0.467]0.00472240.1020.082
6(0.382, 0.421]0.02371170.1000.058
7(0.337, 0.382]0.00171270.1000.093
8(0.293, 0.337]0.00071280.1000.096
9(-inf, 0.293]0.01471410.1000.141
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.649, inf] 0.001 70 31 0.099 0.107\n", "1 (0.602, 0.649] 0.001 72 32 0.102 0.110\n", "2 (0.562, 0.602] 0.001 71 26 0.100 0.089\n", "3 (0.519, 0.562] 0.002 71 25 0.100 0.086\n", "4 (0.467, 0.519] 0.014 69 40 0.097 0.137\n", "5 (0.421, 0.467] 0.004 72 24 0.102 0.082\n", "6 (0.382, 0.421] 0.023 71 17 0.100 0.058\n", "7 (0.337, 0.382] 0.001 71 27 0.100 0.093\n", "8 (0.293, 0.337] 0.000 71 28 0.100 0.096\n", "9 (-inf, 0.293] 0.014 71 41 0.100 0.141" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "# points=[0, 0.2, 0.3, 0.4, 1]\n", "points=None\n", "psi = model.psi(train_data, test_data, points=points)\n", "psi\n", "# psi[\"psi\"].sum()" ] }, { "cell_type": "markdown", "id": "0cc6d099", "metadata": {}, "source": [ "# xgb" ] }, { "cell_type": "code", "execution_count": 4, "id": "95af6493", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "项目路径:【./cache/train/OnlineLearningDemo】\n", "model load from【/root/notebook/ol_resources_demo/xgb.bin】success.\n", "原模型一共有【80】棵树\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 80/80 [00:24<00:00, 3.31it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "选择ntree:【79】的参数:\n", "{'auc_test': 0.8116, 'ks_test': 0.4959, 'psi': 0.01, 'ntree': 79}\n", "model save to【./cache/train/OnlineLearningDemo/model.pmml】success. \n", "pmml模型结果一致率(误差小于0.001):100.0%\n" ] }, { "data": { "text/html": [ "

模型优化过程

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auc_testks_testpsintree
00.70800.35130.0151
10.70540.33190.0122
20.72200.35350.0333
30.73280.37920.0704
40.73430.37870.0735
50.73910.38650.0786
60.74190.39010.0867
70.74460.39500.0608
80.74880.41070.0979
90.75130.40360.10610
100.75710.42070.09511
110.75560.41480.09312
120.75820.42970.07313
130.76250.42870.08314
140.76410.43650.06215
150.76790.45040.07716
160.76750.43430.11117
170.76780.44050.10118
180.77010.44190.08319
190.77340.46430.06920
200.78000.45540.06221
210.78430.46660.04622
220.78380.47290.05623
230.78570.46340.05024
240.78570.46340.04325
250.78620.46840.03126
260.78810.46840.03827
270.78890.46840.03728
280.79020.47830.04029
290.79120.46840.04930
300.79060.46210.03331
310.79220.46340.03232
320.79520.46890.04133
330.79830.46930.03534
340.79850.46930.03735
350.79810.46300.04236
360.80020.47200.02037
370.80080.46930.02638
380.80210.45440.02439
390.80220.45120.03640
400.80130.45500.03741
410.80100.45630.02842
420.80210.45350.02943
430.80240.45630.02644
440.80260.46930.02945
450.80200.46740.02546
460.80230.46130.03947
470.80320.46130.05748
480.80330.46490.03849
490.80230.46110.02650
500.80310.47100.02551
510.80470.48230.01652
520.80410.47100.02653
530.80540.46970.02954
540.80610.48090.02855
550.80560.48090.02756
560.80580.48090.02957
570.80390.49220.02358
580.80360.48720.02559
590.80530.48720.01860
600.80540.48720.01361
610.80530.48720.01462
620.80580.48470.01963
630.80580.48340.02064
640.80590.48590.01865
650.80680.48970.02066
660.80620.48470.03167
670.80780.48830.01468
680.80800.49220.01369
690.80770.49710.01170
700.80760.48830.01671
710.80710.48830.01972
720.80970.48740.01873
730.81020.48740.01974
740.80920.48600.02175
750.80900.48600.02476
760.80890.48600.02177
770.81150.49590.01878
780.81160.49590.01079
790.81180.49670.01280
\n", "
" ], "text/plain": [ " auc_test ks_test psi ntree\n", "0 0.7080 0.3513 0.015 1\n", "1 0.7054 0.3319 0.012 2\n", "2 0.7220 0.3535 0.033 3\n", "3 0.7328 0.3792 0.070 4\n", "4 0.7343 0.3787 0.073 5\n", "5 0.7391 0.3865 0.078 6\n", "6 0.7419 0.3901 0.086 7\n", "7 0.7446 0.3950 0.060 8\n", "8 0.7488 0.4107 0.097 9\n", "9 0.7513 0.4036 0.106 10\n", "10 0.7571 0.4207 0.095 11\n", "11 0.7556 0.4148 0.093 12\n", "12 0.7582 0.4297 0.073 13\n", "13 0.7625 0.4287 0.083 14\n", "14 0.7641 0.4365 0.062 15\n", "15 0.7679 0.4504 0.077 16\n", "16 0.7675 0.4343 0.111 17\n", "17 0.7678 0.4405 0.101 18\n", "18 0.7701 0.4419 0.083 19\n", "19 0.7734 0.4643 0.069 20\n", "20 0.7800 0.4554 0.062 21\n", "21 0.7843 0.4666 0.046 22\n", "22 0.7838 0.4729 0.056 23\n", "23 0.7857 0.4634 0.050 24\n", "24 0.7857 0.4634 0.043 25\n", "25 0.7862 0.4684 0.031 26\n", "26 0.7881 0.4684 0.038 27\n", "27 0.7889 0.4684 0.037 28\n", "28 0.7902 0.4783 0.040 29\n", "29 0.7912 0.4684 0.049 30\n", "30 0.7906 0.4621 0.033 31\n", "31 0.7922 0.4634 0.032 32\n", "32 0.7952 0.4689 0.041 33\n", "33 0.7983 0.4693 0.035 34\n", "34 0.7985 0.4693 0.037 35\n", "35 0.7981 0.4630 0.042 36\n", "36 0.8002 0.4720 0.020 37\n", "37 0.8008 0.4693 0.026 38\n", "38 0.8021 0.4544 0.024 39\n", "39 0.8022 0.4512 0.036 40\n", "40 0.8013 0.4550 0.037 41\n", "41 0.8010 0.4563 0.028 42\n", "42 0.8021 0.4535 0.029 43\n", "43 0.8024 0.4563 0.026 44\n", "44 0.8026 0.4693 0.029 45\n", "45 0.8020 0.4674 0.025 46\n", "46 0.8023 0.4613 0.039 47\n", "47 0.8032 0.4613 0.057 48\n", "48 0.8033 0.4649 0.038 49\n", "49 0.8023 0.4611 0.026 50\n", "50 0.8031 0.4710 0.025 51\n", "51 0.8047 0.4823 0.016 52\n", "52 0.8041 0.4710 0.026 53\n", "53 0.8054 0.4697 0.029 54\n", "54 0.8061 0.4809 0.028 55\n", "55 0.8056 0.4809 0.027 56\n", "56 0.8058 0.4809 0.029 57\n", "57 0.8039 0.4922 0.023 58\n", "58 0.8036 0.4872 0.025 59\n", "59 0.8053 0.4872 0.018 60\n", "60 0.8054 0.4872 0.013 61\n", "61 0.8053 0.4872 0.014 62\n", "62 0.8058 0.4847 0.019 63\n", "63 0.8058 0.4834 0.020 64\n", "64 0.8059 0.4859 0.018 65\n", "65 0.8068 0.4897 0.020 66\n", "66 0.8062 0.4847 0.031 67\n", "67 0.8078 0.4883 0.014 68\n", "68 0.8080 0.4922 0.013 69\n", "69 0.8077 0.4971 0.011 70\n", "70 0.8076 0.4883 0.016 71\n", "71 0.8071 0.4883 0.019 72\n", "72 0.8097 0.4874 0.018 73\n", "73 0.8102 0.4874 0.019 74\n", "74 0.8092 0.4860 0.021 75\n", "75 0.8090 0.4860 0.024 76\n", "76 0.8089 0.4860 0.021 77\n", "77 0.8115 0.4959 0.018 78\n", "78 0.8116 0.4959 0.010 79\n", "79 0.8118 0.4967 0.012 80" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

样本分布

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本样本数样本占比坏样本数坏样本比例
0训练集70970.90%21129.76%
1测试集29129.10%8930.58%
2合计1000100%30030.00%
\n", "
" ], "text/plain": [ " 样本 样本数 样本占比 坏样本数 坏样本比例\n", "0 训练集 709 70.90% 211 29.76%\n", "1 测试集 291 29.10% 89 30.58%\n", "2 合计 1000 100% 300 30.00%" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型结果

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.88410.6214
1训练集0.91680.6775
2测试集0.80690.4918
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.8841 0.6214\n", "1 训练集 0.9168 0.6775\n", "2 测试集 0.8069 0.4918" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
样本集AUCKS
0建模数据0.86470.5586
1训练集0.88730.5935
2测试集0.81160.4959
\n", "
" ], "text/plain": [ " 样本集 AUC KS\n", "0 建模数据 0.8647 0.5586\n", "1 训练集 0.8873 0.5935\n", "2 测试集 0.8116 0.4959" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

模型psi

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.459, inf]0.00371340.1000.117
1(0.437, 0.459]0.00071300.1000.103
2(0.419, 0.437]0.00271330.1000.113
3(0.407, 0.419]0.00271250.1000.086
4(0.395, 0.407]0.00270250.0990.086
5(0.38, 0.395]0.00171260.1000.089
6(0.358, 0.38]0.00071280.1000.096
7(0.342, 0.358]0.00071300.1000.103
8(0.326, 0.342]0.00071290.1000.100
9(-inf, 0.326]0.00071310.1000.107
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.459, inf] 0.003 71 34 0.100 0.117\n", "1 (0.437, 0.459] 0.000 71 30 0.100 0.103\n", "2 (0.419, 0.437] 0.002 71 33 0.100 0.113\n", "3 (0.407, 0.419] 0.002 71 25 0.100 0.086\n", "4 (0.395, 0.407] 0.002 70 25 0.099 0.086\n", "5 (0.38, 0.395] 0.001 71 26 0.100 0.089\n", "6 (0.358, 0.38] 0.000 71 28 0.100 0.096\n", "7 (0.342, 0.358] 0.000 71 30 0.100 0.103\n", "8 (0.326, 0.342] 0.000 71 29 0.100 0.100\n", "9 (-inf, 0.326] 0.000 71 31 0.100 0.107" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.01\n" ] }, { "data": { "text/html": [ "

分数分箱

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "建模数据上分数分箱\n", "原模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.653, inf]1009190.910.13007000.39191000.3030.0130.2903.033
1(0.524, 0.653]10070300.700.13007000.3161392000.5370.0560.4812.683
2(0.403, 0.524]10052480.520.13007000.3213873000.7100.1240.5862.367
3(0.332, 0.403]10034660.340.13007000.32471534000.8230.2190.6042.058
4(0.249, 0.332]10017830.170.13007000.32642365000.8800.3370.5431.760
5(0.175, 0.249]10017830.170.13007000.32813196000.9370.4560.4811.561
6(0.117, 0.175]10012880.120.13007000.32934077000.9770.5810.3961.395
7(0.0809, 0.117]1003970.030.13007000.32965048000.9870.7200.2671.233
8(0.0445, 0.0809]1003970.030.13007000.32996019000.9970.8590.1381.107
9(-inf, 0.0445]1001990.010.13007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.653, inf] 100 91 9 0.91 0.1 300 700 0.3 \n", "1 (0.524, 0.653] 100 70 30 0.70 0.1 300 700 0.3 \n", "2 (0.403, 0.524] 100 52 48 0.52 0.1 300 700 0.3 \n", "3 (0.332, 0.403] 100 34 66 0.34 0.1 300 700 0.3 \n", "4 (0.249, 0.332] 100 17 83 0.17 0.1 300 700 0.3 \n", "5 (0.175, 0.249] 100 17 83 0.17 0.1 300 700 0.3 \n", "6 (0.117, 0.175] 100 12 88 0.12 0.1 300 700 0.3 \n", "7 (0.0809, 0.117] 100 3 97 0.03 0.1 300 700 0.3 \n", "8 (0.0445, 0.0809] 100 3 97 0.03 0.1 300 700 0.3 \n", "9 (-inf, 0.0445] 100 1 99 0.01 0.1 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 91 9 100 0.303 0.013 0.290 3.033 \n", "1 161 39 200 0.537 0.056 0.481 2.683 \n", "2 213 87 300 0.710 0.124 0.586 2.367 \n", "3 247 153 400 0.823 0.219 0.604 2.058 \n", "4 264 236 500 0.880 0.337 0.543 1.760 \n", "5 281 319 600 0.937 0.456 0.481 1.561 \n", "6 293 407 700 0.977 0.581 0.396 1.395 \n", "7 296 504 800 0.987 0.720 0.267 1.233 \n", "8 299 601 900 0.997 0.859 0.138 1.107 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "新模型\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BIN样本数坏样本数好样本数坏样本比例样本数比例总坏样本数总好样本数平均坏样本率累计坏样本数累计好样本数累计样本数累计坏样本比例累计好样本比例KSLIFT
0(0.46, inf]1009190.910.13007000.39191000.3030.0130.2903.033
1(0.439, 0.46]10068320.680.13007000.3159412000.5300.0590.4712.650
2(0.421, 0.439]10041590.410.13007000.32001003000.6670.1430.5242.222
3(0.408, 0.421]10033670.330.13007000.32331674000.7770.2390.5381.942
4(0.395, 0.408]10030700.300.13007000.32632375000.8770.3390.5381.753
5(0.379, 0.395]10015850.150.13007000.32783226000.9270.4600.4671.544
6(0.358, 0.379]10011890.110.13007000.32894117000.9630.5870.3761.376
7(0.341, 0.358]1009910.090.13007000.32985028000.9930.7170.2761.242
8(0.326, 0.341]1001990.010.13007000.32996019000.9970.8590.1381.107
9(-inf, 0.326]1001990.010.13007000.330070010001.0001.0000.0001.000
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN 样本数 坏样本数 好样本数 坏样本比例 样本数比例 总坏样本数 总好样本数 平均坏样本率 \\\n", "0 (0.46, inf] 100 91 9 0.91 0.1 300 700 0.3 \n", "1 (0.439, 0.46] 100 68 32 0.68 0.1 300 700 0.3 \n", "2 (0.421, 0.439] 100 41 59 0.41 0.1 300 700 0.3 \n", "3 (0.408, 0.421] 100 33 67 0.33 0.1 300 700 0.3 \n", "4 (0.395, 0.408] 100 30 70 0.30 0.1 300 700 0.3 \n", "5 (0.379, 0.395] 100 15 85 0.15 0.1 300 700 0.3 \n", "6 (0.358, 0.379] 100 11 89 0.11 0.1 300 700 0.3 \n", "7 (0.341, 0.358] 100 9 91 0.09 0.1 300 700 0.3 \n", "8 (0.326, 0.341] 100 1 99 0.01 0.1 300 700 0.3 \n", "9 (-inf, 0.326] 100 1 99 0.01 0.1 300 700 0.3 \n", "\n", " 累计坏样本数 累计好样本数 累计样本数 累计坏样本比例 累计好样本比例 KS LIFT \n", "0 91 9 100 0.303 0.013 0.290 3.033 \n", "1 159 41 200 0.530 0.059 0.471 2.650 \n", "2 200 100 300 0.667 0.143 0.524 2.222 \n", "3 233 167 400 0.777 0.239 0.538 1.942 \n", "4 263 237 500 0.877 0.339 0.538 1.753 \n", "5 278 322 600 0.927 0.460 0.467 1.544 \n", "6 289 411 700 0.963 0.587 0.376 1.376 \n", "7 298 502 800 0.993 0.717 0.276 1.242 \n", "8 299 601 900 0.997 0.859 0.138 1.107 \n", "9 300 700 1000 1.000 1.000 0.000 1.000 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "olcfg save to【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "model save to【./cache/train/OnlineLearningDemo/model.pkl】success. \n", "model save to【./cache/train/OnlineLearningDemo/xgb.bin】success. \n" ] } ], "source": [ "%matplotlib agg\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.append(\"/root/project\")\n", "from easy_ml import DataSplitEntity, OnlineLearningTrainerXgb\n", "\n", "\n", "# 加载demo数据\n", "import scorecardpy as sc\n", "\n", "# 加载数据\n", "dat = sc.germancredit()\n", "dat_columns = dat.columns.tolist()\n", "dat_columns = [c.replace(\".\",\"_\") for c in dat_columns]\n", "dat.columns = dat_columns\n", "\n", "dat[\"creditability\"] = dat[\"creditability\"].apply(lambda x: 1 if x == \"bad\" else 0)\n", "train_data=dat[:709]\n", "test_data=dat[709:]\n", "data = DataSplitEntity(train_data=train_data, test_data=test_data)\n", "\n", "cfg = {\n", "# 模型,请参考ol_resources_demo目录下文件\n", "# 模型文件 model.pkl\n", "\"path_resources\": \"/root/notebook/ol_resources_demo\",\n", "# 项目名称,影响数据存储位置\n", "\"project_name\": \"OnlineLearningDemo\",\n", "\"y_column\": \"creditability\",\n", "# 学习率\n", "\"lr\": 0.01,\n", "\"jupyter_print\": True,\n", "# 压力测试\n", "\"stress_test\": False,\n", "# 压力测试抽样次数\n", "\"stress_sample_times\": 10,\n", "\"columns_anns\":{\n", " \"age_in_years\":\"年龄\"\n", "}\n", "}\n", "\n", "# 训练并生成报告\n", "trainer = OnlineLearningTrainerXgb(data=data, **cfg)\n", "trainer.train()\n", "trainer.report(ntree=79)\n", "trainer.save()\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "28a40327", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "olcfg load from【./cache/train/OnlineLearningDemo/olcfg.json】success. \n", "项目路径:【./cache/train/OnlineLearningDemo】\n", "model load from【./cache/train/OnlineLearningDemo/xgb.bin】success.\n" ] }, { "data": { "text/plain": [ "{'KS': 0.4959,\n", " 'AUC': 0.8116,\n", " 'Gini': 0.6232,\n", " 'pic':
}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = OnlineLearningTrainerXgb.load(\"./cache/train/OnlineLearningDemo/\")\n", "prob = model.prob(test_data)\n", "sc.perf_eva(test_data[\"creditability\"], prob, title=\"test\", show_plot=True)" ] }, { "cell_type": "code", "execution_count": 6, "id": "8f8addf4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型psi: 0.01\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MODEL_SCORE_BINpsi训练样本数测试样本数训练样本数比例测试样本数比例
0(0.459, inf]0.00371340.1000.117
1(0.437, 0.459]0.00071300.1000.103
2(0.419, 0.437]0.00271330.1000.113
3(0.407, 0.419]0.00271250.1000.086
4(0.395, 0.407]0.00270250.0990.086
5(0.38, 0.395]0.00171260.1000.089
6(0.358, 0.38]0.00071280.1000.096
7(0.342, 0.358]0.00071300.1000.103
8(0.326, 0.342]0.00071290.1000.100
9(-inf, 0.326]0.00071310.1000.107
\n", "
" ], "text/plain": [ " MODEL_SCORE_BIN psi 训练样本数 测试样本数 训练样本数比例 测试样本数比例\n", "0 (0.459, inf] 0.003 71 34 0.100 0.117\n", "1 (0.437, 0.459] 0.000 71 30 0.100 0.103\n", "2 (0.419, 0.437] 0.002 71 33 0.100 0.113\n", "3 (0.407, 0.419] 0.002 71 25 0.100 0.086\n", "4 (0.395, 0.407] 0.002 70 25 0.099 0.086\n", "5 (0.38, 0.395] 0.001 71 26 0.100 0.089\n", "6 (0.358, 0.38] 0.000 71 28 0.100 0.096\n", "7 (0.342, 0.358] 0.000 71 30 0.100 0.103\n", "8 (0.326, 0.342] 0.000 71 29 0.100 0.100\n", "9 (-inf, 0.326] 0.000 71 31 0.100 0.107" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "points=None\n", "psi = model.psi(train_data, test_data, points=points)\n", "psi" ] } ], "metadata": { "celltoolbar": "编辑元数据", "kernelspec": { "display_name": "Python [conda env:analysis]", "language": "python", "name": "conda-env-analysis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "372.364px" }, "toc_section_display": true, "toc_window_display": true }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "toc-showtags": false }, "nbformat": 4, "nbformat_minor": 5 }