|
@@ -8,10 +8,10 @@ from typing import Dict
|
|
|
|
|
|
import pandas as pd
|
|
|
import scorecardpy as sc
|
|
|
-from matplotlib import pyplot as plt
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
|
-from entitys import TrainConfigEntity, DataPreparedEntity, MetricFucEntity
|
|
|
+from entitys import TrainConfigEntity, DataPreparedEntity, MetricFucEntity, DataSplitEntity
|
|
|
+from feature import f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi
|
|
|
from init import f_get_save_path
|
|
|
from .model_base import ModelBase
|
|
|
|
|
@@ -22,48 +22,86 @@ class ModelLr(ModelBase):
|
|
|
self.lr = LogisticRegression(penalty='l1', C=0.9, solver='saga', n_jobs=-1)
|
|
|
|
|
|
def train(self, data: DataPreparedEntity, *args, **kwargs) -> Dict[str, MetricFucEntity]:
|
|
|
+ bins = kwargs["bins"]
|
|
|
+ data_split_original: DataSplitEntity = kwargs["data_split_original"]
|
|
|
+
|
|
|
+ # woe编码之前的数据
|
|
|
+ train_data_original = data_split_original.train_data
|
|
|
+ test_data_original = data_split_original.test_data
|
|
|
+
|
|
|
train_data = data.train_data
|
|
|
train_y = train_data.get_Ydata()
|
|
|
+ y_column = train_data.y_column
|
|
|
+
|
|
|
test_data = data.test_data
|
|
|
- test_y = test_data.get_Ydata()
|
|
|
+
|
|
|
self.lr.fit(train_data.get_Xdata(), train_y)
|
|
|
|
|
|
+ metric_value_dict = {}
|
|
|
+ # 评分卡
|
|
|
+ card: Dict = sc.scorecard(bins, self.lr, train_data.x_columns, points0=600, odds0=train_data.get_odds0(),
|
|
|
+ pdo=50)
|
|
|
+ card_df = pd.DataFrame(columns=card['basepoints'].keys())
|
|
|
+ for k, v in card.items():
|
|
|
+ card_df = pd.concat((card_df, v))
|
|
|
+ metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=12)
|
|
|
+
|
|
|
+ # 模型系数
|
|
|
+ coef = dict(zip(train_data.x_columns, self.lr.coef_.reshape(-1)))
|
|
|
+ coef_df = pd.DataFrame()
|
|
|
+ coef_df['变量'] = coef.keys()
|
|
|
+ coef_df['变量系数'] = coef.values()
|
|
|
+ metric_value_dict["变量系数"] = MetricFucEntity(table=coef_df, table_font_size=12)
|
|
|
+
|
|
|
+ # 模型ks auc
|
|
|
train_prob = self.lr.predict_proba(train_data.get_Xdata())[:, 1]
|
|
|
- test_prob = self.lr.predict_proba(test_data.get_Xdata())[:, 1]
|
|
|
-
|
|
|
image_path_list = []
|
|
|
train_perf = sc.perf_eva(train_y, train_prob, title="train", show_plot=True)
|
|
|
path = f_get_save_path(f"train_perf.png")
|
|
|
train_perf["pic"].savefig(path)
|
|
|
image_path_list.append(path)
|
|
|
|
|
|
- test_perf = sc.perf_eva(test_y, test_prob, title="test", show_plot=True)
|
|
|
- path = f_get_save_path(f"test_perf.png")
|
|
|
- test_perf["pic"].savefig(path)
|
|
|
- image_path_list.append(path)
|
|
|
-
|
|
|
train_auc = train_perf["KS"]
|
|
|
train_ks = train_perf["AUC"]
|
|
|
|
|
|
- test_auc = test_perf["KS"]
|
|
|
- test_ks = test_perf["AUC"]
|
|
|
-
|
|
|
- metric_value_dict = {}
|
|
|
- df = pd.DataFrame()
|
|
|
- df["样本集"] = ["训练集", "测试集"]
|
|
|
- df["AUC"] = [train_auc, test_auc]
|
|
|
- df["KS"] = [train_ks, test_ks]
|
|
|
-
|
|
|
- metric_value_dict["模型结果"] = MetricFucEntity(table=df, image_path=image_path_list, image_size=5)
|
|
|
-
|
|
|
+ test_auc = "-"
|
|
|
+ test_ks = "-"
|
|
|
+ if test_data is not None:
|
|
|
+ test_prob = self.lr.predict_proba(test_data.get_Xdata())[:, 1]
|
|
|
+ test_y = test_data.get_Ydata()
|
|
|
+ test_perf = sc.perf_eva(test_y, test_prob, title="test", show_plot=True)
|
|
|
+ path = f_get_save_path(f"test_perf.png")
|
|
|
+ test_perf["pic"].savefig(path)
|
|
|
+ image_path_list.append(path)
|
|
|
+ test_auc = test_perf["KS"]
|
|
|
+ test_ks = test_perf["AUC"]
|
|
|
+
|
|
|
+ df_auc = pd.DataFrame()
|
|
|
+ df_auc["样本集"] = ["训练集", "测试集"]
|
|
|
+ df_auc["AUC"] = [train_auc, test_auc]
|
|
|
+ df_auc["KS"] = [train_ks, test_ks]
|
|
|
+ metric_value_dict["模型结果"] = MetricFucEntity(table=df_auc, image_path=image_path_list, image_size=5,
|
|
|
+ table_font_size=12)
|
|
|
+
|
|
|
+ # 评分卡分箱
|
|
|
+ train_data_original, score_bins = f_get_model_score_bin(train_data_original, card)
|
|
|
+ train_data_gain = f_calcu_model_ks(train_data_original, y_column, sort_ascending=True)
|
|
|
+ metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=12)
|
|
|
+ if test_data is not None:
|
|
|
+ test_data_original, bins = f_get_model_score_bin(test_data_original, card, score_bins)
|
|
|
+ test_data_gain = f_calcu_model_ks(test_data_original, y_column, sort_ascending=True)
|
|
|
+ metric_value_dict["测试集分数分箱"] = MetricFucEntity(table=test_data_gain,
|
|
|
+ table_font_size=12)
|
|
|
+
|
|
|
+ # 模型分psi
|
|
|
+ model_psi = f_calcu_model_psi(train_data_original, test_data_original)
|
|
|
+ metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), table=model_psi,
|
|
|
+ table_font_size=12)
|
|
|
return metric_value_dict
|
|
|
|
|
|
def predict_prob(self, x: pd.DataFrame, *args, **kwargs):
|
|
|
return self.lr.predict_proba(x)[:, 1]
|
|
|
|
|
|
- def predict(self, x: pd.DataFrame, *args, **kwargs):
|
|
|
- return self.lr.predict(x)
|
|
|
-
|
|
|
def export_model_file(self):
|
|
|
pass
|
|
|
|