|
@@ -16,10 +16,10 @@ import statsmodels.api as sm
|
|
|
|
|
|
from commom import f_df_to_image, f_display_images_by_side, GeneralException, f_display_title
|
|
|
from entitys import MetricFucResultEntity, DataSplitEntity, DataFeatureEntity
|
|
|
-from enums import ContextEnum, ResultCodesEnum
|
|
|
+from enums import ContextEnum, ResultCodesEnum, ConstantEnum
|
|
|
from init import context
|
|
|
from .model_base import ModelBase
|
|
|
-from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi
|
|
|
+from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi, f_add_rules
|
|
|
|
|
|
|
|
|
class ModelLr(ModelBase):
|
|
@@ -60,6 +60,11 @@ class ModelLr(ModelBase):
|
|
|
def score(self, x: pd.DataFrame, *args, **kwargs) -> np.array:
|
|
|
return np.array(sc.scorecard_ply(x, self.card, print_step=0)["score"])
|
|
|
|
|
|
+ def score_rule(self, x: pd.DataFrame, *args, **kwargs) -> np.array:
|
|
|
+ x[ConstantEnum.SCORE.value] = self.score(x)
|
|
|
+ x = f_add_rules(x, self.ml_config.rules)
|
|
|
+ return np.array(x[ConstantEnum.SCORE.value])
|
|
|
+
|
|
|
def model_save(self):
|
|
|
if self.lr is None:
|
|
|
GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
|
|
@@ -96,13 +101,69 @@ class ModelLr(ModelBase):
|
|
|
|
|
|
def train_report(self, data: DataSplitEntity, *args, **kwargs) -> Dict[str, MetricFucResultEntity]:
|
|
|
|
|
|
+ def _get_auc_ks(data_y, score, title):
|
|
|
+ perf = sc.perf_eva(data_y, score, title=title, show_plot=True)
|
|
|
+ path = self.ml_config.f_get_save_path(f"perf_{title}.png")
|
|
|
+ perf["pic"].savefig(path)
|
|
|
+ auc = perf["AUC"]
|
|
|
+ ks = perf["KS"]
|
|
|
+ return auc, ks, path
|
|
|
+
|
|
|
+ def _get_perf(perf_rule=False):
|
|
|
+ # 模型ks auc
|
|
|
+ img_path_auc_ks = []
|
|
|
+ suffix = ""
|
|
|
+ if perf_rule:
|
|
|
+ suffix = "-规则"
|
|
|
+ train_score = self.score_rule(train_data)
|
|
|
+ test_score = self.score_rule(test_data)
|
|
|
+ else:
|
|
|
+ train_score = self.score(train_data)
|
|
|
+ test_score = self.score(test_data)
|
|
|
+
|
|
|
+ train_auc, train_ks, path = _get_auc_ks(train_data[y_column], train_score, f"train{suffix}")
|
|
|
+ img_path_auc_ks.append(path)
|
|
|
+ test_auc, test_ks, path = _get_auc_ks(test_data[y_column], test_score, f"test{suffix}")
|
|
|
+ img_path_auc_ks.append(path)
|
|
|
+
|
|
|
+ df_auc_ks = pd.DataFrame()
|
|
|
+ df_auc_ks["样本集"] = ["训练集", "测试集"]
|
|
|
+ df_auc_ks["AUC"] = [train_auc, test_auc]
|
|
|
+ df_auc_ks["KS"] = [train_ks, test_ks]
|
|
|
+ metric_value_dict[f"模型结果{suffix}"] = MetricFucResultEntity(table=df_auc_ks, image_path=img_path_auc_ks,
|
|
|
+ image_size=5, table_font_size=10)
|
|
|
+
|
|
|
+ # 评分卡分箱
|
|
|
+ train_score_bin, score_bins = f_get_model_score_bin(train_data, train_score)
|
|
|
+ train_data_gain = f_calcu_model_ks(train_score_bin, y_column, sort_ascending=True)
|
|
|
+ img_path_train_gain = self.ml_config.f_get_save_path(f"train_gain{suffix}.png")
|
|
|
+ f_df_to_image(train_data_gain, img_path_train_gain)
|
|
|
+ metric_value_dict[f"训练集分数分箱{suffix}"] = MetricFucResultEntity(table=train_data_gain,
|
|
|
+ image_path=img_path_train_gain)
|
|
|
+
|
|
|
+ test_score_bin, _ = f_get_model_score_bin(test_data, test_score, score_bins)
|
|
|
+ test_data_gain = f_calcu_model_ks(test_score_bin, y_column, sort_ascending=True)
|
|
|
+ img_path_test_gain = self.ml_config.f_get_save_path(f"test_gain{suffix}.png")
|
|
|
+ f_df_to_image(test_data_gain, img_path_test_gain)
|
|
|
+ metric_value_dict[f"测试集分数分箱{suffix}"] = MetricFucResultEntity(table=test_data_gain,
|
|
|
+ image_path=img_path_test_gain)
|
|
|
+
|
|
|
+ # 模型分psi
|
|
|
+ model_psi = f_calcu_model_psi(train_score_bin, test_score_bin)
|
|
|
+ img_path_psi = self.ml_config.f_get_save_path(f"model_psi{suffix}.png")
|
|
|
+ f_df_to_image(model_psi, img_path_psi)
|
|
|
+ metric_value_dict[f"模型稳定性{suffix}"] = MetricFucResultEntity(table=model_psi,
|
|
|
+ value=model_psi["psi"].sum().round(3),
|
|
|
+ image_path=img_path_psi)
|
|
|
+ return train_score_bin, test_score_bin
|
|
|
+
|
|
|
y_column = self._ml_config.y_column
|
|
|
stress_test = self.ml_config.stress_test
|
|
|
stress_sample_times = self.ml_config.stress_sample_times
|
|
|
stress_bad_rate_list = self.ml_config.stress_bad_rate_list
|
|
|
|
|
|
- train_data = data.train_data.copy()
|
|
|
- test_data = data.test_data.copy()
|
|
|
+ train_data = data.train_data
|
|
|
+ test_data = data.test_data
|
|
|
|
|
|
metric_value_dict = {}
|
|
|
# 评分卡
|
|
@@ -120,55 +181,15 @@ class ModelLr(ModelBase):
|
|
|
f_df_to_image(df_coef, img_path_coef)
|
|
|
metric_value_dict["变量系数"] = MetricFucResultEntity(table=df_coef, image_path=img_path_coef)
|
|
|
|
|
|
- # 模型ks auc
|
|
|
- img_path_perf = []
|
|
|
- train_score = self.score(train_data)
|
|
|
- train_perf = sc.perf_eva(train_data[y_column], train_score, title="train", show_plot=True)
|
|
|
- path = self.ml_config.f_get_save_path(f"train_perf.png")
|
|
|
- train_perf["pic"].savefig(path)
|
|
|
- img_path_perf.append(path)
|
|
|
- train_auc = train_perf["AUC"]
|
|
|
- train_ks = train_perf["KS"]
|
|
|
-
|
|
|
- test_score = self.score(test_data)
|
|
|
- test_perf = sc.perf_eva(test_data[y_column], test_score, title="test", show_plot=True)
|
|
|
- path = self.ml_config.f_get_save_path(f"test_perf.png")
|
|
|
- test_perf["pic"].savefig(path)
|
|
|
- img_path_perf.append(path)
|
|
|
- test_auc = test_perf["AUC"]
|
|
|
- test_ks = test_perf["KS"]
|
|
|
-
|
|
|
- df_auc_ks = pd.DataFrame()
|
|
|
- df_auc_ks["样本集"] = ["训练集", "测试集"]
|
|
|
- df_auc_ks["AUC"] = [train_auc, test_auc]
|
|
|
- df_auc_ks["KS"] = [train_ks, test_ks]
|
|
|
- metric_value_dict["模型结果"] = MetricFucResultEntity(table=df_auc_ks, image_path=img_path_perf, image_size=5,
|
|
|
- table_font_size=10)
|
|
|
-
|
|
|
- # 评分卡分箱
|
|
|
- train_data, score_bins = f_get_model_score_bin(train_data, train_score)
|
|
|
- train_data_gain = f_calcu_model_ks(train_data, y_column, sort_ascending=True)
|
|
|
- img_path_train_gain = self.ml_config.f_get_save_path(f"train_gain.png")
|
|
|
- f_df_to_image(train_data_gain, img_path_train_gain)
|
|
|
- metric_value_dict["训练集分数分箱"] = MetricFucResultEntity(table=train_data_gain, image_path=img_path_train_gain)
|
|
|
-
|
|
|
- test_data, _ = f_get_model_score_bin(test_data, test_score, score_bins)
|
|
|
- test_data_gain = f_calcu_model_ks(test_data, y_column, sort_ascending=True)
|
|
|
- img_path_test_gain = self.ml_config.f_get_save_path(f"tes_gain.png")
|
|
|
- f_df_to_image(test_data_gain, img_path_test_gain)
|
|
|
- metric_value_dict["测试集分数分箱"] = MetricFucResultEntity(table=test_data_gain, image_path=img_path_test_gain)
|
|
|
-
|
|
|
- # 模型分psi
|
|
|
- model_psi = f_calcu_model_psi(train_data, test_data)
|
|
|
- img_path_psi = self.ml_config.f_get_save_path(f"model_psi.png")
|
|
|
- f_df_to_image(model_psi, img_path_psi)
|
|
|
- metric_value_dict["模型稳定性"] = MetricFucResultEntity(table=model_psi, value=model_psi["psi"].sum().round(3),
|
|
|
- image_path=img_path_psi)
|
|
|
+ _, test_score_bin = _get_perf()
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ _, test_score_bin = _get_perf(perf_rule=True)
|
|
|
|
|
|
# 压力测试
|
|
|
if stress_test:
|
|
|
- df_stress = f_stress_test(test_data, sample_times=stress_sample_times, bad_rate_list=stress_bad_rate_list,
|
|
|
- target_column=y_column, score_column="score")
|
|
|
+ df_stress = f_stress_test(test_score_bin, sample_times=stress_sample_times,
|
|
|
+ bad_rate_list=stress_bad_rate_list,
|
|
|
+ target_column=y_column, score_column=ConstantEnum.SCORE.value)
|
|
|
|
|
|
img_path_stress = self.ml_config.f_get_save_path(f"stress_test.png")
|
|
|
f_df_to_image(df_stress, img_path_stress)
|
|
@@ -181,11 +202,16 @@ class ModelLr(ModelBase):
|
|
|
|
|
|
def jupyter_print(self, metric_value_dict=Dict[str, MetricFucResultEntity], *args, **kwargs):
|
|
|
from IPython import display
|
|
|
-
|
|
|
+ suffix = "-规则"
|
|
|
f_display_title(display, "模型结果")
|
|
|
display.display(metric_value_dict["模型结果"].table)
|
|
|
f_display_images_by_side(display, metric_value_dict["模型结果"].image_path)
|
|
|
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ print("加入规则后:")
|
|
|
+ display.display(metric_value_dict[f"模型结果{suffix}"].table)
|
|
|
+ f_display_images_by_side(display, metric_value_dict[f"模型结果{suffix}"].image_path)
|
|
|
+
|
|
|
f_display_title(display, "模型变量系数")
|
|
|
print(self.lr.summary().tables[0])
|
|
|
display.display(metric_value_dict["变量系数"].table)
|
|
@@ -195,13 +221,30 @@ class ModelLr(ModelBase):
|
|
|
display.display(metric_value_dict["模型稳定性"].table)
|
|
|
print(f"模型psi: {metric_value_dict['模型稳定性'].value}")
|
|
|
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ print("加入规则后:")
|
|
|
+ display.display(metric_value_dict[f"模型稳定性{suffix}"].table)
|
|
|
+ print(f"模型psi: {metric_value_dict[f'模型稳定性{suffix}'].value}")
|
|
|
+
|
|
|
f_display_title(display, "分数分箱")
|
|
|
print("训练集-分数分箱")
|
|
|
display.display(metric_value_dict["训练集分数分箱"].table)
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ print("加入规则后:")
|
|
|
+ print(f"训练集-分数分箱")
|
|
|
+ display.display(metric_value_dict[f"训练集分数分箱{suffix}"].table)
|
|
|
+
|
|
|
print("测试集-分数分箱")
|
|
|
display.display(metric_value_dict["测试集分数分箱"].table)
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ print("加入规则后:")
|
|
|
+ print(f"测试集-分数分箱")
|
|
|
+ display.display(metric_value_dict[f"测试集分数分箱{suffix}"].table)
|
|
|
+
|
|
|
# 评分卡
|
|
|
f_display_title(display, "评分卡")
|
|
|
+ if len(self.ml_config.rules) != 0:
|
|
|
+ print(f"评分卡不包含规则")
|
|
|
display.display(metric_value_dict["评分卡"].table)
|
|
|
|
|
|
if "压力测试" in metric_value_dict.keys():
|