|
@@ -23,9 +23,9 @@ from tqdm import tqdm
|
|
|
from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
|
|
|
f_display_images_by_side
|
|
|
from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
|
|
|
-from enums import ResultCodesEnum, ConstantEnum, ContextEnum, FileEnum
|
|
|
+from enums import ResultCodesEnum, ConstantEnum, FileEnum
|
|
|
from feature import f_woebin_load
|
|
|
-from init import init, context
|
|
|
+from init import init
|
|
|
from model import f_get_model_score_bin, f_calcu_model_ks, f_stress_test, f_calcu_model_psi
|
|
|
from monitor import ReportWord
|
|
|
from .utils import LR
|
|
@@ -102,7 +102,7 @@ class OnlineLearningTrainerLr:
|
|
|
else:
|
|
|
print(f"选择epoch:【{epoch}】的参数:\n{df_param[df_param['epoch'] == epoch].iloc[0].to_dict()}")
|
|
|
weight = list(df_param[df_param["epoch"] == epoch].iloc[0])
|
|
|
- weight = nn.Parameter(torch.tensor(np.array(weight[0:-5])))
|
|
|
+ weight = nn.Parameter(torch.tensor(np.array(weight[0:-6])))
|
|
|
return LR(weight)
|
|
|
|
|
|
def _f_get_scorecard(self, ):
|
|
@@ -227,13 +227,14 @@ class OnlineLearningTrainerLr:
|
|
|
def score(self, x: pd.DataFrame) -> np.array:
|
|
|
return np.array(sc.scorecard_ply(x, self.card, print_step=0)["score"])
|
|
|
|
|
|
- def psi(self, x1: pd.DataFrame, x2: pd.DataFrame, points: List[float] = None) -> pd.DataFrame:
|
|
|
+ def psi(self, x1: pd.DataFrame, x2: pd.DataFrame, points: List[float] = None, print_sum=True) -> pd.DataFrame:
|
|
|
y1 = self.prob(x1)
|
|
|
y2 = self.prob(x2)
|
|
|
x1_score_bin, score_bins = f_get_model_score_bin(x1, y1, points)
|
|
|
x2_score_bin, _ = f_get_model_score_bin(x2, y2, score_bins)
|
|
|
model_psi = f_calcu_model_psi(x1_score_bin, x2_score_bin, sort_ascending=False)
|
|
|
- print(f"模型psi: {model_psi['psi'].sum()}")
|
|
|
+ if print_sum:
|
|
|
+ print(f"模型psi: {model_psi['psi'].sum()}")
|
|
|
return model_psi
|
|
|
|
|
|
def train(self, ):
|
|
@@ -247,7 +248,8 @@ class OnlineLearningTrainerLr:
|
|
|
perf = sc.perf_eva(test_y, y_prob, show_plot=False)
|
|
|
auc = perf["AUC"]
|
|
|
ks = perf["KS"]
|
|
|
- row = model.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
|
|
|
+ psi = round(self.psi(train_data, test_data, print_sum=False)['psi'].sum(), 3)
|
|
|
+ row = model.linear.weight.tolist() + [auc, ks, psi, epoch + 1, loss_train, loss_test]
|
|
|
return dict(zip(df_param_columns, row))
|
|
|
|
|
|
epochs = self._ol_config.epochs
|
|
@@ -262,9 +264,9 @@ class OnlineLearningTrainerLr:
|
|
|
criterion = nn.BCELoss()
|
|
|
optimizer = optim.Adam(self._model_optimized.parameters(), lr=self._ol_config.lr)
|
|
|
|
|
|
- df_param_columns = self._columns + ["auc_test", "ks_test", "epoch", "loss_train", "loss_test"]
|
|
|
+ df_param_columns = self._columns + ["auc_test", "ks_test", "psi", "epoch", "loss_train", "loss_test"]
|
|
|
self._df_param_optimized = pd.DataFrame(columns=df_param_columns)
|
|
|
-
|
|
|
+
|
|
|
# 优化前
|
|
|
loss_train = 0
|
|
|
self._df_param_optimized.loc[len(self._df_param_optimized)] = _get_param_optimized(self._model_original, -1)
|
|
@@ -281,7 +283,8 @@ class OnlineLearningTrainerLr:
|
|
|
optimizer.step()
|
|
|
loss_train = loss.detach().item()
|
|
|
# 测试集评估
|
|
|
- self._df_param_optimized.loc[len(self._df_param_optimized)] = _get_param_optimized(self._model_optimized, epoch)
|
|
|
+ self._df_param_optimized.loc[len(self._df_param_optimized)] = _get_param_optimized(self._model_optimized,
|
|
|
+ epoch)
|
|
|
|
|
|
def save(self):
|
|
|
|
|
@@ -316,6 +319,10 @@ class OnlineLearningTrainerLr:
|
|
|
return OnlineLearningTrainerLr(ol_config=ol_config)
|
|
|
|
|
|
def report(self, epoch: int = None):
|
|
|
+
|
|
|
+ train_data = self._data.train_data
|
|
|
+ test_data = self._data.test_data
|
|
|
+
|
|
|
self._model_optimized = self._f_get_best_model(self._df_param_optimized, epoch)
|
|
|
|
|
|
if self._ol_config.jupyter_print:
|
|
@@ -347,6 +354,14 @@ class OnlineLearningTrainerLr:
|
|
|
# 模型系数对比
|
|
|
metric_value_dict["模型系数"] = self._f_get_metric_coef()
|
|
|
|
|
|
+ # 模型分psi
|
|
|
+ model_psi = self.psi(train_data, test_data, print_sum=False)
|
|
|
+ img_path_psi = self._ol_config.f_get_save_path(f"model_psi.png")
|
|
|
+ f_df_to_image(model_psi, img_path_psi)
|
|
|
+ metric_value_dict[f"模型稳定性"] = MetricFucResultEntity(table=model_psi,
|
|
|
+ value=model_psi["psi"].sum().round(3),
|
|
|
+ image_path=img_path_psi)
|
|
|
+
|
|
|
# 分数分箱
|
|
|
metric_value_dict["分数分箱-建模数据-新模型"] = self._f_get_metric_gain("新模型")
|
|
|
metric_value_dict["分数分箱-建模数据-原模型"] = self._f_get_metric_gain("原模型")
|
|
@@ -379,6 +394,11 @@ class OnlineLearningTrainerLr:
|
|
|
f_display_title(display, "模型系数")
|
|
|
display.display(metric_value_dict["模型系数"].table)
|
|
|
|
|
|
+ # 模型psi
|
|
|
+ f_display_title(display, "模型psi")
|
|
|
+ display.display(metric_value_dict["模型稳定性"].table)
|
|
|
+ print(f"模型psi: {metric_value_dict['模型稳定性'].value}")
|
|
|
+
|
|
|
f_display_title(display, "分数分箱")
|
|
|
print(f"建模数据上分数分箱")
|
|
|
print(f"原模型")
|