|
@@ -85,10 +85,14 @@ class OnlineLearningTrainer:
|
|
|
data_woe[f"{ConstantEnum.INTERCEPT.value}_woe"] = [1] * len(data_woe)
|
|
|
return data_woe[self._columns_woe].to_numpy()
|
|
|
|
|
|
- def _f_get_best_model(self, df_param: pd.DataFrame) -> LR:
|
|
|
- df_param_sort = df_param.sort_values(by=["ks_test", "auc_test"], ascending=[False, False])
|
|
|
- print(f"最佳参数:\n{df_param_sort.iloc[0].to_dict()}")
|
|
|
- weight = list(df_param_sort.iloc[0])
|
|
|
+ def _f_get_best_model(self, df_param: pd.DataFrame, epoch: int = None) -> LR:
|
|
|
+ if epoch is None:
|
|
|
+ df_param_sort = df_param.sort_values(by=["ks_test", "auc_test"], ascending=[False, False])
|
|
|
+ print(f"选择最佳参数:\n{df_param_sort.iloc[0].to_dict()}")
|
|
|
+ weight = list(df_param_sort.iloc[0])
|
|
|
+ else:
|
|
|
+ print(f"选择epoch:【{epoch}】的参数:\n{df_param[df_param['epoch'] == epoch].iloc[0].to_dict()}")
|
|
|
+ weight = list(df_param[df_param["epoch"] == epoch].iloc[0])
|
|
|
weight = nn.Parameter(torch.tensor(np.array(weight[0:-5])))
|
|
|
return LR(weight)
|
|
|
|
|
@@ -218,6 +222,19 @@ class OnlineLearningTrainer:
|
|
|
return model_psi
|
|
|
|
|
|
def train(self, ):
|
|
|
+ def _get_param_optimized(model: LR, epoch):
|
|
|
+ model.eval()
|
|
|
+ with torch.no_grad():
|
|
|
+ y_prob = model(test_x)
|
|
|
+ loss = criterion(y_prob, torch.tensor(test_y.to_numpy(), dtype=torch.float64))
|
|
|
+ loss_test = loss.detach().item()
|
|
|
+ y_prob = y_prob.detach().numpy()
|
|
|
+ perf = sc.perf_eva(test_y, y_prob, show_plot=False)
|
|
|
+ auc = perf["AUC"]
|
|
|
+ ks = perf["KS"]
|
|
|
+ row = model.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
|
|
|
+ return dict(zip(df_param_columns, row))
|
|
|
+
|
|
|
epochs = self._ol_config.epochs
|
|
|
batch_size = self._ol_config.batch_size
|
|
|
train_data = self._data.train_data
|
|
@@ -232,10 +249,11 @@ class OnlineLearningTrainer:
|
|
|
|
|
|
df_param_columns = self._columns + ["auc_test", "ks_test", "epoch", "loss_train", "loss_test"]
|
|
|
df_param = pd.DataFrame(columns=df_param_columns)
|
|
|
-
|
|
|
+ # 优化前
|
|
|
+ loss_train = 0
|
|
|
+ df_param.loc[len(df_param)] = _get_param_optimized(self._model_original, -1)
|
|
|
for epoch in tqdm(range(epochs)):
|
|
|
data_len = len(train_x)
|
|
|
- loss_train = 0
|
|
|
for i in range(math.ceil(data_len / batch_size)):
|
|
|
train_x_batch = torch.tensor(train_x[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
|
|
|
train_y_batch = torch.tensor(train_y[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
|
|
@@ -247,20 +265,7 @@ class OnlineLearningTrainer:
|
|
|
optimizer.step()
|
|
|
loss_train = loss.detach().item()
|
|
|
# 测试集评估
|
|
|
- self._model_optimized.eval()
|
|
|
- with torch.no_grad():
|
|
|
- y_prob = self._model_optimized(test_x)
|
|
|
- loss = criterion(y_prob, torch.tensor(test_y.to_numpy(), dtype=torch.float64))
|
|
|
- loss_test = loss.detach().item()
|
|
|
- y_prob = y_prob.detach().numpy()
|
|
|
- perf = sc.perf_eva(test_y, y_prob, show_plot=False)
|
|
|
- auc = perf["AUC"]
|
|
|
- ks = perf["KS"]
|
|
|
- row = self._model_optimized.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
|
|
|
- df_param.loc[len(df_param)] = dict(zip(df_param_columns, row))
|
|
|
- # print(f"epoch:{epoch + 1} auc:{auc} ks:{ks}")
|
|
|
-
|
|
|
- self._model_optimized = self._f_get_best_model(df_param)
|
|
|
+ df_param.loc[len(df_param)] = _get_param_optimized(self._model_optimized, epoch)
|
|
|
|
|
|
context.set(ContextEnum.PARAM_OPTIMIZED, df_param)
|
|
|
|
|
@@ -290,7 +295,14 @@ class OnlineLearningTrainer:
|
|
|
ol_config._path_resources = path
|
|
|
return OnlineLearningTrainer(ol_config=ol_config)
|
|
|
|
|
|
- def report(self, ):
|
|
|
+ def report(self, epoch: int = None):
|
|
|
+ df_param = context.get(ContextEnum.PARAM_OPTIMIZED)
|
|
|
+ self._model_optimized = self._f_get_best_model(df_param, epoch)
|
|
|
+
|
|
|
+ if self._ol_config.jupyter_print:
|
|
|
+ from IPython import display
|
|
|
+ f_display_title(display, "模型系数优化过程")
|
|
|
+ display.display(df_param)
|
|
|
|
|
|
metric_value_dict = {}
|
|
|
# 样本分布
|
|
@@ -325,8 +337,6 @@ class OnlineLearningTrainer:
|
|
|
def jupyter_print(self, metric_value_dict=Dict[str, MetricFucResultEntity]):
|
|
|
from IPython import display
|
|
|
|
|
|
- df_param = context.get(ContextEnum.PARAM_OPTIMIZED)
|
|
|
-
|
|
|
f_display_title(display, "样本分布")
|
|
|
display.display(metric_value_dict["样本分布"].table)
|
|
|
|
|
@@ -356,9 +366,6 @@ class OnlineLearningTrainer:
|
|
|
f_display_title(display, "压力测试")
|
|
|
display.display(metric_value_dict["压力测试"].table)
|
|
|
|
|
|
- f_display_title(display, "系数优化过程")
|
|
|
- display.display(df_param)
|
|
|
-
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
pass
|