Răsfoiți Sursa

add: 指定优化参数

yq 1 lună în urmă
părinte
comite
0787e44c2c
1 a modificat fișierele cu 33 adăugiri și 26 ștergeri
  1. 33 26
      online_learning/trainer.py

+ 33 - 26
online_learning/trainer.py

@@ -85,10 +85,14 @@ class OnlineLearningTrainer:
         data_woe[f"{ConstantEnum.INTERCEPT.value}_woe"] = [1] * len(data_woe)
         return data_woe[self._columns_woe].to_numpy()
 
-    def _f_get_best_model(self, df_param: pd.DataFrame) -> LR:
-        df_param_sort = df_param.sort_values(by=["ks_test", "auc_test"], ascending=[False, False])
-        print(f"最佳参数:\n{df_param_sort.iloc[0].to_dict()}")
-        weight = list(df_param_sort.iloc[0])
+    def _f_get_best_model(self, df_param: pd.DataFrame, epoch: int = None) -> LR:
+        if epoch is None:
+            df_param_sort = df_param.sort_values(by=["ks_test", "auc_test"], ascending=[False, False])
+            print(f"选择最佳参数:\n{df_param_sort.iloc[0].to_dict()}")
+            weight = list(df_param_sort.iloc[0])
+        else:
+            print(f"选择epoch:【{epoch}】的参数:\n{df_param[df_param['epoch'] == epoch].iloc[0].to_dict()}")
+            weight = list(df_param[df_param["epoch"] == epoch].iloc[0])
         weight = nn.Parameter(torch.tensor(np.array(weight[0:-5])))
         return LR(weight)
 
@@ -218,6 +222,19 @@ class OnlineLearningTrainer:
         return model_psi
 
     def train(self, ):
+        def _get_param_optimized(model: LR, epoch):
+            model.eval()
+            with torch.no_grad():
+                y_prob = model(test_x)
+                loss = criterion(y_prob, torch.tensor(test_y.to_numpy(), dtype=torch.float64))
+                loss_test = loss.detach().item()
+                y_prob = y_prob.detach().numpy()
+                perf = sc.perf_eva(test_y, y_prob, show_plot=False)
+                auc = perf["AUC"]
+                ks = perf["KS"]
+                row = model.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
+                return dict(zip(df_param_columns, row))
+
         epochs = self._ol_config.epochs
         batch_size = self._ol_config.batch_size
         train_data = self._data.train_data
@@ -232,10 +249,11 @@ class OnlineLearningTrainer:
 
         df_param_columns = self._columns + ["auc_test", "ks_test", "epoch", "loss_train", "loss_test"]
         df_param = pd.DataFrame(columns=df_param_columns)
-
+        # 优化前
+        loss_train = 0
+        df_param.loc[len(df_param)] = _get_param_optimized(self._model_original, -1)
         for epoch in tqdm(range(epochs)):
             data_len = len(train_x)
-            loss_train = 0
             for i in range(math.ceil(data_len / batch_size)):
                 train_x_batch = torch.tensor(train_x[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
                 train_y_batch = torch.tensor(train_y[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
@@ -247,20 +265,7 @@ class OnlineLearningTrainer:
                 optimizer.step()
                 loss_train = loss.detach().item()
             # 测试集评估
-            self._model_optimized.eval()
-            with torch.no_grad():
-                y_prob = self._model_optimized(test_x)
-                loss = criterion(y_prob, torch.tensor(test_y.to_numpy(), dtype=torch.float64))
-                loss_test = loss.detach().item()
-                y_prob = y_prob.detach().numpy()
-                perf = sc.perf_eva(test_y, y_prob, show_plot=False)
-                auc = perf["AUC"]
-                ks = perf["KS"]
-                row = self._model_optimized.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
-                df_param.loc[len(df_param)] = dict(zip(df_param_columns, row))
-                # print(f"epoch:{epoch + 1} auc:{auc} ks:{ks}")
-
-        self._model_optimized = self._f_get_best_model(df_param)
+            df_param.loc[len(df_param)] = _get_param_optimized(self._model_optimized, epoch)
 
         context.set(ContextEnum.PARAM_OPTIMIZED, df_param)
 
@@ -290,7 +295,14 @@ class OnlineLearningTrainer:
         ol_config._path_resources = path
         return OnlineLearningTrainer(ol_config=ol_config)
 
-    def report(self, ):
+    def report(self, epoch: int = None):
+        df_param = context.get(ContextEnum.PARAM_OPTIMIZED)
+        self._model_optimized = self._f_get_best_model(df_param, epoch)
+
+        if self._ol_config.jupyter_print:
+            from IPython import display
+            f_display_title(display, "模型系数优化过程")
+            display.display(df_param)
 
         metric_value_dict = {}
         # 样本分布
@@ -325,8 +337,6 @@ class OnlineLearningTrainer:
     def jupyter_print(self, metric_value_dict=Dict[str, MetricFucResultEntity]):
         from IPython import display
 
-        df_param = context.get(ContextEnum.PARAM_OPTIMIZED)
-
         f_display_title(display, "样本分布")
         display.display(metric_value_dict["样本分布"].table)
 
@@ -356,9 +366,6 @@ class OnlineLearningTrainer:
             f_display_title(display, "压力测试")
             display.display(metric_value_dict["压力测试"].table)
 
-        f_display_title(display, "系数优化过程")
-        display.display(df_param)
-
 
 if __name__ == "__main__":
     pass