Ver código fonte

modify: 代码优化

yq 1 mês atrás
pai
commit
d20ac05be3
1 arquivos alterados com 21 adições e 43 exclusões
  1. 21 43
      feature/woe/strategy_woe.py

+ 21 - 43
feature/woe/strategy_woe.py

@@ -61,6 +61,25 @@ class StrategyWoe(FeatureStrategyBase):
             imgs_path.append(path)
         return imgs_path
 
+    def _f_best_bins_print(self, display, data: DataSplitEntity, column: str, homo_bin_info: HomologousBinInfo):
+        print(f"-----【{column}】不同分箱数下变量的推荐切分点-----")
+        imgs_path_trend_train = []
+        imgs_path_trend_test = []
+        bins_info = homo_bin_info.get_best_bins()
+        for bin_info in bins_info:
+            print(json.dumps(bin_info.points, ensure_ascii=False, cls=NumpyEncoder))
+            breaks_list = [str(i) for i in bin_info.points]
+            sc_woebin_train = self._f_get_sc_woebin(data.train_data, {column: bin_info})
+            image_path = self._f_get_img_trend(sc_woebin_train, [column],
+                                               f"train_{column}_{'_'.join(breaks_list)}")
+            imgs_path_trend_train.append(image_path[0])
+            sc_woebin_test = self._f_get_sc_woebin(data.test_data, {column: bin_info})
+            image_path = self._f_get_img_trend(sc_woebin_test, [column],
+                                               f"test_{column}_{'_'.join(breaks_list)}")
+            imgs_path_trend_test.append(image_path[0])
+        f_display_images_by_side(display, imgs_path_trend_train, title=f"训练集",
+                                 image_path_list2=imgs_path_trend_test, title2="测试集")
+
     def _f_get_sc_woebin(self, data: pd.DataFrame, bin_info_dict: Dict[str, BinInfo]) -> Dict[str, pd.DataFrame]:
         y_column = self.ml_config.y_column
         special_values = self.ml_config.special_values
@@ -444,36 +463,15 @@ class StrategyWoe(FeatureStrategyBase):
         from IPython import display
 
         if is_numeric_dtype(data.train_data[column]):
-            train_data = data.train_data
-            test_data = data.test_data
             format_bin_mlcfg = self.ml_config.format_bin
             if format_bin is not None:
                 self.ml_config._format_bin = format_bin
             homo_bin_info_numeric: HomologousBinInfo = self._handle_numeric(data, column)
-
-            bins_info = homo_bin_info_numeric.get_best_bins()
-            print(f"-----【{column}】不同分箱数下变量的推荐切分点-----")
-            imgs_path_trend_train = []
-            imgs_path_trend_test = []
-            for bin_info in bins_info:
-                print(json.dumps(bin_info.points, ensure_ascii=False, cls=NumpyEncoder))
-                breaks_list = [str(i) for i in bin_info.points]
-                sc_woebin_train = self._f_get_sc_woebin(train_data, {column: bin_info})
-                image_path = self._f_get_img_trend(sc_woebin_train, [column],
-                                                   f"train_{column}_{'_'.join(breaks_list)}")
-                imgs_path_trend_train.append(image_path[0])
-                sc_woebin_test = self._f_get_sc_woebin(test_data, {column: bin_info})
-                image_path = self._f_get_img_trend(sc_woebin_test, [column],
-                                                   f"test_{column}_{'_'.join(breaks_list)}")
-                imgs_path_trend_test.append(image_path[0])
-            f_display_images_by_side(display, imgs_path_trend_train, title=f"训练集",
-                                     image_path_list2=imgs_path_trend_test, title2="测试集")
+            self._f_best_bins_print(display, data, column, homo_bin_info_numeric)
             self.ml_config._format_bin = format_bin_mlcfg
-
         else:
             print("只能针对数值型变量进行分析。")
 
-
     def feature_save(self, *args, **kwargs):
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
@@ -564,29 +562,12 @@ class StrategyWoe(FeatureStrategyBase):
         def detail_print(detail):
             if isinstance(detail, str):
                 detail = [detail]
-
             if isinstance(detail, list):
                 for column in detail:
                     homo_bin_info_numeric = homo_bin_info_numeric_set.get(column)
                     if homo_bin_info_numeric is None:
                         continue
-                    bins_info = homo_bin_info_numeric.get_best_bins()
-                    print(f"-----【{column}】不同分箱数下变量的推荐切分点-----")
-                    imgs_path_trend_train = []
-                    imgs_path_trend_test = []
-                    for bin_info in bins_info:
-                        print(json.dumps(bin_info.points, ensure_ascii=False, cls=NumpyEncoder))
-                        breaks_list = [str(i) for i in bin_info.points]
-                        sc_woebin_train = self._f_get_sc_woebin(train_data, {column: bin_info})
-                        image_path = self._f_get_img_trend(sc_woebin_train, [column],
-                                                           f"train_{column}_{'_'.join(breaks_list)}")
-                        imgs_path_trend_train.append(image_path[0])
-                        sc_woebin_test = self._f_get_sc_woebin(test_data, {column: bin_info})
-                        image_path = self._f_get_img_trend(sc_woebin_test, [column],
-                                                           f"test_{column}_{'_'.join(breaks_list)}")
-                        imgs_path_trend_test.append(image_path[0])
-                    f_display_images_by_side(display, imgs_path_trend_train, title=f"训练集",
-                                             image_path_list2=imgs_path_trend_test, title2="测试集")
+                    self._f_best_bins_print(display, data, column, homo_bin_info_numeric)
             if isinstance(detail, dict):
                 for column, challenger_columns in detail.items():
                     print(f"-----相关性筛选保留的【{column}】-----")
@@ -601,9 +582,6 @@ class StrategyWoe(FeatureStrategyBase):
             if detail is not None and self.ml_config.bin_detail_print:
                 detail_print(detail)
 
-        train_data = data.train_data
-        test_data = data.test_data
-
         bin_info_filtered: Dict[str, BinInfo] = context.get(ContextEnum.BIN_INFO_FILTERED)
         homo_bin_info_numeric_set: Dict[str, HomologousBinInfo] = context.get(ContextEnum.HOMO_BIN_INFO_NUMERIC_SET)
         filter_fast = context.get(ContextEnum.FILTER_FAST)