Selaa lähdekoodia

modify: jupyter输出优化

yq 3 kuukautta sitten
vanhempi
sitoutus
811e7e84a0
2 muutettua tiedostoa jossa 24 lisäystä ja 10 poistoa
  1. 18 10
      feature/strategy_iv.py
  2. 6 0
      init/__init__.py

+ 18 - 10
feature/strategy_iv.py

@@ -4,7 +4,6 @@
 @time: 2024/1/2
 @desc: iv值及单调性筛选类
 """
-import time
 from itertools import combinations_with_replacement
 from typing import List, Dict
 
@@ -68,7 +67,7 @@ class StrategyIv(FilterStrategyBase):
         for column, candidate in candidate_dict.items():
             breaks_list[column] = candidate.breaks_list
         bins = sc.woebin(data[x_columns_candidate + [y_column]], y=y_column, breaks_list=breaks_list,
-                         special_values=special_values)
+                         special_values=special_values, print_info=False)
         return bins
 
     def _f_corr_filter(self, data: DataSplitEntity, candidate_dict: Dict[str, CandidateFeatureEntity]) -> List[str]:
@@ -78,7 +77,7 @@ class StrategyIv(FilterStrategyBase):
         x_columns_candidate = list(candidate_dict.keys())
 
         bins = self._f_get_bins_by_breaks(train_data, candidate_dict)
-        train_woe = sc.woebin_ply(train_data[x_columns_candidate], bins)
+        train_woe = sc.woebin_ply(train_data[x_columns_candidate], bins, print_info=False)
         corr_df = f_get_corr(train_woe)
         corr_dict = corr_df.to_dict()
         for column, corr in corr_dict.items():
@@ -114,14 +113,14 @@ class StrategyIv(FilterStrategyBase):
             x_columns_candidate.remove(y_column)
 
         bins_train = sc.woebin(train_data[x_columns_candidate + [y_column]], y=y_column, bin_num_limit=5,
-                               special_values=special_values, breaks_list=breaks_list)
+                               special_values=special_values, breaks_list=breaks_list, print_info=False)
 
         for column, bin in bins_train.items():
             breaks_list[column] = list(bin['breaks'])
         bins_test = None
         if test_data is not None and len(test_data) != 0:
             bins_test = sc.woebin(test_data[x_columns_candidate + [y_column]], y=y_column,
-                                  special_values=special_values, breaks_list=breaks_list)
+                                  special_values=special_values, breaks_list=breaks_list, print_info=False)
         bins_iv_dict = {}
         for column, bin_train in bins_train.items():
             train_iv = bin_train['total_iv'][0]
@@ -334,19 +333,19 @@ class StrategyIv(FilterStrategyBase):
         x_columns_candidate = list(candidate_dict.keys())
         bins = self._f_get_bins_by_breaks(train_data, candidate_dict)
 
-        train_woe = sc.woebin_ply(train_data[x_columns_candidate], bins)
+        train_woe = sc.woebin_ply(train_data[x_columns_candidate], bins, print_info=False)
         train_data_feature = DataFeatureEntity(pd.concat((train_woe, train_data[y_column]), axis=1),
                                                train_woe.columns.tolist(), y_column)
 
         val_data_feature = None
         if val_data is not None and len(val_data) != 0:
-            val_woe = sc.woebin_ply(val_data[x_columns_candidate], bins)
+            val_woe = sc.woebin_ply(val_data[x_columns_candidate], bins, print_info=False)
             val_data_feature = DataFeatureEntity(pd.concat((val_woe, val_data[y_column]), axis=1),
                                                  train_woe.columns.tolist(), y_column)
 
         test_data_feature = None
         if test_data is not None and len(test_data) != 0:
-            test_woe = sc.woebin_ply(test_data[x_columns_candidate], bins)
+            test_woe = sc.woebin_ply(test_data[x_columns_candidate], bins, print_info=False)
             test_data_feature = DataFeatureEntity(pd.concat((test_woe, test_data[y_column]), axis=1),
                                                   train_woe.columns.tolist(), y_column)
         return DataPreparedEntity(train_data_feature, val_data_feature, test_data_feature, bins=bins,
@@ -386,21 +385,30 @@ class StrategyIv(FilterStrategyBase):
         image_path_list = self._f_save_var_trend(train_bins, x_columns_candidate, "train")
         metric_value_dict["变量趋势-训练集"] = MetricFucEntity(image_path=image_path_list, image_size=4)
         # 变量有效性
-        train_woe = sc.woebin_ply(train_data[x_columns_candidate], train_bins)
+        train_woe = sc.woebin_ply(train_data[x_columns_candidate], train_bins, print_info=False)
         var_corr_image_path = self._f_get_var_corr_image(train_woe)
         # vif
         vif_df = f_get_ivf(train_woe)
         metric_value_dict["变量有效性"] = MetricFucEntity(image_path=var_corr_image_path, table=vif_df)
 
-        time.sleep(3)
         if jupyter:
             from IPython import display
+
             display.display(metric_value_dict["样本分布"].table)
+            # 打印变量iv
             display.display(metric_value_dict["变量iv"].table)
+            # 打印变量相关性
             f_display_images_by_side(metric_value_dict["变量有效性"].image_path, display, width=800)
+            # 打印变量趋势
             f_display_images_by_side(metric_value_dict["变量趋势-训练集"].image_path, display, title="变量趋势训练集")
             metric_test = metric_value_dict.get("变量趋势-测试集")
             if metric_test is not None:
                 f_display_images_by_side(metric_test.image_path, display, title="变量趋势测试集")
+            # 打印breaks_list
+            breaks_list = {}
+            for x_column, feature in candidate_dict.items():
+                breaks_list[x_column] = feature.breaks_list
+            print("变量切分点:")
+            print(breaks_list)
 
         return metric_value_dict

+ 6 - 0
init/__init__.py

@@ -5,11 +5,16 @@
 @desc: 一些资源初始化
 """
 
+import warnings
+
 import matplotlib
 
 matplotlib.use('Agg')
 
 import matplotlib.pyplot as plt
+from pandas.core.common import SettingWithCopyWarning
+
+warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
 
 __all__ = ['init']
 
@@ -18,6 +23,7 @@ def init():
     plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置支持中文的字体
     plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
     plt.rcParams['figure.figsize'] = (8, 8)
+    plt.rcParams['figure.max_open_warning'] = 1000
 
 
 if __name__ == "__main__":