yq 2 сар өмнө
parent
commit
9803e9831c

+ 7 - 1
feature/strategy_iv.py

@@ -261,11 +261,15 @@ class StrategyIv(FilterStrategyBase):
             point_percentile_list = [sum(distribute[0:idx + 1]) * interval for idx, _ in enumerate(distribute[0:-1])]
             for point_percentile in point_percentile_list:
                 point = train_data_x.iloc[int(len(train_data_x) * point_percentile)]
+                point = float(point)
                 if format_bin:
                     point = f_format_bin(train_data_x_describe, point)
+                point = round(point, 2)
+                if point == 0:
+                    continue
                 if point not in point_list_cache:
                     point_list_cache.append(point)
-            if point_list_cache not in points_list:
+            if point_list_cache not in points_list and len(point_list_cache) != 0:
                 points_list.append(point_list_cache)
         # IV与单调性过滤
         # 获取2 - 5 箱的情况下最佳分箱
@@ -477,9 +481,11 @@ class StrategyIv(FilterStrategyBase):
             print("-----不同分箱数下变量的推荐切分点-----")
             for x_column, features in numeric_candidate_dict_all.items():
                 print(f"-----【{x_column}】-----")
+                print(f"切分点:")
                 var_trend_images_train = []
                 var_trend_images_test = []
                 for feature in features:
+                    print(json.dumps(feature.breaks_list, ensure_ascii=False, cls=NumpyEncoder))
                     var_breaks_list = [str(i) for i in feature.breaks_list]
                     var_trend_bins_train = self._f_get_bins_by_breaks(train_data, {x_column: feature})
                     image_path = self._f_save_var_trend(var_trend_bins_train, [x_column],