|
@@ -261,11 +261,15 @@ class StrategyIv(FilterStrategyBase):
|
|
|
point_percentile_list = [sum(distribute[0:idx + 1]) * interval for idx, _ in enumerate(distribute[0:-1])]
|
|
|
for point_percentile in point_percentile_list:
|
|
|
point = train_data_x.iloc[int(len(train_data_x) * point_percentile)]
|
|
|
+ point = float(point)
|
|
|
if format_bin:
|
|
|
point = f_format_bin(train_data_x_describe, point)
|
|
|
+ point = round(point, 2)
|
|
|
+ if point == 0:
|
|
|
+ continue
|
|
|
if point not in point_list_cache:
|
|
|
point_list_cache.append(point)
|
|
|
- if point_list_cache not in points_list:
|
|
|
+ if point_list_cache not in points_list and len(point_list_cache) != 0:
|
|
|
points_list.append(point_list_cache)
|
|
|
# IV与单调性过滤
|
|
|
# 获取2 - 5 箱的情况下最佳分箱
|
|
@@ -477,9 +481,11 @@ class StrategyIv(FilterStrategyBase):
|
|
|
print("-----不同分箱数下变量的推荐切分点-----")
|
|
|
for x_column, features in numeric_candidate_dict_all.items():
|
|
|
print(f"-----【{x_column}】-----")
|
|
|
+ print(f"切分点:")
|
|
|
var_trend_images_train = []
|
|
|
var_trend_images_test = []
|
|
|
for feature in features:
|
|
|
+ print(json.dumps(feature.breaks_list, ensure_ascii=False, cls=NumpyEncoder))
|
|
|
var_breaks_list = [str(i) for i in feature.breaks_list]
|
|
|
var_trend_bins_train = self._f_get_bins_by_breaks(train_data, {x_column: feature})
|
|
|
image_path = self._f_save_var_trend(var_trend_bins_train, [x_column],
|