|
@@ -287,7 +287,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
special_values=special_values, breaks_list=breaks_list, print_info=False)
|
|
|
|
|
|
for column, bin in bins_train.items():
|
|
|
- breaks_list[column] = list(bin['breaks'])
|
|
|
+ breaks_list[column] = list(bin[bin["is_special_values"]==False]['breaks'])
|
|
|
|
|
|
bins_test = sc.woebin(test_data[x_columns + [y_column]], y=y_column,
|
|
|
special_values=special_values, breaks_list=breaks_list, print_info=False)
|
|
@@ -306,7 +306,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
filter_fast_overview = f"{filter_fast_overview}{column} 因为psi【{psi}】大于阈值被剔除\n"
|
|
|
continue
|
|
|
bin_info_fast[column] = BinInfo.ofConvertByDict(
|
|
|
- {"x_column": column, "iv": iv, "psi": psi, "points": breaks_list[column]}
|
|
|
+ {"x_column": column, "train_iv": train_iv, "iv": iv, "psi": psi, "points": breaks_list[column]}
|
|
|
)
|
|
|
|
|
|
context.set_filter_info(ContextEnum.FILTER_FAST,
|
|
@@ -552,8 +552,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
test_data = data.test_data
|
|
|
|
|
|
bin_info_filtered: Dict[str, BinInfo] = context.get(ContextEnum.BIN_INFO_FILTERED)
|
|
|
- homo_bin_info_numeric_set: Dict[str, HomologousBinInfo] = context.get(
|
|
|
- ContextEnum.HOMO_BIN_INFO_NUMERIC_SET)
|
|
|
+ homo_bin_info_numeric_set: Dict[str, HomologousBinInfo] = context.get(ContextEnum.HOMO_BIN_INFO_NUMERIC_SET)
|
|
|
filter_fast = context.get(ContextEnum.FILTER_FAST)
|
|
|
filter_numeric = context.get(ContextEnum.FILTER_NUMERIC)
|
|
|
filter_corr = context.get(ContextEnum.FILTER_CORR)
|
|
@@ -581,6 +580,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
print("变量切分点:")
|
|
|
print(json.dumps(breaks_list, ensure_ascii=False, indent=2, cls=NumpyEncoder))
|
|
|
print("选中变量不同分箱数下变量的推荐切分点:")
|
|
|
+ detail_print(list(bin_info_filtered.keys()))
|
|
|
|
|
|
# 打印fast_filter筛选情况
|
|
|
f_display_title(display, "快速筛选过程")
|