|
@@ -221,7 +221,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
is_auto_bins = 0
|
|
|
else:
|
|
|
points_list_nsv = _get_points(train_data_ascending_nsv, x_column)
|
|
|
- homo_bin_info = HomologousBinInfo(x_column, is_auto_bins)
|
|
|
+ homo_bin_info = HomologousBinInfo(x_column, is_auto_bins, self.ml_config.is_include(x_column))
|
|
|
# 计算iv psi monto_shift等
|
|
|
for points in points_list_nsv:
|
|
|
bin_info = BinInfo()
|
|
@@ -300,11 +300,11 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
continue
|
|
|
bin_test = bins_test[column]
|
|
|
test_iv = bin_test['total_iv'][0].round(3)
|
|
|
- iv = train_iv + test_iv
|
|
|
+ iv = round(train_iv + test_iv, 3)
|
|
|
psi = f_get_psi(bin_train, bin_test)
|
|
|
- if psi >= psi_threshold and not self.ml_config.is_include(column):
|
|
|
- filter_fast_overview = f"{filter_fast_overview}{column} 因为psi【{psi}】大于阈值被剔除\n"
|
|
|
- continue
|
|
|
+ # if psi >= psi_threshold and not self.ml_config.is_include(column):
|
|
|
+ # filter_fast_overview = f"{filter_fast_overview}{column} 因为psi【{psi}】大于阈值被剔除\n"
|
|
|
+ # continue
|
|
|
bin_info_fast[column] = BinInfo.ofConvertByDict(
|
|
|
{"x_column": column, "train_iv": train_iv, "iv": iv, "psi": psi, "points": breaks_list[column]}
|
|
|
)
|
|
@@ -324,7 +324,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
train_woe = sc.woebin_ply(train_data[x_columns], sc_woebin, print_info=False)
|
|
|
corr_df = f_get_corr(train_woe)
|
|
|
corr_dict = corr_df.to_dict()
|
|
|
- filter_corr_overview = "corr_filter\n"
|
|
|
+ filter_corr_overview = "filter_corr\n"
|
|
|
filter_corr_detail = {}
|
|
|
# 依次判断每个变量对于其它变量的相关性
|
|
|
for column, corr in corr_dict.items():
|
|
@@ -355,7 +355,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
for c in column_remove:
|
|
|
if c in x_columns:
|
|
|
x_columns.remove(c)
|
|
|
- if overview != "":
|
|
|
+ if len(column_remove) != 0:
|
|
|
filter_corr_overview = f"{filter_corr_overview}{overview}\n"
|
|
|
filter_corr_detail[column] = column_remove
|
|
|
for column in list(bin_info_dict.keys()):
|
|
@@ -383,7 +383,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
bin_info = bin_info_dict[column]
|
|
|
bin_info.vif = vif
|
|
|
bin_info_dict[column] = bin_info
|
|
|
- if vif < vif_threshold:
|
|
|
+ if vif < vif_threshold or self.ml_config.is_include(column):
|
|
|
continue
|
|
|
filter_vif_overview = f"{filter_vif_overview}{column} 因为vif【{vif}】大于阈值被剔除\n"
|
|
|
filter_vif_detail.append(column)
|
|
@@ -519,6 +519,9 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
from IPython import display
|
|
|
|
|
|
def detail_print(detail):
|
|
|
+ if isinstance(detail, str):
|
|
|
+ detail = [detail]
|
|
|
+
|
|
|
if isinstance(detail, list):
|
|
|
for column in detail:
|
|
|
homo_bin_info_numeric = homo_bin_info_numeric_set.get(column)
|
|
@@ -545,8 +548,7 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
for column, challenger_columns in detail.items():
|
|
|
print(f"-----相关性筛选保留的【{column}】-----")
|
|
|
detail_print(column)
|
|
|
- for challenger_column in challenger_columns:
|
|
|
- detail_print(challenger_column)
|
|
|
+ detail_print(challenger_columns)
|
|
|
|
|
|
train_data = data.train_data
|
|
|
test_data = data.test_data
|