|
@@ -432,6 +432,40 @@ class StrategyWoe(FeatureStrategyBase):
|
|
|
context.set(ContextEnum.HOMO_BIN_INFO_NUMERIC_SET, homo_bin_info_numeric_set)
|
|
|
context.set_filter_info(ContextEnum.FILTER_NUMERIC, filter_numeric_overview, filter_numeric_detail)
|
|
|
|
|
|
+ def variable_analyse(self, data: DataSplitEntity, column: str, format_bin=None, *args, **kwargs):
|
|
|
+ from IPython import display
|
|
|
+
|
|
|
+ if is_numeric_dtype(data.train_data[column]):
|
|
|
+ train_data = data.train_data
|
|
|
+ test_data = data.test_data
|
|
|
+ format_bin_mlcfg = self.ml_config.format_bin
|
|
|
+ if format_bin is not None:
|
|
|
+ self.ml_config._format_bin = format_bin
|
|
|
+ homo_bin_info_numeric: HomologousBinInfo = self._handle_numeric(data, column)
|
|
|
+
|
|
|
+ bins_info = homo_bin_info_numeric.get_best_bins()
|
|
|
+ print(f"-----【{column}】不同分箱数下变量的推荐切分点-----")
|
|
|
+ imgs_path_trend_train = []
|
|
|
+ imgs_path_trend_test = []
|
|
|
+ for bin_info in bins_info:
|
|
|
+ print(json.dumps(bin_info.points, ensure_ascii=False, cls=NumpyEncoder))
|
|
|
+ breaks_list = [str(i) for i in bin_info.points]
|
|
|
+ sc_woebin_train = self._f_get_sc_woebin(train_data, {column: bin_info})
|
|
|
+ image_path = self._f_get_img_trend(sc_woebin_train, [column],
|
|
|
+ f"train_{column}_{'_'.join(breaks_list)}")
|
|
|
+ imgs_path_trend_train.append(image_path[0])
|
|
|
+ sc_woebin_test = self._f_get_sc_woebin(test_data, {column: bin_info})
|
|
|
+ image_path = self._f_get_img_trend(sc_woebin_test, [column],
|
|
|
+ f"test_{column}_{'_'.join(breaks_list)}")
|
|
|
+ imgs_path_trend_test.append(image_path[0])
|
|
|
+ f_display_images_by_side(display, imgs_path_trend_train, title=f"训练集",
|
|
|
+ image_path_list2=imgs_path_trend_test, title2="测试集")
|
|
|
+ self.ml_config._format_bin = format_bin_mlcfg
|
|
|
+
|
|
|
+ else:
|
|
|
+ print("只能针对数值型变量进行分析。")
|
|
|
+
|
|
|
+
|
|
|
def feature_save(self, *args, **kwargs):
|
|
|
if self.sc_woebin is None:
|
|
|
GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
|