Преглед на файлове

modify: 代码重构及优化

yq преди 1 месец
родител
ревизия
a5a1300f7f
променени са 2 файла, в които са добавени 8 реда и са изтрити 6 реда
  1. 4 4
      feature/woe/strategy_woe.py
  2. 4 2
      model/model_lr.py

+ 4 - 4
feature/woe/strategy_woe.py

@@ -287,7 +287,7 @@ class StrategyWoe(FeatureStrategyBase):
                                special_values=special_values, breaks_list=breaks_list, print_info=False)
 
         for column, bin in bins_train.items():
-            breaks_list[column] = list(bin['breaks'])
+            breaks_list[column] = list(bin[bin["is_special_values"]==False]['breaks'])
 
         bins_test = sc.woebin(test_data[x_columns + [y_column]], y=y_column,
                               special_values=special_values, breaks_list=breaks_list, print_info=False)
@@ -306,7 +306,7 @@ class StrategyWoe(FeatureStrategyBase):
                 filter_fast_overview = f"{filter_fast_overview}{column} 因为psi【{psi}】大于阈值被剔除\n"
                 continue
             bin_info_fast[column] = BinInfo.ofConvertByDict(
-                {"x_column": column, "iv": iv, "psi": psi, "points": breaks_list[column]}
+                {"x_column": column, "train_iv": train_iv, "iv": iv, "psi": psi, "points": breaks_list[column]}
             )
 
         context.set_filter_info(ContextEnum.FILTER_FAST,
@@ -552,8 +552,7 @@ class StrategyWoe(FeatureStrategyBase):
         test_data = data.test_data
 
         bin_info_filtered: Dict[str, BinInfo] = context.get(ContextEnum.BIN_INFO_FILTERED)
-        homo_bin_info_numeric_set: Dict[str, HomologousBinInfo] = context.get(
-            ContextEnum.HOMO_BIN_INFO_NUMERIC_SET)
+        homo_bin_info_numeric_set: Dict[str, HomologousBinInfo] = context.get(ContextEnum.HOMO_BIN_INFO_NUMERIC_SET)
         filter_fast = context.get(ContextEnum.FILTER_FAST)
         filter_numeric = context.get(ContextEnum.FILTER_NUMERIC)
         filter_corr = context.get(ContextEnum.FILTER_CORR)
@@ -581,6 +580,7 @@ class StrategyWoe(FeatureStrategyBase):
         print("变量切分点:")
         print(json.dumps(breaks_list, ensure_ascii=False, indent=2, cls=NumpyEncoder))
         print("选中变量不同分箱数下变量的推荐切分点:")
+        detail_print(list(bin_info_filtered.keys()))
 
         # 打印fast_filter筛选情况
         f_display_title(display, "快速筛选过程")

+ 4 - 2
model/model_lr.py

@@ -67,6 +67,7 @@ class ModelLr(ModelBase):
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
         path = self.ml_config.f_get_save_path(f"model.pkl")
         self.lr.save(path)
+        print(f"model save to【{path}】success. ")
         df_card = pd.concat(self.card.values())
         path = self.ml_config.f_get_save_path(f"card.csv")
         df_card.to_csv(path)
@@ -194,9 +195,10 @@ class ModelLr(ModelBase):
         print(self.lr.summary().tables[0])
         display.display(metric_value_dict["变量系数"].table)
 
-        f_display_title(display, "训练集-分数分箱")
+        f_display_title(display, "分数分箱")
+        print("训练集-分数分箱")
         display.display(metric_value_dict["训练集分数分箱"].table)
-        f_display_title(display, "测试集-分数分箱")
+        print("测试集-分数分箱")
         display.display(metric_value_dict["测试集分数分箱"].table)
         # 评分卡
         f_display_title(display, "评分卡")