Jelajahi Sumber

modify: jupyter输出优化

yq 3 bulan lalu
induk
melakukan
109ce1d626
3 mengubah file dengan 35 tambahan dan 8 penghapusan
  1. 8 1
      entitys/data_process_config_entity.py
  2. 4 2
      feature/strategy_iv.py
  3. 23 5
      model/model_lr.py

+ 8 - 1
entitys/data_process_config_entity.py

@@ -19,7 +19,10 @@ class DataProcessConfigEntity():
                  iv_threshold: float = 0.03, iv_threshold_wide: float = 0.05, corr_threshold: float = 0.4,
                  sample_rate: float = 0.1, x_candidate_num: int = 10, special_values: Union[dict, list, str] = None,
                  project_name: str = None, format_bin: str = False, breaks_list: dict = None, pos_neg_cnt=1,
-                 *args, **kwargs):
+                 jupyter=False, *args, **kwargs):
+
+        # 单调性允许变化次数
+        self._jupyter = jupyter
 
         # 单调性允许变化次数
         self._pos_neg_cnt = pos_neg_cnt
@@ -77,6 +80,10 @@ class DataProcessConfigEntity():
 
         os.makedirs(self._base_dir, exist_ok=True)
 
+    @property
+    def jupyter(self):
+        return self._jupyter
+
     @property
     def base_dir(self):
         return self._base_dir

+ 4 - 2
feature/strategy_iv.py

@@ -330,6 +330,7 @@ class StrategyIv(FilterStrategyBase):
         val_data = data.val_data
         test_data = data.test_data
         y_column = self.data_process_config.y_column
+        jupyter = self.data_process_config.jupyter
         x_columns_candidate = list(candidate_dict.keys())
         bins = self._f_get_bins_by_breaks(train_data, candidate_dict)
 
@@ -349,11 +350,12 @@ class StrategyIv(FilterStrategyBase):
             test_data_feature = DataFeatureEntity(pd.concat((test_woe, test_data[y_column]), axis=1),
                                                   train_woe.columns.tolist(), y_column)
         return DataPreparedEntity(train_data_feature, val_data_feature, test_data_feature, bins=bins,
-                                  data_split_original=data)
+                                  data_split_original=data, jupyter=jupyter)
 
-    def feature_report(self, data: DataSplitEntity, candidate_dict: Dict[str, CandidateFeatureEntity], jupyter=False,
+    def feature_report(self, data: DataSplitEntity, candidate_dict: Dict[str, CandidateFeatureEntity],
                        *args, **kwargs) -> Dict[str, MetricFucEntity]:
         y_column = self.data_process_config.y_column
+        jupyter = self.data_process_config.jupyter
         x_columns_candidate = list(candidate_dict.keys())
         train_data = data.train_data
         test_data = data.test_data

+ 23 - 5
model/model_lr.py

@@ -12,7 +12,7 @@ import pandas as pd
 import scorecardpy as sc
 from sklearn.linear_model import LogisticRegression
 
-from commom import f_df_to_image
+from commom import f_df_to_image, f_display_images_by_side
 from entitys import DataPreparedEntity, MetricFucEntity, DataSplitEntity
 from feature import f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi
 from .model_base import ModelBase
@@ -31,6 +31,7 @@ class ModelLr(ModelBase):
     def train(self, data: DataPreparedEntity, *args, **kwargs) -> Dict[str, MetricFucEntity]:
         bins = kwargs["bins"]
         data_split_original: DataSplitEntity = kwargs["data_split_original"]
+        jupyter = kwargs["jupyter"]
 
         # woe编码之前的数据
         train_data_original = data_split_original.train_data
@@ -53,7 +54,7 @@ class ModelLr(ModelBase):
             card_df = pd.concat((card_df, v))
         card_df_path = self._train_config.f_get_save_path(f"card_df.png")
         f_df_to_image(card_df, card_df_path)
-        metric_value_dict["评分卡"] = MetricFucEntity(image_path=card_df_path)
+        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, image_path=card_df_path)
 
         # 模型系数
         coef = dict(zip(train_data.x_columns, self.lr.coef_.reshape(-1)))
@@ -97,19 +98,36 @@ class ModelLr(ModelBase):
         train_data_gain = f_calcu_model_ks(train_data_original, y_column, sort_ascending=True)
         train_data_gain_path = self._train_config.f_get_save_path(f"train_data_gain.png")
         f_df_to_image(train_data_gain, train_data_gain_path)
-        metric_value_dict["训练集分数分箱"] = MetricFucEntity(image_path=train_data_gain_path)
+        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, image_path=train_data_gain_path)
         if test_data is not None:
             test_data_original, bins = f_get_model_score_bin(test_data_original, card, score_bins)
             test_data_gain = f_calcu_model_ks(test_data_original, y_column, sort_ascending=True)
             test_data_gain_path = self._train_config.f_get_save_path(f"test_data_gain.png")
             f_df_to_image(test_data_gain, test_data_gain_path)
-            metric_value_dict["测试集分数分箱"] = MetricFucEntity(image_path=test_data_gain_path)
+            metric_value_dict["测试集分数分箱"] = MetricFucEntity(table=test_data_gain, image_path=test_data_gain_path)
 
         # 模型分psi
         model_psi = f_calcu_model_psi(train_data_original, test_data_original)
         model_psi_path = self._train_config.f_get_save_path(f"model_psi.png")
         f_df_to_image(model_psi, model_psi_path)
-        metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), image_path=model_psi_path)
+        metric_value_dict["模型稳定性"] = MetricFucEntity(table=model_psi, value=model_psi["psi"].sum().round(4),
+                                                     image_path=model_psi_path)
+
+        if jupyter:
+            from IPython import display
+            print("-----模型结果-----")
+            display.display(metric_value_dict["模型结果"].table)
+            f_display_images_by_side(metric_value_dict["模型结果"].image_path, display)
+            # 模型psi
+            display.display(metric_value_dict["模型稳定性"].table)
+            print(f"模型psi: {metric_value_dict['模型稳定性'].value}")
+            display.display(metric_value_dict["变量系数"].table)
+            print("-----训练集-分数分箱-----")
+            display.display(metric_value_dict["训练集分数分箱"].table)
+            print("-----测试集-分数分箱-----")
+            display.display(metric_value_dict["测试集分数分箱"].table)
+            # 评分卡
+            display.display(metric_value_dict["评分卡"].table)
 
         return metric_value_dict