Browse Source

add: xgb模型报告

yq 18 hours ago
parent
commit
4cf7278c3f

+ 15 - 5
feature/bin/strategy_norm.py

@@ -162,16 +162,26 @@ class StrategyNorm(FeatureStrategyBase):
     def feature_load(self, path: str, *args, **kwargs):
         pass
 
-    def feature_report(self, *args, **kwargs) -> Dict[str, MetricFucResultEntity]:
-        self.jupyter_print()
-        return {}
+    def feature_report(self, data: DataSplitEntity, *args, **kwargs) -> Dict[str, MetricFucResultEntity]:
 
-    def jupyter_print(self, *args, **kwargs):
+        y_column = self.ml_config.y_column
 
-        max_feature_num = self.ml_config.max_feature_num
+        metric_value_dict = {}
+        # 样本分布
+        metric_value_dict["样本分布"] = MetricFucResultEntity(table=data.get_distribution(y_column), table_font_size=10,
+                                                          table_cell_width=3)
+
+        self.jupyter_print(metric_value_dict)
+        return metric_value_dict
 
+    def jupyter_print(self, metric_value_dict, *args, **kwargs):
         from IPython import display
 
+        max_feature_num = self.ml_config.max_feature_num
+
+        f_display_title(display, "样本分布")
+        display.display(metric_value_dict["样本分布"].table)
+
         filter_fast = context.get(ContextEnum.FILTER_FAST)
         f_display_title(display, "快速筛选过程")
         print(f"剔除变量重要性排名{max_feature_num}以后的变量")

+ 7 - 1
model/model_xgb.py

@@ -4,6 +4,7 @@
 @time: 2024/11/1
 @desc: 
 """
+import json
 import os.path
 from os.path import dirname, realpath
 from typing import Dict
@@ -27,7 +28,7 @@ class ModelXgb(ModelBase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # 报告模板
-        self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_lr.docx")
+        self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_xgb.docx")
         self.model = None
 
     def get_report_template_path(self):
@@ -173,6 +174,8 @@ class ModelXgb(ModelBase):
         test_data = data.test_data
 
         metric_value_dict = {}
+        metric_value_dict["模型超参数"] = MetricFucResultEntity(
+            value=json.dumps(self.model.get_xgb_params(), ensure_ascii=False, indent=2))
 
         _, test_score_bin = _get_perf()
 
@@ -198,6 +201,9 @@ class ModelXgb(ModelBase):
         display.display(metric_value_dict["模型结果"].table)
         f_display_images_by_side(display, metric_value_dict["模型结果"].image_path)
 
+        f_display_title(display, "模型超参数")
+        print(metric_value_dict["模型超参数"].value)
+
         # 模型psi
         f_display_title(display, "模型psi")
         display.display(metric_value_dict["模型稳定性"].table)

+ 13 - 7
pipeline/pipeline.py

@@ -9,7 +9,7 @@ from typing import List
 import pandas as pd
 
 from entitys import DataSplitEntity, MlConfigEntity, DataFeatureEntity
-from enums import ConstantEnum
+from enums import ConstantEnum, ModelEnum
 from feature import FeatureStrategyFactory, FeatureStrategyBase
 from init import init
 from model import ModelBase, ModelFactory, f_add_rules, f_get_model_score_bin, f_calcu_model_psi
@@ -57,15 +57,21 @@ class Pipeline():
         return self._model.score_rule(data)
 
     def psi(self, x1: pd.DataFrame, x2: pd.DataFrame, points: List[float] = None) -> pd.DataFrame:
-        if len(self._ml_config.rules) != 0:
-            y1 = self.score_rule(x1)
-            y2 = self.score_rule(x2)
+        if self._ml_config.model_type == ModelEnum.XGB.value:
+            y1 = self.prob(x1)
+            y2 = self.prob(x2)
+            sort_ascending = False
         else:
-            y1 = self.score(x1)
-            y2 = self.score(x2)
+            sort_ascending = True
+            if len(self._ml_config.rules) != 0:
+                y1 = self.score_rule(x1)
+                y2 = self.score_rule(x2)
+            else:
+                y1 = self.score(x1)
+                y2 = self.score(x2)
         x1_score_bin, score_bins = f_get_model_score_bin(x1, y1, points)
         x2_score_bin, _ = f_get_model_score_bin(x2, y2, score_bins)
-        model_psi = f_calcu_model_psi(x1_score_bin, x2_score_bin)
+        model_psi = f_calcu_model_psi(x1_score_bin, x2_score_bin, sort_ascending)
         print(f"模型psi: {model_psi['psi'].sum()}")
         return model_psi
 

BIN
template/模型开发报告模板_xgb.docx