Browse Source

add: 模型结果报告

yq 4 months ago
parent
commit
faf579c8a6

+ 4 - 4
entitys/__init__.py

@@ -4,15 +4,15 @@
 @time: 2024/10/30
 @desc: 数据实体类
 """
-from .train_config_entity import TrainConfigEntity
-from .data_process_config_entity import DataProcessConfigEntity
 from .data_feaure_entity import DataFeatureEntity, DataSplitEntity, DataPreparedEntity, CandidateFeatureEntity
+from .data_process_config_entity import DataProcessConfigEntity
 from .db_config_entity import DbConfigEntity
 from .metric_config_entity import MetricConfigEntity
-from .metric_entity import MetricTrainEntity, MetricFucEntity
+from .metric_entity import MetricFucEntity
 from .monitor_metric_config_entity import MonitorMetricConfigEntity
+from .train_config_entity import TrainConfigEntity
 
-__all__ = ['DataFeatureEntity', 'DbConfigEntity', 'MetricTrainEntity', 'MonitorMetricConfigEntity', 'MetricConfigEntity',
+__all__ = ['DataFeatureEntity', 'DbConfigEntity', 'MonitorMetricConfigEntity', 'MetricConfigEntity',
            'MetricFucEntity', 'DataSplitEntity', 'DataProcessConfigEntity', 'TrainConfigEntity', 'DataPreparedEntity',
            'CandidateFeatureEntity']
 

+ 2 - 38
entitys/metric_entity.py

@@ -8,43 +8,6 @@ from typing import Union
 
 import pandas as pd
 
-from commom import f_format_float
-
-
-class MetricTrainEntity():
-    """
-    模型训练结果指标类
-    """
-
-    def __init__(self, train_auc: float, train_ks: float, test_auc: float, test_ks: float,
-                 train_perf_image_path: str = None, test_perf_image_path: str = None):
-        self._train_auc = f_format_float(train_auc)
-        self._train_ks = f_format_float(train_ks)
-        self._train_perf_image_path = train_perf_image_path
-
-        self._test_auc = f_format_float(test_auc)
-        self._test_ks = f_format_float(test_ks)
-        self._test_perf_image_path = test_perf_image_path
-
-    def __str__(self):
-        return f"train_auc:{self._train_auc} train_ks:{self._train_ks}\ntest_auc:{self._test_auc} test_ks:{self._test_ks}"
-
-    @property
-    def train_auc(self):
-        return self._train_auc
-
-    @property
-    def train_ks(self):
-        return self._train_ks
-
-    @property
-    def test_auc(self):
-        return self._test_auc
-
-    @property
-    def test_ks(self):
-        return self._test_ks
-
 
 class MetricFucEntity():
     """
@@ -55,7 +18,7 @@ class MetricFucEntity():
                  table_font_size=12, table_autofit=False, table_cell_width=None, image_size: int = 6):
         self._table = table
         self._table_font_size = table_font_size
-        self._table_cell_width= table_cell_width
+        self._table_cell_width = table_cell_width
         self._table_autofit = table_autofit
 
         self._value = value
@@ -90,5 +53,6 @@ class MetricFucEntity():
     def image_size(self):
         return self._image_size
 
+
 if __name__ == "__main__":
     pass

+ 3 - 2
model/model_base.py

@@ -5,10 +5,11 @@
 @desc: 模型基类
 """
 import abc
+from typing import Dict
 
 import pandas as pd
 
-from entitys import MetricTrainEntity, TrainConfigEntity, DataPreparedEntity
+from entitys import TrainConfigEntity, DataPreparedEntity, MetricFucEntity
 
 
 class ModelBase(metaclass=abc.ABCMeta):
@@ -17,7 +18,7 @@ class ModelBase(metaclass=abc.ABCMeta):
         self._train_config = train_config
 
     @abc.abstractmethod
-    def train(self, data: DataPreparedEntity, *args, **kwargs) -> MetricTrainEntity:
+    def train(self, data: DataPreparedEntity, *args, **kwargs) -> Dict[str, MetricFucEntity]:
         pass
 
     @abc.abstractmethod

+ 35 - 10
model/model_lr.py

@@ -4,12 +4,15 @@
 @time: 2024/11/1
 @desc: 
 """
+from typing import Dict
 
 import pandas as pd
+import scorecardpy as sc
+from matplotlib import pyplot as plt
 from sklearn.linear_model import LogisticRegression
-from toad.metrics import KS, AUC
 
-from entitys import MetricTrainEntity, TrainConfigEntity, DataPreparedEntity
+from entitys import TrainConfigEntity, DataPreparedEntity, MetricFucEntity
+from init import f_get_save_path
 from .model_base import ModelBase
 
 
@@ -18,20 +21,42 @@ class ModelLr(ModelBase):
         super().__init__(train_config)
         self.lr = LogisticRegression(penalty='l1', C=0.9, solver='saga', n_jobs=-1)
 
-    def train(self, data: DataPreparedEntity, *args, **kwargs) -> MetricTrainEntity:
+    def train(self, data: DataPreparedEntity, *args, **kwargs) -> Dict[str, MetricFucEntity]:
         train_data = data.train_data
+        train_y = train_data.get_Ydata()
         test_data = data.test_data
-        self.lr.fit(train_data.get_Xdata(), train_data.get_Ydata())
+        test_y = test_data.get_Ydata()
+        self.lr.fit(train_data.get_Xdata(), train_y)
 
         train_prob = self.lr.predict_proba(train_data.get_Xdata())[:, 1]
-        train_auc = AUC(train_prob, train_data.get_Ydata())
-        train_ks = KS(train_prob, train_data.get_Ydata())
-
         test_prob = self.lr.predict_proba(test_data.get_Xdata())[:, 1]
-        test_auc = AUC(test_prob, test_data.get_Ydata())
-        test_ks = KS(test_prob, test_data.get_Ydata())
 
-        return MetricTrainEntity(train_auc, train_ks, test_auc, test_ks)
+        image_path_list = []
+        train_perf = sc.perf_eva(train_y, train_prob, title="train", show_plot=True)
+        path = f_get_save_path(f"train_perf.png")
+        train_perf["pic"].savefig(path)
+        image_path_list.append(path)
+
+        test_perf = sc.perf_eva(test_y, test_prob, title="test", show_plot=True)
+        path = f_get_save_path(f"test_perf.png")
+        test_perf["pic"].savefig(path)
+        image_path_list.append(path)
+
+        train_auc = train_perf["KS"]
+        train_ks = train_perf["AUC"]
+
+        test_auc = test_perf["KS"]
+        test_ks = test_perf["AUC"]
+
+        metric_value_dict = {}
+        df = pd.DataFrame()
+        df["样本集"] = ["训练集", "测试集"]
+        df["AUC"] = [train_auc, test_auc]
+        df["KS"] = [train_ks, test_ks]
+
+        metric_value_dict["模型结果"] = MetricFucEntity(table=df, image_path=image_path_list, image_size=5)
+
+        return metric_value_dict
 
     def predict_prob(self, x: pd.DataFrame, *args, **kwargs):
         return self.lr.predict_proba(x)[:, 1]

BIN
template/模型开发报告模板_lr.docx


+ 4 - 3
train_test.py

@@ -26,9 +26,10 @@ if __name__ == "__main__":
     data_prepared = strategy.feature_generate(data, candidate_feature)
     # 训练
     train_pipeline = TrainPipeline(TrainConfigEntity.from_config('./config/train_config_template.json'))
-    train_pipeline.train(data_prepared)
+    metric_value_dict_train = train_pipeline.train(data_prepared)
     # 报告生成
-    metric_value_dict = strategy.feature_report(data, candidate_feature)
-    train_pipeline.generate_report(metric_value_dict)
+    metric_value_dict_feature = strategy.feature_report(data, candidate_feature)
+    metric_value_dict_train.update(metric_value_dict_feature)
+    train_pipeline.generate_report(metric_value_dict_train)
 
     print(time.time() - time_now)

+ 3 - 3
trainer/train.py

@@ -18,9 +18,9 @@ class TrainPipeline():
         model_clazz = f_get_model(self._train_config.model_type)
         self.model = model_clazz(self._train_config)
 
-    def train(self, data: DataPreparedEntity):
-        metric_train = self.model.train(data)
-        print(metric_train)
+    def train(self, data: DataPreparedEntity) -> Dict[str, MetricFucEntity]:
+        metric_value_dict = self.model.train(data)
+        return metric_value_dict
 
     def generate_report(self, metric_value_dict: Dict[str, MetricFucEntity]):
         Report.generate_report(metric_value_dict, self._train_config.template_path, save_path=f_get_save_path("模型报告.docx"))