8 Commits eca33dc807 ... 750ba6f920

Author SHA1 Message Date
  yq 750ba6f920 modify: 打印 1 month ago
  yq c3e3ce78d8 modify: demo 1 month ago
  yq e37ceb9d01 add: 在线学习新增生成评分卡 1 month ago
  yq 73265a5049 add: 重要文件名枚举值 1 month ago
  yq 843ef7db60 modify: ignore 1 month ago
  yq 8f3d8770e7 modify: demo 1 month ago
  yq 0787e44c2c add: 指定优化参数 1 month ago
  yq 01649d9201 add: OnlineLearning 1 month ago

+ 3 - 5
__init__.py

@@ -9,14 +9,12 @@ from os.path import dirname, realpath
 
 
 sys.path.append(dirname(realpath(__file__)))
 sys.path.append(dirname(realpath(__file__)))
 
 
-from feature import FeatureStrategyFactory
-from model import ModelFactory
+from online_learning import OnlineLearningTrainer
 from pipeline import Pipeline
 from pipeline import Pipeline
-
 from data import DataLoaderMysql
 from data import DataLoaderMysql
 from entitys import DbConfigEntity, DataSplitEntity
 from entitys import DbConfigEntity, DataSplitEntity
 from monitor import MonitorMetric
 from monitor import MonitorMetric
 from metrics import MetricBase
 from metrics import MetricBase
 
 
-__all__ = ['MonitorMetric', 'DataLoaderMysql', 'DbConfigEntity', 'MetricBase', 'FeatureStrategyFactory', 'ModelFactory',
-           'Pipeline', 'DataSplitEntity']
+__all__ = ['MonitorMetric', 'MetricBase', 'DataLoaderMysql', 'DbConfigEntity',
+           'DataSplitEntity', 'Pipeline', 'OnlineLearningTrainer']

File diff suppressed because it is too large
+ 34 - 30
easy_ml_demo.ipynb


+ 2 - 1
entitys/__init__.py

@@ -9,9 +9,10 @@ from .db_config_entity import DbConfigEntity
 from .metric_entity import MetricFucResultEntity, MetricConfigEntity
 from .metric_entity import MetricFucResultEntity, MetricConfigEntity
 from .ml_config_entity import MlConfigEntity
 from .ml_config_entity import MlConfigEntity
 from .monitor_entity import MonitorConfigEntity
 from .monitor_entity import MonitorConfigEntity
+from .ol_config_entity import OnlineLearningConfigEntity
 
 
 __all__ = ['DataFeatureEntity', 'DbConfigEntity', 'MonitorConfigEntity', 'MetricConfigEntity', 'MetricFucResultEntity',
 __all__ = ['DataFeatureEntity', 'DbConfigEntity', 'MonitorConfigEntity', 'MetricConfigEntity', 'MetricFucResultEntity',
-           'DataSplitEntity', 'MlConfigEntity']
+           'DataSplitEntity', 'MlConfigEntity', 'OnlineLearningConfigEntity']
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     pass
     pass

+ 3 - 3
entitys/ml_config_entity.py

@@ -10,7 +10,7 @@ from typing import List, Union
 
 
 from commom import GeneralException, f_get_datetime
 from commom import GeneralException, f_get_datetime
 from config import BaseConfig
 from config import BaseConfig
-from enums import ResultCodesEnum
+from enums import ResultCodesEnum, FileEnum
 from init import warning_ignore
 from init import warning_ignore
 
 
 
 
@@ -295,7 +295,7 @@ class MlConfigEntity():
         从配置文件生成实体类
         从配置文件生成实体类
         """
         """
         if os.path.isdir(config_path):
         if os.path.isdir(config_path):
-            config_path = os.path.join(config_path, "mlcfg.json")
+            config_path = os.path.join(config_path, FileEnum.MLCFG.value)
 
 
         if os.path.exists(config_path):
         if os.path.exists(config_path):
             with open(config_path, mode="r", encoding="utf-8") as f:
             with open(config_path, mode="r", encoding="utf-8") as f:
@@ -306,7 +306,7 @@ class MlConfigEntity():
         return MlConfigEntity(**j)
         return MlConfigEntity(**j)
 
 
     def config_save(self):
     def config_save(self):
-        path = self.f_get_save_path("mlcfg.json")
+        path = self.f_get_save_path(FileEnum.MLCFG.value)
         with open(path, mode="w", encoding="utf-8") as f:
         with open(path, mode="w", encoding="utf-8") as f:
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = json.dumps(j, ensure_ascii=False)
             j = json.dumps(j, ensure_ascii=False)

+ 139 - 0
entitys/ol_config_entity.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2024/11/1
+@desc: OnlineLearning数配置类
+"""
+import json
+import os
+from typing import List
+
+from commom import GeneralException, f_get_datetime
+from config import BaseConfig
+from enums import ResultCodesEnum, FileEnum
+from init import warning_ignore
+
+
+class OnlineLearningConfigEntity():
+    def __init__(self,
+                 path_resources: str,
+                 y_column: str,
+                 project_name: str = None,
+                 lr: float = 0.01,
+                 batch_size: int = 64,
+                 epochs: int = 50,
+                 columns_anns: dict = {},
+                 jupyter_print=False,
+                 stress_test=False,
+                 stress_sample_times=100,
+                 stress_bad_rate_list: List[float] = [],
+                 *args, **kwargs):
+
+        self._path_resources = path_resources
+        # 定义y变量
+        self._y_column = y_column
+        # 项目名称,和缓存路径有关
+        self._project_name = project_name
+        # 学习率
+        self._lr = lr
+        # 模型单次更新使用数据量
+        self._batch_size = batch_size
+        # 最大训练轮数
+        self._epochs = epochs
+
+        # 变量注释
+        self._columns_anns = columns_anns
+
+        # jupyter下输出内容
+        self._jupyter_print = jupyter_print
+
+        # 是否开启下输出内容
+        self._stress_test = stress_test
+
+        # jupyter下输出内容
+        self._stress_sample_times = stress_sample_times
+
+        # jupyter下输出内容
+        self._stress_bad_rate_list = stress_bad_rate_list
+
+        if self._project_name is None or len(self._project_name) == 0:
+            self._base_dir = os.path.join(BaseConfig.train_path, f"{f_get_datetime()}")
+        else:
+            self._base_dir = os.path.join(BaseConfig.train_path, self._project_name)
+        os.makedirs(self._base_dir, exist_ok=True)
+        print(f"项目路径:【{self._base_dir}】")
+
+        if self._jupyter_print:
+            warning_ignore()
+
+    @property
+    def path_resources(self):
+        return self._path_resources
+
+    @property
+    def y_column(self):
+        return self._y_column
+
+    @property
+    def lr(self):
+        return self._lr
+
+    @property
+    def batch_size(self):
+        return self._batch_size
+
+    @property
+    def epochs(self):
+        return self._epochs
+
+    @property
+    def columns_anns(self):
+        return self._columns_anns
+
+    @property
+    def jupyter_print(self):
+        return self._jupyter_print
+
+    @property
+    def stress_test(self):
+        return self._stress_test
+
+    @property
+    def stress_sample_times(self):
+        return self._stress_sample_times
+
+    @property
+    def stress_bad_rate_list(self):
+        return self._stress_bad_rate_list
+
+    @staticmethod
+    def from_config(config_path: str):
+        """
+        从配置文件生成实体类
+        """
+        if os.path.isdir(config_path):
+            config_path = os.path.join(config_path, FileEnum.OLCFG.value)
+
+        if os.path.exists(config_path):
+            with open(config_path, mode="r", encoding="utf-8") as f:
+                j = json.loads(f.read())
+        else:
+            raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"指配置文件【{config_path}】不存在")
+        print(f"olcfg load from【{config_path}】success. ")
+        return OnlineLearningConfigEntity(**j)
+
+    def config_save(self):
+        path = self.f_get_save_path(FileEnum.OLCFG.value)
+        with open(path, mode="w", encoding="utf-8") as f:
+            j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
+            j = json.dumps(j, ensure_ascii=False)
+            f.write(j)
+        print(f"olcfg save to【{path}】success. ")
+
+    def f_get_save_path(self, file_name: str) -> str:
+        path = os.path.join(self._base_dir, file_name)
+        return path
+
+
+if __name__ == "__main__":
+    pass

+ 0 - 43
entitys/train_config_entity.py

@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@author: yq
-@time: 2024/11/1
-@desc: 模型训练超参数配置类
-"""
-import json
-import os
-
-from commom import GeneralException
-from enums import ResultCodesEnum
-
-
-class TrainConfigEntity():
-    def __init__(self, lr: float = None, *args, **kwargs):
-        # 学习率
-        self._lr = lr
-        # 该函数需要去继承
-        self.f_get_save_path = None
-
-    @property
-    def lr(self):
-        return self._lr
-
-    def set_save_path_func(self, f):
-        self.f_get_save_path = f
-
-    @staticmethod
-    def from_config(config_path: str):
-        """
-        从配置文件生成实体类
-        """
-        if os.path.exists(config_path):
-            with open(config_path, mode="r", encoding="utf-8") as f:
-                j = json.loads(f.read())
-        else:
-            raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"指配置文件【{config_path}】不存在")
-
-        return TrainConfigEntity(**j)
-
-
-if __name__ == "__main__":
-    pass

+ 2 - 1
enums/__init__.py

@@ -7,9 +7,10 @@
 from .constant_enum import ConstantEnum
 from .constant_enum import ConstantEnum
 from .context_enum import ContextEnum
 from .context_enum import ContextEnum
 from .feature_strategy_enum import FeatureStrategyEnum
 from .feature_strategy_enum import FeatureStrategyEnum
+from .file_enum import FileEnum
 from .model_enum import ModelEnum
 from .model_enum import ModelEnum
 from .placeholder_prefix_enum import PlaceholderPrefixEnum
 from .placeholder_prefix_enum import PlaceholderPrefixEnum
 from .result_codes_enum import ResultCodesEnum
 from .result_codes_enum import ResultCodesEnum
 
 
 __all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum', 'FeatureStrategyEnum', 'ModelEnum', 'ContextEnum',
 __all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum', 'FeatureStrategyEnum', 'ModelEnum', 'ContextEnum',
-           'ConstantEnum']
+           'ConstantEnum','FileEnum']

+ 2 - 0
enums/constant_enum.py

@@ -10,3 +10,5 @@ from enum import Enum
 class ConstantEnum(Enum):
 class ConstantEnum(Enum):
     SCORE = "SCORE"
     SCORE = "SCORE"
     SCORE_BIN = "MODEL_SCORE_BIN"
     SCORE_BIN = "MODEL_SCORE_BIN"
+    # lr模型常数项
+    INTERCEPT = "const"

+ 1 - 0
enums/context_enum.py

@@ -8,6 +8,7 @@ from enum import Enum
 
 
 
 
 class ContextEnum(Enum):
 class ContextEnum(Enum):
+    PARAM_OPTIMIZED = "param_optimized"
     BIN_INFO_FILTERED = "bin_info_filtered"
     BIN_INFO_FILTERED = "bin_info_filtered"
     HOMO_BIN_INFO_NUMERIC_SET = "homo_bin_info_numeric_set"
     HOMO_BIN_INFO_NUMERIC_SET = "homo_bin_info_numeric_set"
     WOEBIN = "woebin"
     WOEBIN = "woebin"

+ 25 - 0
enums/file_enum.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2024/11/14
+@desc: 文件名枚举值
+"""
+from enum import Enum
+
+
+class FileEnum(Enum):
+    MLCFG = "mlcfg.json"
+    OLCFG = "olcfg.json"
+    FEATURE = "feature.csv"
+    CARD = "card.csv"
+    CARD_CFG = "card.cfg"
+    COEF = "coef.json"
+    MODEL = "model.pkl"
+
+
+
+
+
+
+
+

+ 2 - 1
feature/__init__.py

@@ -6,5 +6,6 @@
 """
 """
 from .feature_strategy_base import FeatureStrategyBase
 from .feature_strategy_base import FeatureStrategyBase
 from .feature_strategy_factory import FeatureStrategyFactory
 from .feature_strategy_factory import FeatureStrategyFactory
+from .woe.utils import f_woebin_load
 
 
-__all__ = ['FeatureStrategyFactory', 'FeatureStrategyBase']
+__all__ = ['FeatureStrategyFactory', 'FeatureStrategyBase', 'f_woebin_load']

+ 4 - 15
feature/woe/strategy_woe.py

@@ -5,7 +5,6 @@
 @desc: iv值及单调性筛选类
 @desc: iv值及单调性筛选类
 """
 """
 import json
 import json
-import os.path
 from itertools import combinations_with_replacement
 from itertools import combinations_with_replacement
 from typing import Dict, Optional, Union
 from typing import Dict, Optional, Union
 
 
@@ -21,11 +20,11 @@ from commom import f_display_images_by_side, NumpyEncoder, GeneralException, f_d
     f_image_crop_white_borders
     f_image_crop_white_borders
 from data import DataExplore
 from data import DataExplore
 from entitys import DataSplitEntity, MetricFucResultEntity
 from entitys import DataSplitEntity, MetricFucResultEntity
-from enums import ContextEnum, ResultCodesEnum
+from enums import ContextEnum, ResultCodesEnum, FileEnum
 from feature.feature_strategy_base import FeatureStrategyBase
 from feature.feature_strategy_base import FeatureStrategyBase
 from init import context
 from init import context
 from .entity import BinInfo, HomologousBinInfo
 from .entity import BinInfo, HomologousBinInfo
-from .utils import f_monto_shift, f_get_corr, f_get_vif, f_format_bin, f_trend_shift, f_get_psi
+from .utils import f_monto_shift, f_get_corr, f_get_vif, f_format_bin, f_trend_shift, f_get_psi, f_woebin_load
 
 
 
 
 class StrategyWoe(FeatureStrategyBase):
 class StrategyWoe(FeatureStrategyBase):
@@ -476,22 +475,12 @@ class StrategyWoe(FeatureStrategyBase):
         if self.sc_woebin is None:
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
         df_woebin = pd.concat(self.sc_woebin.values())
         df_woebin = pd.concat(self.sc_woebin.values())
-        path = self.ml_config.f_get_save_path(f"feature.csv")
+        path = self.ml_config.f_get_save_path(FileEnum.FEATURE.value)
         df_woebin.to_csv(path)
         df_woebin.to_csv(path)
         print(f"feature save to【{path}】success. ")
         print(f"feature save to【{path}】success. ")
 
 
     def feature_load(self, path: str, *args, **kwargs):
     def feature_load(self, path: str, *args, **kwargs):
-        if os.path.isdir(path):
-            path = os.path.join(path, "feature.csv")
-        if not os.path.isfile(path) or "feature.csv" not in path:
-            raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【feature.csv】不存在")
-
-        df_woebin = pd.read_csv(path)
-        variables = df_woebin["variable"].unique().tolist()
-        self.sc_woebin = {}
-        for variable in variables:
-            self.sc_woebin[variable] = df_woebin[df_woebin["variable"] == variable]
-        print(f"feature load from【{path}】success.")
+        self.sc_woebin = f_woebin_load(path)
 
 
     def feature_generate(self, data: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
     def feature_generate(self, data: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
         x_columns = list(self.sc_woebin.keys())
         x_columns = list(self.sc_woebin.keys())

+ 19 - 0
feature/woe/utils.py

@@ -4,12 +4,16 @@
 @time: 2023/12/28
 @time: 2023/12/28
 @desc:  特征工具类
 @desc:  特征工具类
 """
 """
+import os
 from typing import Union
 from typing import Union
 
 
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 from statsmodels.stats.outliers_influence import variance_inflation_factor as vif
 from statsmodels.stats.outliers_influence import variance_inflation_factor as vif
 
 
+from commom import GeneralException
+from enums import ResultCodesEnum, FileEnum
+
 FORMAT_DICT = {
 FORMAT_DICT = {
     # 比例类 -1 - 1
     # 比例类 -1 - 1
     "bin_rate1": np.arange(-1, 1 + 0.1, 0.1),
     "bin_rate1": np.arange(-1, 1 + 0.1, 0.1),
@@ -133,3 +137,18 @@ def f_get_vif(data: pd.DataFrame) -> Union[pd.DataFrame, None]:
     df_vif["变量"] = [column.replace("_woe", "") for column in data.columns]
     df_vif["变量"] = [column.replace("_woe", "") for column in data.columns]
     df_vif['vif'] = vif_v
     df_vif['vif'] = vif_v
     return df_vif
     return df_vif
+
+
+def f_woebin_load(path: str):
+    if os.path.isdir(path):
+        path = os.path.join(path, FileEnum.FEATURE.value)
+    if not os.path.isfile(path) or FileEnum.FEATURE.value not in path:
+        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【{FileEnum.FEATURE.value}】不存在")
+
+    df_woebin = pd.read_csv(path)
+    variables = df_woebin["variable"].unique().tolist()
+    sc_woebin = {}
+    for variable in variables:
+        sc_woebin[variable] = df_woebin[df_woebin["variable"] == variable]
+    print(f"feature load from【{path}】success.")
+    return sc_woebin

+ 3 - 2
model/__init__.py

@@ -6,6 +6,7 @@
 """
 """
 from .model_base import ModelBase
 from .model_base import ModelBase
 from .model_factory import ModelFactory
 from .model_factory import ModelFactory
-from .model_utils import f_add_rules
+from .model_utils import f_add_rules, f_get_model_score_bin, f_calcu_model_ks, f_calcu_model_psi, f_stress_test
 
 
-__all__ = ['ModelBase', 'ModelFactory', 'f_add_rules']
+__all__ = ['ModelBase', 'ModelFactory', 'f_add_rules', 'f_get_model_score_bin', 'f_calcu_model_ks', 'f_calcu_model_psi',
+           'f_stress_test']

+ 14 - 6
model/model_lr.py

@@ -18,7 +18,7 @@ import statsmodels.api as sm
 from commom import f_df_to_image, f_display_images_by_side, GeneralException, f_display_title, \
 from commom import f_df_to_image, f_display_images_by_side, GeneralException, f_display_title, \
     f_image_crop_white_borders
     f_image_crop_white_borders
 from entitys import MetricFucResultEntity, DataSplitEntity, DataFeatureEntity
 from entitys import MetricFucResultEntity, DataSplitEntity, DataFeatureEntity
-from enums import ContextEnum, ResultCodesEnum, ConstantEnum
+from enums import ContextEnum, ResultCodesEnum, ConstantEnum, FileEnum
 from init import context
 from init import context
 from .model_base import ModelBase
 from .model_base import ModelBase
 from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi, f_add_rules
 from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi, f_add_rules
@@ -31,6 +31,7 @@ class ModelLr(ModelBase):
         self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_lr.docx")
         self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_lr.docx")
         self.lr = None
         self.lr = None
         self.card = None
         self.card = None
+        self.card_cfg = None
         self.coef = None
         self.coef = None
 
 
     def get_report_template_path(self):
     def get_report_template_path(self):
@@ -52,6 +53,7 @@ class ModelLr(ModelBase):
             if len(self.lr.coef_[0]) != len(data_x.columns):
             if len(self.lr.coef_[0]) != len(data_x.columns):
                 raise GeneralException(ResultCodesEnum.SYSTEM_ERROR, message=f"lr模型coef系数长度与x_columns长度不一致。")
                 raise GeneralException(ResultCodesEnum.SYSTEM_ERROR, message=f"lr模型coef系数长度与x_columns长度不一致。")
         self.card = sc.scorecard(woebin, self.lr, data_x.columns, points0=600, pdo=50, odds0=train_data.get_odds0())
         self.card = sc.scorecard(woebin, self.lr, data_x.columns, points0=600, pdo=50, odds0=train_data.get_odds0())
+        self.card_cfg = {"points0": 600, "pdo": 50, "odds0": train_data.get_odds0()}
         coef_table = self.lr.summary2().tables[1]
         coef_table = self.lr.summary2().tables[1]
         self.coef = dict(zip(coef_table.index, coef_table['Coef.']))
         self.coef = dict(zip(coef_table.index, coef_table['Coef.']))
 
 
@@ -75,28 +77,34 @@ class ModelLr(ModelBase):
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
         if self.card is None:
         if self.card is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
-        path = self.ml_config.f_get_save_path(f"model.pkl")
+        path = self.ml_config.f_get_save_path(FileEnum.MODEL.value)
         self.lr.save(path)
         self.lr.save(path)
         print(f"model save to【{path}】success. ")
         print(f"model save to【{path}】success. ")
 
 
-        path = self.ml_config.f_get_save_path(f"coef.dict")
+        path = self.ml_config.f_get_save_path(FileEnum.COEF.value)
         with open(path, mode="w", encoding="utf-8") as f:
         with open(path, mode="w", encoding="utf-8") as f:
             j = json.dumps(self.coef, ensure_ascii=False)
             j = json.dumps(self.coef, ensure_ascii=False)
             f.write(j)
             f.write(j)
         print(f"model save to【{path}】success. ")
         print(f"model save to【{path}】success. ")
 
 
         df_card = pd.concat(self.card.values())
         df_card = pd.concat(self.card.values())
-        path = self.ml_config.f_get_save_path(f"card.csv")
+        path = self.ml_config.f_get_save_path(FileEnum.CARD.value)
         df_card.to_csv(path)
         df_card.to_csv(path)
         print(f"model save to【{path}】success. ")
         print(f"model save to【{path}】success. ")
 
 
+        path = self.ml_config.f_get_save_path(FileEnum.CARD_CFG.value)
+        with open(path, mode="w", encoding="utf-8") as f:
+            j = json.dumps(self.card_cfg, ensure_ascii=False)
+            f.write(j)
+        print(f"model save to【{path}】success. ")
+
     def model_load(self, path: str, *args, **kwargs):
     def model_load(self, path: str, *args, **kwargs):
         if not os.path.isdir(path):
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
-        path_model = os.path.join(path, "model.pkl")
+        path_model = os.path.join(path, FileEnum.MODEL.value)
         if not os.path.isfile(path_model):
         if not os.path.isfile(path_model):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
-        path_card = os.path.join(path, "card.csv")
+        path_card = os.path.join(path, FileEnum.CARD.value)
         if not os.path.isfile(path_card):
         if not os.path.isfile(path_card):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_card}】不存在")
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_card}】不存在")
 
 

+ 3 - 3
model/model_utils.py

@@ -44,12 +44,12 @@ def f_get_model_score_bin(df, score, bins=None):
     return df, bins
     return df, bins
 
 
 
 
-def f_calcu_model_psi(df_train, df_test):
+def f_calcu_model_psi(df_train, df_test, sort_ascending=True):
     tmp1 = df_train.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
     tmp1 = df_train.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
-        ascending=True)
+        ascending=sort_ascending)
     tmp1['样本数比例'] = (tmp1['count'] / tmp1['count'].sum()).round(3)
     tmp1['样本数比例'] = (tmp1['count'] / tmp1['count'].sum()).round(3)
     tmp2 = df_test.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
     tmp2 = df_test.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
-        ascending=True)
+        ascending=sort_ascending)
     tmp2['样本数比例'] = (tmp2['count'] / tmp2['count'].sum()).round(3)
     tmp2['样本数比例'] = (tmp2['count'] / tmp2['count'].sum()).round(3)
     psi = ((tmp1['样本数比例'] - tmp2['样本数比例']) * np.log(tmp1['样本数比例'] / tmp2['样本数比例'])).round(3)
     psi = ((tmp1['样本数比例'] - tmp2['样本数比例']) * np.log(tmp1['样本数比例'] / tmp2['样本数比例'])).round(3)
     psi = psi.reset_index()
     psi = psi.reset_index()

+ 54 - 0
ol_test.py

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2024/11/27
+@desc:
+"""
+import time
+
+from entitys import DataSplitEntity
+from online_learning import OnlineLearningTrainer
+
+
+if __name__ == "__main__":
+    time_now = time.time()
+    import scorecardpy as sc
+
+    # 加载数据
+    dat = sc.germancredit()
+    dat_columns = dat.columns.tolist()
+    dat_columns = [c.replace(".","_") for c in dat_columns]
+    dat.columns = dat_columns
+
+    dat["creditability"] = dat["creditability"].apply(lambda x: 1 if x == "bad" else 0)
+
+    data = DataSplitEntity(train_data=dat[:709], test_data=dat[709:])
+
+    # 特征处理
+    cfg = {
+        # 模型系数,分箱信息等,请参考ol_resources_demo目录下文件
+        # 模型系数文件 coef.dict(如果有常数项(截距)请用const作为key)
+        # 分箱信息文件 feature.csv(数值型的分箱信息请按升序排列)
+        "path_resources": "/root/notebook/ol_resources_demo",
+        # 项目名称,影响数据存储位置
+        "project_name": "OnlineLearningDemo",
+        "y_column": "creditability",
+        # 学习率
+        "lr": 0.01,
+        # 单次更新批大小
+        "batch_size": 64,
+        # 训练轮数
+        "epochs": 20,
+        "jupyter_print": True,
+        # 压力测试
+        "stress_test": True,
+        # 压力测试抽样次数
+        "stress_sample_times": 10,
+    }
+
+    # 训练并生成报告
+    trainer = OnlineLearningTrainer(data=data, **cfg)
+    trainer.train()
+    trainer.report()
+
+    print(time.time() - time_now)

+ 10 - 0
online_learning/__init__.py

@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2025/2/27
+@desc: 
+"""
+
+from .trainer import OnlineLearningTrainer
+
+__all__ = ['OnlineLearningTrainer']

+ 412 - 0
online_learning/trainer.py

@@ -0,0 +1,412 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2025/2/27
+@desc: 
+"""
+import json
+import math
+import os
+import re
+from os.path import dirname, realpath
+from typing import Dict, List
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import scorecardpy as sc
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from tqdm import tqdm
+
+from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
+    f_display_images_by_side
+from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
+from enums import ResultCodesEnum, ConstantEnum, ContextEnum, FileEnum
+from feature import f_woebin_load
+from init import init, context
+from model import f_get_model_score_bin, f_calcu_model_ks, f_stress_test, f_calcu_model_psi
+from monitor import ReportWord
+from .utils import LR
+
+init()
+
+
+class OnlineLearningTrainer:
+    def __init__(self, data: DataSplitEntity = None, ol_config: OnlineLearningConfigEntity = None, *args, **kwargs):
+        if ol_config is not None:
+            self._ol_config = ol_config
+        else:
+            self._ol_config = OnlineLearningConfigEntity(*args, **kwargs)
+        self._data = data
+        self._columns = None
+        self._model_original: LR
+        self._model_optimized: LR
+        self.sc_woebin = None
+        self.card_cfg = None
+        self.card = None
+        # 报告模板
+        self._template_path = os.path.join(dirname(dirname(realpath(__file__))),
+                                           "./template/OnlineLearning报告模板_lr.docx")
+        self._init(self._ol_config.path_resources)
+
+    def _init(self, path: str):
+        if not os.path.isdir(path):
+            raise GeneralException(ResultCodesEnum.ILLEGAL_PARAMS, message=f"【{path}】不是文件夹")
+
+        path_coef = os.path.join(path, FileEnum.COEF.value)
+        if not os.path.isfile(path_coef):
+            raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型系数文件【{path_coef}】不存在")
+        with open(path_coef, mode="r", encoding="utf-8") as f:
+            coef = json.loads(f.read())
+            print(f"coef load from【{path_coef}】success.")
+
+        path_card_cfg = os.path.join(path, FileEnum.CARD_CFG.value)
+        if os.path.isfile(path_card_cfg):
+            with open(path_card_cfg, mode="r", encoding="utf-8") as f:
+                self.card_cfg = json.loads(f.read())
+                print(f"{FileEnum.CARD_CFG.value} load from【{path_card_cfg}】success.")
+
+        self._columns = list(coef.keys())
+        # 排个序,防止因为顺序原因导致的可能的bug
+        self._columns.sort()
+        weight = [coef[k] for k in self._columns]
+        self._model_original = LR(nn.Parameter(torch.tensor(np.array(weight))))
+        self._model_optimized = LR(nn.Parameter(torch.tensor(np.array(weight))))
+
+        self._columns = [re.sub('_woe$', '', i) for i in self._columns]
+        # 剔除常数项,因为woe编码里没有常数项
+        self._columns_intercept_remove = self._columns.copy()
+        if ConstantEnum.INTERCEPT.value in self._columns_intercept_remove:
+            self._columns_intercept_remove.remove(ConstantEnum.INTERCEPT.value)
+        # woe编码后带_woe后缀
+        self._columns_woe = [f"{i}_woe" for i in self._columns]
+
+        self.sc_woebin = f_woebin_load(path)
+        for k in self._columns_intercept_remove:
+            if k not in self.sc_woebin.keys():
+                GeneralException(ResultCodesEnum.ILLEGAL_PARAMS, message=f"模型变量【{k}】在woe特征里不存在")
+
+    def _feature_generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        data_woe = sc.woebin_ply(data[self._columns_intercept_remove], self.sc_woebin, print_info=False)
+        data_woe[f"{ConstantEnum.INTERCEPT.value}_woe"] = [1] * len(data_woe)
+        return data_woe[self._columns_woe].to_numpy()
+
+    def _f_get_best_model(self, df_param: pd.DataFrame, epoch: int = None) -> LR:
+        if epoch is None:
+            df_param_sort = df_param.sort_values(by=["ks_test", "auc_test"], ascending=[False, False])
+            print(f"选择最佳参数:\n{df_param_sort.iloc[0].to_dict()}")
+            weight = list(df_param_sort.iloc[0])
+        else:
+            print(f"选择epoch:【{epoch}】的参数:\n{df_param[df_param['epoch'] == epoch].iloc[0].to_dict()}")
+            weight = list(df_param[df_param["epoch"] == epoch].iloc[0])
+        weight = nn.Parameter(torch.tensor(np.array(weight[0:-5])))
+        return LR(weight)
+
+    def _f_get_scorecard(self, ):
+        class M:
+            def __init__(self, ):
+                pass
+
+        m = M()
+        m.coef_ = [self._model_optimized.linear.weight.tolist()]
+        m.intercept_ = [0]
+        self.card = sc.scorecard(self.sc_woebin, m, self._columns_woe, **self.card_cfg)
+
+    def _f_get_metric_auc_ks(self, model_type: str):
+        def _get_auc_ks(data, title):
+            y = data[self._ol_config.y_column]
+            y_prob = self.prob(data, model)
+            perf = sc.perf_eva(y, y_prob, title=f"{title}", show_plot=True)
+            path = self._ol_config.f_get_save_path(f"perf_{title}.png")
+            perf["pic"].savefig(path)
+            auc = perf["AUC"]
+            ks = perf["KS"]
+            f_image_crop_white_borders(path, path)
+            return auc, ks, path
+
+        train_data = self._data.train_data
+        test_data = self._data.test_data
+        data = pd.concat((train_data, test_data))
+
+        model = self._model_optimized
+        if model_type != "新模型":
+            model = self._model_original
+
+        img_path_auc_ks = []
+        auc, ks, path = _get_auc_ks(data, f"{model_type}-建模数据")
+        img_path_auc_ks.append(path)
+        train_auc, train_ks, path = _get_auc_ks(train_data, f"{model_type}-训练集")
+        img_path_auc_ks.append(path)
+        test_auc, test_ks, path = _get_auc_ks(test_data, f"{model_type}-测试集")
+        img_path_auc_ks.append(path)
+
+        df_auc_ks = pd.DataFrame()
+        df_auc_ks["样本集"] = ["建模数据", "训练集", "测试集"]
+        df_auc_ks["AUC"] = [auc, train_auc, test_auc]
+        df_auc_ks["KS"] = [ks, train_ks, test_ks]
+
+        return MetricFucResultEntity(table=df_auc_ks, image_path=img_path_auc_ks, image_size=5, table_font_size=10)
+
+    def _f_get_metric_trend(self, ):
+        train_data = self._data.train_data
+        test_data = self._data.test_data
+        y_column = self._ol_config.y_column
+        data = pd.concat((train_data, test_data))
+
+        # 建模样本变量趋势
+        breaks_list = {}
+        special_values = {}
+        for column, bin in self.sc_woebin.items():
+            breaks_list[column] = list(bin[bin["is_special_values"] == False]['breaks'])
+            sv = list(bin[bin["is_special_values"] == True]['breaks'])
+            if len(sv) > 0:
+                special_values[column] = sv
+        woebin = sc.woebin(data[self._columns_intercept_remove + [y_column]], y=y_column, breaks_list=breaks_list,
+                           special_values=special_values, print_info=False)
+
+        imgs_path = []
+        for k, df_bin in woebin.items():
+            sc.woebin_plot(df_bin)
+            path = self._ol_config.f_get_save_path(f"trend_{k}.png")
+            plt.savefig(path)
+            imgs_path.append(path)
+        return MetricFucResultEntity(image_path=imgs_path, image_size=4)
+
+    def _f_get_metric_coef(self, ):
+        columns_anns = self._ol_config.columns_anns
+        df = pd.DataFrame()
+        df["变量"] = self._columns
+        df["原变量WOE拟合系数"] = [round(i, 4) for i in self._model_original.linear.weight.tolist()]
+        df["新变量WOE拟合系数"] = [round(i, 4) for i in self._model_optimized.linear.weight.tolist()]
+        anns = [columns_anns.get(column, "-") for column in self._columns]
+        df["释义"] = anns
+        img_path_coef = self._ol_config.f_get_save_path(f"coef.png")
+        f_df_to_image(df, img_path_coef)
+        return MetricFucResultEntity(table=df, image_path=img_path_coef)
+
+    def _f_get_metric_gain(self, model_type: str):
+        train_data = self._data.train_data
+        test_data = self._data.test_data
+        y_column = self._ol_config.y_column
+        data = pd.concat((train_data, test_data))
+
+        model = self._model_optimized
+        if model_type != "新模型":
+            model = self._model_original
+
+        score = self.prob(data, model)
+        score_bin, _ = f_get_model_score_bin(data, score)
+        gain = f_calcu_model_ks(score_bin, y_column, sort_ascending=False)
+        img_path_gain = self._ol_config.f_get_save_path(f"{model_type}-gain.png")
+        f_df_to_image(gain, img_path_gain)
+
+        return MetricFucResultEntity(table=gain, image_path=img_path_gain)
+
+    def _f_get_stress_test(self, ):
+        stress_sample_times = self._ol_config.stress_sample_times
+        stress_bad_rate_list = self._ol_config.stress_bad_rate_list
+        train_data = self._data.train_data
+        test_data = self._data.test_data
+        y_column = self._ol_config.y_column
+        data = pd.concat((train_data, test_data))
+        score = self.prob(data, self._model_optimized)
+        score_bin, _ = f_get_model_score_bin(data, score)
+        df_stress = f_stress_test(score_bin, sample_times=stress_sample_times, bad_rate_list=stress_bad_rate_list,
+                                  target_column=y_column, score_column=ConstantEnum.SCORE.value, sort_ascending=False)
+
+        img_path_stress = self._ol_config.f_get_save_path(f"stress.png")
+        f_df_to_image(df_stress, img_path_stress)
+        return MetricFucResultEntity(table=df_stress, image_path=img_path_stress)
+
+    def prob(self, x: pd.DataFrame, model=None):
+        if model is None:
+            model = self._model_optimized
+        model.eval()
+        with torch.no_grad():
+            x = torch.tensor(self._feature_generate(x), dtype=torch.float64)
+            y_prob = model(x)
+            y_prob = y_prob.detach().numpy()
+            return y_prob
+
+    def score(self, x: pd.DataFrame) -> np.array:
+        return np.array(sc.scorecard_ply(x, self.card, print_step=0)["score"])
+
+    def psi(self, x1: pd.DataFrame, x2: pd.DataFrame, points: List[float] = None) -> pd.DataFrame:
+        y1 = self.prob(x1)
+        y2 = self.prob(x2)
+        x1_score_bin, score_bins = f_get_model_score_bin(x1, y1, points)
+        x2_score_bin, _ = f_get_model_score_bin(x2, y2, score_bins)
+        model_psi = f_calcu_model_psi(x1_score_bin, x2_score_bin, sort_ascending=False)
+        print(f"模型psi: {model_psi['psi'].sum()}")
+        return model_psi
+
+    def train(self, ):
+        def _get_param_optimized(model: LR, epoch):
+            model.eval()
+            with torch.no_grad():
+                y_prob = model(test_x)
+                loss = criterion(y_prob, torch.tensor(test_y.to_numpy(), dtype=torch.float64))
+                loss_test = loss.detach().item()
+                y_prob = y_prob.detach().numpy()
+                perf = sc.perf_eva(test_y, y_prob, show_plot=False)
+                auc = perf["AUC"]
+                ks = perf["KS"]
+                row = model.linear.weight.tolist() + [auc, ks, epoch + 1, loss_train, loss_test]
+                return dict(zip(df_param_columns, row))
+
+        epochs = self._ol_config.epochs
+        batch_size = self._ol_config.batch_size
+        train_data = self._data.train_data
+        test_data = self._data.test_data
+        train_x = self._feature_generate(train_data)
+        train_y = train_data[self._ol_config.y_column].to_numpy()
+        test_x = torch.tensor(self._feature_generate(test_data), dtype=torch.float64)
+        test_y = test_data[self._ol_config.y_column]
+
+        criterion = nn.BCELoss()
+        optimizer = optim.Adam(self._model_optimized.parameters(), lr=self._ol_config.lr)
+
+        df_param_columns = self._columns + ["auc_test", "ks_test", "epoch", "loss_train", "loss_test"]
+        df_param = pd.DataFrame(columns=df_param_columns)
+        # 优化前
+        loss_train = 0
+        df_param.loc[len(df_param)] = _get_param_optimized(self._model_original, -1)
+        for epoch in tqdm(range(epochs)):
+            data_len = len(train_x)
+            for i in range(math.ceil(data_len / batch_size)):
+                train_x_batch = torch.tensor(train_x[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
+                train_y_batch = torch.tensor(train_y[i * batch_size:(i + 1) * batch_size], dtype=torch.float64)
+                self._model_optimized.train()
+                optimizer.zero_grad()
+                y_prob = self._model_optimized(train_x_batch)
+                loss = criterion(y_prob, train_y_batch)
+                loss.backward()
+                optimizer.step()
+                loss_train = loss.detach().item()
+            # 测试集评估
+            df_param.loc[len(df_param)] = _get_param_optimized(self._model_optimized, epoch)
+
+        context.set(ContextEnum.PARAM_OPTIMIZED, df_param)
+
+    def save(self):
+
+        self._ol_config.config_save()
+
+        if self.sc_woebin is None:
+            GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
+        df_woebin = pd.concat(self.sc_woebin.values())
+        path = self._ol_config.f_get_save_path(FileEnum.FEATURE.value)
+        df_woebin.to_csv(path)
+        print(f"feature save to【{path}】success. ")
+
+        if self._model_optimized is None:
+            GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
+        path = self._ol_config.f_get_save_path(FileEnum.COEF.value)
+        with open(path, mode="w", encoding="utf-8") as f:
+            coef = dict(zip(self._columns, self._model_optimized.linear.weight.tolist()))
+            j = json.dumps(coef, ensure_ascii=False)
+            f.write(j)
+        print(f"model save to【{path}】success. ")
+
+        if self.card is not None:
+            df_card = pd.concat(self.card.values())
+            path = self._ol_config.f_get_save_path(FileEnum.CARD.value)
+            df_card.to_csv(path)
+            print(f"model save to【{path}】success. ")
+
+    @staticmethod
+    def load(path: str):
+        ol_config = OnlineLearningConfigEntity.from_config(path)
+        ol_config._path_resources = path
+        return OnlineLearningTrainer(ol_config=ol_config)
+
+    def report(self, epoch: int = None):
+        df_param = context.get(ContextEnum.PARAM_OPTIMIZED)
+        self._model_optimized = self._f_get_best_model(df_param, epoch)
+
+        if self._ol_config.jupyter_print:
+            from IPython import display
+            f_display_title(display, "模型系数优化过程")
+            display.display(df_param)
+
+        metric_value_dict = {}
+
+        # 评分卡
+        if not self.card_cfg is None:
+            self._f_get_scorecard()
+            df_card = pd.concat(self.card.values())
+            img_path_card = self._ol_config.f_get_save_path(f"card.png")
+            f_df_to_image(df_card, img_path_card)
+            metric_value_dict["评分卡"] = MetricFucResultEntity(table=df_card, image_path=img_path_card)
+
+        # 样本分布
+        metric_value_dict["样本分布"] = MetricFucResultEntity(table=self._data.get_distribution(self._ol_config.y_column),
+                                                          table_font_size=10, table_cell_width=3)
+
+        # 模型结果对比
+        metric_value_dict[f"模型结果-新模型"] = self._f_get_metric_auc_ks("新模型")
+        metric_value_dict[f"模型结果-原模型"] = self._f_get_metric_auc_ks("原模型")
+
+        # 变量趋势
+        metric_value_dict["变量趋势-建模数据"] = self._f_get_metric_trend()
+
+        # 模型系数对比
+        metric_value_dict["模型系数"] = self._f_get_metric_coef()
+
+        # 分数分箱
+        metric_value_dict["分数分箱-建模数据-新模型"] = self._f_get_metric_gain("新模型")
+        metric_value_dict["分数分箱-建模数据-原模型"] = self._f_get_metric_gain("原模型")
+
+        # 压力测试
+        if self._ol_config.stress_test:
+            metric_value_dict["压力测试"] = self._f_get_stress_test()
+
+        if self._ol_config.jupyter_print:
+            self.jupyter_print(metric_value_dict)
+
+        save_path = self._ol_config.f_get_save_path("OnlineLearning报告.docx")
+        ReportWord.generate_report(metric_value_dict, self._template_path, save_path=save_path)
+        print(f"模型报告文件储存路径:{save_path}")
+
+    def jupyter_print(self, metric_value_dict=Dict[str, MetricFucResultEntity]):
+        from IPython import display
+
+        f_display_title(display, "样本分布")
+        display.display(metric_value_dict["样本分布"].table)
+
+        f_display_title(display, "模型结果")
+        print(f"原模型")
+        display.display(metric_value_dict["模型结果-原模型"].table)
+        f_display_images_by_side(display, metric_value_dict["模型结果-原模型"].image_path)
+        print(f"新模型")
+        display.display(metric_value_dict["模型结果-新模型"].table)
+        f_display_images_by_side(display, metric_value_dict["模型结果-新模型"].image_path)
+
+        f_display_title(display, "模型系数")
+        display.display(metric_value_dict["模型系数"].table)
+
+        f_display_title(display, "分数分箱")
+        print(f"建模数据上分数分箱")
+        print(f"原模型")
+        display.display(metric_value_dict["分数分箱-建模数据-原模型"].table)
+        print(f"新模型")
+        display.display(metric_value_dict["分数分箱-建模数据-新模型"].table)
+
+        f_display_title(display, "变量趋势")
+        print(f"建模数据上变量趋势")
+        f_display_images_by_side(display, metric_value_dict["变量趋势-建模数据"].image_path)
+
+        if "压力测试" in metric_value_dict.keys():
+            f_display_title(display, "压力测试")
+            display.display(metric_value_dict["压力测试"].table)
+
+        # 评分卡
+        if "评分卡" in metric_value_dict.keys():
+            f_display_title(display, "评分卡")
+            display.display(metric_value_dict["评分卡"].table)
+
+
+if __name__ == "__main__":
+    pass

+ 22 - 0
online_learning/utils.py

@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2025/2/27
+@desc: 
+"""
+import torch.nn as nn
+
+
+class LR(nn.Module):
+    def __init__(self, weight: nn.Parameter):
+        super(LR, self).__init__()
+        self.linear = nn.Linear(weight.shape[0], 1, bias=False)
+        self.linear.weight = weight
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        return self.sigmoid(self.linear(x))
+
+
+if __name__ == "__main__":
+    pass

File diff suppressed because it is too large
+ 680 - 0
online_learning_demo.ipynb


+ 18 - 2
pipeline/pipeline.py

@@ -4,12 +4,15 @@
 @time: 2024/11/1
 @time: 2024/11/1
 @desc: 模型训练管道
 @desc: 模型训练管道
 """
 """
+from typing import List
+
 import pandas as pd
 import pandas as pd
 
 
 from entitys import DataSplitEntity, MlConfigEntity, DataFeatureEntity
 from entitys import DataSplitEntity, MlConfigEntity, DataFeatureEntity
+from enums import ConstantEnum
 from feature import FeatureStrategyFactory, FeatureStrategyBase
 from feature import FeatureStrategyFactory, FeatureStrategyBase
 from init import init
 from init import init
-from model import ModelBase, ModelFactory, f_add_rules
+from model import ModelBase, ModelFactory, f_add_rules, f_get_model_score_bin, f_calcu_model_psi
 from monitor import ReportWord
 from monitor import ReportWord
 
 
 init()
 init()
@@ -53,6 +56,19 @@ class Pipeline():
     def score_rule(self, data: pd.DataFrame):
     def score_rule(self, data: pd.DataFrame):
         return self._model.score_rule(data)
         return self._model.score_rule(data)
 
 
+    def psi(self, x1: pd.DataFrame, x2: pd.DataFrame, points: List[float] = None) -> pd.DataFrame:
+        if len(self._ml_config.rules) != 0:
+            y1 = self.score_rule(x1)
+            y2 = self.score_rule(x2)
+        else:
+            y1 = self.score(x1)
+            y2 = self.score(x2)
+        x1_score_bin, score_bins = f_get_model_score_bin(x1, y1, points)
+        x2_score_bin, _ = f_get_model_score_bin(x2, y2, score_bins)
+        model_psi = f_calcu_model_psi(x1_score_bin, x2_score_bin)
+        print(f"模型psi: {model_psi['psi'].sum()}")
+        return model_psi
+
     def report(self, ):
     def report(self, ):
         save_path = self._ml_config.f_get_save_path("模型报告.docx")
         save_path = self._ml_config.f_get_save_path("模型报告.docx")
         ReportWord.generate_report(self.metric_value_dict, self._model.get_report_template_path(), save_path=save_path)
         ReportWord.generate_report(self.metric_value_dict, self._model.get_report_template_path(), save_path=save_path)
@@ -77,7 +93,7 @@ class Pipeline():
     def rules_test(self, ):
     def rules_test(self, ):
         rules = self._ml_config.rules
         rules = self._ml_config.rules
         df = self._data.train_data.copy()
         df = self._data.train_data.copy()
-        df["SCORE"] = [0] * len(df)
+        df[ConstantEnum.SCORE.value] = [0] * len(df)
         f_add_rules(df, rules)
         f_add_rules(df, rules)
 
 
 
 

+ 1 - 0
requirements-analysis.txt

@@ -18,3 +18,4 @@ kaleido==0.2.1
 statsmodels==0.12.2
 statsmodels==0.12.2
 beautifulsoup4==4.11.1
 beautifulsoup4==4.11.1
 openpyxl==3.0.9
 openpyxl==3.0.9
+torch==1.1.0

BIN
template/OnlineLearning报告模板_lr.docx


Some files were not shown because too many files changed in this diff