4 months ago · 73265a5049
--- a/entitys/ml_config_entity.py
+++ b/entitys/ml_config_entity.py
@@ -10,7 +10,7 @@ from typing import List, Union
 
				 
			
 
				 from commom import GeneralException, f_get_datetime
			
 
				 from config import BaseConfig
			
 
				-from enums import ResultCodesEnum
			
 
				+from enums import ResultCodesEnum, FileEnum
			
 
				 from init import warning_ignore
			
 
				 
			
 
				 
			
@@ -295,7 +295,7 @@ class MlConfigEntity():
 
				         从配置文件生成实体类
			
 
				         """
			
 
				         if os.path.isdir(config_path):
			
 
				-            config_path = os.path.join(config_path, "mlcfg.json")
			
 
				+            config_path = os.path.join(config_path, FileEnum.MLCFG.value)
			
 
				 
			
 
				         if os.path.exists(config_path):
			
 
				             with open(config_path, mode="r", encoding="utf-8") as f:
			
@@ -306,7 +306,7 @@ class MlConfigEntity():
 
				         return MlConfigEntity(**j)
			
 
				 
			
 
				     def config_save(self):
			
 
				-        path = self.f_get_save_path("mlcfg.json")
			
 
				+        path = self.f_get_save_path(FileEnum.MLCFG.value)
			
 
				         with open(path, mode="w", encoding="utf-8") as f:
			
 
				             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
			
 
				             j = json.dumps(j, ensure_ascii=False)
			
--- a/entitys/ol_config_entity.py
+++ b/entitys/ol_config_entity.py
@@ -10,7 +10,7 @@ from typing import List
 
				 
			
 
				 from commom import GeneralException, f_get_datetime
			
 
				 from config import BaseConfig
			
 
				-from enums import ResultCodesEnum
			
 
				+from enums import ResultCodesEnum, FileEnum
			
 
				 from init import warning_ignore
			
 
				 
			
 
				 
			
@@ -112,7 +112,7 @@ class OnlineLearningConfigEntity():
 
				         从配置文件生成实体类
			
 
				         """
			
 
				         if os.path.isdir(config_path):
			
 
				-            config_path = os.path.join(config_path, "olcfg.json")
			
 
				+            config_path = os.path.join(config_path, FileEnum.OLCFG.value)
			
 
				 
			
 
				         if os.path.exists(config_path):
			
 
				             with open(config_path, mode="r", encoding="utf-8") as f:
			
@@ -123,7 +123,7 @@ class OnlineLearningConfigEntity():
 
				         return OnlineLearningConfigEntity(**j)
			
 
				 
			
 
				     def config_save(self):
			
 
				-        path = self.f_get_save_path("olcfg.json")
			
 
				+        path = self.f_get_save_path(FileEnum.OLCFG.value)
			
 
				         with open(path, mode="w", encoding="utf-8") as f:
			
 
				             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
			
 
				             j = json.dumps(j, ensure_ascii=False)
			
--- a/enums/__init__.py
+++ b/enums/__init__.py
@@ -7,9 +7,10 @@
 
				 from .constant_enum import ConstantEnum
			
 
				 from .context_enum import ContextEnum
			
 
				 from .feature_strategy_enum import FeatureStrategyEnum
			
 
				+from .file_enum import FileEnum
			
 
				 from .model_enum import ModelEnum
			
 
				 from .placeholder_prefix_enum import PlaceholderPrefixEnum
			
 
				 from .result_codes_enum import ResultCodesEnum
			
 
				 
			
 
				 __all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum', 'FeatureStrategyEnum', 'ModelEnum', 'ContextEnum',
			
 
				-           'ConstantEnum']
			
 
				+           'ConstantEnum','FileEnum']
			
--- a/enums/file_enum.py
+++ b/enums/file_enum.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+@author: yq
			
 
				+@time: 2024/11/14
			
 
				+@desc: 文件名枚举值
			
 
				+"""
			
 
				+from enum import Enum
			
 
				+
			
 
				+
			
 
				+class FileEnum(Enum):
			
 
				+    MLCFG = "mlcfg.json"
			
 
				+    OLCFG = "olcfg.json"
			
 
				+    FEATURE = "feature.csv"
			
 
				+    CARD = "card.csv"
			
 
				+    CARD_CFG = "card.cfg"
			
 
				+    COEF = "coef.json"
			
 
				+    MODEL = "model.pkl"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/feature/woe/strategy_woe.py
+++ b/feature/woe/strategy_woe.py
@@ -20,7 +20,7 @@ from commom import f_display_images_by_side, NumpyEncoder, GeneralException, f_d
 
				     f_image_crop_white_borders
			
 
				 from data import DataExplore
			
 
				 from entitys import DataSplitEntity, MetricFucResultEntity
			
 
				-from enums import ContextEnum, ResultCodesEnum
			
 
				+from enums import ContextEnum, ResultCodesEnum, FileEnum
			
 
				 from feature.feature_strategy_base import FeatureStrategyBase
			
 
				 from init import context
			
 
				 from .entity import BinInfo, HomologousBinInfo
			
@@ -475,7 +475,7 @@ class StrategyWoe(FeatureStrategyBase):
 
				         if self.sc_woebin is None:
			
 
				             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
			
 
				         df_woebin = pd.concat(self.sc_woebin.values())
			
 
				-        path = self.ml_config.f_get_save_path(f"feature.csv")
			
 
				+        path = self.ml_config.f_get_save_path(FileEnum.FEATURE.value)
			
 
				         df_woebin.to_csv(path)
			
 
				         print(f"feature save to【{path}】success. ")
			
 
				 
			
--- a/feature/woe/utils.py
+++ b/feature/woe/utils.py
@@ -12,7 +12,7 @@ import pandas as pd
 
				 from statsmodels.stats.outliers_influence import variance_inflation_factor as vif
			
 
				 
			
 
				 from commom import GeneralException
			
 
				-from enums import ResultCodesEnum
			
 
				+from enums import ResultCodesEnum, FileEnum
			
 
				 
			
 
				 FORMAT_DICT = {
			
 
				     # 比例类 -1 - 1
			
@@ -141,9 +141,9 @@ def f_get_vif(data: pd.DataFrame) -> Union[pd.DataFrame, None]:
 
				 
			
 
				 def f_woebin_load(path: str):
			
 
				     if os.path.isdir(path):
			
 
				-        path = os.path.join(path, "feature.csv")
			
 
				-    if not os.path.isfile(path) or "feature.csv" not in path:
			
 
				-        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【feature.csv】不存在")
			
 
				+        path = os.path.join(path, FileEnum.FEATURE.value)
			
 
				+    if not os.path.isfile(path) or FileEnum.FEATURE.value not in path:
			
 
				+        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【{FileEnum.FEATURE.value}】不存在")
			
 
				 
			
 
				     df_woebin = pd.read_csv(path)
			
 
				     variables = df_woebin["variable"].unique().tolist()
			
--- a/model/model_lr.py
+++ b/model/model_lr.py
@@ -18,7 +18,7 @@ import statsmodels.api as sm
 
				 from commom import f_df_to_image, f_display_images_by_side, GeneralException, f_display_title, \
			
 
				     f_image_crop_white_borders
			
 
				 from entitys import MetricFucResultEntity, DataSplitEntity, DataFeatureEntity
			
 
				-from enums import ContextEnum, ResultCodesEnum, ConstantEnum
			
 
				+from enums import ContextEnum, ResultCodesEnum, ConstantEnum, FileEnum
			
 
				 from init import context
			
 
				 from .model_base import ModelBase
			
 
				 from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi, f_add_rules
			
@@ -31,6 +31,7 @@ class ModelLr(ModelBase):
 
				         self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_lr.docx")
			
 
				         self.lr = None
			
 
				         self.card = None
			
 
				+        self.card_cfg = None
			
 
				         self.coef = None
			
 
				 
			
 
				     def get_report_template_path(self):
			
@@ -52,6 +53,7 @@ class ModelLr(ModelBase):
 
				             if len(self.lr.coef_[0]) != len(data_x.columns):
			
 
				                 raise GeneralException(ResultCodesEnum.SYSTEM_ERROR, message=f"lr模型coef系数长度与x_columns长度不一致。")
			
 
				         self.card = sc.scorecard(woebin, self.lr, data_x.columns, points0=600, pdo=50, odds0=train_data.get_odds0())
			
 
				+        self.card_cfg = {"points0": 600, "pdo": 50, "odds0": train_data.get_odds0()}
			
 
				         coef_table = self.lr.summary2().tables[1]
			
 
				         self.coef = dict(zip(coef_table.index, coef_table['Coef.']))
			
 
				 
			
@@ -75,28 +77,34 @@ class ModelLr(ModelBase):
 
				             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
			
 
				         if self.card is None:
			
 
				             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
			
 
				-        path = self.ml_config.f_get_save_path(f"model.pkl")
			
 
				+        path = self.ml_config.f_get_save_path(FileEnum.MODEL.value)
			
 
				         self.lr.save(path)
			
 
				         print(f"model save to【{path}】success. ")
			
 
				 
			
 
				-        path = self.ml_config.f_get_save_path("coef.dict")
			
 
				+        path = self.ml_config.f_get_save_path(FileEnum.COEF.value)
			
 
				         with open(path, mode="w", encoding="utf-8") as f:
			
 
				             j = json.dumps(self.coef, ensure_ascii=False)
			
 
				             f.write(j)
			
 
				         print(f"model save to【{path}】success. ")
			
 
				 
			
 
				         df_card = pd.concat(self.card.values())
			
 
				-        path = self.ml_config.f_get_save_path(f"card.csv")
			
 
				+        path = self.ml_config.f_get_save_path(FileEnum.CARD.value)
			
 
				         df_card.to_csv(path)
			
 
				         print(f"model save to【{path}】success. ")
			
 
				 
			
 
				+        path = self.ml_config.f_get_save_path(FileEnum.CARD_CFG.value)
			
 
				+        with open(path, mode="w", encoding="utf-8") as f:
			
 
				+            j = json.dumps(self.card_cfg, ensure_ascii=False)
			
 
				+            f.write(j)
			
 
				+        print(f"model save to【{path}】success. ")
			
 
				+
			
 
				     def model_load(self, path: str, *args, **kwargs):
			
 
				         if not os.path.isdir(path):
			
 
				             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
			
 
				-        path_model = os.path.join(path, "model.pkl")
			
 
				+        path_model = os.path.join(path, FileEnum.MODEL.value)
			
 
				         if not os.path.isfile(path_model):
			
 
				             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
			
 
				-        path_card = os.path.join(path, "card.csv")
			
 
				+        path_card = os.path.join(path, FileEnum.CARD.value)
			
 
				         if not os.path.isfile(path_card):
			
 
				             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_card}】不存在")
			
 
				 
			
--- a/online_learning/trainer.py
+++ b/online_learning/trainer.py
@@ -23,7 +23,7 @@ from tqdm import tqdm
 
				 from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
			
 
				     f_display_images_by_side
			
 
				 from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
			
 
				-from enums import ResultCodesEnum, ConstantEnum, ContextEnum
			
 
				+from enums import ResultCodesEnum, ConstantEnum, ContextEnum, FileEnum
			
 
				 from feature import f_woebin_load
			
 
				 from init import init, context
			
 
				 from model import f_get_model_score_bin, f_calcu_model_ks, f_stress_test, f_calcu_model_psi
			
@@ -53,7 +53,7 @@ class OnlineLearningTrainer:
 
				         if not os.path.isdir(path):
			
 
				             raise GeneralException(ResultCodesEnum.ILLEGAL_PARAMS, message=f"【{path}】不是文件夹")
			
 
				 
			
 
				-        path_coef = os.path.join(path, "coef.dict")
			
 
				+        path_coef = os.path.join(path, FileEnum.COEF.value)
			
 
				         if not os.path.isfile(path_coef):
			
 
				             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型系数文件【{path_coef}】不存在")
			
 
				         with open(path_coef, mode="r", encoding="utf-8") as f:
			
@@ -276,13 +276,13 @@ class OnlineLearningTrainer:
 
				         if self.sc_woebin is None:
			
 
				             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
			
 
				         df_woebin = pd.concat(self.sc_woebin.values())
			
 
				-        path = self._ol_config.f_get_save_path(f"feature.csv")
			
 
				+        path = self._ol_config.f_get_save_path(FileEnum.FEATURE.value)
			
 
				         df_woebin.to_csv(path)
			
 
				         print(f"feature save to【{path}】success. ")
			
 
				 
			
 
				         if self._model_optimized is None:
			
 
				             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
			
 
				-        path = self._ol_config.f_get_save_path("coef.dict")
			
 
				+        path = self._ol_config.f_get_save_path(FileEnum.COEF.value)
			
 
				         with open(path, mode="w", encoding="utf-8") as f:
			
 
				             coef = dict(zip(self._columns, self._model_optimized.linear.weight.tolist()))
			
 
				             j = json.dumps(coef, ensure_ascii=False)
			
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -9,6 +9,7 @@ from typing import List
 
				 import pandas as pd
			
 
				 
			
 
				 from entitys import DataSplitEntity, MlConfigEntity, DataFeatureEntity
			
 
				+from enums import ConstantEnum
			
 
				 from feature import FeatureStrategyFactory, FeatureStrategyBase
			
 
				 from init import init
			
 
				 from model import ModelBase, ModelFactory, f_add_rules, f_get_model_score_bin, f_calcu_model_psi
			
@@ -92,7 +93,7 @@ class Pipeline():
 
				     def rules_test(self, ):
			
 
				         rules = self._ml_config.rules
			
 
				         df = self._data.train_data.copy()
			
 
				-        df["SCORE"] = [0] * len(df)
			
 
				+        df[ConstantEnum.SCORE.value] = [0] * len(df)
			
 
				         f_add_rules(df, rules)