Browse Source

add: 重要文件名枚举值

yq 1 month ago
parent
commit
73265a5049

+ 3 - 3
entitys/ml_config_entity.py

@@ -10,7 +10,7 @@ from typing import List, Union
 
 from commom import GeneralException, f_get_datetime
 from config import BaseConfig
-from enums import ResultCodesEnum
+from enums import ResultCodesEnum, FileEnum
 from init import warning_ignore
 
 
@@ -295,7 +295,7 @@ class MlConfigEntity():
         从配置文件生成实体类
         """
         if os.path.isdir(config_path):
-            config_path = os.path.join(config_path, "mlcfg.json")
+            config_path = os.path.join(config_path, FileEnum.MLCFG.value)
 
         if os.path.exists(config_path):
             with open(config_path, mode="r", encoding="utf-8") as f:
@@ -306,7 +306,7 @@ class MlConfigEntity():
         return MlConfigEntity(**j)
 
     def config_save(self):
-        path = self.f_get_save_path("mlcfg.json")
+        path = self.f_get_save_path(FileEnum.MLCFG.value)
         with open(path, mode="w", encoding="utf-8") as f:
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = json.dumps(j, ensure_ascii=False)

+ 3 - 3
entitys/ol_config_entity.py

@@ -10,7 +10,7 @@ from typing import List
 
 from commom import GeneralException, f_get_datetime
 from config import BaseConfig
-from enums import ResultCodesEnum
+from enums import ResultCodesEnum, FileEnum
 from init import warning_ignore
 
 
@@ -112,7 +112,7 @@ class OnlineLearningConfigEntity():
         从配置文件生成实体类
         """
         if os.path.isdir(config_path):
-            config_path = os.path.join(config_path, "olcfg.json")
+            config_path = os.path.join(config_path, FileEnum.OLCFG.value)
 
         if os.path.exists(config_path):
             with open(config_path, mode="r", encoding="utf-8") as f:
@@ -123,7 +123,7 @@ class OnlineLearningConfigEntity():
         return OnlineLearningConfigEntity(**j)
 
     def config_save(self):
-        path = self.f_get_save_path("olcfg.json")
+        path = self.f_get_save_path(FileEnum.OLCFG.value)
         with open(path, mode="w", encoding="utf-8") as f:
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = json.dumps(j, ensure_ascii=False)

+ 2 - 1
enums/__init__.py

@@ -7,9 +7,10 @@
 from .constant_enum import ConstantEnum
 from .context_enum import ContextEnum
 from .feature_strategy_enum import FeatureStrategyEnum
+from .file_enum import FileEnum
 from .model_enum import ModelEnum
 from .placeholder_prefix_enum import PlaceholderPrefixEnum
 from .result_codes_enum import ResultCodesEnum
 
 __all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum', 'FeatureStrategyEnum', 'ModelEnum', 'ContextEnum',
-           'ConstantEnum']
+           'ConstantEnum','FileEnum']

+ 25 - 0
enums/file_enum.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2024/11/14
+@desc: 文件名枚举值
+"""
+from enum import Enum
+
+
+class FileEnum(Enum):
+    MLCFG = "mlcfg.json"
+    OLCFG = "olcfg.json"
+    FEATURE = "feature.csv"
+    CARD = "card.csv"
+    CARD_CFG = "card.cfg"
+    COEF = "coef.json"
+    MODEL = "model.pkl"
+
+
+
+
+
+
+
+

+ 2 - 2
feature/woe/strategy_woe.py

@@ -20,7 +20,7 @@ from commom import f_display_images_by_side, NumpyEncoder, GeneralException, f_d
     f_image_crop_white_borders
 from data import DataExplore
 from entitys import DataSplitEntity, MetricFucResultEntity
-from enums import ContextEnum, ResultCodesEnum
+from enums import ContextEnum, ResultCodesEnum, FileEnum
 from feature.feature_strategy_base import FeatureStrategyBase
 from init import context
 from .entity import BinInfo, HomologousBinInfo
@@ -475,7 +475,7 @@ class StrategyWoe(FeatureStrategyBase):
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
         df_woebin = pd.concat(self.sc_woebin.values())
-        path = self.ml_config.f_get_save_path(f"feature.csv")
+        path = self.ml_config.f_get_save_path(FileEnum.FEATURE.value)
         df_woebin.to_csv(path)
         print(f"feature save to【{path}】success. ")
 

+ 4 - 4
feature/woe/utils.py

@@ -12,7 +12,7 @@ import pandas as pd
 from statsmodels.stats.outliers_influence import variance_inflation_factor as vif
 
 from commom import GeneralException
-from enums import ResultCodesEnum
+from enums import ResultCodesEnum, FileEnum
 
 FORMAT_DICT = {
     # 比例类 -1 - 1
@@ -141,9 +141,9 @@ def f_get_vif(data: pd.DataFrame) -> Union[pd.DataFrame, None]:
 
 def f_woebin_load(path: str):
     if os.path.isdir(path):
-        path = os.path.join(path, "feature.csv")
-    if not os.path.isfile(path) or "feature.csv" not in path:
-        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【feature.csv】不存在")
+        path = os.path.join(path, FileEnum.FEATURE.value)
+    if not os.path.isfile(path) or FileEnum.FEATURE.value not in path:
+        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【{FileEnum.FEATURE.value}】不存在")
 
     df_woebin = pd.read_csv(path)
     variables = df_woebin["variable"].unique().tolist()

+ 14 - 6
model/model_lr.py

@@ -18,7 +18,7 @@ import statsmodels.api as sm
 from commom import f_df_to_image, f_display_images_by_side, GeneralException, f_display_title, \
     f_image_crop_white_borders
 from entitys import MetricFucResultEntity, DataSplitEntity, DataFeatureEntity
-from enums import ContextEnum, ResultCodesEnum, ConstantEnum
+from enums import ContextEnum, ResultCodesEnum, ConstantEnum, FileEnum
 from init import context
 from .model_base import ModelBase
 from .model_utils import f_stress_test, f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi, f_add_rules
@@ -31,6 +31,7 @@ class ModelLr(ModelBase):
         self._template_path = os.path.join(dirname(dirname(realpath(__file__))), "./template/模型开发报告模板_lr.docx")
         self.lr = None
         self.card = None
+        self.card_cfg = None
         self.coef = None
 
     def get_report_template_path(self):
@@ -52,6 +53,7 @@ class ModelLr(ModelBase):
             if len(self.lr.coef_[0]) != len(data_x.columns):
                 raise GeneralException(ResultCodesEnum.SYSTEM_ERROR, message=f"lr模型coef系数长度与x_columns长度不一致。")
         self.card = sc.scorecard(woebin, self.lr, data_x.columns, points0=600, pdo=50, odds0=train_data.get_odds0())
+        self.card_cfg = {"points0": 600, "pdo": 50, "odds0": train_data.get_odds0()}
         coef_table = self.lr.summary2().tables[1]
         self.coef = dict(zip(coef_table.index, coef_table['Coef.']))
 
@@ -75,28 +77,34 @@ class ModelLr(ModelBase):
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
         if self.card is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
-        path = self.ml_config.f_get_save_path(f"model.pkl")
+        path = self.ml_config.f_get_save_path(FileEnum.MODEL.value)
         self.lr.save(path)
         print(f"model save to【{path}】success. ")
 
-        path = self.ml_config.f_get_save_path("coef.dict")
+        path = self.ml_config.f_get_save_path(FileEnum.COEF.value)
         with open(path, mode="w", encoding="utf-8") as f:
             j = json.dumps(self.coef, ensure_ascii=False)
             f.write(j)
         print(f"model save to【{path}】success. ")
 
         df_card = pd.concat(self.card.values())
-        path = self.ml_config.f_get_save_path(f"card.csv")
+        path = self.ml_config.f_get_save_path(FileEnum.CARD.value)
         df_card.to_csv(path)
         print(f"model save to【{path}】success. ")
 
+        path = self.ml_config.f_get_save_path(FileEnum.CARD_CFG.value)
+        with open(path, mode="w", encoding="utf-8") as f:
+            j = json.dumps(self.card_cfg, ensure_ascii=False)
+            f.write(j)
+        print(f"model save to【{path}】success. ")
+
     def model_load(self, path: str, *args, **kwargs):
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
-        path_model = os.path.join(path, "model.pkl")
+        path_model = os.path.join(path, FileEnum.MODEL.value)
         if not os.path.isfile(path_model):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
-        path_card = os.path.join(path, "card.csv")
+        path_card = os.path.join(path, FileEnum.CARD.value)
         if not os.path.isfile(path_card):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_card}】不存在")
 

+ 4 - 4
online_learning/trainer.py

@@ -23,7 +23,7 @@ from tqdm import tqdm
 from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
     f_display_images_by_side
 from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
-from enums import ResultCodesEnum, ConstantEnum, ContextEnum
+from enums import ResultCodesEnum, ConstantEnum, ContextEnum, FileEnum
 from feature import f_woebin_load
 from init import init, context
 from model import f_get_model_score_bin, f_calcu_model_ks, f_stress_test, f_calcu_model_psi
@@ -53,7 +53,7 @@ class OnlineLearningTrainer:
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.ILLEGAL_PARAMS, message=f"【{path}】不是文件夹")
 
-        path_coef = os.path.join(path, "coef.dict")
+        path_coef = os.path.join(path, FileEnum.COEF.value)
         if not os.path.isfile(path_coef):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型系数文件【{path_coef}】不存在")
         with open(path_coef, mode="r", encoding="utf-8") as f:
@@ -276,13 +276,13 @@ class OnlineLearningTrainer:
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
         df_woebin = pd.concat(self.sc_woebin.values())
-        path = self._ol_config.f_get_save_path(f"feature.csv")
+        path = self._ol_config.f_get_save_path(FileEnum.FEATURE.value)
         df_woebin.to_csv(path)
         print(f"feature save to【{path}】success. ")
 
         if self._model_optimized is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
-        path = self._ol_config.f_get_save_path("coef.dict")
+        path = self._ol_config.f_get_save_path(FileEnum.COEF.value)
         with open(path, mode="w", encoding="utf-8") as f:
             coef = dict(zip(self._columns, self._model_optimized.linear.weight.tolist()))
             j = json.dumps(coef, ensure_ascii=False)

+ 2 - 1
pipeline/pipeline.py

@@ -9,6 +9,7 @@ from typing import List
 import pandas as pd
 
 from entitys import DataSplitEntity, MlConfigEntity, DataFeatureEntity
+from enums import ConstantEnum
 from feature import FeatureStrategyFactory, FeatureStrategyBase
 from init import init
 from model import ModelBase, ModelFactory, f_add_rules, f_get_model_score_bin, f_calcu_model_psi
@@ -92,7 +93,7 @@ class Pipeline():
     def rules_test(self, ):
         rules = self._ml_config.rules
         df = self._data.train_data.copy()
-        df["SCORE"] = [0] * len(df)
+        df[ConstantEnum.SCORE.value] = [0] * len(df)
         f_add_rules(df, rules)