Browse Source

modify: 变量命名优化

yq 2 days ago
parent
commit
73c466c6b5

+ 2 - 2
entitys/ml_config_entity.py

@@ -307,7 +307,7 @@ class MlConfigEntity():
         从配置文件生成实体类
         """
         if os.path.isdir(config_path):
-            config_path = os.path.join(config_path, FileEnum.MLCFG.value)
+            config_path = os.path.join(config_path, FileEnum.ML_CFG.value)
 
         if os.path.exists(config_path):
             with open(config_path, mode="r", encoding="utf-8") as f:
@@ -318,7 +318,7 @@ class MlConfigEntity():
         return MlConfigEntity(**j)
 
     def config_save(self):
-        path = self.f_get_save_path(FileEnum.MLCFG.value)
+        path = self.f_get_save_path(FileEnum.ML_CFG.value)
         with open(path, mode="w", encoding="utf-8") as f:
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = json.dumps(j, ensure_ascii=False)

+ 2 - 2
entitys/ol_config_entity.py

@@ -126,7 +126,7 @@ class OnlineLearningConfigEntity():
         从配置文件生成实体类
         """
         if os.path.isdir(config_path):
-            config_path = os.path.join(config_path, FileEnum.OLCFG.value)
+            config_path = os.path.join(config_path, FileEnum.OL_CFG.value)
 
         if os.path.exists(config_path):
             with open(config_path, mode="r", encoding="utf-8") as f:
@@ -137,7 +137,7 @@ class OnlineLearningConfigEntity():
         return OnlineLearningConfigEntity(**j)
 
     def config_save(self):
-        path = self.f_get_save_path(FileEnum.OLCFG.value)
+        path = self.f_get_save_path(FileEnum.OL_CFG.value)
         with open(path, mode="w", encoding="utf-8") as f:
             j = {k.lstrip("_"): v for k, v in self.__dict__.items()}
             j = json.dumps(j, ensure_ascii=False)

+ 9 - 9
enums/file_enum.py

@@ -8,18 +8,18 @@ from enum import Enum
 
 
 class FileEnum(Enum):
-    MLCFG = "mlcfg.json"
-    OLCFG = "olcfg.json"
-    FEATURE = "feature.csv"
+    ML_CFG = "ml_cfg.json"
+    OL_CFG = "ol_cfg.json"
+    FEATURE_CSV = "feature.csv"
     FEATURE_PKL = "feature.pkl"
-    VAR_MAPPING = "var_mapping.csv"
-    CARD = "card.csv"
-    CARD_CFG = "card.cfg"
+    VAR_MAPPING_CSV = "var_mapping.csv"
+    CARD_CSV = "card.csv"
+    CARD_CFG = "card_cfg.json"
     COEF = "coef.json"
-    MODEL = "model.pkl"
+    PIPELINE_XGB = "xgb.pipeline"
     MODEL_XGB = "xgb.bin"
-    PMML = "model.pmml"
-    TEST_CASE = "test_case.csv"
+    PMML_XGB = "xgb.pmml"
+    TEST_CASE_CSV = "test_case.csv"
 
 
 

+ 1 - 1
feature/woe/strategy_woe.py

@@ -476,7 +476,7 @@ class StrategyWoe(FeatureStrategyBase):
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
         df_woebin = pd.concat(self.sc_woebin.values())
-        path = self.ml_config.f_get_save_path(FileEnum.FEATURE.value)
+        path = self.ml_config.f_get_save_path(FileEnum.FEATURE_CSV.value)
         df_woebin.to_csv(path)
         print(f"feature save to【{path}】success. ")
 

+ 3 - 3
feature/woe/utils.py

@@ -143,9 +143,9 @@ def f_get_vif(data: pd.DataFrame) -> Union[pd.DataFrame, None]:
 
 def f_woebin_load(path: str):
     if os.path.isdir(path):
-        path = os.path.join(path, FileEnum.FEATURE.value)
-    if not os.path.isfile(path) or FileEnum.FEATURE.value not in path:
-        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【{FileEnum.FEATURE.value}】不存在")
+        path = os.path.join(path, FileEnum.FEATURE_CSV.value)
+    if not os.path.isfile(path) or FileEnum.FEATURE_CSV.value not in path:
+        raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"特征信息【{FileEnum.FEATURE_CSV.value}】不存在")
 
     df_woebin = pd.read_csv(path)
     variables = df_woebin["variable"].unique().tolist()

+ 6 - 6
model/model_lr.py

@@ -79,7 +79,7 @@ class ModelLr(ModelBase):
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
         if self.card is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"card不存在")
-        path = self.ml_config.f_get_save_path(FileEnum.MODEL.value)
+        path = self.ml_config.f_get_save_path(FileEnum.PIPELINE_XGB.value)
         self.lr.save(path)
         print(f"model save to【{path}】success. ")
 
@@ -90,7 +90,7 @@ class ModelLr(ModelBase):
         print(f"model save to【{path}】success. ")
 
         df_card = pd.concat(self.card.values())
-        path = self.ml_config.f_get_save_path(FileEnum.CARD.value)
+        path = self.ml_config.f_get_save_path(FileEnum.CARD_CSV.value)
         df_card.to_csv(path)
         print(f"model save to【{path}】success. ")
 
@@ -103,21 +103,21 @@ class ModelLr(ModelBase):
         woebin = context.get(ContextEnum.WOEBIN)
         df_woebin = pd.concat(woebin.values())
         df_var_mapping = f_get_var_mapping(df_woebin, df_card, columns_anns=self.ml_config.columns_anns)
-        path = self.ml_config.f_get_save_path(FileEnum.VAR_MAPPING.value)
+        path = self.ml_config.f_get_save_path(FileEnum.VAR_MAPPING_CSV.value)
         df_var_mapping.to_csv(path, encoding="utf-8")
         print(f"model save to【{path}】success. ")
 
-        path = self.ml_config.f_get_save_path(FileEnum.TEST_CASE.value)
+        path = self.ml_config.f_get_save_path(FileEnum.TEST_CASE_CSV.value)
         self._test_case.to_csv(path, encoding="utf-8")
         print(f"test case save to【{path}】success. ")
 
     def model_load(self, path: str, *args, **kwargs):
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
-        path_model = os.path.join(path, FileEnum.MODEL.value)
+        path_model = os.path.join(path, FileEnum.PIPELINE_XGB.value)
         if not os.path.isfile(path_model):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
-        path_card = os.path.join(path, FileEnum.CARD.value)
+        path_card = os.path.join(path, FileEnum.CARD_CSV.value)
         if not os.path.isfile(path_card):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_card}】不存在")
 

+ 4 - 4
model/model_xgb.py

@@ -131,7 +131,7 @@ class ModelXgb(ModelBase):
                           )
 
         if params_xgb.get("save_pmml"):
-            path_pmml = self.ml_config.f_get_save_path(FileEnum.PMML.value)
+            path_pmml = self.ml_config.f_get_save_path(FileEnum.PMML_XGB.value)
             # pipeline = make_pmml_pipeline(self.model)
             sklearn2pmml(self.pipeline, path_pmml, with_repr=True, )
             self._f_rewrite_pmml(path_pmml)
@@ -169,19 +169,19 @@ class ModelXgb(ModelBase):
         if self.pipeline is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
 
-        path_model = self.ml_config.f_get_save_path(FileEnum.MODEL.value)
+        path_model = self.ml_config.f_get_save_path(FileEnum.PIPELINE_XGB.value)
         # self.model.save_model(path_model)
         joblib.dump(self.pipeline, path_model)
         print(f"model save to【{path_model}】success. ")
 
-        path = self.ml_config.f_get_save_path(FileEnum.TEST_CASE.value)
+        path = self.ml_config.f_get_save_path(FileEnum.TEST_CASE_CSV.value)
         self._test_case.to_csv(path, encoding="utf-8")
         print(f"test case save to【{path}】success. ")
 
     def model_load(self, path: str, *args, **kwargs):
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"【{path}】不是文件夹")
-        path_model = os.path.join(path, FileEnum.MODEL.value)
+        path_model = os.path.join(path, FileEnum.PIPELINE_XGB.value)
         if not os.path.isfile(path_model):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
 

+ 2 - 2
online_learning/trainer_lr.py

@@ -294,7 +294,7 @@ class OnlineLearningTrainerLr:
         if self.sc_woebin is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"feature不存在")
         df_woebin = pd.concat(self.sc_woebin.values())
-        path = self._ol_config.f_get_save_path(FileEnum.FEATURE.value)
+        path = self._ol_config.f_get_save_path(FileEnum.FEATURE_CSV.value)
         df_woebin.to_csv(path)
         print(f"feature save to【{path}】success. ")
 
@@ -309,7 +309,7 @@ class OnlineLearningTrainerLr:
 
         if self.card is not None:
             df_card = pd.concat(self.card.values())
-            path = self._ol_config.f_get_save_path(FileEnum.CARD.value)
+            path = self._ol_config.f_get_save_path(FileEnum.CARD_CSV.value)
             df_card.to_csv(path)
             print(f"model save to【{path}】success. ")
 

+ 3 - 3
online_learning/trainer_xgb.py

@@ -55,7 +55,7 @@ class OnlineLearningTrainerXgb:
     def _init(self, path: str):
         if not os.path.isdir(path):
             raise GeneralException(ResultCodesEnum.ILLEGAL_PARAMS, message=f"【{path}】不是文件夹")
-        path_model = os.path.join(path, FileEnum.MODEL.value)
+        path_model = os.path.join(path, FileEnum.PIPELINE_XGB.value)
         if not os.path.isfile(path_model):
             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型文件【{path_model}】不存在")
 
@@ -88,7 +88,7 @@ class OnlineLearningTrainerXgb:
 
         if self._ol_config.save_pmml:
             data = self._data.data
-            path_pmml = self._ol_config.f_get_save_path(FileEnum.PMML.value)
+            path_pmml = self._ol_config.f_get_save_path(FileEnum.PMML_XGB.value)
             # pipeline = make_pmml_pipeline(self.model)
             sklearn2pmml(self._pipeline_optimized, path_pmml, with_repr=True, )
             self._f_rewrite_pmml(path_pmml)
@@ -241,7 +241,7 @@ class OnlineLearningTrainerXgb:
         if self._pipeline_optimized is None:
             GeneralException(ResultCodesEnum.NOT_FOUND, message=f"模型不存在")
 
-        path_model = self._ol_config.f_get_save_path(FileEnum.MODEL.value)
+        path_model = self._ol_config.f_get_save_path(FileEnum.PIPELINE_XGB.value)
         joblib.dump(self._pipeline_optimized, path_model)
         print(f"model save to【{path_model}】success. ")
         # 在xgb的增量学习下直接保存pipeline会出错,所以这里需要单独保存xgb model,然后进行复原