12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- # -*- coding: utf-8 -*-
- """
- @author: yq
- @time: 2024/11/1
- @desc: 数据处理配置类
- """
- import json
- import os
- from typing import List
- from commom import GeneralException
- from enums import ResultCodesEnum
- class DataProcessConfigEntity():
- def __init__(self, y_column: str, x_columns_candidate: List[str], fill_method: str, split_method: str,
- feature_search_method: str, bin_search_interval: float = 0.05, iv_threshold: float = 0.03,
- x_candidate_num: int = 10):
- # 定义y变量
- self._y_column = y_column
- # 候选x变量
- self._x_columns_candidate = x_columns_candidate
- # 缺失值填充方法
- self._fill_method = fill_method
- # 数据划分方法
- self._split_method = split_method
- # 最优特征搜索方法
- self._feature_search_method = feature_search_method
- # 使用iv筛变量时的阈值
- self._iv_threshold = iv_threshold
- # 贪婪搜索分箱时数据粒度大小,应该在0.01-0.1之间
- self._bin_search_interval = bin_search_interval
- # 最终保留多少x变量
- self._x_candidate_num = x_candidate_num
- @property
- def y_column(self):
- return self._y_column
- @property
- def fill_method(self):
- return self._fill_method
- @property
- def split_method(self):
- return self._split_method
- @property
- def feature_search_method(self):
- return self._feature_search_method
- @property
- def iv_threshold(self):
- return self._iv_threshold
- @property
- def bin_search_interval(self):
- return self._bin_search_interval
- @staticmethod
- def from_config(config_path: str):
- """
- 从配置文件生成实体类
- """
- if os.path.exists(config_path):
- with open(config_path, mode="r", encoding="utf-8") as f:
- j = json.loads(f.read())
- else:
- raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"指配置文件【{config_path}】不存在")
- return DataProcessConfigEntity(**j)
- if __name__ == "__main__":
- pass
|