data_process_config_entity.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: yq
  4. @time: 2024/11/1
  5. @desc: 数据处理配置类
  6. """
  7. import json
  8. import os
  9. from typing import List
  10. from commom import GeneralException
  11. from enums import ResultCodesEnum
  12. class DataProcessConfigEntity():
  13. def __init__(self, y_column: str, x_columns_candidate: List[str], fill_method: str, split_method: str,
  14. feature_search_method: str, bin_search_interval: float = 0.05, iv_threshold: float = 0.03,
  15. x_candidate_num: int = 10):
  16. # 定义y变量
  17. self._y_column = y_column
  18. # 候选x变量
  19. self._x_columns_candidate = x_columns_candidate
  20. # 缺失值填充方法
  21. self._fill_method = fill_method
  22. # 数据划分方法
  23. self._split_method = split_method
  24. # 最优特征搜索方法
  25. self._feature_search_method = feature_search_method
  26. # 使用iv筛变量时的阈值
  27. self._iv_threshold = iv_threshold
  28. # 贪婪搜索分箱时数据粒度大小,应该在0.01-0.1之间
  29. self._bin_search_interval = bin_search_interval
  30. # 最终保留多少x变量
  31. self._x_candidate_num = x_candidate_num
  32. @property
  33. def y_column(self):
  34. return self._y_column
  35. @property
  36. def fill_method(self):
  37. return self._fill_method
  38. @property
  39. def split_method(self):
  40. return self._split_method
  41. @property
  42. def feature_search_method(self):
  43. return self._feature_search_method
  44. @property
  45. def iv_threshold(self):
  46. return self._iv_threshold
  47. @property
  48. def bin_search_interval(self):
  49. return self._bin_search_interval
  50. @staticmethod
  51. def from_config(config_path: str):
  52. """
  53. 从配置文件生成实体类
  54. """
  55. if os.path.exists(config_path):
  56. with open(config_path, mode="r", encoding="utf-8") as f:
  57. j = json.loads(f.read())
  58. else:
  59. raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"指配置文件【{config_path}】不存在")
  60. return DataProcessConfigEntity(**j)
  61. if __name__ == "__main__":
  62. pass