DataProcessor.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: zsc
  4. @time: 2024/11/18
  5. @desc: 数据处理
  6. """
  7. # 数据预处理模块
  8. class DataPreprocessor:
  9. def __init__(self, data):
  10. self.data = data
  11. def preprocess(self):
  12. # 更多的数据预处理函数
  13. processed_data = [
  14. item for item in self.data
  15. if 'user' in item and 'action' in item and 'item' in item
  16. ]
  17. processed_data = self.remove_duplicates(processed_data)
  18. processed_data = self.fill_missing_values(processed_data)
  19. processed_data = self.convert_data_types(processed_data)
  20. return processed_data
  21. def remove_duplicates(self, data):
  22. # 去除重复数据
  23. unique_data = []
  24. seen = set()
  25. for item in data:
  26. identifier = (item['user'], item['action'], item['item'])
  27. if identifier not in seen:
  28. unique_data.append(item)
  29. seen.add(identifier)
  30. return unique_data
  31. def fill_missing_values(self, data):
  32. # 填充缺失值
  33. for item in data:
  34. if 'item' not in item:
  35. item['item'] = 'unknown'
  36. return data
  37. def convert_data_types(self, data):
  38. # 转换数据类型
  39. for item in data:
  40. item['user'] = str(item['user'])
  41. item['action'] = str(item['action'])
  42. item['item'] = str(item['item'])
  43. return data