DataProcessor.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: zsc
  4. @time: 2024/11/18
  5. @desc: 数据处理
  6. """
  7. from collections import defaultdict
  8. # 数据预处理模块
  9. class DataPreprocessor:
  10. def __init__(self, data):
  11. self.data = data
  12. def preprocess(self):
  13. # 这里可以添加更多的预处理步骤,例如数据清洗、格式化等
  14. # 目前我们只是简单地返回原始数据
  15. preprocessed_data = []
  16. for user_actions in self.data:
  17. user = user_actions['user']
  18. process = user_actions['process']
  19. actions = user_actions['actions']
  20. product = user_actions['product']
  21. channel = user_actions['channel']
  22. # 假设的预处理步骤,例如过滤掉某些行为或添加额外的信息
  23. # 这里我们只是将行为序列转换为小写
  24. preprocessed_actions = [action.lower() for action in actions]
  25. preprocessed_data.append({
  26. 'user': user,
  27. 'process': process,
  28. 'actions': preprocessed_actions,
  29. 'product': product,
  30. 'channel': channel
  31. })
  32. return preprocessed_data