# -*- coding: utf-8 -*- """ @author: zsc @time: 2024/11/18 @desc: 数据处理 """ from collections import defaultdict # 数据预处理模块 class DataPreprocessor: def __init__(self, data): self.data = data def preprocess(self): # 这里可以添加更多的预处理步骤,例如数据清洗、格式化等 # 目前我们只是简单地返回原始数据 preprocessed_data = [] for user_actions in self.data: user = user_actions['user'] process = user_actions['process'] actions = user_actions['actions'] product = user_actions['product'] channel = user_actions['channel'] # 假设的预处理步骤,例如过滤掉某些行为或添加额外的信息 # 这里我们只是将行为序列转换为小写 preprocessed_actions = [action.lower() for action in actions] preprocessed_data.append({ 'user': user, 'process': process, 'actions': preprocessed_actions, 'product': product, 'channel': channel }) return preprocessed_data