# -*- coding: utf-8 -*- """ @author: zsc @time: 2024/11/18 @desc: 数据处理 """ # 数据预处理模块 class DataPreprocessor: def __init__(self, data): self.data = data def preprocess(self): # 更多的数据预处理函数 processed_data = [ item for item in self.data if 'user' in item and 'action' in item and 'item' in item ] processed_data = self.remove_duplicates(processed_data) processed_data = self.fill_missing_values(processed_data) processed_data = self.convert_data_types(processed_data) return processed_data def remove_duplicates(self, data): # 去除重复数据 unique_data = [] seen = set() for item in data: identifier = (item['user'], item['action'], item['item']) if identifier not in seen: unique_data.append(item) seen.add(identifier) return unique_data def fill_missing_values(self, data): # 填充缺失值 for item in data: if 'item' not in item: item['item'] = 'unknown' return data def convert_data_types(self, data): # 转换数据类型 for item in data: item['user'] = str(item['user']) item['action'] = str(item['action']) item['item'] = str(item['item']) return data