|
@@ -8,80 +8,96 @@ import time
|
|
|
import random
|
|
|
from collections import defaultdict
|
|
|
import matplotlib.pyplot as plt
|
|
|
+import numpy as np
|
|
|
|
|
|
-plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|
|
-plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
|
|
-
|
|
|
-# 报告生成模块(续)
|
|
|
+# 报告生成模块
|
|
|
class ReportGenerator:
|
|
|
- def __init__(self, data, anomalies, segments, action_stats, product_stats, channel_stats):
|
|
|
+ def __init__(self, data, anomalies, segments, process_stats, action_stats, product_stats, channel_stats):
|
|
|
self.data = data
|
|
|
self.anomalies = anomalies
|
|
|
self.segments = segments
|
|
|
+ self.process_stats = process_stats
|
|
|
self.action_stats = action_stats
|
|
|
self.product_stats = product_stats
|
|
|
self.channel_stats = channel_stats
|
|
|
|
|
|
def generate(self):
|
|
|
# 生成用户行为报告,并展示成图表形式
|
|
|
-
|
|
|
report = {
|
|
|
- 'total_users': len(set([item['user'] for item in self.data])),
|
|
|
- 'total_actions': len(self.data),
|
|
|
+ 'total_users': len(set([user_actions['user'] for user_actions in self.data])),
|
|
|
+ 'total_actions': sum(len(user_actions['actions']) for user_actions in self.data),
|
|
|
'anomalies': self.anomalies,
|
|
|
'user_segments': self.segments,
|
|
|
+ 'process_stats': self.process_stats,
|
|
|
'action_stats': self.action_stats,
|
|
|
'product_stats': self.product_stats,
|
|
|
'channel_stats': self.channel_stats
|
|
|
}
|
|
|
- self.plot_action_stats(self.action_stats)
|
|
|
- self.plot_product_stats(self.product_stats)
|
|
|
- self.plot_channel_stats(self.channel_stats)
|
|
|
+
|
|
|
+ # 打印报告摘要
|
|
|
+ print("Report Summary:")
|
|
|
+ print(f"Total Users: {report['total_users']}")
|
|
|
+ print(f"Total Actions: {report['total_actions']}")
|
|
|
+ print(f"Anomalies: {report['anomalies']}")
|
|
|
+ print(f"User Segments: {report['user_segments']}")
|
|
|
+
|
|
|
+ # 展示每个流程的行为次数漏斗图
|
|
|
+ for process, actions in self.process_stats.items():
|
|
|
+ self.plot_bar(actions, f"{process} Funnel")
|
|
|
+
|
|
|
+ # 展示行为统计图表
|
|
|
+ self.plot_stats(self.action_stats, "Action Statistics")
|
|
|
+
|
|
|
+ # 展示产品统计图表
|
|
|
+ self.plot_stats(self.product_stats, "Product Statistics")
|
|
|
+
|
|
|
+ # 展示渠道统计图表
|
|
|
+ self.plot_stats(self.channel_stats, "Channel Statistics")
|
|
|
+
|
|
|
return report
|
|
|
|
|
|
- def plot_action_stats(self, action_stats):
|
|
|
- if not action_stats:
|
|
|
- print("No data for action stats.")
|
|
|
- return
|
|
|
- # 生成行为统计图表
|
|
|
- actions = list(action_stats.keys())
|
|
|
- counts = list(action_stats.values())
|
|
|
- plt.figure(figsize=(12, 8)) # 调整图表大小
|
|
|
- plt.bar(actions, counts, color='skyblue')
|
|
|
- plt.xlabel('行为', fontsize=12) # 调整字体大小
|
|
|
- plt.ylabel('次数', fontsize=12)
|
|
|
- plt.title('行为统计', fontsize=14)
|
|
|
- plt.xticks(rotation=45, fontsize=10) # 旋转刻度标签并调整字体大小
|
|
|
- plt.yticks(fontsize=10)
|
|
|
- plt.tight_layout() # 自动调整布局
|
|
|
- # 如果需要,可以手动调整边距
|
|
|
- # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
|
|
|
+ def plot_stats(self, stats, title):
|
|
|
+ # 生成并展示统计图表
|
|
|
+ labels, values = zip(*stats.items())
|
|
|
+ plt.figure(figsize=(12, 6)) # 增加图表宽度
|
|
|
+ plt.bar(labels, values)
|
|
|
+ plt.title(title)
|
|
|
+ plt.xticks(rotation=45, ha='right', fontsize=10) # 旋转标签,右对齐,减小字体大小
|
|
|
+ plt.tight_layout() # 调整布局以避免标签被截断
|
|
|
plt.show()
|
|
|
|
|
|
- def plot_product_stats(self, product_stats):
|
|
|
- if not product_stats:
|
|
|
- print("No data for product stats.")
|
|
|
- return
|
|
|
- # 生成产品统计图表
|
|
|
- products = list(product_stats.keys())
|
|
|
- counts = list(product_stats.values())
|
|
|
- plt.figure(figsize=(10, 6))
|
|
|
- plt.bar(products, counts, color='lightgreen')
|
|
|
- plt.xlabel('产品')
|
|
|
- plt.ylabel('次数')
|
|
|
- plt.title('产品统计')
|
|
|
+ def plot_bar(self, actions, title):
|
|
|
+ # 生成并展示逆序的水平柱状图
|
|
|
+ labels = list(actions.keys())
|
|
|
+ values = list(actions.values())
|
|
|
+
|
|
|
+ # 根据值对标签和值进行排序(从小到大,以实现逆序显示)
|
|
|
+ sorted_indices = np.argsort(values)
|
|
|
+ labels = np.array(labels)[sorted_indices].tolist()
|
|
|
+ values = np.array(values)[sorted_indices].tolist()
|
|
|
+
|
|
|
+ # 水平柱状图的矩形位置
|
|
|
+ positions = np.arange(len(labels))
|
|
|
+
|
|
|
+ fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
+ ax.barh(positions, values, color='teal')
|
|
|
+
|
|
|
+ # 在矩形旁边添加具体数值
|
|
|
+ for i, value in enumerate(values):
|
|
|
+ ax.text(value, positions[i], str(value),
|
|
|
+ ha='right', va='center', color='black', fontsize=10)
|
|
|
+
|
|
|
+ # 设置横坐标和纵坐标
|
|
|
+ ax.set_yticks(positions)
|
|
|
+ ax.set_yticklabels(labels)
|
|
|
+ ax.set_xlabel('Number of Actions')
|
|
|
+ ax.set_ylabel('Behavior')
|
|
|
+
|
|
|
+ ax.set_title(title)
|
|
|
+
|
|
|
+ plt.tight_layout() # 调整布局
|
|
|
plt.show()
|
|
|
|
|
|
- def plot_channel_stats(self, channel_stats):
|
|
|
- if not channel_stats:
|
|
|
- print("No data for channel stats.")
|
|
|
- return
|
|
|
- # 生成渠道统计图表
|
|
|
- channels = list(channel_stats.keys())
|
|
|
- counts = list(channel_stats.values())
|
|
|
- plt.figure(figsize=(10, 6))
|
|
|
- plt.bar(channels, counts, color='orange')
|
|
|
- plt.xlabel('渠道')
|
|
|
- plt.ylabel('次数')
|
|
|
- plt.title('渠道统计')
|
|
|
- plt.show()
|
|
|
+# 设置matplotlib支持中文
|
|
|
+plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|
|
+plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|