├── Data_visualization ├── Data_visualization_copy.py ├── Data_visualization_corr.py ├── Data_visualization_monthly.py ├── Data_visualization_yearly.py └── README.md ├── Q2.py ├── README.md ├── images ├── Figure_1.png ├── Figure_2.png ├── Figure_3.png ├── Figure_4.png ├── Figure_5.png ├── Figure_6.png ├── Figure_7.png ├── Figure_8.png └── README.md ├── rules.csv ├── rules_single.csv ├── total_sales.csv ├── 单品Apriori.py └── 品类FP-Growth.py /Data_visualization/Data_visualization_copy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | import matplotlib 5 | matplotlib.rc("font",family='YouYuan') 6 | 7 | # 从CSV文件加载数据 8 | data = pd.read_csv('data.csv') 9 | 10 | data['销售日期'] = data['销售日期'].astype(str).str[:2] 11 | 12 | # 按单品名称分组并计算总销量 13 | total_sales = data.groupby(['单品名称', '销售日期'])['销量(千克)'].sum().reset_index() 14 | 15 | # 添加名为“总销量”的列,表示每个单品名称的总销量 16 | total_sales['总销量'] = total_sales.groupby('单品名称')['销量(千克)'].transform('sum') 17 | 18 | # 按总销量降序排序 19 | total_sales = total_sales.sort_values(by='总销量', ascending=False) 20 | # print(total_sales) 21 | # 按总销量降序排列 22 | # total_sales = total_sales.sort_values(by='销量(千克)', ascending=False) 23 | # total_sales.to_csv('total_sales.csv', index=False, encoding='utf-8-sig') 24 | # 选择销量排名前30的数据 25 | # data = sorted_df.head(1000) 26 | total_sales = total_sales.head(100) 27 | # data['销量(千克)'] = data['销量(千克)'].head(30) 28 | 29 | # 创建数据透视表 30 | pivot_table = total_sales.pivot_table(index='单品名称', columns='销售日期', values='销量(千克)', fill_value=0) 31 | 32 | print(pivot_table) 33 | # 创建热力图 34 | plt.figure(figsize=(12, 8)) 35 | sns.heatmap(pivot_table, cmap="YlGnBu", annot=True, fmt=".2f", linewidths=.5) 36 | 37 | plt.title("251类单品销量热力图(部分)") 38 | plt.xlabel("销售年份") 39 | plt.ylabel("单品名称") 40 | plt.show() 41 | -------------------------------------------------------------------------------- /Data_visualization/Data_visualization_corr.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import matplotlib 4 | matplotlib.rc("font",family='YouYuan') 5 | 6 | # 读取数据 7 | data = pd.read_csv('data.csv') 8 | 9 | # 数据预处理:筛选蔬菜品类,按日期和分类名称分组,计算每组销量总和 10 | # 定义一个字典,将原始的分类名称映射到新的数字 11 | category_mapping = { 12 | '花菜类': 1, 13 | '花叶类': 2, 14 | '水生根茎类': 3, 15 | '茄类': 4, 16 | '辣椒类': 5, 17 | '食用菌': 6 18 | } 19 | 20 | # 使用replace方法进行替换 21 | data['分类名称'] = data['分类名称'].replace(category_mapping) 22 | 23 | # 假设您的DataFrame为df,且包含“销售日期”列 24 | # data['销售日期'] = data['销售日期'].astype(str).str[:2].astype(int) 25 | data['销售日期'] = data['销售日期'].astype(str).str[:2] == '22' 26 | 27 | 28 | grouped_data = data.groupby(['销售日期', '分类名称'])['销量(千克)'].sum().reset_index() 29 | 30 | # # 创建日期和分类名称的唯一值列表 31 | # unique_dates = grouped_data['销售日期'].unique() 32 | # unique_categories = grouped_data['分类名称'].unique() 33 | 34 | # # 创建一个空的三维数组,用于存储销量数据 35 | # sales_volume = np.zeros((len(unique_dates), len(unique_categories))) 36 | 37 | # # 填充销量数据 38 | # for i, date in enumerate(unique_dates): 39 | # for j, category in enumerate(unique_categories): 40 | # volume = grouped_data.loc[(grouped_data['销售日期'] == date) & (grouped_data['分类名称'] == category), '销量(千克)'] 41 | # if not volume.empty: 42 | # sales_volume[i, j] = volume.values[0] 43 | 44 | # # df = pd.DataFrame(sales_volume) 45 | # df = pd.DataFrame(sales_volume, columns=unique_categories) 46 | 47 | # # 创建包含销售年份和分类名称的DataFrame 48 | # data_df = pd.read_csv('data.csv') # 读取包含销售年份和分类名称的数据 49 | # data_df = data_df[['销售日期', '分类名称']] # 选择需要的列 50 | 51 | # # 合并数据 52 | # merged_df = pd.concat([data_df, df], axis=1) 53 | 54 | # print(merged_df.head(5)) 55 | 56 | # 假设销售数据存储在df中,包括'品类'、'销售年份'和'销量'列 57 | pivot_table = data.pivot_table(index='销售日期', columns='分类名称', values='销量(千克)', aggfunc='sum') 58 | correlation_matrix = pivot_table.corr() 59 | 60 | import seaborn as sns 61 | import matplotlib.pyplot as plt 62 | 63 | plt.figure(figsize=(10, 8)) 64 | sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f") 65 | plt.title("不同蔬菜品类在不同年份销量变化的相关性热力图") 66 | plt.xlabel("蔬菜品类") 67 | plt.ylabel("蔬菜品类") 68 | plt.show() 69 | -------------------------------------------------------------------------------- /Data_visualization/Data_visualization_monthly.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib 6 | matplotlib.rc("font",family='YouYuan') 7 | 8 | # 读取数据 9 | data = pd.read_csv('data.csv') 10 | 11 | # 数据预处理:筛选蔬菜品类,按日期和分类名称分组,计算每组销量总和 12 | # 定义一个字典,将原始的分类名称映射到新的数字 13 | category_mapping = { 14 | '花菜类': 1, 15 | '花叶类': 2, 16 | '水生根茎类': 3, 17 | '茄类': 4, 18 | '辣椒类': 5, 19 | '食用菌': 6 20 | } 21 | 22 | # 使用replace方法进行替换 23 | data['分类名称'] = data['分类名称'].replace(category_mapping) 24 | 25 | # 假设您的DataFrame为df,且包含“销售日期”列 26 | data_filtered = data[data['销售日期'].astype(str).str[:2] == '22'] 27 | data['销售日期'] = data_filtered['销售日期'].astype(str).str[2:4] 28 | 29 | 30 | grouped_data = data.groupby(['销售日期', '分类名称'])['销量(千克)'].sum().reset_index() 31 | 32 | # 创建日期和分类名称的唯一值列表 33 | unique_dates = grouped_data['销售日期'].unique() 34 | unique_categories = grouped_data['分类名称'].unique() 35 | 36 | # 创建一个空的三维数组,用于存储销量数据 37 | sales_volume = np.zeros((len(unique_dates), len(unique_categories))) 38 | 39 | # 填充销量数据 40 | for i, date in enumerate(unique_dates): 41 | for j, category in enumerate(unique_categories): 42 | volume = grouped_data.loc[(grouped_data['销售日期'] == date) & (grouped_data['分类名称'] == category), '销量(千克)'] 43 | if not volume.empty: 44 | sales_volume[i, j] = volume.values[0] 45 | 46 | # 绘制三维热力图 47 | fig = plt.figure() 48 | ax = fig.add_subplot(111, projection='3d') 49 | 50 | x, y = np.meshgrid(np.arange(len(unique_categories)), np.arange(len(unique_dates))) 51 | z = sales_volume 52 | 53 | ax.plot_surface(x, y, z, cmap='viridis') 54 | 55 | # 设置坐标轴标签 56 | ax.set_xticks(np.arange(len(unique_categories))) 57 | ax.set_xticklabels(unique_categories, ha='right') 58 | step = 2 59 | ax.set_yticks(np.arange(len(unique_dates))[::step]) 60 | ax.set_yticklabels(unique_dates[::step]) 61 | 62 | ax.set_xlabel('分类名称') 63 | ax.set_ylabel('销售月份') 64 | ax.set_zlabel('销量(千克)') 65 | 66 | plt.show() -------------------------------------------------------------------------------- /Data_visualization/Data_visualization_yearly.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib 6 | matplotlib.rc("font",family='YouYuan') 7 | 8 | # 读取数据 9 | data = pd.read_csv('data.csv') 10 | 11 | # 数据预处理:筛选蔬菜品类,按日期和分类名称分组,计算每组销量总和 12 | # 定义一个字典,将原始的分类名称映射到新的数字 13 | category_mapping = { 14 | '花菜类': 1, 15 | '花叶类': 2, 16 | '水生根茎类': 3, 17 | '茄类': 4, 18 | '辣椒类': 5, 19 | '食用菌': 6 20 | } 21 | 22 | # 使用replace方法进行替换 23 | data['分类名称'] = data['分类名称'].replace(category_mapping) 24 | 25 | # 假设您的DataFrame为df,且包含“销售日期”列 26 | data['销售日期'] = data['销售日期'].astype(str).str[:2].astype(int) 27 | 28 | 29 | grouped_data = data.groupby(['销售日期', '分类名称'])['销量(千克)'].sum().reset_index() 30 | 31 | # 创建日期和分类名称的唯一值列表 32 | unique_dates = grouped_data['销售日期'].unique() 33 | unique_categories = grouped_data['分类名称'].unique() 34 | 35 | # 创建一个空的三维数组,用于存储销量数据 36 | sales_volume = np.zeros((len(unique_dates), len(unique_categories))) 37 | 38 | # 填充销量数据 39 | for i, date in enumerate(unique_dates): 40 | for j, category in enumerate(unique_categories): 41 | volume = grouped_data.loc[(grouped_data['销售日期'] == date) & (grouped_data['分类名称'] == category), '销量(千克)'] 42 | if not volume.empty: 43 | sales_volume[i, j] = volume.values[0] 44 | 45 | # 绘制三维热力图 46 | fig = plt.figure() 47 | ax = fig.add_subplot(111, projection='3d') 48 | 49 | x, y = np.meshgrid(np.arange(len(unique_categories)), np.arange(len(unique_dates))) 50 | z = sales_volume 51 | 52 | ax.plot_surface(x, y, z, cmap='viridis') 53 | 54 | # 设置坐标轴标签 55 | ax.set_xticks(np.arange(len(unique_categories))) 56 | ax.set_xticklabels(unique_categories, ha='right') 57 | ax.set_yticks(np.arange(len(unique_dates))) 58 | ax.set_yticklabels(unique_dates) 59 | 60 | ax.set_xlabel('分类名称') 61 | ax.set_ylabel('销售年份') 62 | ax.set_zlabel('销量(千克)') 63 | 64 | plt.show() -------------------------------------------------------------------------------- /Data_visualization/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Q2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | # 读取数据 4 | data = pd.read_csv("问题二1数据.csv") 5 | 6 | # data_filtered = data[data['类别'].astype(str).str[5:7] == '辣椒'] 7 | # data['类别'] = data_filtered['类别'] 8 | 9 | # 检查缺失值 10 | missing_values = data.isnull().sum() 11 | 12 | # 处理缺失值(如果有) 13 | data = data.dropna() 14 | 15 | # 数据摘要统计信息 16 | summary_stats = data.describe() 17 | 18 | correlation = data['profit_margin'].corr(data['total_quantity']) 19 | 20 | import matplotlib.pyplot as plt 21 | import seaborn as sns 22 | import matplotlib 23 | matplotlib.rc("font",family='YouYuan') 24 | 25 | # 绘制散点图 26 | plt.figure(figsize=(10, 6)) 27 | sns.scatterplot(x='total_quantity', y='profit_margin', data=data, hue='类别') 28 | plt.title('不同类别中profit_margin和total_quantity的相关关系') 29 | plt.xlabel('total_quantity') 30 | plt.ylabel('profit_margin') 31 | plt.legend(title='类别') 32 | 33 | # 获取当前轴上的图例 34 | handles, labels = plt.gca().get_legend_handles_labels() 35 | 36 | # 仅保留前8个图例项 37 | new_handles = handles[:20] 38 | new_labels = labels[:20] 39 | 40 | # 创建一个自定义图例 41 | custom_legend = plt.legend(new_handles, new_labels, loc='upper right') 42 | 43 | # 添加自定义图例到当前图表 44 | plt.gca().add_artist(custom_legend) 45 | plt.show() 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MCM2023_C 2 | 2023年全国大学生数学建模竞赛C题的代码和论文 3 | 4 | # 五、问题一的模型建立与求解 5 | 6 | ## 5.1 用三维热力图分析各品类销量的分布规律 7 | 8 | 为了对数据有进一步的认识,探寻蔬菜各品类销量的分布规律,我们用三维热力图绘制了各品类在不同年份和月份的销量情况。这种可视化方法允许我们将销售量、品类和日期三个维度结合在一起,以更加生动地呈现销售趋势。 9 | 在图1和图2中,x轴的数字1到6表示6种不同品类商品,其对应关系如下表所示: 10 | 11 | 在图1中,y轴上显示的是销售年份,z轴的高低、颜色的深浅表示销售量的多少。这种可视化方法使我们能够一目了然地了解不同品类商品在不同日期的销售情况,有助于我们发现销售的潜在模式和趋势。从图中可以看出,花叶类蔬菜的销量相对较多,茄类的销量相对较少。2020年到2022年,随着年份的推移,蔬菜的销量逐年增加。 12 | 13 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_1.png) 14 | 15 | 为了更好地了解不同蔬菜品类在不同月份的销量情况,我们选择了销量最多的2022年,研究在一年12个月中的销量变化情况,绘制出了图2的三维热力图。从图中可以看出,随着月份的推移,各品类的蔬菜销量整体都呈上升趋势,其中在9月销量几乎都达到最高。 16 | 17 | 通过数据可视化,我们能够更全面地理解销售数据的特点和规律,为进一步的分析和决策提供了有力的支持。这些可视化工具帮助我们更好地掌握数据,为研究提供了坚实的基础。 18 | 19 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_2.png) 20 | 21 | ## 5.2 用二维热力图分析各单品销量之间的关系 22 | 23 | 为了更好地了解各蔬菜单品的销量分布规律,我们绘制了不同单品在各年份总销量数据的热力图,图3是我们截取的热力图中销量较高的一部分。从图中可以看出很多有趣的信息,例如: 24 | - 芜湖青椒(1)、净藕(1)、西兰花等单品的销量一直遥遥领先; 25 | - 云南油麦菜(份)、洪湖莲藕(粉藕)的销量一直稳步提升; 26 | - 泡泡椒(精品)的在2020年销量表现亮眼,但销量逐渐减少,到2023年已经停售了; 27 | - 盒装的金针菇在2022年一经推出便饱受好评,2023年销量领先,有望冲击年销量第一; 28 | - 2021年热销的商品,在2022年销量几乎都有一定程度的下滑。 29 | 30 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_4.png) 31 | 该图片清晰地展现了各单品的销量分布情况和变化趋势,有助于商家进行更好地进货决策。根据商家的需求,我们还可以绘制各单品在不同月份,不同时间的销量情况,以让我们对数据的整体情况有一个直观地了解。有了这个了解之后,我们就可以对数据进行更深层次的分析,例如,使用STL方法进行季节性分析,使用时间序列分析蔬菜单品销量随时间的变化规律等。 32 | 33 | ## 5.3 相关性分析 34 | 35 | 为了探讨蔬菜各品类之间的销量相关性,以及它们随时间的演变,我们通过相关性矩阵和相关性热力图的形式来可视化这些关系,以更深入地理解不同蔬菜品类之间的关联性和趋势。相关性矩阵是一个方阵,其中包含了各品类销量之间的Pearson相关系数。相关性系数的取值范围在-1到1之间,负值表示负相关,正值表示正相关。我们首先计算了不同蔬菜品类之间的销量相关性系数,并将其呈现在相关性矩阵中。为了更直观地展示不同品类销量之间的相关性,我们创建了一个相关性热力图。相关性热力图以颜色编码的方式表示了相关性矩阵中的数值,使我们可以快速识别出高相关性和低相关性的品类组合。 36 | 37 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_3.png) 38 | 39 | 从相关性矩阵中,我们可以观察到不同蔬菜品类之间的销量存在一定的相关性。例如,水生根茎类和食用菌之间的销量相关性系数为0.98,表明它们之间存在很强的正相关关系。相反,茄类和辣椒类之间的销量相关性系数为0.40,表明它们之间的关系较弱。 40 | 通过时间趋势分析,我们可以观察到不同品类销量之间的年度相关性。这有助于我们理解各品类在不同年份内的销售表现是否存在一定的关联性。例如,水生根茎类和食用菌之间的高相关性系数表明它们可能受到相似的季节性影响。 41 | 42 | ## 5.4 基于Apriori算法的单品关联关系分析 43 | 44 | ### 5.4.1 方法简介 45 | 46 | Apriori算法是一种流行的算法,用于数据库中的关联规则挖掘。在Apriori算法中,获取频繁项目集的过程包括连接和修剪过程。在连接过程中,每个项目都与其他项目组合,直到不再形成组合。在修剪过程中,使用 Apriori 算法的最小缺点来修剪在上一个过程中组合的项目的结果,即在执行频繁的项目集搜索时,它必须针对每个项目组合重复扫描数据库。因此,扫描数据库需要花费大量时间;此外,Apriori 过程需要大量候选生成才能根据数据库中项重复出现的频率生成关联规则。 47 | 在建模过程中,需要先寻找频繁项集,用于生成关联规则。支持度(support)是该方法的一个重要参数,用于衡量一个项集在数据集中出现的频率或支持程度,其计算公式如下: 48 | $$Support(A,B) = P(A \cap B) = \frac{\Sigma 含A和B的交易}{\Sigma 总交易}$$ 49 | 50 | 另一个重要的参数是置信度(Confidence),用于衡量关联规则的参数,计算公式为: 51 | $$Confidence = P(A\ |\ B) = \frac{\Sigma 含A和B的交易}{\Sigma 包含A的总交易}$$ 52 | 53 | 此外,我们还需要设定合适的最小置信度阈值和最小置信度阈值,用于筛选频繁项集和关联规则,只有达到最小阈值的数据才会被保留。基于支持度算法进行剪枝操作,去除不满足最小支持度阈值的候选项集。这一步骤减少了搜索空间,提高了效率。 最后,通过组合频繁项集,APRIORI算法生成关联规则,并计算这些规则的置信度。只有置信度不低于最小置信度阈值的规则才会被保留。[2] 54 | 55 | 在本次研究中,我们将使用 Apriori 算法来分析蔬菜单品销量之间的关系,从而确定不同蔬菜单品销售之间的关联。 通过分析这种关联,我们可以深入了解客户的行为和偏好,从而制定有效的营销策略并提高销量。 56 | 57 | ### 5.4.2 实验设计与结果 58 | 建立关联规则的分析主要分为以下五个步骤: 59 | 1. 设置最低支持率:首先需要挖掘数据中的频繁项集,这些频繁项集代表了在销售数据中经常一起出现的商品组合。通过初步分析发现,蔬菜单品销售之间的关联度较弱,故我们设置了一个较小的最小支持度阈值,以筛选出频繁项集。 60 | 2. 将数据转换为适合Apriori的格式:Apriori算法通常使用二进制编码表示商品是否出现。这是为了将问题转化为一个集合理论的问题,其中每个交易可以看作是一个包含不同商品的集合。这种编码方式使得算法更高效,减少了内存占用和计算时间。为了进行频繁项集挖掘,我们需要先通过TransactionEncoder函数将数据转换为适合Apriori的格式。 61 | 3. 生成频繁项集:该算法通过迭代扫描交易数据库并剪切不符合最小支持阈值的项目集来生成频繁项目集。在本实验中,频繁项集指的是顾客经常一起购买的蔬菜单品。我们将“销售日期”与“扫码销售时间”合并,得到了“销售时间”这一变量,相同“销售时间”卖出的单品就认为是同一个顾客购买的。通过使用groupby和agg函数将相同销售时间的单品放入一个集合中,我们得到了蔬菜单品销售的频繁项集。 62 | 4. 生成关联规则:我们使用频繁项集生成关联规则,关联规则根据其支持度和置信度进行评估。得到了各个单品之间地关联性,并将结果保存在附件中。例如,我们得到七彩椒(份)与云南生菜的关联支持度约为9.21%,这表明购买了七彩椒(份)的顾客,有9.21%的可能性会搭配购买云南生菜。 63 | 5. 绘制关联性强度热力图:为了更好得展示我们得到地结果,我们使用关联规则绘制了图5的关联性强度热力图。该图直观地展示了不同蔬菜品类之间销售的关联性,红色越深表示关联性越强,蓝色越深表示关联性越弱。从图中可以看出,平菇和大白菜与金针菇(盒)的关联性较强,红椒与上海青的关联性较弱,而红椒与油麦菜几乎没有关联性。 64 | 65 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_6.png) 66 | 67 | ## 5.5 基于FP-Growth算法的品类关联关系分析 68 | 69 | ### 5.5.1 方法简介 70 | 71 | FP-Growth算法(Frequent Pattern Growth算法)进行关联分析是一种用于挖掘频繁项集的算法,是Apriori算法的发展。FP-Growth算法纠正了Apriori算法的缺点,在频繁的项目集搜索中生成基于候选树的概念,这就是导致FP-Growth算法比Apriori算法更快的原因。 72 | 73 | ### 5.5.2 实验设计与结果 74 | 75 | 由于在尝试使用Apriori算法分析品类之间关联度速度过慢,而且效果不佳,故我们改为使用FP-Growth算法进行分析,步骤如下: 76 | - 数据处理:我们使用了与处理品类关联时相同的数据预处理方法,将同时销售的品类放入到一个个集合当中,然后让这些集合构成一个数据集。 77 | - 构建FP树: 首先,算法遍历数据集,统计每个项的支持度,并根据支持度构建FP树。FP树是一种前缀树结构,用于表示项集的层次关系和支持度信息。 78 | - 构建条件模式基: 在构建FP树的过程中,算法还维护了一组称为条件模式基的数据结构,用于存储每个项的条件模式。条件模式是指在给定前缀的条件下,与某项相关联的所有事务。 79 | - 挖掘频繁项集: 一旦构建了FP树和条件模式基,算法可以通过递归方式挖掘频繁项集。它从FP树的叶节点开始,回溯到根节点,生成频繁项集。 80 | 在置信度设为50%时,我们没有得到任何输出结果,由此可见各品类销售之间也无明显强关联性。在适当调低了置信度之后,程序输出了如下结果: 81 | 82 | {('辣椒类',): (('花叶类',), 0.4411945812807882), ('花叶类',): (('辣椒类',), 0.2959520859149112)} 83 | 84 | 从该结果可以看出,辣椒类和花菜类在销售时的关联度较高,购买辣椒类菜品的顾客有44.12%的可能性会购买花叶类菜品,而购买花叶类菜品的顾客有29.60%的可能性会购买辣椒类菜品。 85 | 86 | ## 5.6 综合分析与讨论 87 | 88 | 经过数据预处理和可视化,我们对蔬菜单品和品类数据的分布情况有了更深的了解。通过这些了解,商家可以对单品的选择进行更精准的决策。例如,从图3中我们可以看出,在2022有很多新的单品上市,一开始销售就饱受好评,导致很多原先热销的商品销量降低。在分析利润时,如果新品的利润没有原先热销的商品高,那就要考虑是否要提高新品的价格,促进原先产品的销售,反之则需要考虑要不要把原先的产品下架,以保持总体利润的最大化。 89 | 除了Apriori算法或FP-Growth算法,我们还将基于NSGA-II(Non-dominated Sorting Genetic Algorithm II)的多层次关联规则挖掘与已有的Apriori算法结合,来发现更高质量的不同单品之间的关联关系。NSGA-II 使用快速非优势排序算法和精英选择策略来提高求解质量,使用拥挤距离排序技术来保持群体的多样性,防止过早收敛。在关联规则挖掘方面,该方法已被用于从大量交易数据中挖掘高质量的规则。它对多层次关联规则挖掘尤其有用,因为多层次关联规则挖掘的目标是挖掘不同抽象层次的关联规则。我们将 NSGA-II 与 Apriori 算法结合使用,先用 Apriori 算法生成频繁项集,用于初始化 NSGA-II 的群体,然后使用 NSGA-II 挖掘不同抽象层次的关联规则。对在不同层次上获得的最终群体进行解码,以获得最终的关联规则。 90 | 91 | 通过这些关联度分析方法,我们得到了蔬菜各品类或单品之间的相互关系,通过这些关系,我们可以进行购物篮分析(Market Basket Analysis),为了解客户行为和偏好提供有价值的信息,帮助商家制定更加科学合理的销售和展示策略。经过筛选和优化,我们绘制了图7所示的单品关联性强度热力图,帮助商家更清楚地了解不同蔬菜单品之间的关联情况。 92 | 93 | ![图片](https://github.com/jxTse/MCM2023_C/blob/main/images/Figure_7.png) 94 | 95 | 从图中可以很明显地看出购买冰草的顾客很有可能会购买小青菜,购买赤松茸的顾客有很大可能会购买杏鲍菇,这些关联规则揭示了不同产品之间的有趣关系,商家可以将这些产品进行捆绑打折促销。例如,针对购买冰草的顾客提供小青菜的优惠,或者推出买一送一活动,从而提高客户满意度和忠诚度,最终提高经济效益。 96 | 97 | 从之前讨论中我们还发现。花叶类蔬菜与辣椒类蔬菜之间存在一定的关联关系。这意味着购买花叶类蔬菜的顾客中,有一部分顾客可能会同时购买辣椒类蔬菜,反之亦然。根据这些关联规则,可以考虑将花叶类和辣椒类蔬菜放置在相邻的位置,以便于顾客同时购买,这样可以提高顾客的购物体验,从而提高销售额。另外,商家还可以根据这些关联规则对商品库存进行合理的调整。例如,如果发现花叶类蔬菜的销售量增加,可以适当增加辣椒类蔬菜的库存,以满足顾客的需求。 98 | 总之,通过本章的分析,我们帮助商家可以更好地了解蔬菜各品类及单品销售量的分布规律及相互关系,并据此制定相应的销售策略,以提高销售额和顾客满意度。 99 | 100 | 参考文献 101 | [1] Wang, Zhenmin. “A Multi-Level Association Rule Mining Algorithm Based on NSGA-II for Market Basket Analysis.” 2023 4th Information Communication Technologies Conference (ICTC) (2023): 294-300. 102 | 103 | [2] Satria, Christofer, Anthony Anggrawan, Mayadi. 《Recommendation System of Food Package Using Apriori and FP-Growth Data Mining Methods》. Journal of Advances in Information Technology 14, Issue 3 (2023): 454–62. https://doi.org/10.12720/jait.14.3.454-462. 104 | -------------------------------------------------------------------------------- /images/Figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_1.png -------------------------------------------------------------------------------- /images/Figure_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_2.png -------------------------------------------------------------------------------- /images/Figure_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_3.png -------------------------------------------------------------------------------- /images/Figure_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_4.png -------------------------------------------------------------------------------- /images/Figure_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_5.png -------------------------------------------------------------------------------- /images/Figure_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_6.png -------------------------------------------------------------------------------- /images/Figure_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_7.png -------------------------------------------------------------------------------- /images/Figure_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jxtse/MCM2023_C/1860460b4dc5ba26cd2e2be9f8aa737be5fd2256/images/Figure_8.png -------------------------------------------------------------------------------- /images/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rules.csv: -------------------------------------------------------------------------------- 1 | antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric 2 | frozenset({'七彩椒(份)'}),frozenset({'云南生菜'}),0.00012851818532322323,0.09214753887675106,0.00012851818532322323,1.0,10.852161785216179,0.00011667555084478202,inf,0.9079691516709512 3 | frozenset({'红椒(份)'}),frozenset({'上海青'}),0.00012851818532322323,0.03919804652358309,0.00012851818532322323,1.0,25.511475409836066,0.00012348052351579704,inf,0.9609254498714652 4 | frozenset({'西峡花菇(2)'}),frozenset({'上海青'}),0.00012851818532322323,0.03919804652358309,0.00012851818532322323,1.0,25.511475409836066,0.00012348052351579704,inf,0.9609254498714652 5 | frozenset({'艾蒿'}),frozenset({'云南生菜'}),0.00012851818532322323,0.09214753887675106,0.00012851818532322323,1.0,10.852161785216179,0.00011667555084478202,inf,0.9079691516709512 6 | frozenset({'冰草'}),frozenset({'小青菜(份)'}),0.00012851818532322323,0.008482200231332733,0.00012851818532322323,1.0,117.8939393939394,0.00012742806834194412,inf,0.9916452442159384 7 | frozenset({'冰草(盒)'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 8 | frozenset({'大白菜秧'}),frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 9 | frozenset({'鲜粽叶'}),frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 10 | frozenset({'和丰阳光海鲜菇(包)'}),frozenset({'青线椒'}),0.00012851818532322323,0.022105127875594398,0.00012851818532322323,1.0,45.23837209302325,0.00012567727440231403,inf,0.9780205655526992 11 | frozenset({'奶白菜苗'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 12 | frozenset({'田七'}),frozenset({'小米椒'}),0.00012851818532322323,0.03007325536563424,0.00012851818532322323,1.0,33.25213675213675,0.00012465322511687004,inf,0.9700514138817481 13 | frozenset({'赤松茸'}),frozenset({'杏鲍菇(2)'}),0.00012851818532322323,0.009381827528595297,0.00012851818532322323,1.0,106.58904109589041,0.0001273124498742327,inf,0.9907455012853471 14 | frozenset({'槐花'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 15 | frozenset({'水果辣椒'}),frozenset({'金针菇(盒)'}),0.00012851818532322323,0.03341472818403804,0.00012851818532322323,1.0,29.926923076923078,0.0001242237850939419,inf,0.9667095115681233 16 | frozenset({'洪湖莲藕(脆藕)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 17 | frozenset({'白玉菇(1)'}),frozenset({'白玉菇(袋)'}),0.00012851818532322323,0.005140727412928929,0.00012851818532322323,1.0,194.525,0.00012785750836487224,inf,0.9949871465295629 18 | frozenset({'紫白菜(1)'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 19 | frozenset({'野藕(2)'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 20 | frozenset({'紫苏(份)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 21 | frozenset({'荠菜'}),frozenset({'紫茄子(2)'}),0.00012851818532322323,0.06104613802853104,0.00012851818532322323,1.0,16.381052631578946,0.00012067264644280541,inf,0.9390745501285347 22 | frozenset({'菌菇火锅套餐(份)'}),frozenset({'紫茄子(2)'}),0.00012851818532322323,0.06104613802853104,0.00012851818532322323,1.0,16.381052631578946,0.00012067264644280541,inf,0.9390745501285347 23 | frozenset({'紫螺丝椒'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 24 | frozenset({'野生粉藕'}),frozenset({'红椒(1)'}),0.00012851818532322323,0.029944737180311015,0.00012851818532322323,1.0,33.39484978540772,0.00012466974204082882,inf,0.9701799485861183 25 | frozenset({'青菜苔'}),frozenset({'红椒(1)'}),0.00012851818532322323,0.029944737180311015,0.00012851818532322323,1.0,33.39484978540772,0.00012466974204082882,inf,0.9701799485861183 26 | frozenset({'鲜木耳(2)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 27 | frozenset({'鱼腥草'}),frozenset({'茼蒿'}),0.00012851818532322323,0.023133273358180183,0.00012851818532322323,1.0,43.227777777777774,0.00012554513901064383,inf,0.9769922879177377 28 | frozenset({'鲜粽叶(袋)(1)'}),frozenset({'菠菜(份)'}),0.00012851818532322323,0.012723300346999101,0.00012851818532322323,1.0,78.59595959595958,0.00012688300985130456,inf,0.9874035989717223 29 | frozenset({'蔡甸藜蒿(份)'}),frozenset({'西兰花'}),0.00012851818532322323,0.13083151265904125,0.00012851818532322323,1.0,7.643418467583498,0.00011170395673319095,inf,0.8692802056555271 30 | frozenset({'蟹味菇(盒)'}),frozenset({'青梗散花'}),0.00012851818532322323,0.03572805551985606,0.00012851818532322323,1.0,27.989208633093522,0.00012392648046268397,inf,0.9643958868894602 31 | frozenset({'黑皮鸡枞菌(盒)'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 32 | "frozenset({'东门口小白菜', '小米椒'})",frozenset({'螺丝椒'}),0.00012851818532322323,0.04883691042282483,0.00012851818532322323,1.0,20.476315789473684,0.000122241754218889,inf,0.9512853470437019 33 | "frozenset({'姬菇(1)', '云南油麦菜(份)'})",frozenset({'云南油麦菜'}),0.00012851818532322323,0.052178383241228636,0.00012851818532322323,1.0,19.165024630541872,0.00012181231419596084,inf,0.9479434447300771 34 | "frozenset({'大白菜', '平菇'})",frozenset({'云南油麦菜'}),0.00012851818532322323,0.052178383241228636,0.00012851818532322323,1.0,19.165024630541872,0.00012181231419596084,inf,0.9479434447300771 35 | "frozenset({'大白菜', '青杭椒(2)'})",frozenset({'云南油麦菜'}),0.00012851818532322323,0.052178383241228636,0.00012851818532322323,1.0,19.165024630541872,0.00012181231419596084,inf,0.9479434447300771 36 | "frozenset({'云南油麦菜', '青杭椒(2)'})",frozenset({'大白菜'}),0.00012851818532322323,0.03251510088677548,0.00012851818532322323,1.0,30.754940711462453,0.0001243394035616533,inf,0.9676092544987146 37 | "frozenset({'平菇', '青杭椒(2)'})",frozenset({'云南油麦菜'}),0.00012851818532322323,0.052178383241228636,0.00012851818532322323,1.0,19.165024630541872,0.00012181231419596084,inf,0.9479434447300771 38 | "frozenset({'青杭椒(2)', '云南油麦菜'})",frozenset({'平菇'}),0.00012851818532322323,0.016964400462665466,0.00012851818532322323,1.0,58.9469696969697,0.00012633795136066502,inf,0.9831619537275065 39 | "frozenset({'平菇', '菠菜(份)'})",frozenset({'云南生菜'}),0.00012851818532322323,0.09214753887675106,0.00012851818532322323,1.0,10.852161785216179,0.00011667555084478202,inf,0.9079691516709512 40 | "frozenset({'云南生菜(份)', '青线椒(份)'})",frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 41 | "frozenset({'外地茼蒿', '云南生菜(份)'})",frozenset({'金针菇(1)'}),0.00012851818532322323,0.026731782547230434,0.00012851818532322323,1.0,37.40865384615385,0.00012508266513979816,inf,0.9733933161953727 42 | "frozenset({'外地茼蒿', '金针菇(1)'})",frozenset({'云南生菜(份)'}),0.00012851818532322323,0.027245855288523326,0.00012851818532322323,1.0,36.70283018867924,0.00012501659744396307,inf,0.9728791773778921 43 | "frozenset({'红椒(1)', '奶白菜(份)'})",frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 44 | "frozenset({'双孢菇(盒)', '金针菇(1)'})",frozenset({'青梗散花'}),0.00012851818532322323,0.03572805551985606,0.00012851818532322323,1.0,27.989208633093522,0.00012392648046268397,inf,0.9643958868894602 45 | "frozenset({'大白菜', '金针菇(袋)(3)'})",frozenset({'小皱皮'}),0.00012851818532322323,0.0017992545945251252,0.00012851818532322323,1.0,555.7857142857143,0.0001282869483878004,inf,0.9983290488431877 46 | "frozenset({'金针菇(袋)(3)', '小皱皮'})",frozenset({'大白菜'}),0.00012851818532322323,0.03251510088677548,0.00012851818532322323,1.0,30.754940711462453,0.0001243394035616533,inf,0.9676092544987146 47 | "frozenset({'大白菜', '平菇'})",frozenset({'青杭椒(2)'}),0.00012851818532322323,0.001670736409201902,0.00012851818532322323,1.0,598.5384615384615,0.00012830346531175917,inf,0.9984575835475579 48 | "frozenset({'大白菜', '青杭椒(2)'})",frozenset({'平菇'}),0.00012851818532322323,0.016964400462665466,0.00012851818532322323,1.0,58.9469696969697,0.00012633795136066502,inf,0.9831619537275065 49 | "frozenset({'平菇', '青杭椒(2)'})",frozenset({'大白菜'}),0.00012851818532322323,0.03251510088677548,0.00012851818532322323,1.0,30.754940711462453,0.0001243394035616533,inf,0.9676092544987146 50 | "frozenset({'青杭椒(2)', '杏鲍菇(2)'})",frozenset({'奶白菜'}),0.00012851818532322323,0.03534250096388639,0.00012851818532322323,1.0,28.294545454545453,0.00012397603123456027,inf,0.9647814910025706 51 | "frozenset({'青杭椒(2)', '奶白菜'})",frozenset({'杏鲍菇(2)'}),0.00012851818532322323,0.009381827528595297,0.00012851818532322323,1.0,106.58904109589041,0.0001273124498742327,inf,0.9907455012853471 52 | "frozenset({'小米椒', '黄白菜(2)'})",frozenset({'甜白菜'}),0.00012851818532322323,0.012466263976352653,0.00012851818532322323,1.0,80.21649484536083,0.0001269160436992221,inf,0.9876606683804627 53 | "frozenset({'木耳菜', '红椒(2)'})",frozenset({'杏鲍菇(1)'}),0.00012851818532322323,0.017349955018635135,0.00012851818532322323,1.0,57.63703703703704,0.00012628840058878868,inf,0.9827763496143959 54 | "frozenset({'木耳菜', '高瓜(2)'})",frozenset({'黄白菜(2)'}),0.00012851818532322323,0.03187250996015936,0.00012851818532322323,1.0,31.375,0.0001244219881814472,inf,0.9682519280205657 55 | "frozenset({'高瓜(2)', '黄白菜(2)'})",frozenset({'木耳菜'}),0.00012851818532322323,0.010924045752473976,0.00012851818532322323,1.0,91.54117647058823,0.00012711424678672742,inf,0.989203084832905 56 | "frozenset({'紫茄子(2)', '青线椒'})",frozenset({'枝江红菜苔'}),0.00012851818532322323,0.018506618686544147,0.00012851818532322323,1.0,54.03472222222222,0.00012613974827315974,inf,0.9816195372750643 57 | "frozenset({'牛首生菜', '海鲜菇(份)'})",frozenset({'螺丝椒'}),0.00012851818532322323,0.04883691042282483,0.00012851818532322323,1.0,20.476315789473684,0.000122241754218889,inf,0.9512853470437019 58 | "frozenset({'芜湖青椒(1)', '虫草花'})",frozenset({'海鲜菇(袋)(4)'}),0.00012851818532322323,0.005012209227605706,0.00012851818532322323,1.0,199.51282051282053,0.00012787402528883102,inf,0.9951156812339331 59 | "frozenset({'海鲜菇(袋)(4)', '虫草花'})",frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 60 | "frozenset({'随州泡泡青', '菠菜'})",frozenset({'西兰花'}),0.00012851818532322323,0.13083151265904125,0.00012851818532322323,1.0,7.643418467583498,0.00011170395673319095,inf,0.8692802056555271 61 | "frozenset({'金针菇(1)', '金针菇(袋)(2)'})",frozenset({'螺丝椒'}),0.00012851818532322323,0.04883691042282483,0.00012851818532322323,1.0,20.476315789473684,0.000122241754218889,inf,0.9512853470437019 62 | "frozenset({'大白菜', '平菇', '青杭椒(2)'})",frozenset({'云南油麦菜'}),0.00012851818532322323,0.052178383241228636,0.00012851818532322323,1.0,19.165024630541872,0.00012181231419596084,inf,0.9479434447300771 63 | "frozenset({'大白菜', '平菇', '云南油麦菜'})",frozenset({'青杭椒(2)'}),0.00012851818532322323,0.001670736409201902,0.00012851818532322323,1.0,598.5384615384615,0.00012830346531175917,inf,0.9984575835475579 64 | "frozenset({'大白菜', '青杭椒(2)', '云南油麦菜'})",frozenset({'平菇'}),0.00012851818532322323,0.016964400462665466,0.00012851818532322323,1.0,58.9469696969697,0.00012633795136066502,inf,0.9831619537275065 65 | "frozenset({'平菇', '青杭椒(2)', '云南油麦菜'})",frozenset({'大白菜'}),0.00012851818532322323,0.03251510088677548,0.00012851818532322323,1.0,30.754940711462453,0.0001243394035616533,inf,0.9676092544987146 66 | "frozenset({'大白菜', '平菇'})","frozenset({'青杭椒(2)', '云南油麦菜'})",0.00012851818532322323,0.00012851818532322323,0.00012851818532322323,1.0,7781.0,0.00012850166839926445,inf,1.0 67 | "frozenset({'大白菜', '青杭椒(2)'})","frozenset({'平菇', '云南油麦菜'})",0.00012851818532322323,0.00025703637064644646,0.00012851818532322323,1.0,3890.5,0.00012848515147530567,inf,0.9998714652956298 68 | "frozenset({'平菇', '青杭椒(2)'})","frozenset({'大白菜', '云南油麦菜'})",0.00012851818532322323,0.0008996272972625626,0.00012851818532322323,1.0,1111.5714285714287,0.0001284025668555118,inf,0.999228791773779 69 | "frozenset({'青杭椒(2)', '云南油麦菜'})","frozenset({'大白菜', '平菇'})",0.00012851818532322323,0.00012851818532322323,0.00012851818532322323,1.0,7781.0,0.00012850166839926445,inf,1.0 70 | -------------------------------------------------------------------------------- /rules_single.csv: -------------------------------------------------------------------------------- 1 | antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric 2 | frozenset({'七彩椒(份)'}),frozenset({'云南生菜'}),0.00012851818532322323,0.09214753887675106,0.00012851818532322323,1.0,10.852161785216179,0.00011667555084478202,inf,0.9079691516709512 3 | frozenset({'红椒(份)'}),frozenset({'上海青'}),0.00012851818532322323,0.03919804652358309,0.00012851818532322323,1.0,25.511475409836066,0.00012348052351579704,inf,0.9609254498714652 4 | frozenset({'西峡花菇(2)'}),frozenset({'上海青'}),0.00012851818532322323,0.03919804652358309,0.00012851818532322323,1.0,25.511475409836066,0.00012348052351579704,inf,0.9609254498714652 5 | frozenset({'艾蒿'}),frozenset({'云南生菜'}),0.00012851818532322323,0.09214753887675106,0.00012851818532322323,1.0,10.852161785216179,0.00011667555084478202,inf,0.9079691516709512 6 | frozenset({'冰草'}),frozenset({'小青菜(份)'}),0.00012851818532322323,0.008482200231332733,0.00012851818532322323,1.0,117.8939393939394,0.00012742806834194412,inf,0.9916452442159384 7 | frozenset({'冰草(盒)'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 8 | frozenset({'大白菜秧'}),frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 9 | frozenset({'鲜粽叶'}),frozenset({'净藕(1)'}),0.00012851818532322323,0.09266161161804395,0.00012851818532322323,1.0,10.791955617198335,0.00011660948314894692,inf,0.9074550128534705 10 | frozenset({'和丰阳光海鲜菇(包)'}),frozenset({'青线椒'}),0.00012851818532322323,0.022105127875594398,0.00012851818532322323,1.0,45.23837209302325,0.00012567727440231403,inf,0.9780205655526992 11 | frozenset({'奶白菜苗'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 12 | frozenset({'田七'}),frozenset({'小米椒'}),0.00012851818532322323,0.03007325536563424,0.00012851818532322323,1.0,33.25213675213675,0.00012465322511687004,inf,0.9700514138817481 13 | frozenset({'赤松茸'}),frozenset({'杏鲍菇(2)'}),0.00012851818532322323,0.009381827528595297,0.00012851818532322323,1.0,106.58904109589041,0.0001273124498742327,inf,0.9907455012853471 14 | frozenset({'槐花'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 15 | frozenset({'水果辣椒'}),frozenset({'金针菇(盒)'}),0.00012851818532322323,0.03341472818403804,0.00012851818532322323,1.0,29.926923076923078,0.0001242237850939419,inf,0.9667095115681233 16 | frozenset({'洪湖莲藕(脆藕)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 17 | frozenset({'白玉菇(1)'}),frozenset({'白玉菇(袋)'}),0.00012851818532322323,0.005140727412928929,0.00012851818532322323,1.0,194.525,0.00012785750836487224,inf,0.9949871465295629 18 | frozenset({'紫白菜(1)'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 19 | frozenset({'野藕(2)'}),frozenset({'竹叶菜'}),0.00012851818532322323,0.04061174656213854,0.00012851818532322323,1.0,24.623417721518987,0.00012329883735225054,inf,0.9595115681233933 20 | frozenset({'紫苏(份)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 21 | frozenset({'荠菜'}),frozenset({'紫茄子(2)'}),0.00012851818532322323,0.06104613802853104,0.00012851818532322323,1.0,16.381052631578946,0.00012067264644280541,inf,0.9390745501285347 22 | frozenset({'菌菇火锅套餐(份)'}),frozenset({'紫茄子(2)'}),0.00012851818532322323,0.06104613802853104,0.00012851818532322323,1.0,16.381052631578946,0.00012067264644280541,inf,0.9390745501285347 23 | frozenset({'紫螺丝椒'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 24 | frozenset({'野生粉藕'}),frozenset({'红椒(1)'}),0.00012851818532322323,0.029944737180311015,0.00012851818532322323,1.0,33.39484978540772,0.00012466974204082882,inf,0.9701799485861183 25 | frozenset({'青菜苔'}),frozenset({'红椒(1)'}),0.00012851818532322323,0.029944737180311015,0.00012851818532322323,1.0,33.39484978540772,0.00012466974204082882,inf,0.9701799485861183 26 | frozenset({'鲜木耳(2)'}),frozenset({'芜湖青椒(1)'}),0.00012851818532322323,0.1582058861328878,0.00012851818532322323,1.0,6.320877335499594,0.000108185851929972,inf,0.8419023136246786 27 | frozenset({'鱼腥草'}),frozenset({'茼蒿'}),0.00012851818532322323,0.023133273358180183,0.00012851818532322323,1.0,43.227777777777774,0.00012554513901064383,inf,0.9769922879177377 28 | frozenset({'鲜粽叶(袋)(1)'}),frozenset({'菠菜(份)'}),0.00012851818532322323,0.012723300346999101,0.00012851818532322323,1.0,78.59595959595958,0.00012688300985130456,inf,0.9874035989717223 29 | frozenset({'蔡甸藜蒿(份)'}),frozenset({'西兰花'}),0.00012851818532322323,0.13083151265904125,0.00012851818532322323,1.0,7.643418467583498,0.00011170395673319095,inf,0.8692802056555271 30 | frozenset({'蟹味菇(盒)'}),frozenset({'青梗散花'}),0.00012851818532322323,0.03572805551985606,0.00012851818532322323,1.0,27.989208633093522,0.00012392648046268397,inf,0.9643958868894602 31 | frozenset({'黑皮鸡枞菌(盒)'}),frozenset({'西峡香菇(1)'}),0.00012851818532322323,0.10345713918519471,0.00012851818532322323,1.0,9.66583850931677,0.00011522206153640988,inf,0.8966580976863754 32 | -------------------------------------------------------------------------------- /total_sales.csv: -------------------------------------------------------------------------------- 1 | 单品名称,销售日期,销量(千克) 2 | 芜湖青椒(1),21,12280.682 3 | 芜湖青椒(1),22,11256.322 4 | 西兰花,22,10294.533 5 | 净藕(1),21,9869.3 6 | 净藕(1),22,9513.204 7 | 大白菜,20,9451.288 8 | 大白菜,21,9159.122 9 | 金针菇(盒),22,8845.0 10 | 西兰花,21,8423.415 11 | 云南生菜,21,8048.176 12 | 云南生菜(份),22,7896.0 13 | 泡泡椒(精品),20,7760.776 14 | 金针菇(盒),23,6751.0 15 | 云南生菜(份),23,6384.0 16 | 小米椒(份),22,5902.0 17 | 西兰花,20,5454.586 18 | 云南油麦菜,21,5332.896 19 | 净藕(1),20,5199.875 20 | 西峡香菇(1),21,5181.374 21 | 云南油麦菜(份),22,5125.0 22 | 小米椒(份),23,4844.0 23 | 保康高山大白菜,22,4735.161 24 | 紫茄子(2),21,4688.369 25 | 芜湖青椒(1),23,4613.659 26 | 螺丝椒(份),23,4490.0 27 | 青梗散花,21,4252.805 28 | 云南生菜,20,4174.9710000000005 29 | 青梗散花,20,3976.643 30 | 紫茄子(2),22,3860.134 31 | 枝江青梗散花,22,3798.761 32 | 螺丝椒(份),22,3745.0 33 | 云南油麦菜(份),23,3655.0 34 | 奶白菜(份),22,3587.0 35 | 云南生菜,22,3534.289 36 | 娃娃菜,23,3403.0 37 | 西兰花,23,3364.694 38 | 小青菜(份),22,3356.0 39 | 奶白菜(份),23,3344.0 40 | 上海青,21,3314.792 41 | 菠菜(份),22,3258.0 42 | 螺丝椒,21,3228.8 43 | 小皱皮(份),22,3095.0 44 | 云南油麦菜,20,3088.294 45 | 菠菜(份),23,3039.0 46 | 西峡香菇(1),20,2973.247 47 | 黄白菜(2),21,2903.6440000000002 48 | 西峡香菇(1),22,2899.859 49 | 紫茄子(2),23,2815.191 50 | 黄白菜(2),20,2723.557 51 | 竹叶菜,21,2720.622 52 | 双孢菇(盒),23,2710.0 53 | 上海青(份),22,2696.0 54 | 洪湖莲藕(粉藕),22,2620.0 55 | 净藕(1),23,2567.061 56 | 甜白菜,20,2481.306 57 | 奶白菜,21,2470.483 58 | 金针菇(1),20,2415.33 59 | 金针菇(袋)(3),21,2258.0 60 | 菠菜,21,2251.926 61 | 上海青,20,2246.363 62 | 紫茄子(2),20,2238.307 63 | 竹叶菜,22,2181.57 64 | 金针菇(1),21,2169.861 65 | 螺丝椒,22,2165.456 66 | 金针菇(袋)(2),21,2111.0 67 | 金针菇(袋)(1),20,2103.0 68 | 小皱皮(份),23,2080.0 69 | 牛首油菜,20,2076.303 70 | 黄白菜(2),22,2063.761 71 | 枝江红菜苔,21,2043.723 72 | 海鲜菇(袋)(4),21,2040.0 73 | 枝江青梗散花,23,2022.81 74 | 娃娃菜,21,2018.0 75 | 苋菜,21,2002.446 76 | 莲蓬(个),22,1960.0 77 | 泡泡椒(精品),21,1942.174 78 | 菜心,20,1935.813 79 | 娃娃菜,22,1909.0 80 | 菜心,21,1896.152 81 | 红薯尖,21,1861.757 82 | 云南油麦菜,22,1837.359 83 | 洪湖莲藕(粉藕),21,1818.0 84 | 菜心(份),22,1776.0 85 | 黄心菜(2),22,1773.265 86 | 保康高山大白菜,23,1749.575 87 | 奶白菜,22,1693.996 88 | 苋菜,22,1686.283 89 | 黄心菜(1),21,1667.993 90 | 娃娃菜,20,1652.0 91 | 甜白菜,21,1599.225 92 | 红薯尖,20,1557.73 93 | 茼蒿,21,1528.927 94 | 红椒(1),21,1528.889 95 | 双孢菇(盒),22,1519.0 96 | 螺丝椒,20,1488.967 97 | 洪湖莲藕(粉藕),23,1446.0 98 | 枝江红菜苔,23,1441.971 99 | 菠菜,22,1420.059 100 | 红薯尖,22,1388.486 101 | 小青菜(1),22,1339.019 102 | 海鲜菇(份),22,1298.0 103 | 青线椒(份),22,1296.0 104 | 平菇,21,1277.935 105 | 杏鲍菇(1),20,1261.546 106 | 本地黄心油菜,22,1256.871 107 | 青茄子(1),20,1242.309 108 | 菠菜,20,1236.475 109 | 长线茄,21,1221.1200000000001 110 | 小青菜(1),23,1220.646 111 | 海鲜菇(袋)(1),20,1213.0 112 | 竹叶菜,23,1187.513 113 | 上海青,22,1179.329 114 | 竹叶菜,20,1151.059 115 | 牛首油菜,21,1141.567 116 | 杏鲍菇(1),21,1138.647 117 | 海鲜菇(袋)(3),22,1106.0 118 | 黄心菜(1),20,1086.641 119 | 青茄子(1),21,1082.825 120 | 枝江红菜苔,22,1078.803 121 | 茼蒿,20,1078.164 122 | 竹叶菜(份),22,1076.0 123 | 金针菇(袋)(2),22,1064.0 124 | 长线茄,22,1062.557 125 | 白玉菇(袋),21,1045.0 126 | 茼蒿,22,1034.912 127 | 青线椒,21,1016.4350000000001 128 | 青线椒(份),23,1011.0 129 | 西峡花菇(1),21,967.725 130 | 姜蒜小米椒组合装(小份),23,953.0 131 | 海鲜菇(包),23,948.0 132 | 平菇,20,944.208 133 | 青尖椒(份),22,920.0 134 | 西峡花菇(1),22,917.766 135 | 姜蒜小米椒组合装(小份),22,915.0 136 | 螺丝椒,23,908.958 137 | 奶白菜,23,900.5 138 | 苋菜,23,892.307 139 | 上海青,23,866.272 140 | 西峡香菇(1),23,865.747 141 | 东门口小白菜,21,846.0360000000001 142 | 红椒(1),22,834.385 143 | 大龙茄子,20,822.68 144 | 小米椒,21,776.611 145 | 青茄子(1),22,772.187 146 | 小白菜,21,771.272 147 | 西峡花菇(1),23,759.313 148 | 奶白菜,20,751.329 149 | 白玉菇(袋),20,733.0 150 | 牛首生菜,21,732.492 151 | 红薯尖(份),22,727.0 152 | 杏鲍菇(2),22,723.732 153 | 青线椒,20,723.699 154 | 红尖椒(份),22,721.0 155 | 红椒(1),20,711.293 156 | 小青菜(1),21,708.328 157 | 小青菜(份),23,701.0 158 | 虫草花(份),23,690.0 159 | 姬菇(份),22,682.0 160 | 红杭椒(份),22,674.0 161 | 海鲜菇(袋)(3),23,669.0 162 | 青红杭椒组合装(份),23,650.0 163 | 小白菜,20,645.995 164 | 组合椒系列,21,641.0 165 | 荸荠,21,639.681 166 | 青尖椒,21,624.712 167 | 牛首油菜,22,618.696 168 | 海鲜菇(袋)(2),23,615.0 169 | 青红杭椒组合装(份),22,607.0 170 | 甜白菜,22,604.73 171 | 白玉菇(袋),22,597.0 172 | 鲜木耳(份),22,594.0 173 | 杏鲍菇(袋),20,593.0 174 | 杏鲍菇(2),23,580.82 175 | 杏鲍菇(袋),21,575.0 176 | 本地上海青,21,570.143 177 | 木耳菜,22,566.792 178 | 大白菜,23,565.076 179 | 白玉菇(袋),23,552.0 180 | 姬菇(包),20,549.0 181 | 茼蒿(份),22,546.0 182 | 高瓜(1),21,544.049 183 | 虫草花(袋),21,534.0 184 | 红椒(2),23,519.636 185 | 苋菜,20,519.025 186 | 双孢菇,21,516.842 187 | 姬菇(包),21,483.0 188 | 金针菇(2),22,473.315 189 | 蔡甸藜蒿,21,473.03000000000003 190 | 茼蒿,23,468.159 191 | 荸荠,20,460.238 192 | 青杭椒(份),22,457.0 193 | 圆茄子(2),20,450.726 194 | 小米椒,20,445.573 195 | 虫草花(袋),20,444.0 196 | 木耳菜,21,438.802 197 | 东门口小白菜,22,436.823 198 | 菜心,22,434.087 199 | 圆茄子(2),21,426.337 200 | 白菜苔,21,426.15500000000003 201 | 苋菜(份),22,426.0 202 | 青茄子(1),23,419.442 203 | 虫草花(份),22,419.0 204 | 西峡香菇(2),22,413.841 205 | 高瓜(2),22,409.308 206 | 枝江红菜苔,20,397.957 207 | 蟹味菇(袋),21,391.0 208 | 野藕(1),23,388.975 209 | 青尖椒,20,386.912 210 | 红椒(1),23,383.308 211 | 青线椒,22,378.92 212 | 蟹味菇与白玉菇双拼(盒),23,371.0 213 | 洪湖藕带,22,370.55400000000003 214 | 海鲜菇(包),21,355.0 215 | 红杭椒,21,354.523 216 | 红薯尖,23,351.46 217 | 大龙茄子,21,344.538 218 | 外地茼蒿,21,341.065 219 | 小白菜(份),22,341.0 220 | 平菇,22,320.298 221 | 木耳菜,23,318.718 222 | 荸荠,23,317.99 223 | 上海青(份),23,317.0 224 | 菠菜,23,308.001 225 | 随州泡泡青,21,303.608 226 | 黄白菜(2),23,297.028 227 | 随州泡泡青,22,293.349 228 | 金针菇(袋)(3),20,291.0 229 | 高瓜(1),23,285.11 230 | 紫茄子(1),22,284.328 231 | 水果辣椒(份),22,283.0 232 | 海鲜菇(1),21,279.814 233 | 小白菜,22,272.901 234 | 枝江红菜苔(份),22,272.0 235 | 外地茼蒿,22,270.871 236 | 净藕(3),22,268.673 237 | 七彩椒(2),23,265.35 238 | 西峡香菇(份) ,22,262.0 239 | 东门口小白菜,20,260.619 240 | 荸荠,22,253.515 241 | 白玉菇(2),20,250.0 242 | 红椒(2),22,249.572 243 | 红尖椒,22,244.522 244 | 木耳菜,20,242.328 245 | 鱼腥草(份),22,242.0 246 | 小青菜(2),22,238.0 247 | 白菜苔,22,232.465 248 | 红尖椒(份),23,232.0 249 | 菜心,23,230.665 250 | 鱼腥草(份),23,228.0 251 | 红杭椒,20,226.398 252 | 洪湖藕带,23,223.285 253 | 菜心(份),23,223.0 254 | 海鲜菇(1),20,222.351 255 | 长线茄,23,212.736 256 | 姬菇(1),21,209.043 257 | 鲜粽叶(袋)(1),22,200.0 258 | 萝卜叶,21,195.315 259 | 高瓜(1),20,185.18 260 | 蔡甸藜蒿,23,183.53 261 | 青茄子(2),22,183.457 262 | 红尖椒,21,177.069 263 | 青尖椒,22,172.336 264 | 白玉菇(盒),22,168.0 265 | 洪湖莲藕(粉藕),20,168.0 266 | 姬菇(1),20,166.179 267 | 西峡花菇(1),20,165.912 268 | 高瓜(1),22,165.313 269 | 红灯笼椒(2),22,163.787 270 | 银耳(朵),23,160.0 271 | 红灯笼椒(1),20,159.833 272 | 龙牙菜,22,158.576 273 | 蔡甸藜蒿,22,155.605 274 | 云南生菜,23,153.025 275 | 小米椒,22,150.012 276 | 鲜木耳(份),23,147.0 277 | 黄白菜(1),22,141.768 278 | 四川红香椿,21,137.061 279 | 萝卜叶,22,130.083 280 | 鲜粽叶(袋)(3),23,128.0 281 | 蔡甸藜蒿,20,127.41 282 | 外地茼蒿(份),22,127.0 283 | 灯笼椒(1),20,125.8 284 | 鲜木耳(1),21,125.469 285 | 本地小毛白菜,22,121.02 286 | 青杭椒(2),21,120.348 287 | 圆茄子(2),23,120.193 288 | 七彩椒(1),20,119.489 289 | 辣妹子,21,117.753 290 | 西峡香菇(份) ,23,117.0 291 | 杏鲍菇(袋),22,116.0 292 | 黄心菜(1),23,115.327 293 | 四川红香椿,22,111.894 294 | 双孢菇,20,110.876 295 | 黄心菜(2),23,109.503 296 | 和丰阳光海鲜菇(包),22,109.0 297 | 七彩椒(2),22,104.683 298 | 杏鲍菇(份),22,102.0 299 | 金针菇(1),23,101.867 300 | 青梗散花,23,101.388 301 | 洪湖藕带,21,101.176 302 | 黑油菜,22,100.748 303 | 青线椒,23,100.727 304 | 本地黄心油菜,23,99.255 305 | 海鲜菇(袋)(4),22,95.0 306 | 红椒(份),22,93.0 307 | 海鲜菇(袋)(2),22,93.0 308 | 小皱皮,21,92.17 309 | 快菜,20,91.272 310 | 萝卜叶,20,89.927 311 | 小米椒(份),21,87.0 312 | 鲜木耳(1),20,86.702 313 | 蟹味菇(盒),22,81.0 314 | 净藕(3),23,80.127 315 | 四川红香椿,23,78.092 316 | 银耳(朵),22,78.0 317 | 牛首生菜,23,77.94500000000001 318 | 七彩椒(1),21,75.944 319 | 小米椒,23,75.157 320 | 莲蓬(个),20,75.0 321 | 红杭椒,22,74.768 322 | 红莲藕带,23,71.399 323 | 红灯笼椒(1),21,69.219 324 | 云南油麦菜(份),21,68.0 325 | 蔡甸藜蒿(份),22,65.0 326 | 青梗散花,22,62.95 327 | 外地茼蒿,20,62.371 328 | 净藕(2),23,62.221 329 | 菱角,22,61.077 330 | 莲蓬(个),21,60.0 331 | 杏鲍菇(袋),23,59.0 332 | 外地茼蒿,23,58.308 333 | 小青菜(2),21,58.0 334 | 茶树菇(袋),22,58.0 335 | 上海青(份),21,57.0 336 | 牛首生菜,22,56.624 337 | 蟹味菇(1),22,56.0 338 | 菜心(份),21,54.0 339 | 海鲜菇(袋)(1),21,53.0 340 | 海鲜菇(2),22,51.895 341 | 灯笼椒(1),21,51.565 342 | 红灯笼椒(2),23,51.023 343 | 洪湖藕带,20,50.539 344 | 圆茄子(2),22,50.433 345 | 芥菜,21,49.794 346 | 云南油麦菜,23,46.815 347 | 小皱皮,23,46.761 348 | 七彩椒(1),23,46.054 349 | 蟹味菇(1),21,46.0 350 | 金针菇(袋)(1),21,46.0 351 | 马齿苋,21,45.726 352 | 豌豆尖,20,45.578 353 | 菠菜(份),21,45.0 354 | 青红尖椒组合装(份),23,45.0 355 | 云南生菜(份),21,45.0 356 | 田七,21,44.065 357 | 野生粉藕,20,43.333 358 | 随州泡泡青,20,43.313 359 | 蟹味菇(2),20,43.0 360 | 黄心菜(1),22,41.338 361 | 灯笼椒(2),22,41.259 362 | 襄甜红菜苔(袋),22,41.0 363 | 枝江红菜苔(份),21,41.0 364 | 茼蒿(份),21,41.0 365 | 油菜苔,21,40.726 366 | 菱角,23,40.526 367 | 高瓜(2),23,40.376 368 | 紫苏(份),22,40.0 369 | 洪湖莲藕(脆藕),22,40.0 370 | 豌豆尖,22,38.252 371 | 姬菇(包),22,38.0 372 | 野藕(1),22,37.906 373 | 红灯笼椒(1),23,37.374 374 | 花茄子,22,37.219 375 | 蟹味菇(2),22,36.0 376 | 花茄子,20,35.965 377 | 灯笼椒(2),23,35.337 378 | 随州泡泡青,23,34.951 379 | 余干椒,21,33.907000000000004 380 | 红杭椒(份),23,33.0 381 | 牛首生菜,20,32.776 382 | 白菜苔,20,32.705 383 | 茶树菇(袋),23,32.0 384 | 青菜苔,21,30.275 385 | 鱼腥草,22,28.601 386 | 鲜木耳(2),22,28.535 387 | 龙牙菜,23,28.255 388 | 马齿苋,23,28.228 389 | 花茄子,21,28.195 390 | 冰草(盒),23,28.0 391 | 白菜苔,23,27.351 392 | 银耳(朵),20,27.0 393 | 木耳菜(份),23,27.0 394 | 灯笼椒(1),23,26.099 395 | 牛排菇(盒),22,26.0 396 | 紫贝菜,21,25.878 397 | 春菜,22,24.996 398 | 红莲藕带,22,24.923000000000002 399 | 小皱皮,22,24.709 400 | 白玉菇(1),21,22.0 401 | 银耳(朵),21,22.0 402 | 七彩椒(1),22,21.705000000000002 403 | 鲜藕带(袋),21,21.0 404 | 马齿苋,22,20.361 405 | 虫草花,21,19.905 406 | 野藕(1),21,19.312 407 | 本地黄心油菜,21,18.996 408 | 洪山菜苔,22,18.0 409 | 七彩椒(份),22,18.0 410 | 小白菜,23,17.135 411 | 姬菇(2),22,16.953 412 | 奶白菜苗,21,16.829 413 | 大白菜秧,23,16.538 414 | 红尖椒,20,16.434 415 | 冰草,21,16.019000000000002 416 | 蟹味菇(2),21,16.0 417 | 水果辣椒,22,15.638 418 | 本地上海青,20,15.501 419 | 红线椒,21,15.445 420 | 菱角,21,14.664 421 | 紫苏,21,14.069 422 | 虫草花,20,14.06 423 | 海鲜菇(袋)(4),20,14.0 424 | 黄白菜(1),23,13.728 425 | 芜湖青椒(1),20,13.668 426 | 南瓜尖,21,13.553 427 | 红灯笼椒(份),22,13.0 428 | 水果辣椒,23,12.933 429 | 紫茄子(1),23,12.83 430 | 红线椒,22,12.268 431 | 红灯笼椒(1),22,12.186 432 | 青尖椒(份),23,12.0 433 | 菌蔬四宝(份),22,12.0 434 | 紫白菜(1),21,11.984 435 | 大白菜,22,11.732 436 | 荠菜,20,11.457 437 | 野藕(2),22,11.398 438 | 本地上海青,22,11.053 439 | 秀珍菇,20,10.592 440 | 藕尖,21,10.432 441 | 金针菇(1),22,10.411999999999999 442 | 菱角,20,10.155 443 | 双孢菇(份),22,10.0 444 | 荸荠(份),22,10.0 445 | 杏鲍菇(250克),21,10.0 446 | 艾蒿,23,9.876 447 | 芝麻苋菜,22,9.381 448 | 西峡花菇(2),22,9.24 449 | 丝瓜尖,21,8.639 450 | 紫苏,22,8.587 451 | 灯笼椒(1),22,8.579 452 | 菊花油菜,21,8.472 453 | 藕尖,22,8.182 454 | 青杭椒(2),20,8.052 455 | 鲜粽叶(袋)(2),22,8.0 456 | 菌菇火锅套餐(份),23,8.0 457 | 蟹味菇(袋),20,8.0 458 | 面条菜,23,7.737 459 | 大芥兰,21,7.48 460 | 青茄子(2),23,7.065 461 | 槐花,22,7.019 462 | 洪山菜苔,21,7.0 463 | 鲜粽子叶,21,6.32 464 | 四川红香椿,20,6.176 465 | 虫草花,23,6.012 466 | 鲜粽叶,22,6.009 467 | 灯笼椒(份),22,6.0 468 | 黑皮鸡枞菌,20,5.795 469 | 紫苏,23,5.686 470 | 紫螺丝椒,23,5.682 471 | 野生粉藕,21,5.043 472 | 绣球菌,20,5.0 473 | 猪肚菇(盒),22,5.0 474 | 圆茄子(1),22,4.981 475 | 南瓜尖,20,4.875 476 | 丝瓜尖,22,4.322 477 | 紫圆茄,21,4.289 478 | 牛排菇,21,4.268 479 | 红莲藕带,20,4.147 480 | 杏鲍菇(1),23,4.139 481 | 秀珍菇,21,4.116 482 | 黑牛肝菌(盒),21,4.0 483 | 黑皮鸡枞菌(盒),22,4.0 484 | 牛排菇(盒),23,4.0 485 | 牛排菇(盒),21,4.0 486 | 薄荷叶,23,3.947 487 | 马兰头,22,3.39 488 | 紫圆茄,22,3.3689999999999998 489 | 荠菜,21,3.314 490 | 鹿茸菇(盒),23,3.0 491 | 花菇(一人份),23,3.0 492 | 海鲜菇(份),23,3.0 493 | 洪山菜薹珍品手提袋,23,3.0 494 | 赤松茸(盒),21,3.0 495 | 洪山菜薹莲藕拼装礼盒,23,3.0 496 | 薄荷叶,21,2.912 497 | 双沟白菜,21,2.8859999999999997 498 | 红杭椒,23,2.871 499 | 面条菜,21,2.867 500 | 野藕(1),20,2.667 501 | 青尖椒,23,2.571 502 | 野生粉藕,23,2.439 503 | 赤松茸,23,2.388 504 | 牛排菇,20,2.334 505 | 面条菜,22,2.332 506 | 槐花,23,2.281 507 | 白蒿,23,2.224 508 | 蒲公英,22,2.212 509 | 猪肚菇(盒),21,2.0 510 | 绣球菌(袋),22,2.0 511 | 绣球菌,21,2.0 512 | 赤松茸(盒),22,2.0 513 | 蒲公英,23,1.964 514 | 芥菜,23,1.941 515 | 辣妹子,22,1.892 516 | 紫苏,20,1.83 517 | 青杭椒(1),21,1.817 518 | 紫尖椒,21,1.754 519 | 黄花菜,23,1.71 520 | 紫白菜(1),22,1.267 521 | 丝瓜尖,23,1.266 522 | 豌豆尖,23,1.218 523 | 鸡枞菌,20,1.215 524 | 田七,23,1.187 525 | 紫螺丝椒,22,1.161 526 | 绿牛油,22,1.153 527 | 荠菜,23,1.116 528 | 猴头菇,20,1.0 529 | 菌蔬四宝(份),23,1.0 530 | 活体银耳,21,1.0 531 | 金针菇(份),22,1.0 532 | 姬菇(包),23,1.0 533 | 猴头菇,21,1.0 534 | 冰草(盒),22,1.0 535 | 猴头菇,22,1.0 536 | 白玉菇(2),21,1.0 537 | 虫草花(盒)(2),21,1.0 538 | 甘蓝叶,21,0.9430000000000001 539 | 薄荷叶,22,0.7090000000000001 540 | 红珊瑚(粗叶),22,0.682 541 | 芥兰,21,0.671 542 | 紫贝菜,23,0.667 543 | 杏鲍菇(1),22,0.645 544 | 黑牛肝菌,23,0.638 545 | 艾蒿,22,0.636 546 | 紫白菜(2),22,0.615 547 | 红尖椒,23,0.5680000000000001 548 | 平菇,23,0.518 549 | 紫贝菜,22,0.477 550 | 黑牛肝菌,22,0.446 551 | 红橡叶,22,0.419 552 | 水果辣椒(橙色),22,0.415 553 | 黑油菜,21,0.377 554 | 冰草,22,0.318 555 | 泡泡椒(精品),22,0.175 556 | 青杭椒(2),22,0.1 557 | 黄花菜,21,0.074 558 | 蒲公英,21,0.056 559 | -------------------------------------------------------------------------------- /单品Apriori.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from mlxtend.frequent_patterns import apriori, association_rules 4 | from mlxtend.preprocessing import TransactionEncoder 5 | 6 | df1 = pd.read_csv('附件1.csv') 7 | df2 = pd.read_csv('附件2.csv', dtype={'销售日期': str, '扫码销售时间':str}) 8 | df3 = pd.read_csv('附件3.csv', dtype={'日期': str}) 9 | df4 = pd.read_csv('附件4.csv') 10 | 11 | # # 简化数据 12 | # 将'销售日期'和'扫码销售时间'数据进行简化 13 | 14 | 15 | def process_text(text, indices): 16 | selected_chars = ''.join(text[i] for i in indices if 0 <= i < len(text)) 17 | return selected_chars 18 | 19 | sales_date = [3, 5, 6, 8, 9] 20 | df2['销售日期'] = df2['销售日期'].apply(lambda x: process_text(x, sales_date)) 21 | # df2['扫码销售时间'] = df2['扫码销售时间'].str[:2] 22 | 23 | # # 统一附件三格式 24 | 25 | def process_text(text, indices): 26 | selected_chars = ''.join(text[i] for i in indices if 0 <= i < len(text)) 27 | return selected_chars 28 | 29 | sales_date = [3, 5, 6, 8, 9] 30 | df3['日期'] = df3['日期'].apply(lambda x: process_text(x, sales_date)) 31 | new_column_name = '销售日期' 32 | df3.columns.values[0] = new_column_name 33 | 34 | # # 将附件一、附件三、附件四的信息加入附件二 35 | 36 | df1_key = df1['单品编码'].tolist() 37 | df1_value1 = df1['分类名称'].tolist() 38 | df1_value2 = df1['单品名称'].tolist() 39 | df4_key = df4['单品编码'].tolist() 40 | df4_value = df4['损耗率(%)'].tolist() 41 | csv_dict1_1 = dict(zip(df1_key, df1_value1)) 42 | csv_dict1_2 = dict(zip(df1_key, df1_value2)) 43 | csv_dict4 = dict(zip(df4_key, df4_value)) 44 | 45 | def get_value(key, csv_dict): 46 | return csv_dict.get(key, None) 47 | df2['单品名称'] = df2['单品编码'].apply(get_value, args=(csv_dict1_2,)) 48 | df2['分类名称'] = df2['单品编码'].apply(get_value, args=(csv_dict1_1,)) 49 | df2['损耗率(%)'] = df2['单品编码'].apply(get_value, args=(csv_dict4,)) 50 | merged_df = df2.merge(df3[['销售日期', '单品编码', '批发价格(元/千克)']], on=['销售日期', '单品编码'], how='left') 51 | 52 | # # 将数据改为适当的数据类型,并将'扫码销售时间'改为'存放时常(小时)' 53 | 54 | # columns_to_convert = {'扫码销售时间': int, '损耗率(%)': float} 55 | # df2 = df2.astype(columns_to_convert) 56 | # df2['扫码销售时间'] = df2['扫码销售时间'] - 4 57 | # new_column_name = '存放时常(小时)' 58 | # df2.rename(columns={'扫码销售时间': new_column_name}, inplace=True) 59 | df2.columns = df2.columns.tolist() 60 | new_columns = df2.columns.tolist() 61 | df = pd.DataFrame(df2.values, columns=new_columns) 62 | columns_to_convert = {'单品编码': int, '销量(千克)': float, 63 | '销售单价(元/千克)': float, '损耗率(%)': float 64 | } 65 | # df['存放时常(小时)'] = df['存放时常(小时)'].astype('int64') 66 | df['单品编码'] = df['单品编码'].astype('int64') 67 | 68 | # 定义一个函数来处理每一行的'扫码销售时间' 69 | def process_time(row): 70 | time_str = row['扫码销售时间'] 71 | time_int = int(time_str.replace(':', '').replace('.', '')) 72 | return time_int 73 | 74 | # 使用apply()方法将处理函数应用到'扫码销售时间'列的每一行 75 | df['扫码销售时间'] = df.apply(process_time, axis=1) 76 | 77 | df['销售日期'] = df['销售日期'].astype(int) 78 | 79 | # 将'销售日期'和'扫码销售时间'列合并成一个新列'销售时间' 80 | df['销售时间'] = df['销售日期'] + df['扫码销售时间'] 81 | 82 | # df.to_csv('data.csv', index=False, encoding='utf-8-sig') 83 | # print(df.info()) 84 | # print(df.head()) 85 | 86 | # 使用groupby和agg将相同销售时间的单品放入一个集合中 87 | result_df = df.groupby('销售时间')['单品名称'].agg(lambda x: set(x) if len(x) > 1 else x).reset_index() 88 | result_df = result_df[result_df['单品名称'].apply(lambda x: isinstance(x, set))] 89 | # print(result_df) 90 | 91 | # 将包含集合的行合并到一个列表中 92 | basket = result_df['单品名称'].tolist() 93 | 94 | # 将数据转换为适合Apriori的格式 95 | te = TransactionEncoder() 96 | te_ary = te.fit(basket).transform(basket) 97 | 98 | # 使用pandas创建一个DataFrame 99 | df = pd.DataFrame(te_ary, columns=te.columns_) 100 | 101 | # 使用Apriori找出所有的频繁项集 102 | frequent_itemsets = apriori(df, min_support=0.0001, use_colnames=True) 103 | 104 | # 使用频繁项集生成规则 105 | rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7) 106 | 107 | # 筛选单品类与单品类之间的关联规则 108 | rules = rules[rules['antecedents'].apply(lambda x: len(x) == 1) & 109 | rules['consequents'].apply(lambda x: len(x) == 1)] 110 | 111 | 112 | 113 | # print(basket) 114 | 115 | # 打印关联规则 116 | print(rules) 117 | 118 | rules.to_csv('rules_single.csv', index=False, encoding='utf-8-sig') 119 | 120 | import pandas as pd 121 | import matplotlib.pyplot as plt 122 | import seaborn as sns 123 | import matplotlib 124 | matplotlib.rc("font",family='YouYuan') 125 | 126 | # 读取关联规则数据 127 | rules = pd.read_csv('rules_single.csv') 128 | 129 | # 提取antecedents和consequents列中的标签 130 | rules['antecedents'] = rules['antecedents'].apply(lambda x: x.strip("frozenset()").replace("'", "")) 131 | rules['consequents'] = rules['consequents'].apply(lambda x: x.strip("frozenset()").replace("'", "")) 132 | 133 | # 绘制支持度和置信度的散点图 134 | # plt.figure(figsize=(8, 6)) 135 | # sns.scatterplot(x='support', y='confidence', data=rules) 136 | # plt.title('Support vs Confidence') 137 | # plt.xlabel('Support') 138 | # plt.ylabel('Confidence') 139 | # plt.show() 140 | 141 | # 绘制规则的热力图 142 | pivot_table = rules.pivot_table(index='antecedents', columns='consequents', values='lift') 143 | plt.figure(figsize=(12, 10)) 144 | sns.heatmap(pivot_table, annot=False, cmap='coolwarm') 145 | plt.xticks(rotation=45) 146 | plt.xticks(fontsize=8) 147 | plt.title('单品关联性强度热力图') 148 | plt.show() 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /品类FP-Growth.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from mlxtend.frequent_patterns import apriori, association_rules 4 | from mlxtend.preprocessing import TransactionEncoder 5 | 6 | df1 = pd.read_csv('附件1.csv') 7 | df2 = pd.read_csv('附件2.csv', dtype={'销售日期': str, '扫码销售时间':str}) 8 | df3 = pd.read_csv('附件3.csv', dtype={'日期': str}) 9 | df4 = pd.read_csv('附件4.csv') 10 | 11 | # # 简化数据 12 | # 将'销售日期'和'扫码销售时间'数据进行简化 13 | 14 | 15 | # 定义一个函数来处理日期字符串 16 | def simplify_date(date_str): 17 | # 将日期字符串分割为年、月、日部分 18 | parts = date_str.split('-') 19 | year = parts[0][-2:] # 获取年份的后两位 20 | month = parts[1] # 月份部分保持不变 21 | day = parts[2] # 日份部分保持不变 22 | # 组合年、月、日部分并返回 23 | simplified_date = year + month + day 24 | return simplified_date 25 | 26 | # 在DataFrame中应用处理函数 27 | df2['销售日期'] = df2['销售日期'].apply(simplify_date) 28 | 29 | # df2['扫码销售时间'] = df2['扫码销售时间'].str[:2] 30 | 31 | # # 统一附件三格式 32 | 33 | def process_text(text, indices): 34 | selected_chars = ''.join(text[i] for i in indices if 0 <= i < len(text)) 35 | return selected_chars 36 | 37 | sales_date = [3, 5, 6, 8, 9] 38 | df3['日期'] = df3['日期'].apply(lambda x: process_text(x, sales_date)) 39 | new_column_name = '销售日期' 40 | df3.columns.values[0] = new_column_name 41 | 42 | # # 将附件一、附件三、附件四的信息加入附件二 43 | 44 | df1_key = df1['单品编码'].tolist() 45 | df1_value1 = df1['分类名称'].tolist() 46 | df1_value2 = df1['单品名称'].tolist() 47 | df4_key = df4['单品编码'].tolist() 48 | df4_value = df4['损耗率(%)'].tolist() 49 | csv_dict1_1 = dict(zip(df1_key, df1_value1)) 50 | csv_dict1_2 = dict(zip(df1_key, df1_value2)) 51 | csv_dict4 = dict(zip(df4_key, df4_value)) 52 | 53 | def get_value(key, csv_dict): 54 | return csv_dict.get(key, None) 55 | df2['单品名称'] = df2['单品编码'].apply(get_value, args=(csv_dict1_2,)) 56 | df2['分类名称'] = df2['单品编码'].apply(get_value, args=(csv_dict1_1,)) 57 | df2['损耗率(%)'] = df2['单品编码'].apply(get_value, args=(csv_dict4,)) 58 | merged_df = df2.merge(df3[['销售日期', '单品编码', '批发价格(元/千克)']], on=['销售日期', '单品编码'], how='left') 59 | 60 | # # 将数据改为适当的数据类型,并将'扫码销售时间'改为'存放时常(小时)' 61 | 62 | # columns_to_convert = {'扫码销售时间': int, '损耗率(%)': float} 63 | # df2 = df2.astype(columns_to_convert) 64 | # df2['扫码销售时间'] = df2['扫码销售时间'] - 4 65 | # new_column_name = '存放时常(小时)' 66 | # df2.rename(columns={'扫码销售时间': new_column_name}, inplace=True) 67 | df2.columns = df2.columns.tolist() 68 | new_columns = df2.columns.tolist() 69 | df = pd.DataFrame(df2.values, columns=new_columns) 70 | columns_to_convert = {'单品编码': int, '销量(千克)': float, 71 | '销售单价(元/千克)': float, '损耗率(%)': float 72 | } 73 | # df['存放时常(小时)'] = df['存放时常(小时)'].astype('int64') 74 | df['单品编码'] = df['单品编码'].astype('int64') 75 | 76 | # 定义一个函数来处理每一行的'扫码销售时间' 77 | def process_time(row): 78 | time_str = row['扫码销售时间'] 79 | time_int = int(time_str.replace(':', '').replace('.', '')) 80 | return time_int 81 | 82 | # 使用apply()方法将处理函数应用到'扫码销售时间'列的每一行 83 | df['扫码销售时间'] = df.apply(process_time, axis=1) 84 | 85 | df['销售日期'] = df['销售日期'].astype(int) 86 | 87 | # 将'销售日期'和'扫码销售时间'列合并成一个新列'销售时间' 88 | df['销售时间'] = df['销售日期'] + df['扫码销售时间'] 89 | 90 | df.to_csv('data.csv', index=False, encoding='utf-8-sig') 91 | # print(df.info()) 92 | # print(df.head()) 93 | 94 | # 使用groupby和agg将相同销售时间的单品放入一个集合中 95 | result_df = df.groupby('销售时间')['分类名称'].agg(lambda x: set(x) if len(x) > 1 else x).reset_index() 96 | result_df = result_df[result_df['分类名称'].apply(lambda x: isinstance(x, set))] 97 | # print(result_df) 98 | 99 | import pyfpgrowth 100 | 101 | # 假设您的数据已经存储在一个名为data的列表中,其中每个元素是一个集合,如:[{花叶类, 水生根茎类}, {食用菌, 花菜类}, ...] 102 | transactions = [list(item) for item in result_df['分类名称']] 103 | 104 | # 设置支持度阈值,例如0.02,表示项集在所有交易中至少出现2%的次数 105 | support_threshold = 0.002 * len(transactions) 106 | 107 | # 使用find_frequent_patterns找到频繁项集 108 | patterns = pyfpgrowth.find_frequent_patterns(transactions, support_threshold) 109 | 110 | # 设置置信度阈值,例如0.5,表示规则成立的置信度至少为50% 111 | confidence_threshold = 0.2 112 | 113 | # 使用generate_association_rules找到关联规则 114 | rules = pyfpgrowth.generate_association_rules(patterns, confidence_threshold) 115 | 116 | # 打印关联规则 117 | print(rules) 118 | 119 | # rules.to_csv('rules.csv', index=False, encoding='utf-8-sig') 120 | 121 | 122 | 123 | 124 | 125 | --------------------------------------------------------------------------------