├── .gitignore ├── Annual-Report.py ├── Get-Eat-Data.py ├── LICENSE ├── README.md ├── figs ├── example-poster.png └── example.png ├── generate-poster.py ├── html-template └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | eat-data.json 2 | 3 | **/__pycache__/ 4 | 5 | /.idea 6 | /.vscode 7 | **/.DS_Store 8 | 9 | *.json 10 | *.html -------------------------------------------------------------------------------- /Annual-Report.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime as dt 3 | import matplotlib.pyplot as plt 4 | import matplotlib.cm as cm 5 | import warnings 6 | import json 7 | 8 | # 指定字体 9 | plt.rcParams['font.sans-serif'] = ['MiSans', 'SimHei', 'Hiragino Sans GB', 'Noto Sans SC', 'Noto Sans'] 10 | 11 | def convert_time(timestamp, time_zone = 8): 12 | ''' 13 | 时间转换 14 | ''' 15 | # 转换为 UTC 时间 16 | utc_time = dt.datetime.fromtimestamp(timestamp, tz = dt.timezone.utc) 17 | # 转换为东八区时间 18 | converted_time = utc_time.astimezone(dt.timezone(dt.timedelta(hours = time_zone))) 19 | return converted_time 20 | 21 | 22 | def load_eat_data(eat_data, time_zone = 8): 23 | ''' 24 | 加载消费数据 25 | ''' 26 | 27 | data = json.load(eat_data) 28 | entities = data.get("entities", []) 29 | df = pd.DataFrame(entities) 30 | 31 | # 取反并乘100取整,修复浮点精度问题 32 | df['amount'] = (df['amount'] * -100).round().astype(int) 33 | df['amount'] = df['amount'] / 100 34 | df['orderTime'] = df['orderTime'].apply(lambda x: convert_time(x, time_zone)) 35 | df['payTime'] = df['payTime'].apply(lambda x: convert_time(x, time_zone)) 36 | 37 | # 去除年份、秒和时区 38 | df['formatted_orderTime'] = df['orderTime'].dt.strftime('%m月%d日%H点%M分') 39 | df['formatted_payTime'] = df['payTime'].dt.strftime('%m月%d日%H点%M分') 40 | 41 | # 提取日期和时分秒 42 | df['date'] = df['payTime'].dt.date # 提取日期部分 43 | df['time'] = df['payTime'].dt.time # 提取时分秒部分 44 | 45 | return df 46 | 47 | def filter(df): 48 | ''' 49 | 过滤数据 50 | ''' 51 | # 非消费(支出)数据 52 | df = df[~(df['amount'] < 0)] 53 | 54 | # 非餐饮数据 55 | filter_keys = ['电瓶车', '游泳', '核减', '浴室', '教材科' ,'校医院', '充值', r'沪(?:\w){6,7}'] # 需要继续补充 56 | for k in filter_keys: 57 | df = df[~df['merchant'].str.contains(k)] 58 | return df 59 | 60 | def annual_analysis(df): 61 | ''' 62 | 年度消费分析 63 | ''' 64 | 65 | print("\n思源码年度消费报告:") 66 | 67 | # 总消费 68 | total_value = df['amount'].sum() 69 | print(f"\n 2024年,你在交大共消费了 {total_value:.2f} 元。") 70 | 71 | # 第一笔消费 72 | first_row = df.iloc[-1] 73 | print(f"\n {first_row['formatted_payTime']},你在 {first_row['merchant']} 开启了第一笔在交大的消费,花了 {first_row['amount']:.2f} 元。") 74 | print(" 在交大的每一年都要有一个美好的开始。") 75 | 76 | # 最大消费 77 | max_row = df.loc[df['amount'].idxmax()] 78 | print(f"\n 今年 {max_row['formatted_payTime']},你在交大的 {max_row['merchant']} 单笔最多消费了 {max_row['amount']:.2f} 元。") 79 | print(" 哇,真是胃口大开的一顿!") 80 | 81 | # 最常消费 82 | most_frequent_merchant = df['merchant'].mode()[0] 83 | most_frequent_merchant_count = df[df['merchant'] == most_frequent_merchant].shape[0] 84 | most_frequent_merchant_total = df[df['merchant'] == most_frequent_merchant]['amount'].sum() 85 | print(f"\n 你最常前往 {most_frequent_merchant} ,一共 {most_frequent_merchant_count} 次,总共花了 {most_frequent_merchant_total:.2f} 元。") 86 | print(" 这里的美食真是让你回味无穷。") 87 | 88 | # 最多消费 89 | most_expensive_merchant = df.groupby('merchant')['amount'].sum().idxmax() 90 | most_expensive_merchant_count = df[df['merchant'] == most_expensive_merchant].shape[0] 91 | most_expensive_merchant_total = df.groupby('merchant')['amount'].sum().max() 92 | print(f"\n 你在 {most_expensive_merchant} 消费最多,{most_expensive_merchant_count} 次消费里,一共花了 {most_expensive_merchant_total:.2f} 元。") 93 | print(" 想来这里一定有你钟爱的菜品。") 94 | 95 | # 早中晚消费 96 | df['hour'] = df['payTime'].dt.hour 97 | morning = df[(df['hour'] >= 6) & (df['hour'] < 9)]['amount'].shape[0] 98 | noon = df[(df['hour'] >= 11) & (df['hour'] < 14)]['amount'].shape[0] 99 | night = df[(df['hour'] >= 17) & (df['hour'] < 19)]['amount'].shape[0] 100 | print(f"\n 你今年一共在交大吃了 {morning} 顿早餐,{noon} 顿午餐,{night} 顿晚餐。") 101 | print(" 在交大的每一顿都要好好吃饭~") 102 | 103 | # 按日期分组,找到每一天中最早的时间 104 | try: 105 | earliest_rows_per_day = df.loc[df.groupby('date')['time'].idxmin()] 106 | overall_earliest_row = earliest_rows_per_day.loc[earliest_rows_per_day['time'].idxmin()] 107 | print(f"\n {overall_earliest_row['formatted_payTime']} 是你今年最早的一次用餐,你一早就在 {overall_earliest_row['merchant']} 吃了 {overall_earliest_row['amount']:.2f} 元。") 108 | # 错误似乎是因为pandas版本过低导致的,建议更新 109 | except Exception: 110 | print(f"\n 获取每日最早消费时出错,请更新pandas: pip install --upgrade pandas") 111 | 112 | 113 | # 月份消费金额分布 114 | df['month'] = df['payTime'].dt.month 115 | most_expensive_month = df.groupby('month')['amount'].sum().idxmax() 116 | most_expensive_month_total = df.groupby('month')['amount'].sum().max() 117 | print(f"\n 你在 {most_expensive_month} 月消费最多,一共花了 {most_expensive_month_total:.2f} 元。") 118 | print(" 来看看你的月份分布图") 119 | 120 | # 按食堂分组,统计总消费金额 121 | grouped = df.groupby('merchant')['amount'].sum().sort_values(ascending=False) 122 | # 计算总消费金额 123 | total_amount = grouped.sum() 124 | # 找到占比 >= 1% 的食堂 125 | threshold = 0.01 # 占比 1% 126 | major_merchants = grouped[grouped / total_amount >= threshold] 127 | # 将占比 < 1% 的合并为 "其他" 128 | other_sum = grouped[grouped / total_amount < threshold].sum() 129 | # 合并为新的 Series 130 | final_grouped = pd.concat([major_merchants, pd.Series({'其他': other_sum})]) 131 | 132 | 133 | # 绘图 134 | fig, axs = plt.subplots(1, 3, figsize=(20, 6)) 135 | 136 | # 食堂消费金额饼图 137 | final_grouped.plot( 138 | kind='pie', autopct='%1.1f%%', startangle=90, textprops={'fontsize': 12}, ax=axs[0] 139 | ) 140 | axs[0].set_ylabel('') # 去掉 y 轴标签 141 | axs[0].set_title('各食堂总消费金额分布', fontsize=16) 142 | 143 | # 月份消费金额分布 144 | df['month'] = df['payTime'].dt.month 145 | monthly_amount = df.groupby('month')['amount'].sum() 146 | axs[1].bar(monthly_amount.index, monthly_amount.values, color='skyblue') 147 | axs[1].set_title('月份消费金额分布', fontsize=16) 148 | axs[1].set_xlabel('月份', fontsize=12) 149 | axs[1].set_ylabel('消费金额', fontsize=12) 150 | axs[1].set_xticks(range(1, 13)) # 确保横坐标是 1 到 12 月份 151 | 152 | # 一天内消费时间分布 153 | axs[2].hist(df['hour'], bins=24, color='skyblue', edgecolor='black') 154 | axs[2].set_title('一天内消费时间分布', fontsize=16) 155 | axs[2].set_xlabel('时间 (小时)', fontsize=12) 156 | axs[2].set_ylabel('消费次数', fontsize=12) 157 | axs[2].set_xticks(range(0, 24)) # 确保横坐标是 0 到 23 小时 158 | 159 | print("\n不管怎样,吃饭要紧") 160 | print("2025年也要记得好好吃饭喔(⌒▽⌒)☆ \n") 161 | 162 | # 调整布局和显示 163 | plt.tight_layout() 164 | with warnings.catch_warnings(record=True) as warns: 165 | warnings.simplefilter("always", UserWarning) 166 | plt.show() 167 | if not any(item.category == UserWarning for item in warns): 168 | print("未知`plt.show()`错误。请更新matplotlib: pip install --upgrade matplotlib") 169 | raise 170 | print("对不起,当前无法显示图表。你可以输入图片名称保存图表,也可以回车并不保存图片。") 171 | filename = input("图片名称(不需要后缀):") 172 | if filename: 173 | plt.savefig(f"{filename}.png") 174 | 175 | input() 176 | 177 | 178 | 179 | if __name__ == "__main__": 180 | try: 181 | with open("eat-data.json", 'r', encoding='utf-8') as eat_data: 182 | eat_data_df = load_eat_data(eat_data) 183 | 184 | # 现在默认启用过滤 185 | eat_data_df = filter(eat_data_df) 186 | annual_analysis(eat_data_df) 187 | except FileNotFoundError: 188 | print("\n首次运行,请先运行 Get-Eat-Data 以获取消费数据") 189 | print("如果已经运行过 Get-Eat-Data,请查看 README 中的问题解答") 190 | input("按回车键退出...") 191 | except Exception: 192 | print("\n发生其他错误") 193 | raise 194 | -------------------------------------------------------------------------------- /Get-Eat-Data.py: -------------------------------------------------------------------------------- 1 | import json 2 | from requests import get, post 3 | from requests.auth import HTTPBasicAuth 4 | from urllib.parse import urlencode 5 | 6 | # 配置参数 7 | AUTHORIZATION_URL = "https://jaccount.sjtu.edu.cn/oauth2/authorize" 8 | API_URL = "https://api.sjtu.edu.cn/v1/unicode/transactions" 9 | TOKEN_URL = "https://jaccount.sjtu.edu.cn/oauth2/token" 10 | REDIRECT_URI = "https://net.sjtu.edu.cn" 11 | STATE = "" 12 | BEGIN_DATE = 1704038400 13 | 14 | # 设置 client_id 和 client_secret 15 | CLIENT_ID = "" 16 | CLIENT_SECRET = "" 17 | 18 | def get_authorization_code(): 19 | """ 20 | 构造授权请求 URL 21 | """ 22 | params = { 23 | "response_type": "code", 24 | "client_id": CLIENT_ID, 25 | "redirect_uri": REDIRECT_URI, 26 | "scope": "", 27 | "state": STATE 28 | } 29 | auth_url = f"{AUTHORIZATION_URL}?{urlencode(params)}" 30 | print(f"\n请在浏览器中打开以下链接并登录:\n{auth_url}\n") 31 | 32 | # 手动输入授权后的回调 URL 33 | redirect_response = input("登录完毕后,请稍等片刻至跳转到网络信息中心页面\n此时复制浏览器地址栏中的完整链接,并粘贴到这里,按回车确认: ") 34 | # 提取 code 参数 35 | from urllib.parse import urlparse, parse_qs 36 | query_params = parse_qs(urlparse(redirect_response).query) 37 | return query_params.get("code", [None])[0] 38 | 39 | def get_access_token(authorization_code): 40 | """ 41 | 使用授权码获取访问令牌 (Access Token) 42 | """ 43 | # 构造请求头,使用 Basic Auth 传递 client_id 和 client_secret 44 | headers = { 45 | "Content-Type": "application/x-www-form-urlencoded" 46 | } 47 | # 使用 HTTPBasicAuth 自动生成 Basic Authorization 头 48 | auth = HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET) 49 | 50 | # 构造请求体 51 | data = { 52 | "grant_type": "authorization_code", # 固定参数 53 | "code": authorization_code, # 授权码 54 | "redirect_uri": REDIRECT_URI # 重定向 URI,必须与之前一致 55 | } 56 | 57 | # 发起 POST 请求到 TOKEN_URL 58 | response = post(TOKEN_URL, headers=headers, auth=auth, data=data) 59 | 60 | # 检查返回结果 61 | if response.status_code == 200: 62 | # 成功获取令牌,返回 JSON 响应 63 | return response.json() 64 | else: 65 | # 发生错误,打印状态码和错误信息 66 | print("获取令牌失败:") 67 | print(f"状态码: {response.status_code}") 68 | print(f"响应: {response.text}") 69 | return None 70 | 71 | def get_eat_data(access_token, begin_date = BEGIN_DATE): 72 | """ 73 | 获取消费数据 74 | """ 75 | params = { 76 | "access_token": access_token, 77 | "channel": "", 78 | "start": 0, 79 | "beginDate": begin_date, 80 | "status": "" 81 | } 82 | 83 | # 发起请求 84 | try: 85 | response = get(API_URL, params=params) 86 | 87 | # 检查请求是否成功 88 | if response.status_code == 200: 89 | # 解析响应 JSON 数据 90 | data = response.json() 91 | 92 | if data.get('errno', 0) != 0: 93 | print(data) 94 | # 防止泄露 client_id 和 client_secret,只放错误码 95 | error_code = data.get('errno', '无错误码') 96 | print(f"API 错误: {error_code})") 97 | raise Exception() 98 | else: 99 | print("消费数据获取成功") 100 | 101 | # 保存到文件 102 | with open("eat-data.json", "w", encoding="utf-8") as file: 103 | json.dump(data, file, ensure_ascii=False, indent=4) 104 | 105 | print("\n消费数据已保存") 106 | 107 | return data 108 | else: 109 | print(f"\n请求失败,状态码: {response.status_code}") 110 | print(f"错误信息: {response.text}") 111 | except Exception as e: 112 | print(f"\n请求过程中发生错误,请检查网络及代理设置,或删除目录下的 eat-data.json 文件后重试") 113 | 114 | 115 | if __name__ == "__main__": 116 | try: 117 | print("\n首次运行,请先登录并获取消费数据") 118 | # 获取授权码 119 | authorization_code = get_authorization_code() 120 | if not authorization_code: 121 | print("\n授权码获取失败,请检查你的返回 URL\n") 122 | exit() 123 | print(f"\n取得授权码: {authorization_code}") 124 | 125 | # 获取访问令牌 126 | token_response = get_access_token(authorization_code) 127 | access_token = token_response.get('access_token') 128 | if token_response: 129 | print("\n成功获取访问令牌(Access Token):") 130 | print(f"{access_token}\n") 131 | 132 | # 获取消费数据 133 | if get_eat_data(access_token): 134 | input("请前往 Annual-Report.py 以继续...") 135 | 136 | except Exception: 137 | print("Unknown Error:500") 138 | input("按回车键退出...") 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Milvoid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SJTU-Annual-Eat 2 | 3 | 思源码消费年度总结 4 | 5 | 来看看你今年都在交大消费了些什么吧 6 | 7 | ## Quick Start 8 | 9 | > 同时支持 Windows 与 macOS 10 | 11 | 前往 [Release 页面](https://github.com/Milvoid/SJTU-Annual-Eat/releases/tag/1.0.1) 下载 `Release.zip` 12 | 13 | 解压后得到 `Get-Eat-Data` 与 `Annual-Report.py` 14 | 15 | 首先运行 `Get-Eat-Data` ,按照提示获取数据后得到 `eat-data.json` 16 | 17 | 之后再运行 `Annual-Report.py` 即可生成年度报告啦 18 | 19 | ## Common Issues 20 | 21 | - Annual-Report.py 闪了一下就没了 22 | 23 | 可能是所需的库没有安装,可以用文本编辑器打开文件,之后查看最上方需要导入的模块是否有缺失 24 | 25 | - Access Token 获取失败 26 | 27 | 可以确认一下自己没有使用代理,sjtu服务器好像会somehow因为代理拒绝访问 28 | 29 | - 运行到早中晚餐之后报错 30 | 31 | 应该是统计最早一餐部分的兼容性问题,可以手动注释掉该部分,或者下载新的 Release 这部分失败之后自动跳过 32 | 33 | 代码有点bug,按日期分组,找到每一天中最早的时间那边会错误,不知道是不是 pandas 版本问题 34 | 35 | 如果是 `reduction operation 'argmin' not allowed for this dtype` 问题,可以在函数load_eat_data的最后加一行 36 | 37 | ```python 38 | df['time_in_seconds'] = df['payTime'].dt.hour * 3600 + df['payTime'].dt.minute * 60 + df['payTime'].dt.second 39 | ``` 40 | 41 | 然后把“按日期分组,找到每一天中最早的时间”下面两行中的'time'都改成'time_in_seconds'就行了。 42 | 43 | - 运行 Get-Eat-Data 后仍然找不到 json 文件 44 | 45 | 可以在终端里先 cd 到 json 文件所在路径,之后从终端运行 Annual-Report.py;或者直接把 Annual-Report.py 的文件路径改成绝对路径 46 | 47 | ## Example 48 | 49 | 运行 `Annual-Report.py` 之后,你就可以看到今年的一些 Highlight 以及相关统计图,譬如: 50 | 51 | ```shell 52 | 思源码年度消费报告: 53 | 54 | 2024年,你在交大共消费了 1885.17 元。 55 | 56 | 01月01日17点43分,你在 闵行三餐外婆桥 开启了第一笔在交大的消费,花了 17.0 元。 57 | 在交大的每一年都要有一个美好的开始。 58 | 59 | 今年 02月20日11点56分,你在交大的 教材科 单笔最多消费了 41.5 元。 60 | 哇,真是胃口大开的一顿! 61 | 62 | 你在 闵行三餐学生餐厅 消费最多,38 次消费里,一共花了 493.38 元。 63 | 想来这里一定有你钟爱的菜品。 64 | 65 | 你今年一共在交大吃了 0 顿早餐,62 顿午餐,55 顿晚餐。 66 | 在交大的每一顿都要好好吃饭~ 67 | 68 | 05月08日09点57分 是你今年最早的一次用餐,你一早就在 沪FP2215 吃了 6.0 元。 69 | 70 | 你在 10 月消费最多,一共花了 308.2 元。 71 | 来看看你的月份分布图 72 | 73 | 不管怎样,吃饭要紧 74 | 2025年也要记得好好吃饭喔(⌒▽⌒)☆ 75 | ``` 76 | 77 | ![example](https://raw.githubusercontent.com/Milvoid/SJTU-Annual-Eat/main/figs/example.png) 78 | 79 | ## Poster 80 | 81 | 你也可以通过运行下面的脚本来生成一幅简单的海报,你可以用你的浏览器打开```海报.html```和截图。 82 | ``` 83 | python generate-poster.py 84 | ``` 85 | 86 | ![](./figs/example-poster.png) 87 | 88 | ## Notes 89 | 90 | `Get-Eat-Data.exe` 可直接运行;如果需要运行 `Get-Eat-Data.py`,请参考 [SJTU 开发者文档](https://developer.sjtu.edu.cn/auth/oauth.html) 填写 `client_id` 和 `client_secret` 91 | 92 | 特别感谢来自 Boar 大佬的帮助 93 | 94 | 以及感谢本仓库帮忙修代码的 Contributors 95 | 96 | ## Related Links 97 | 98 | https://github.com/wzh656/SJTU-Annual-Report 99 | 100 | 网页版本的年度报告,适合直接运行 101 | -------------------------------------------------------------------------------- /figs/example-poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milvoid/SJTU-Annual-Eat/3c4fb0b9d66727e037aa1cd1961c7e90c9da37ea/figs/example-poster.png -------------------------------------------------------------------------------- /figs/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Milvoid/SJTU-Annual-Eat/3c4fb0b9d66727e037aa1cd1961c7e90c9da37ea/figs/example.png -------------------------------------------------------------------------------- /generate-poster.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime as dt 3 | import matplotlib.pyplot as plt 4 | import matplotlib.cm as cm 5 | import json 6 | from jinja2 import Template 7 | 8 | # 指定字体 9 | # plt.rcParams['font.sans-serif'] = ['MiSans', 'SimHei', 'Hiragino Sans GB', 'Noto Sans SC', 'Noto Sans'] 10 | 11 | html_data = { 12 | 'total_spending_text': '', 13 | 'first_spending_text': '', 14 | 'highest_single_spending_text': '', 15 | 'favorite_place_spending_text': '', 16 | 'meal_counts_text': '', 17 | 'earliest_meal_text': '', 18 | 'most_expensive_month_text': '', 19 | 'pie_label_1': '', 20 | 'pie_data_1': '', 21 | 'pie_data_2': '' 22 | } 23 | 24 | 25 | def convert_time(timestamp, time_zone = 8): 26 | ''' 27 | 时间转换 28 | ''' 29 | # 转换为 UTC 时间 30 | utc_time = dt.datetime.fromtimestamp(timestamp, tz = dt.timezone.utc) 31 | # 转换为东八区时间 32 | converted_time = utc_time.astimezone(dt.timezone(dt.timedelta(hours = time_zone))) 33 | return converted_time 34 | 35 | 36 | def load_eat_data(eat_data, time_zone = 8): 37 | ''' 38 | 加载消费数据 39 | ''' 40 | 41 | data = json.load(eat_data) 42 | entities = data.get("entities", []) 43 | df = pd.DataFrame(entities) 44 | 45 | # 取反并乘100取整,修复浮点精度问题 46 | df['amount'] = (df['amount'] * -100).round().astype(int) 47 | df['amount'] = df['amount'] / 100 48 | df['orderTime'] = df['orderTime'].apply(lambda x: convert_time(x, time_zone)) 49 | df['payTime'] = df['payTime'].apply(lambda x: convert_time(x, time_zone)) 50 | 51 | # 去除年份、秒和时区 52 | df['formatted_orderTime'] = df['orderTime'].dt.strftime('%m月%d日%H点%M分') 53 | df['formatted_payTime'] = df['payTime'].dt.strftime('%m月%d日%H点%M分') 54 | 55 | # 提取日期和时分秒 56 | df['date'] = df['payTime'].dt.date # 提取日期部分 57 | df['time'] = df['payTime'].dt.time # 提取时分秒部分 58 | 59 | return df 60 | 61 | def filter(df): 62 | ''' 63 | 过滤数据 64 | ''' 65 | # 非消费(支出)数据 66 | df = df[~(df['amount'] < 0)] 67 | 68 | # 非餐饮数据 69 | filter_keys = ['电瓶车', '游泳', '核减', '浴室', '教材科' ,'校医院', '充值', r'沪(?:\w){6,7}'] # 需要继续补充 70 | for k in filter_keys: 71 | df = df[~df['merchant'].str.contains(k)] 72 | return df 73 | 74 | def annual_analysis(df): 75 | ''' 76 | 年度消费分析 77 | ''' 78 | 79 | # 总消费 80 | total_value = df['amount'].sum() 81 | html_data['total_spending_text'] = f"2024年,你在交大共消费了 {total_value:.2f} 元。" 82 | 83 | # 第一笔消费 84 | first_row = df.iloc[-1] 85 | html_data['first_spending_text'] = f"{first_row['formatted_payTime']},你在 {first_row['merchant']} 开启了第一笔在交大的消费,花了 {first_row['amount']:.2f} 元。" 86 | 87 | # 最大消费 88 | max_row = df.loc[df['amount'].idxmax()] 89 | html_data['highest_single_spending_text'] = f"今年 {max_row['formatted_payTime']},你在交大的 {max_row['merchant']} 单笔最多消费了 {max_row['amount']:.2f} 元。" 90 | 91 | # 最常消费 92 | most_frequent_merchant = df['merchant'].mode()[0] 93 | most_frequent_merchant_count = df[df['merchant'] == most_frequent_merchant].shape[0] 94 | most_frequent_merchant_total = df[df['merchant'] == most_frequent_merchant]['amount'].sum() 95 | html_data['favorite_place_spending_text'] = f"你最常前往 {most_frequent_merchant} ,一共 {most_frequent_merchant_count} 次,总共花了 {most_frequent_merchant_total:.2f} 元。" 96 | 97 | # 早中晚消费 98 | df['hour'] = df['payTime'].dt.hour 99 | morning = df[(df['hour'] >= 6) & (df['hour'] < 9)]['amount'].shape[0] 100 | noon = df[(df['hour'] >= 11) & (df['hour'] < 14)]['amount'].shape[0] 101 | night = df[(df['hour'] >= 17) & (df['hour'] < 19)]['amount'].shape[0] 102 | html_data['meal_counts_text'] = f"你今年一共在交大吃了 {morning} 顿早餐, {noon} 顿午餐, {night} 顿晚餐。" 103 | 104 | # 按日期分组,找到每一天中最早的时间 105 | try: 106 | earliest_rows_per_day = df.loc[df.groupby('date')['time'].idxmin()] 107 | overall_earliest_row = earliest_rows_per_day.loc[earliest_rows_per_day['time'].idxmin()] 108 | html_data['earliest_meal_text'] = f"{overall_earliest_row['formatted_payTime']} 是你今年最早的一次用餐,你一早就在 {overall_earliest_row['merchant']} 吃了 {overall_earliest_row['amount']:.2f} 元。" 109 | # print(f"\n {html_data['earliest_meal_text']}") 110 | # 错误似乎是因为pandas版本过低导致的,建议更新 111 | except Exception: 112 | print(f"\n 获取每日最早消费时出错,请更新pandas: pip install --upgrade pandas") 113 | 114 | 115 | # 月份消费金额分布 116 | df['month'] = df['payTime'].dt.month 117 | most_expensive_month = df.groupby('month')['amount'].sum().idxmax() 118 | most_expensive_month_total = df.groupby('month')['amount'].sum().max() 119 | 120 | # 按食堂分组,统计总消费金额 121 | grouped = df.groupby('merchant')['amount'].sum().sort_values(ascending=False) 122 | # 计算总消费金额 123 | total_amount = grouped.sum() 124 | # 找到占比 >= 1% 的食堂 125 | threshold = 0.01 # 占比 1% 126 | major_merchants = grouped[grouped / total_amount >= threshold] 127 | if len(major_merchants) > 9: 128 | major_merchants = grouped[:9] 129 | other_sum = grouped[9:].sum() 130 | # 将占比 < 1% 的合并为 "其他" 131 | else: 132 | other_sum = grouped[grouped / total_amount < threshold].sum() 133 | # 合并为新的 Series 134 | final_grouped = pd.concat([major_merchants, pd.Series({'其他': other_sum})]) 135 | html_data['pie_data_1'] = list(final_grouped.values) 136 | html_data['pie_label_1'] = list(final_grouped.index) 137 | 138 | df['month'] = df['payTime'].dt.month 139 | monthly_amount = df.groupby('month')['amount'].sum() 140 | html_data['pie_data_2'] = list(monthly_amount.values) 141 | 142 | 143 | 144 | if __name__ == "__main__": 145 | try: 146 | with open("eat-data.json", 'r', encoding='utf-8') as eat_data: 147 | eat_data_df = load_eat_data(eat_data) 148 | 149 | # 现在默认启用过滤 150 | eat_data_df = filter(eat_data_df) 151 | annual_analysis(eat_data_df) 152 | 153 | with open('html-template', 'r', encoding='utf-8') as file: 154 | template_content = file.read() 155 | 156 | template = Template(template_content) 157 | rendered_html = template.render(html_data) 158 | with open('海报.html', 'w', encoding='utf-8') as file: 159 | file.write(rendered_html) 160 | 161 | except FileNotFoundError: 162 | print("\n首次运行,请先运行 Get-Eat-Data 以获取消费数据") 163 | print("如果已经运行过 Get-Eat-Data,请查看 README 中的问题解答") 164 | input("按回车键退出...") 165 | except Exception: 166 | print("\n发生其他错误") 167 | raise 168 | -------------------------------------------------------------------------------- /html-template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 思源码年度消费报告 2024 7 | 8 | 9 | 79 | 80 | 81 | 82 |
83 |
84 | 思源码年度消费报告 2024 85 |
86 | 87 |
88 |

{{ total_spending_text }}

89 |
90 | 91 |
92 |

{{ first_spending_text }}

93 |

在交大的每一年都要有一个美好的开始。

94 |
95 | 96 |
97 |

{{ highest_single_spending_text }}

98 |

哇,真是胃口大开的一顿!

99 |
100 | 101 |
102 |

{{ favorite_place_spending_text }}

103 |

想来这里一定有你钟爱的菜品。

104 |
105 | 106 |
107 |

{{ meal_counts_text }}

108 |

在交大的每一顿都要好好吃饭~

109 |
110 | 111 |
112 |

{{ earliest_meal_text }}

113 |
114 | 115 |
116 |

{{ most_expensive_month_text }}

117 |
118 | 119 | 120 |
121 |
122 |

食堂消费分布

123 | 124 |
125 |
126 |

月份消费分布

127 | 128 |
129 |
130 | 131 | 135 |
136 | 137 | 242 | 243 | 244 | 245 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | pandas 3 | requests 4 | jinja2 --------------------------------------------------------------------------------