├── loan.csv
├── order-14.1.csv
├── order-14.3.csv
├── train-pivot.csv
├── .idea
├── encodings.xml
├── vcs.xml
├── modules.xml
├── misc.xml
├── 项目.iml
└── workspace.xml
├── order.csv
├── README.md
├── 自动化.ipynb
├── .ipynb_checkpoints
├── 自动化-checkpoint.ipynb
├── Supermarket-checkpoint.ipynb
└── Bank-checkpoint.ipynb
├── Supermarket.ipynb
└── Bank.ipynb
/loan.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Andchenn/Analysis_Item/HEAD/loan.csv
--------------------------------------------------------------------------------
/order-14.1.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Andchenn/Analysis_Item/HEAD/order-14.1.csv
--------------------------------------------------------------------------------
/order-14.3.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Andchenn/Analysis_Item/HEAD/order-14.3.csv
--------------------------------------------------------------------------------
/train-pivot.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Andchenn/Analysis_Item/HEAD/train-pivot.csv
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/order.csv:
--------------------------------------------------------------------------------
1 | ,本月累计,上月同期,去年同期,环比,同比
2 | 销售额,10412.78007,9940.97291,8596.313470000001,0.04746086366711566,0.21130762696581828
3 | 客流量,343.0,315.0,262.0,0.0888888888888888,0.30916030534351147
4 | 客单价,30.357959387755105,31.55864415873016,32.810356755725195,-0.038046145611832505,-0.0747446114721737
5 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 数据分析小项目
2 |
3 | #### 前言
4 |
5 | Hi! 这里有三个小项目分别是利用 Python 实现报表自动化、某连锁超市、某银行数据分析,展示了问题分解、数据清洗、数据分析与可视化的过程。
6 |
7 | #### 如下:
8 |
9 | 1.利用 Python 实现报表自动化
10 |
11 | + 为什么要进行报表自动化
12 |
13 | + 什么样的报表适合自动化
14 |
15 | + 如何实现报表自动化
16 |
17 |
18 | 2.假如你是某连锁超市的数据分析师
19 |
20 | + 哪些类别的商品比较畅销
21 |
22 | + 哪些商品比较畅销
23 |
24 | + 不同门店的销售额占比
25 |
26 | + 哪些时间段是超市的客流高峰期
27 |
28 | 3. 假如你是某银行的数据分析师
29 |
30 | + 是不是人收入越高的坏账率越低
31 |
32 | + 年龄和坏账率有什么关系
33 |
34 | + 家庭人口数量和坏账率有什么关系
--------------------------------------------------------------------------------
/.idea/项目.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | 1563609800195
85 |
86 |
87 | 1563609800195
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/自动化.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 25,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "\n",
13 | "RangeIndex: 3744 entries, 0 to 3743\n",
14 | "Data columns (total 7 columns):\n",
15 | "商品ID 3478 non-null float64\n",
16 | "类别ID 3478 non-null float64\n",
17 | "门店编号 3478 non-null object\n",
18 | "单价 3478 non-null float64\n",
19 | "销量 3478 non-null float64\n",
20 | "成交时间 3478 non-null datetime64[ns]\n",
21 | "订单ID 3478 non-null object\n",
22 | "dtypes: datetime64[ns](1), float64(4), object(2)\n",
23 | "memory usage: 204.8+ KB\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "import pandas as pd\n",
29 | "from datetime import datetime\n",
30 | "\n",
31 | "data=pd.read_csv(\"order-14.1.csv\",parse_dates=[\"成交时间\"],encoding='gbk')\n",
32 | "data.head()\n",
33 | "# print(data.head(5))\n",
34 | "# 查看源数据类型\n",
35 | "data.info()"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 17,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "本月销售额为:10412.78,客流量为:343,客单价为:30.36\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "# 计算本月的相关的指标\n",
53 | "This_month=data[(data[\"成交时间\"]>=datetime(2018,2,1))&(data[\"成交时间\"]<=datetime(2018,2,28))]\n",
54 | "# 销售额计算\n",
55 | "sales_1=(This_month[\"销量\"]*This_month['单价']).sum()\n",
56 | "# 客流量计算\n",
57 | "traffic_1=This_month[\"订单ID\"].drop_duplicates().count()\n",
58 | "# 客单价计算\n",
59 | "s_t_1=sales_1/traffic_1\n",
60 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_1,traffic_1,s_t_1))\n"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 19,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "本月销售额为:9940.97,客流量为:315,客单价为:31.56\n"
73 | ]
74 | }
75 | ],
76 | "source": [
77 | "# 计算上月相关指标\n",
78 | "last_month=data[(data[\"成交时间\"]>=datetime(2018,1,1))&(data[\"成交时间\"]<=datetime(2018,1,31))]\n",
79 | "\n",
80 | "# 销售额计算\n",
81 | "sales_2=(last_month[\"销量\"]*last_month['单价']).sum()\n",
82 | "# 客流量计算\n",
83 | "traffic_2=last_month[\"订单ID\"].drop_duplicates().count()\n",
84 | "# 客单价计算\n",
85 | "s_t_2=sales_2/traffic_2\n",
86 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_2,traffic_2,s_t_2))\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 20,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "本月销售额为:8596.31,客流量为:262,客单价为:32.81\n"
99 | ]
100 | }
101 | ],
102 | "source": [
103 | "# 计算去年同期相关指标\n",
104 | "same_month=data[(data[\"成交时间\"]>=datetime(2017,2,1))&(data[\"成交时间\"]<=datetime(2017,2,28))]\n",
105 | "\n",
106 | "sales_3=(same_month[\"销量\"]*same_month[\"单价\"]).sum()\n",
107 | "\n",
108 | "traffic_3=same_month[\"订单ID\"].drop_duplicates().count()\n",
109 | "s_t_3=sales_3/traffic_3\n",
110 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_3,traffic_3,s_t_3))\n"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 29,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "10412.78007 343 30.357959387755105\n",
123 | "9940.97291 315 31.55864415873016\n",
124 | "8596.313470000001 262 32.810356755725195\n"
125 | ]
126 | }
127 | ],
128 | "source": [
129 | "# 利用函数提高编码效率\n",
130 | "def get_month_data(data):\n",
131 | " sale=(data[\"销量\"]*data[\"单价\"]).sum()\n",
132 | " traffic=data[\"订单ID\"].drop_duplicates().count()\n",
133 | " s_t=sale/traffic\n",
134 | " return (sale,traffic,s_t)\n",
135 | "\n",
136 | "# 本月相关指数\n",
137 | "sales_1,traffic_1,s_t_1=get_month_data(This_month)\n",
138 | "print(sales_1,traffic_1,s_t_1)\n",
139 | "\n",
140 | "# 上月相关指数\n",
141 | "sales_2,traffic_2,s_t_2=get_month_data(last_month)\n",
142 | "print(sales_2,traffic_2,s_t_2)\n",
143 | "\n",
144 | "# 去年同期相关指数\n",
145 | "sales_3,traffic_3,s_t_3=get_month_data(same_month)\n",
146 | "print(sales_3,traffic_3,s_t_3)\n"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 36,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | " 本月累计 上月同期 去年同期\n",
159 | "销售额 10412.780070 9940.972910 8596.313470\n",
160 | "客流量 343.000000 315.000000 262.000000\n",
161 | "客单价 30.357959 31.558644 32.810357\n",
162 | " 本月累计 上月同期 去年同期 环比 同比\n",
163 | "销售额 10412.780070 9940.972910 8596.313470 0.047461 0.211308\n",
164 | "客流量 343.000000 315.000000 262.000000 0.088889 0.309160\n",
165 | "客单价 30.357959 31.558644 32.810357 -0.038046 -0.074745\n"
166 | ]
167 | }
168 | ],
169 | "source": [
170 | "report=pd.DataFrame([[sales_1,sales_2,sales_3],[traffic_1,traffic_2,traffic_3],[s_t_1,s_t_2,s_t_3]],columns=[\"本月累计\",\"上月同期\",\"去年同期\"],index=[\"销售额\",\"客流量\",\"客单价\"])\n",
171 | "print(report)\n",
172 | "# 添加同比和环比字段\n",
173 | "report[\"环比\"]=report[\"本月累计\"]/report[\"上月同期\"]-1\n",
174 | "\n",
175 | "report[\"同比\"]=report[\"本月累计\"]/report[\"去年同期\"]-1\n",
176 | "\n",
177 | "print(report)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 37,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "# 将结果导出本地\n",
187 | "report.to_csv(\"order.csv\",encoding=\"utf-8-sig\")"
188 | ]
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.7.3"
208 | },
209 | "toc": {
210 | "base_numbering": 1,
211 | "nav_menu": {},
212 | "number_sections": true,
213 | "sideBar": true,
214 | "skip_h1_title": false,
215 | "title_cell": "Table of Contents",
216 | "title_sidebar": "Contents",
217 | "toc_cell": false,
218 | "toc_position": {},
219 | "toc_section_display": true,
220 | "toc_window_display": false
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/自动化-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 25,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "\n",
13 | "RangeIndex: 3744 entries, 0 to 3743\n",
14 | "Data columns (total 7 columns):\n",
15 | "商品ID 3478 non-null float64\n",
16 | "类别ID 3478 non-null float64\n",
17 | "门店编号 3478 non-null object\n",
18 | "单价 3478 non-null float64\n",
19 | "销量 3478 non-null float64\n",
20 | "成交时间 3478 non-null datetime64[ns]\n",
21 | "订单ID 3478 non-null object\n",
22 | "dtypes: datetime64[ns](1), float64(4), object(2)\n",
23 | "memory usage: 204.8+ KB\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "import pandas as pd\n",
29 | "from datetime import datetime\n",
30 | "\n",
31 | "data=pd.read_csv(\"order-14.1.csv\",parse_dates=[\"成交时间\"],encoding='gbk')\n",
32 | "data.head()\n",
33 | "# print(data.head(5))\n",
34 | "# 查看源数据类型\n",
35 | "data.info()"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 17,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "本月销售额为:10412.78,客流量为:343,客单价为:30.36\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "# 计算本月的相关的指标\n",
53 | "This_month=data[(data[\"成交时间\"]>=datetime(2018,2,1))&(data[\"成交时间\"]<=datetime(2018,2,28))]\n",
54 | "# 销售额计算\n",
55 | "sales_1=(This_month[\"销量\"]*This_month['单价']).sum()\n",
56 | "# 客流量计算\n",
57 | "traffic_1=This_month[\"订单ID\"].drop_duplicates().count()\n",
58 | "# 客单价计算\n",
59 | "s_t_1=sales_1/traffic_1\n",
60 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_1,traffic_1,s_t_1))\n"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 19,
66 | "metadata": {},
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "本月销售额为:9940.97,客流量为:315,客单价为:31.56\n"
73 | ]
74 | }
75 | ],
76 | "source": [
77 | "# 计算上月相关指标\n",
78 | "last_month=data[(data[\"成交时间\"]>=datetime(2018,1,1))&(data[\"成交时间\"]<=datetime(2018,1,31))]\n",
79 | "\n",
80 | "# 销售额计算\n",
81 | "sales_2=(last_month[\"销量\"]*last_month['单价']).sum()\n",
82 | "# 客流量计算\n",
83 | "traffic_2=last_month[\"订单ID\"].drop_duplicates().count()\n",
84 | "# 客单价计算\n",
85 | "s_t_2=sales_2/traffic_2\n",
86 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_2,traffic_2,s_t_2))\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 20,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "本月销售额为:8596.31,客流量为:262,客单价为:32.81\n"
99 | ]
100 | }
101 | ],
102 | "source": [
103 | "# 计算去年同期相关指标\n",
104 | "same_month=data[(data[\"成交时间\"]>=datetime(2017,2,1))&(data[\"成交时间\"]<=datetime(2017,2,28))]\n",
105 | "\n",
106 | "sales_3=(same_month[\"销量\"]*same_month[\"单价\"]).sum()\n",
107 | "\n",
108 | "traffic_3=same_month[\"订单ID\"].drop_duplicates().count()\n",
109 | "s_t_3=sales_3/traffic_3\n",
110 | "print(\"本月销售额为:{:.2f},客流量为:{},客单价为:{:.2f}\".format(sales_3,traffic_3,s_t_3))\n"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 29,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "10412.78007 343 30.357959387755105\n",
123 | "9940.97291 315 31.55864415873016\n",
124 | "8596.313470000001 262 32.810356755725195\n"
125 | ]
126 | }
127 | ],
128 | "source": [
129 | "# 利用函数提高编码效率\n",
130 | "def get_month_data(data):\n",
131 | " sale=(data[\"销量\"]*data[\"单价\"]).sum()\n",
132 | " traffic=data[\"订单ID\"].drop_duplicates().count()\n",
133 | " s_t=sale/traffic\n",
134 | " return (sale,traffic,s_t)\n",
135 | "\n",
136 | "# 本月相关指数\n",
137 | "sales_1,traffic_1,s_t_1=get_month_data(This_month)\n",
138 | "print(sales_1,traffic_1,s_t_1)\n",
139 | "\n",
140 | "# 上月相关指数\n",
141 | "sales_2,traffic_2,s_t_2=get_month_data(last_month)\n",
142 | "print(sales_2,traffic_2,s_t_2)\n",
143 | "\n",
144 | "# 去年同期相关指数\n",
145 | "sales_3,traffic_3,s_t_3=get_month_data(same_month)\n",
146 | "print(sales_3,traffic_3,s_t_3)\n"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 36,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | " 本月累计 上月同期 去年同期\n",
159 | "销售额 10412.780070 9940.972910 8596.313470\n",
160 | "客流量 343.000000 315.000000 262.000000\n",
161 | "客单价 30.357959 31.558644 32.810357\n",
162 | " 本月累计 上月同期 去年同期 环比 同比\n",
163 | "销售额 10412.780070 9940.972910 8596.313470 0.047461 0.211308\n",
164 | "客流量 343.000000 315.000000 262.000000 0.088889 0.309160\n",
165 | "客单价 30.357959 31.558644 32.810357 -0.038046 -0.074745\n"
166 | ]
167 | }
168 | ],
169 | "source": [
170 | "report=pd.DataFrame([[sales_1,sales_2,sales_3],[traffic_1,traffic_2,traffic_3],[s_t_1,s_t_2,s_t_3]],columns=[\"本月累计\",\"上月同期\",\"去年同期\"],index=[\"销售额\",\"客流量\",\"客单价\"])\n",
171 | "print(report)\n",
172 | "# 添加同比和环比字段\n",
173 | "report[\"环比\"]=report[\"本月累计\"]/report[\"上月同期\"]-1\n",
174 | "\n",
175 | "report[\"同比\"]=report[\"本月累计\"]/report[\"去年同期\"]-1\n",
176 | "\n",
177 | "print(report)"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 37,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "# 将结果导出本地\n",
187 | "report.to_csv(\"order.csv\",encoding=\"utf-8-sig\")"
188 | ]
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.7.3"
208 | },
209 | "toc": {
210 | "base_numbering": 1,
211 | "nav_menu": {},
212 | "number_sections": true,
213 | "sideBar": true,
214 | "skip_h1_title": false,
215 | "title_cell": "Table of Contents",
216 | "title_sidebar": "Contents",
217 | "toc_cell": false,
218 | "toc_position": {},
219 | "toc_section_display": true,
220 | "toc_window_display": false
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/Supermarket.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 14,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "data": {
10 | "text/html": [
11 | "\n",
12 | "\n",
25 | "
\n",
26 | " \n",
27 | " \n",
28 | " | \n",
29 | " 商品ID | \n",
30 | " 类别ID | \n",
31 | " 门店编号 | \n",
32 | " 单价 | \n",
33 | " 销量 | \n",
34 | " 成交时间 | \n",
35 | " 订单ID | \n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " \n",
40 | " | 0 | \n",
41 | " 30006206 | \n",
42 | " 915000003 | \n",
43 | " CDNL | \n",
44 | " 25.23 | \n",
45 | " 0.328 | \n",
46 | " 2017-01-03 09:56:00 | \n",
47 | " 20170103CDLG000210052759 | \n",
48 | "
\n",
49 | " \n",
50 | " | 1 | \n",
51 | " 30163281 | \n",
52 | " 914010000 | \n",
53 | " CDNL | \n",
54 | " 2.00 | \n",
55 | " 2.000 | \n",
56 | " 2017-01-03 09:56:00 | \n",
57 | " 20170103CDLG000210052759 | \n",
58 | "
\n",
59 | " \n",
60 | " | 2 | \n",
61 | " 30200518 | \n",
62 | " 922000000 | \n",
63 | " CDNL | \n",
64 | " 19.62 | \n",
65 | " 0.230 | \n",
66 | " 2017-01-03 09:56:00 | \n",
67 | " 20170103CDLG000210052759 | \n",
68 | "
\n",
69 | " \n",
70 | " | 3 | \n",
71 | " 29989105 | \n",
72 | " 922000000 | \n",
73 | " CDNL | \n",
74 | " 2.80 | \n",
75 | " 2.044 | \n",
76 | " 2017-01-03 09:56:00 | \n",
77 | " 20170103CDLG000210052759 | \n",
78 | "
\n",
79 | " \n",
80 | " | 4 | \n",
81 | " 30179558 | \n",
82 | " 915000100 | \n",
83 | " CDNL | \n",
84 | " 47.41 | \n",
85 | " 0.226 | \n",
86 | " 2017-01-03 09:56:00 | \n",
87 | " 20170103CDLG000210052759 | \n",
88 | "
\n",
89 | " \n",
90 | "
\n",
91 | "
"
92 | ],
93 | "text/plain": [
94 | " 商品ID 类别ID 门店编号 单价 销量 成交时间 \\\n",
95 | "0 30006206 915000003 CDNL 25.23 0.328 2017-01-03 09:56:00 \n",
96 | "1 30163281 914010000 CDNL 2.00 2.000 2017-01-03 09:56:00 \n",
97 | "2 30200518 922000000 CDNL 19.62 0.230 2017-01-03 09:56:00 \n",
98 | "3 29989105 922000000 CDNL 2.80 2.044 2017-01-03 09:56:00 \n",
99 | "4 30179558 915000100 CDNL 47.41 0.226 2017-01-03 09:56:00 \n",
100 | "\n",
101 | " 订单ID \n",
102 | "0 20170103CDLG000210052759 \n",
103 | "1 20170103CDLG000210052759 \n",
104 | "2 20170103CDLG000210052759 \n",
105 | "3 20170103CDLG000210052759 \n",
106 | "4 20170103CDLG000210052759 "
107 | ]
108 | },
109 | "execution_count": 14,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "import pandas as pd\n",
116 | "from datetime import datetime\n",
117 | "# 导入数据源\n",
118 | "data=pd.read_csv(\"order-14.3.csv\",parse_dates=[\"成交时间\"],encoding='gbk')\n",
119 | "data.head()\n",
120 | "# data.shape"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 20,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/html": [
131 | "\n",
132 | "\n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " | \n",
149 | " 类别ID | \n",
150 | " 销量 | \n",
151 | "
\n",
152 | " \n",
153 | " \n",
154 | " \n",
155 | " | 240 | \n",
156 | " 922000003 | \n",
157 | " 425.328 | \n",
158 | "
\n",
159 | " \n",
160 | " | 239 | \n",
161 | " 922000002 | \n",
162 | " 206.424 | \n",
163 | "
\n",
164 | " \n",
165 | " | 251 | \n",
166 | " 923000006 | \n",
167 | " 190.294 | \n",
168 | "
\n",
169 | " \n",
170 | " | 216 | \n",
171 | " 915030104 | \n",
172 | " 175.059 | \n",
173 | "
\n",
174 | " \n",
175 | " | 238 | \n",
176 | " 922000001 | \n",
177 | " 121.355 | \n",
178 | "
\n",
179 | " \n",
180 | " | 367 | \n",
181 | " 960000000 | \n",
182 | " 121.000 | \n",
183 | "
\n",
184 | " \n",
185 | " | 234 | \n",
186 | " 920090000 | \n",
187 | " 111.565 | \n",
188 | "
\n",
189 | " \n",
190 | " | 249 | \n",
191 | " 923000002 | \n",
192 | " 91.847 | \n",
193 | "
\n",
194 | " \n",
195 | " | 237 | \n",
196 | " 922000000 | \n",
197 | " 86.395 | \n",
198 | "
\n",
199 | " \n",
200 | " | 247 | \n",
201 | " 923000000 | \n",
202 | " 85.845 | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | "
"
207 | ],
208 | "text/plain": [
209 | " 类别ID 销量\n",
210 | "240 922000003 425.328\n",
211 | "239 922000002 206.424\n",
212 | "251 923000006 190.294\n",
213 | "216 915030104 175.059\n",
214 | "238 922000001 121.355\n",
215 | "367 960000000 121.000\n",
216 | "234 920090000 111.565\n",
217 | "249 923000002 91.847\n",
218 | "237 922000000 86.395\n",
219 | "247 923000000 85.845"
220 | ]
221 | },
222 | "execution_count": 20,
223 | "metadata": {},
224 | "output_type": "execute_result"
225 | }
226 | ],
227 | "source": [
228 | "# 哪些类别的商品比较畅销\n",
229 | "# ascending=False 降序\n",
230 | "data.groupby(\"类别ID\")[\"销量\"].sum().reset_index().sort_values(by=\"销量\",ascending=False).head(10)"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 22,
236 | "metadata": {},
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/html": [
241 | "\n",
242 | "\n",
255 | "
\n",
256 | " \n",
257 | " \n",
258 | " | \n",
259 | " 商品ID | \n",
260 | " 销量 | \n",
261 | "
\n",
262 | " \n",
263 | " \n",
264 | " \n",
265 | " | 8 | \n",
266 | " 29989059 | \n",
267 | " 391.549 | \n",
268 | "
\n",
269 | " \n",
270 | " | 18 | \n",
271 | " 29989072 | \n",
272 | " 102.876 | \n",
273 | "
\n",
274 | " \n",
275 | " | 469 | \n",
276 | " 30022232 | \n",
277 | " 101.000 | \n",
278 | "
\n",
279 | " \n",
280 | " | 523 | \n",
281 | " 30031960 | \n",
282 | " 99.998 | \n",
283 | "
\n",
284 | " \n",
285 | " | 57 | \n",
286 | " 29989157 | \n",
287 | " 72.453 | \n",
288 | "
\n",
289 | " \n",
290 | " | 476 | \n",
291 | " 30023041 | \n",
292 | " 64.416 | \n",
293 | "
\n",
294 | " \n",
295 | " | 505 | \n",
296 | " 30026255 | \n",
297 | " 62.375 | \n",
298 | "
\n",
299 | " \n",
300 | " | 7 | \n",
301 | " 29989058 | \n",
302 | " 56.052 | \n",
303 | "
\n",
304 | " \n",
305 | " | 510 | \n",
306 | " 30027007 | \n",
307 | " 48.757 | \n",
308 | "
\n",
309 | " \n",
310 | " | 903 | \n",
311 | " 30171264 | \n",
312 | " 45.000 | \n",
313 | "
\n",
314 | " \n",
315 | "
\n",
316 | "
"
317 | ],
318 | "text/plain": [
319 | " 商品ID 销量\n",
320 | "8 29989059 391.549\n",
321 | "18 29989072 102.876\n",
322 | "469 30022232 101.000\n",
323 | "523 30031960 99.998\n",
324 | "57 29989157 72.453\n",
325 | "476 30023041 64.416\n",
326 | "505 30026255 62.375\n",
327 | "7 29989058 56.052\n",
328 | "510 30027007 48.757\n",
329 | "903 30171264 45.000"
330 | ]
331 | },
332 | "execution_count": 22,
333 | "metadata": {},
334 | "output_type": "execute_result"
335 | }
336 | ],
337 | "source": [
338 | "# 哪些商品比较畅销\n",
339 | "pd.pivot_table(data,index=\"商品ID\",values=\"销量\",aggfunc=\"sum\").reset_index().sort_values(by=\"销量\",ascending=False).head(10)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 25,
345 | "metadata": {},
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "门店编号\n",
351 | "CDLG 10908.82612\n",
352 | "CDNL 8059.47867\n",
353 | "CDXL 9981.76166\n",
354 | "Name: 销售额, dtype: float64"
355 | ]
356 | },
357 | "execution_count": 25,
358 | "metadata": {},
359 | "output_type": "execute_result"
360 | }
361 | ],
362 | "source": [
363 | "# 不同门店的销售额占比\n",
364 | "data[\"销售额\"]=data[\"销量\"]*data[\"单价\"]\n",
365 | "data.groupby(\"门店编号\")[\"销售额\"].sum()\n"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 29,
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "data": {
375 | "text/plain": [
376 | "门店编号\n",
377 | "CDLG 0.376815\n",
378 | "CDNL 0.278392\n",
379 | "CDXL 0.344792\n",
380 | "Name: 销售额, dtype: float64"
381 | ]
382 | },
383 | "execution_count": 29,
384 | "metadata": {},
385 | "output_type": "execute_result"
386 | }
387 | ],
388 | "source": [
389 | "data.groupby(\"门店编号\")[\"销售额\"].sum()/data[\"销售额\"].sum()"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 30,
395 | "metadata": {},
396 | "outputs": [
397 | {
398 | "data": {
399 | "text/plain": [
400 | ""
401 | ]
402 | },
403 | "execution_count": 30,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | },
407 | {
408 | "data": {
409 | "image/png": "\n",
410 | "text/plain": [
411 | ""
412 | ]
413 | },
414 | "metadata": {},
415 | "output_type": "display_data"
416 | }
417 | ],
418 | "source": [
419 | "import matplotlib as mpl\n",
420 | "\n",
421 | "mpl.rcParams[\"font.family\"]=\"SimHei\"\n",
422 | "mpl.rcParams[\"axes.unicode_minus\"]=False\n",
423 | "(data.groupby(\"门店编号\")[\"销售额\"].sum()/data[\"销售额\"].sum()).plot.pie()"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 37,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "data": {
433 | "text/plain": [
434 | ""
435 | ]
436 | },
437 | "execution_count": 37,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | },
441 | {
442 | "data": {
443 | "image/png": "\n",
444 | "text/plain": [
445 | ""
446 | ]
447 | },
448 | "metadata": {
449 | "needs_background": "light"
450 | },
451 | "output_type": "display_data"
452 | }
453 | ],
454 | "source": [
455 | "# 哪个时间段是超市的客流高封期\n",
456 | "# 利用自定义时间格式函数strftime提取小时数\n",
457 | "data[\"小时\"]=data[\"成交时间\"].map(lambda x:int(x.strftime(\"%H\")))\n",
458 | "# 对小时和订单去重\n",
459 | "traffic=data[[\"小时\",\"订单ID\"]].drop_duplicates()\n",
460 | "# 求每小时的客流量\n",
461 | "traffic.groupby(\"小时\")[\"订单ID\"].count().plot()"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": null,
467 | "metadata": {},
468 | "outputs": [],
469 | "source": []
470 | }
471 | ],
472 | "metadata": {
473 | "kernelspec": {
474 | "display_name": "Python 3",
475 | "language": "python",
476 | "name": "python3"
477 | },
478 | "language_info": {
479 | "codemirror_mode": {
480 | "name": "ipython",
481 | "version": 3
482 | },
483 | "file_extension": ".py",
484 | "mimetype": "text/x-python",
485 | "name": "python",
486 | "nbconvert_exporter": "python",
487 | "pygments_lexer": "ipython3",
488 | "version": "3.7.3"
489 | },
490 | "toc": {
491 | "base_numbering": 1,
492 | "nav_menu": {},
493 | "number_sections": true,
494 | "sideBar": true,
495 | "skip_h1_title": false,
496 | "title_cell": "Table of Contents",
497 | "title_sidebar": "Contents",
498 | "toc_cell": false,
499 | "toc_position": {},
500 | "toc_section_display": true,
501 | "toc_window_display": false
502 | }
503 | },
504 | "nbformat": 4,
505 | "nbformat_minor": 2
506 | }
507 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/Supermarket-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 14,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "data": {
10 | "text/html": [
11 | "\n",
12 | "\n",
25 | "
\n",
26 | " \n",
27 | " \n",
28 | " | \n",
29 | " 商品ID | \n",
30 | " 类别ID | \n",
31 | " 门店编号 | \n",
32 | " 单价 | \n",
33 | " 销量 | \n",
34 | " 成交时间 | \n",
35 | " 订单ID | \n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " \n",
40 | " | 0 | \n",
41 | " 30006206 | \n",
42 | " 915000003 | \n",
43 | " CDNL | \n",
44 | " 25.23 | \n",
45 | " 0.328 | \n",
46 | " 2017-01-03 09:56:00 | \n",
47 | " 20170103CDLG000210052759 | \n",
48 | "
\n",
49 | " \n",
50 | " | 1 | \n",
51 | " 30163281 | \n",
52 | " 914010000 | \n",
53 | " CDNL | \n",
54 | " 2.00 | \n",
55 | " 2.000 | \n",
56 | " 2017-01-03 09:56:00 | \n",
57 | " 20170103CDLG000210052759 | \n",
58 | "
\n",
59 | " \n",
60 | " | 2 | \n",
61 | " 30200518 | \n",
62 | " 922000000 | \n",
63 | " CDNL | \n",
64 | " 19.62 | \n",
65 | " 0.230 | \n",
66 | " 2017-01-03 09:56:00 | \n",
67 | " 20170103CDLG000210052759 | \n",
68 | "
\n",
69 | " \n",
70 | " | 3 | \n",
71 | " 29989105 | \n",
72 | " 922000000 | \n",
73 | " CDNL | \n",
74 | " 2.80 | \n",
75 | " 2.044 | \n",
76 | " 2017-01-03 09:56:00 | \n",
77 | " 20170103CDLG000210052759 | \n",
78 | "
\n",
79 | " \n",
80 | " | 4 | \n",
81 | " 30179558 | \n",
82 | " 915000100 | \n",
83 | " CDNL | \n",
84 | " 47.41 | \n",
85 | " 0.226 | \n",
86 | " 2017-01-03 09:56:00 | \n",
87 | " 20170103CDLG000210052759 | \n",
88 | "
\n",
89 | " \n",
90 | "
\n",
91 | "
"
92 | ],
93 | "text/plain": [
94 | " 商品ID 类别ID 门店编号 单价 销量 成交时间 \\\n",
95 | "0 30006206 915000003 CDNL 25.23 0.328 2017-01-03 09:56:00 \n",
96 | "1 30163281 914010000 CDNL 2.00 2.000 2017-01-03 09:56:00 \n",
97 | "2 30200518 922000000 CDNL 19.62 0.230 2017-01-03 09:56:00 \n",
98 | "3 29989105 922000000 CDNL 2.80 2.044 2017-01-03 09:56:00 \n",
99 | "4 30179558 915000100 CDNL 47.41 0.226 2017-01-03 09:56:00 \n",
100 | "\n",
101 | " 订单ID \n",
102 | "0 20170103CDLG000210052759 \n",
103 | "1 20170103CDLG000210052759 \n",
104 | "2 20170103CDLG000210052759 \n",
105 | "3 20170103CDLG000210052759 \n",
106 | "4 20170103CDLG000210052759 "
107 | ]
108 | },
109 | "execution_count": 14,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "import pandas as pd\n",
116 | "from datetime import datetime\n",
117 | "# 导入数据源\n",
118 | "data=pd.read_csv(\"order-14.3.csv\",parse_dates=[\"成交时间\"],encoding='gbk')\n",
119 | "data.head()\n",
120 | "# data.shape"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 20,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/html": [
131 | "\n",
132 | "\n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " | \n",
149 | " 类别ID | \n",
150 | " 销量 | \n",
151 | "
\n",
152 | " \n",
153 | " \n",
154 | " \n",
155 | " | 240 | \n",
156 | " 922000003 | \n",
157 | " 425.328 | \n",
158 | "
\n",
159 | " \n",
160 | " | 239 | \n",
161 | " 922000002 | \n",
162 | " 206.424 | \n",
163 | "
\n",
164 | " \n",
165 | " | 251 | \n",
166 | " 923000006 | \n",
167 | " 190.294 | \n",
168 | "
\n",
169 | " \n",
170 | " | 216 | \n",
171 | " 915030104 | \n",
172 | " 175.059 | \n",
173 | "
\n",
174 | " \n",
175 | " | 238 | \n",
176 | " 922000001 | \n",
177 | " 121.355 | \n",
178 | "
\n",
179 | " \n",
180 | " | 367 | \n",
181 | " 960000000 | \n",
182 | " 121.000 | \n",
183 | "
\n",
184 | " \n",
185 | " | 234 | \n",
186 | " 920090000 | \n",
187 | " 111.565 | \n",
188 | "
\n",
189 | " \n",
190 | " | 249 | \n",
191 | " 923000002 | \n",
192 | " 91.847 | \n",
193 | "
\n",
194 | " \n",
195 | " | 237 | \n",
196 | " 922000000 | \n",
197 | " 86.395 | \n",
198 | "
\n",
199 | " \n",
200 | " | 247 | \n",
201 | " 923000000 | \n",
202 | " 85.845 | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | "
"
207 | ],
208 | "text/plain": [
209 | " 类别ID 销量\n",
210 | "240 922000003 425.328\n",
211 | "239 922000002 206.424\n",
212 | "251 923000006 190.294\n",
213 | "216 915030104 175.059\n",
214 | "238 922000001 121.355\n",
215 | "367 960000000 121.000\n",
216 | "234 920090000 111.565\n",
217 | "249 923000002 91.847\n",
218 | "237 922000000 86.395\n",
219 | "247 923000000 85.845"
220 | ]
221 | },
222 | "execution_count": 20,
223 | "metadata": {},
224 | "output_type": "execute_result"
225 | }
226 | ],
227 | "source": [
228 | "# 哪些类别的商品比较畅销\n",
229 | "# ascending=False 降序\n",
230 | "data.groupby(\"类别ID\")[\"销量\"].sum().reset_index().sort_values(by=\"销量\",ascending=False).head(10)"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 22,
236 | "metadata": {},
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/html": [
241 | "\n",
242 | "\n",
255 | "
\n",
256 | " \n",
257 | " \n",
258 | " | \n",
259 | " 商品ID | \n",
260 | " 销量 | \n",
261 | "
\n",
262 | " \n",
263 | " \n",
264 | " \n",
265 | " | 8 | \n",
266 | " 29989059 | \n",
267 | " 391.549 | \n",
268 | "
\n",
269 | " \n",
270 | " | 18 | \n",
271 | " 29989072 | \n",
272 | " 102.876 | \n",
273 | "
\n",
274 | " \n",
275 | " | 469 | \n",
276 | " 30022232 | \n",
277 | " 101.000 | \n",
278 | "
\n",
279 | " \n",
280 | " | 523 | \n",
281 | " 30031960 | \n",
282 | " 99.998 | \n",
283 | "
\n",
284 | " \n",
285 | " | 57 | \n",
286 | " 29989157 | \n",
287 | " 72.453 | \n",
288 | "
\n",
289 | " \n",
290 | " | 476 | \n",
291 | " 30023041 | \n",
292 | " 64.416 | \n",
293 | "
\n",
294 | " \n",
295 | " | 505 | \n",
296 | " 30026255 | \n",
297 | " 62.375 | \n",
298 | "
\n",
299 | " \n",
300 | " | 7 | \n",
301 | " 29989058 | \n",
302 | " 56.052 | \n",
303 | "
\n",
304 | " \n",
305 | " | 510 | \n",
306 | " 30027007 | \n",
307 | " 48.757 | \n",
308 | "
\n",
309 | " \n",
310 | " | 903 | \n",
311 | " 30171264 | \n",
312 | " 45.000 | \n",
313 | "
\n",
314 | " \n",
315 | "
\n",
316 | "
"
317 | ],
318 | "text/plain": [
319 | " 商品ID 销量\n",
320 | "8 29989059 391.549\n",
321 | "18 29989072 102.876\n",
322 | "469 30022232 101.000\n",
323 | "523 30031960 99.998\n",
324 | "57 29989157 72.453\n",
325 | "476 30023041 64.416\n",
326 | "505 30026255 62.375\n",
327 | "7 29989058 56.052\n",
328 | "510 30027007 48.757\n",
329 | "903 30171264 45.000"
330 | ]
331 | },
332 | "execution_count": 22,
333 | "metadata": {},
334 | "output_type": "execute_result"
335 | }
336 | ],
337 | "source": [
338 | "# 哪些商品比较畅销\n",
339 | "pd.pivot_table(data,index=\"商品ID\",values=\"销量\",aggfunc=\"sum\").reset_index().sort_values(by=\"销量\",ascending=False).head(10)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 25,
345 | "metadata": {},
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "门店编号\n",
351 | "CDLG 10908.82612\n",
352 | "CDNL 8059.47867\n",
353 | "CDXL 9981.76166\n",
354 | "Name: 销售额, dtype: float64"
355 | ]
356 | },
357 | "execution_count": 25,
358 | "metadata": {},
359 | "output_type": "execute_result"
360 | }
361 | ],
362 | "source": [
363 | "# 不同门店的销售额占比\n",
364 | "data[\"销售额\"]=data[\"销量\"]*data[\"单价\"]\n",
365 | "data.groupby(\"门店编号\")[\"销售额\"].sum()\n"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 29,
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "data": {
375 | "text/plain": [
376 | "门店编号\n",
377 | "CDLG 0.376815\n",
378 | "CDNL 0.278392\n",
379 | "CDXL 0.344792\n",
380 | "Name: 销售额, dtype: float64"
381 | ]
382 | },
383 | "execution_count": 29,
384 | "metadata": {},
385 | "output_type": "execute_result"
386 | }
387 | ],
388 | "source": [
389 | "data.groupby(\"门店编号\")[\"销售额\"].sum()/data[\"销售额\"].sum()"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 30,
395 | "metadata": {},
396 | "outputs": [
397 | {
398 | "data": {
399 | "text/plain": [
400 | ""
401 | ]
402 | },
403 | "execution_count": 30,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | },
407 | {
408 | "data": {
409 | "image/png": "\n",
410 | "text/plain": [
411 | ""
412 | ]
413 | },
414 | "metadata": {},
415 | "output_type": "display_data"
416 | }
417 | ],
418 | "source": [
419 | "import matplotlib as mpl\n",
420 | "\n",
421 | "mpl.rcParams[\"font.family\"]=\"SimHei\"\n",
422 | "mpl.rcParams[\"axes.unicode_minus\"]=False\n",
423 | "(data.groupby(\"门店编号\")[\"销售额\"].sum()/data[\"销售额\"].sum()).plot.pie()"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 37,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "data": {
433 | "text/plain": [
434 | ""
435 | ]
436 | },
437 | "execution_count": 37,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | },
441 | {
442 | "data": {
443 | "image/png": "\n",
444 | "text/plain": [
445 | ""
446 | ]
447 | },
448 | "metadata": {
449 | "needs_background": "light"
450 | },
451 | "output_type": "display_data"
452 | }
453 | ],
454 | "source": [
455 | "# 哪个时间段是超市的客流高封期\n",
456 | "# 利用自定义时间格式函数strftime提取小时数\n",
457 | "data[\"小时\"]=data[\"成交时间\"].map(lambda x:int(x.strftime(\"%H\")))\n",
458 | "# 对小时和订单去重\n",
459 | "traffic=data[[\"小时\",\"订单ID\"]].drop_duplicates()\n",
460 | "# 求每小时的客流量\n",
461 | "traffic.groupby(\"小时\")[\"订单ID\"].count().plot()"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": null,
467 | "metadata": {},
468 | "outputs": [],
469 | "source": []
470 | }
471 | ],
472 | "metadata": {
473 | "kernelspec": {
474 | "display_name": "Python 3",
475 | "language": "python",
476 | "name": "python3"
477 | },
478 | "language_info": {
479 | "codemirror_mode": {
480 | "name": "ipython",
481 | "version": 3
482 | },
483 | "file_extension": ".py",
484 | "mimetype": "text/x-python",
485 | "name": "python",
486 | "nbconvert_exporter": "python",
487 | "pygments_lexer": "ipython3",
488 | "version": "3.7.3"
489 | },
490 | "toc": {
491 | "base_numbering": 1,
492 | "nav_menu": {},
493 | "number_sections": true,
494 | "sideBar": true,
495 | "skip_h1_title": false,
496 | "title_cell": "Table of Contents",
497 | "title_sidebar": "Contents",
498 | "toc_cell": false,
499 | "toc_position": {},
500 | "toc_section_display": true,
501 | "toc_window_display": false
502 | }
503 | },
504 | "nbformat": 4,
505 | "nbformat_minor": 2
506 | }
507 |
--------------------------------------------------------------------------------
/Bank.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 8,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "\n",
13 | "RangeIndex: 150000 entries, 0 to 149999\n",
14 | "Data columns (total 6 columns):\n",
15 | "用户ID 150000 non-null int64\n",
16 | "好坏客户 150000 non-null int64\n",
17 | "年龄 150000 non-null int64\n",
18 | "负债率 150000 non-null float64\n",
19 | "月收入 120269 non-null float64\n",
20 | "家属数量 146076 non-null float64\n",
21 | "dtypes: float64(3), int64(3)\n",
22 | "memory usage: 6.9 MB\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "import pandas as pd\n",
28 | "from datetime import datetime\n",
29 | "\n",
30 | "data=pd.read_csv(\"loan.csv\",encoding=\"gbk\")\n",
31 | "data.info()\n",
32 | "# print(data)"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 12,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "\n",
45 | "RangeIndex: 150000 entries, 0 to 149999\n",
46 | "Data columns (total 6 columns):\n",
47 | "用户ID 150000 non-null int64\n",
48 | "好坏客户 150000 non-null int64\n",
49 | "年龄 150000 non-null int64\n",
50 | "负债率 150000 non-null float64\n",
51 | "月收入 150000 non-null float64\n",
52 | "家属数量 146076 non-null float64\n",
53 | "dtypes: float64(3), int64(3)\n",
54 | "memory usage: 6.9 MB\n",
55 | "None\n",
56 | " 用户ID 好坏客户 年龄 负债率 月收入 家属数量\n",
57 | "0 1 1 45 0.802982 9120.000000 2.0\n",
58 | "1 2 0 40 0.121876 2600.000000 1.0\n",
59 | "2 3 0 38 0.085113 3042.000000 0.0\n",
60 | "3 4 0 30 0.036050 3300.000000 0.0\n",
61 | "4 5 0 49 0.024926 63588.000000 0.0\n",
62 | "5 6 0 74 0.375607 3500.000000 1.0\n",
63 | "6 7 0 57 5710.000000 6670.221237 0.0\n",
64 | "7 8 0 39 0.209940 3500.000000 0.0\n",
65 | "8 9 0 27 46.000000 6670.221237 NaN\n",
66 | "9 10 0 57 0.606291 23684.000000 2.0\n",
67 | "10 11 0 30 0.309476 2500.000000 0.0\n",
68 | "11 12 0 51 0.531529 6501.000000 2.0\n",
69 | "12 13 0 46 0.298354 12454.000000 2.0\n",
70 | "13 14 1 40 0.382965 13700.000000 2.0\n",
71 | "14 15 0 76 477.000000 0.000000 0.0\n",
72 | "15 16 0 64 0.209892 11362.000000 2.0\n",
73 | "16 17 0 78 2058.000000 6670.221237 0.0\n",
74 | "17 18 0 53 0.188274 8800.000000 0.0\n",
75 | "18 19 0 43 0.527888 3280.000000 2.0\n",
76 | "19 20 0 25 0.065868 333.000000 0.0\n",
77 | "20 21 0 43 0.430046 12300.000000 0.0\n",
78 | "21 22 1 38 0.475841 3000.000000 2.0\n",
79 | "22 23 0 39 0.241104 2500.000000 0.0\n",
80 | "23 24 0 32 0.085512 7916.000000 0.0\n",
81 | "24 25 0 58 0.241622 2416.000000 0.0\n",
82 | "25 26 1 50 1.595253 4676.000000 1.0\n",
83 | "26 27 0 58 0.097672 8333.000000 0.0\n",
84 | "27 28 0 69 0.042383 2500.000000 1.0\n",
85 | "28 29 0 24 0.011761 3400.000000 0.0\n",
86 | "29 30 0 58 0.436103 5500.000000 0.0\n",
87 | "... ... ... .. ... ... ...\n",
88 | "149970 149971 0 58 0.253855 15500.000000 2.0\n",
89 | "149971 149972 0 83 0.013997 5000.000000 0.0\n",
90 | "149972 149973 0 42 0.008638 6945.000000 1.0\n",
91 | "149973 149974 0 44 0.494819 5500.000000 1.0\n",
92 | "149974 149975 0 61 0.603479 5000.000000 0.0\n",
93 | "149975 149976 0 58 2716.000000 6670.221237 0.0\n",
94 | "149976 149977 0 76 60.000000 6670.221237 0.0\n",
95 | "149977 149978 0 29 349.000000 6670.221237 0.0\n",
96 | "149978 149979 0 52 0.259496 2500.000000 0.0\n",
97 | "149979 149980 1 55 0.057235 8700.000000 0.0\n",
98 | "149980 149981 0 64 0.254976 5525.000000 0.0\n",
99 | "149981 149982 0 43 0.121752 6849.000000 4.0\n",
100 | "149982 149983 0 37 0.250272 2760.000000 3.0\n",
101 | "149983 149984 0 82 0.000800 5000.000000 0.0\n",
102 | "149984 149985 0 84 25.000000 6670.221237 0.0\n",
103 | "149985 149986 0 26 0.324962 1950.000000 0.0\n",
104 | "149986 149987 0 49 0.080384 5000.000000 1.0\n",
105 | "149987 149988 0 28 0.055692 3249.000000 0.0\n",
106 | "149988 149989 0 31 0.347924 7515.000000 0.0\n",
107 | "149989 149990 0 62 0.001408 9233.000000 3.0\n",
108 | "149990 149991 0 46 0.609779 4335.000000 2.0\n",
109 | "149991 149992 0 59 0.477658 10316.000000 0.0\n",
110 | "149992 149993 0 50 4132.000000 6670.221237 3.0\n",
111 | "149993 149994 0 22 0.000000 820.000000 0.0\n",
112 | "149994 149995 0 50 0.404293 3400.000000 0.0\n",
113 | "149995 149996 0 74 0.225131 2100.000000 0.0\n",
114 | "149996 149997 0 44 0.716562 5584.000000 2.0\n",
115 | "149997 149998 0 58 3870.000000 6670.221237 0.0\n",
116 | "149998 149999 0 30 0.000000 5716.000000 0.0\n",
117 | "149999 150000 0 64 0.249908 8158.000000 0.0\n",
118 | "\n",
119 | "[150000 rows x 6 columns]\n"
120 | ]
121 | }
122 | ],
123 | "source": [
124 | "# 是不是收入越高的人坏账率越低\n",
125 | "data=data.fillna({\"月收入\":data[\"月收入\"].mean()})\n",
126 | "print(data.info())\n",
127 | "print(data)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": []
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 11,
140 | "metadata": {},
141 | "outputs": [
142 | {
143 | "name": "stdout",
144 | "output_type": "stream",
145 | "text": [
146 | "0 (5000.0, 10000.0]\n",
147 | "1 (0.0, 5000.0]\n",
148 | "2 (0.0, 5000.0]\n",
149 | "3 (0.0, 5000.0]\n",
150 | "4 (20000.0, 100000.0]\n",
151 | "5 (0.0, 5000.0]\n",
152 | "6 (5000.0, 10000.0]\n",
153 | "7 (0.0, 5000.0]\n",
154 | "8 (5000.0, 10000.0]\n",
155 | "9 (20000.0, 100000.0]\n",
156 | "10 (0.0, 5000.0]\n",
157 | "11 (5000.0, 10000.0]\n",
158 | "12 (10000.0, 15000.0]\n",
159 | "13 (10000.0, 15000.0]\n",
160 | "14 NaN\n",
161 | "15 (10000.0, 15000.0]\n",
162 | "16 (5000.0, 10000.0]\n",
163 | "17 (5000.0, 10000.0]\n",
164 | "18 (0.0, 5000.0]\n",
165 | "19 (0.0, 5000.0]\n",
166 | "20 (10000.0, 15000.0]\n",
167 | "21 (0.0, 5000.0]\n",
168 | "22 (0.0, 5000.0]\n",
169 | "23 (5000.0, 10000.0]\n",
170 | "24 (0.0, 5000.0]\n",
171 | "25 (0.0, 5000.0]\n",
172 | "26 (5000.0, 10000.0]\n",
173 | "27 (0.0, 5000.0]\n",
174 | "28 (0.0, 5000.0]\n",
175 | "29 (5000.0, 10000.0]\n",
176 | " ... \n",
177 | "149970 (15000.0, 20000.0]\n",
178 | "149971 (0.0, 5000.0]\n",
179 | "149972 (5000.0, 10000.0]\n",
180 | "149973 (5000.0, 10000.0]\n",
181 | "149974 (0.0, 5000.0]\n",
182 | "149975 (5000.0, 10000.0]\n",
183 | "149976 (5000.0, 10000.0]\n",
184 | "149977 (5000.0, 10000.0]\n",
185 | "149978 (0.0, 5000.0]\n",
186 | "149979 (5000.0, 10000.0]\n",
187 | "149980 (5000.0, 10000.0]\n",
188 | "149981 (5000.0, 10000.0]\n",
189 | "149982 (0.0, 5000.0]\n",
190 | "149983 (0.0, 5000.0]\n",
191 | "149984 (5000.0, 10000.0]\n",
192 | "149985 (0.0, 5000.0]\n",
193 | "149986 (0.0, 5000.0]\n",
194 | "149987 (0.0, 5000.0]\n",
195 | "149988 (5000.0, 10000.0]\n",
196 | "149989 (5000.0, 10000.0]\n",
197 | "149990 (0.0, 5000.0]\n",
198 | "149991 (10000.0, 15000.0]\n",
199 | "149992 (5000.0, 10000.0]\n",
200 | "149993 (0.0, 5000.0]\n",
201 | "149994 (0.0, 5000.0]\n",
202 | "149995 (0.0, 5000.0]\n",
203 | "149996 (5000.0, 10000.0]\n",
204 | "149997 (5000.0, 10000.0]\n",
205 | "149998 (5000.0, 10000.0]\n",
206 | "149999 (5000.0, 10000.0]\n",
207 | "Name: 月收入, Length: 150000, dtype: category\n",
208 | "Categories (5, interval[int64]): [(0, 5000] < (5000, 10000] < (10000, 15000] < (15000, 20000] < (20000, 100000]]\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "cut_bins=[0,5000,10000,15000,20000,100000]\n",
214 | "income_cut=pd.cut(data[\"月收入\"],cut_bins)\n",
215 | "print(income_cut)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 15,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "name": "stdout",
225 | "output_type": "stream",
226 | "text": [
227 | "月收入\n",
228 | "(0, 5000] 0.087543\n",
229 | "(5000, 10000] 0.058308\n",
230 | "(10000, 15000] 0.041964\n",
231 | "(15000, 20000] 0.041811\n",
232 | "(20000, 100000] 0.053615\n",
233 | "Name: 好坏客户, dtype: float64\n"
234 | ]
235 | },
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 15,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "import matplotlib.pyplot as plt\n",
261 | "\n",
262 | "all_income_user=data[\"好坏客户\"].groupby(income_cut).count()\n",
263 | "bad_income_user=data[\"好坏客户\"].groupby(income_cut).sum()\n",
264 | "bad_rate=bad_income_user/all_income_user\n",
265 | "print(bad_rate)\n",
266 | "\n",
267 | "# 绘制月收入与坏账率关系图\n",
268 | "bad_rate.plot.bar()"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 21,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "年龄\n",
281 | "(-0.109, 18.167] 0.000000\n",
282 | "(18.167, 36.333] 0.110124\n",
283 | "(36.333, 54.5] 0.081645\n",
284 | "(54.5, 72.667] 0.041719\n",
285 | "(72.667, 90.833] 0.021585\n",
286 | "(90.833, 109.0] 0.022495\n",
287 | "Name: 好坏客户, dtype: float64\n"
288 | ]
289 | },
290 | {
291 | "data": {
292 | "text/plain": [
293 | ""
294 | ]
295 | },
296 | "execution_count": 21,
297 | "metadata": {},
298 | "output_type": "execute_result"
299 | },
300 | {
301 | "data": {
302 | "image/png": "\n",
303 | "text/plain": [
304 | ""
305 | ]
306 | },
307 | "metadata": {
308 | "needs_background": "light"
309 | },
310 | "output_type": "display_data"
311 | }
312 | ],
313 | "source": [
314 | "# 年龄和坏账率有什么关系\n",
315 | "age_cut=pd.cut(data[\"年龄\"],6)\n",
316 | "all_age_user=data[\"好坏客户\"].groupby(age_cut).count()\n",
317 | "bad_age_user=data[\"好坏客户\"].groupby(age_cut).sum()\n",
318 | "bad_rate=bad_age_user/all_age_user\n",
319 | "print(bad_rate)\n",
320 | "\n",
321 | "bad_rate.plot.bar()"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 23,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "name": "stdout",
331 | "output_type": "stream",
332 | "text": [
333 | "家属数量\n",
334 | "0.0 0.058629\n",
335 | "1.0 0.073529\n",
336 | "2.0 0.081139\n",
337 | "3.0 0.088263\n",
338 | "4.0 0.103774\n",
339 | "5.0 0.091153\n",
340 | "6.0 0.151899\n",
341 | "7.0 0.098039\n",
342 | "8.0 0.083333\n",
343 | "9.0 0.000000\n",
344 | "10.0 0.000000\n",
345 | "13.0 0.000000\n",
346 | "20.0 0.000000\n",
347 | "Name: 好坏客户, dtype: float64\n"
348 | ]
349 | }
350 | ],
351 | "source": [
352 | "# 家庭人口数量和坏账率有什么关系\n",
353 | "all_age_user=data.groupby(\"家属数量\")[\"好坏客户\"].count()\n",
354 | "bad_age_user=data.groupby(\"家属数量\")[\"好坏客户\"].sum()\n",
355 | "bad_rate=bad_age_user/all_age_user\n",
356 | "print(bad_rate)"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 25,
362 | "metadata": {},
363 | "outputs": [
364 | {
365 | "data": {
366 | "text/plain": [
367 | ""
368 | ]
369 | },
370 | "execution_count": 25,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | },
374 | {
375 | "data": {
376 | "image/png": "\n",
377 | "text/plain": [
378 | ""
379 | ]
380 | },
381 | "metadata": {
382 | "needs_background": "light"
383 | },
384 | "output_type": "display_data"
385 | }
386 | ],
387 | "source": [
388 | "bad_rate.plot()"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {},
395 | "outputs": [],
396 | "source": []
397 | }
398 | ],
399 | "metadata": {
400 | "kernelspec": {
401 | "display_name": "Python 3",
402 | "language": "python",
403 | "name": "python3"
404 | },
405 | "language_info": {
406 | "codemirror_mode": {
407 | "name": "ipython",
408 | "version": 3
409 | },
410 | "file_extension": ".py",
411 | "mimetype": "text/x-python",
412 | "name": "python",
413 | "nbconvert_exporter": "python",
414 | "pygments_lexer": "ipython3",
415 | "version": "3.7.3"
416 | },
417 | "toc": {
418 | "base_numbering": 1,
419 | "nav_menu": {},
420 | "number_sections": true,
421 | "sideBar": true,
422 | "skip_h1_title": false,
423 | "title_cell": "Table of Contents",
424 | "title_sidebar": "Contents",
425 | "toc_cell": false,
426 | "toc_position": {},
427 | "toc_section_display": true,
428 | "toc_window_display": false
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 2
433 | }
434 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/Bank-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 8,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "\n",
13 | "RangeIndex: 150000 entries, 0 to 149999\n",
14 | "Data columns (total 6 columns):\n",
15 | "用户ID 150000 non-null int64\n",
16 | "好坏客户 150000 non-null int64\n",
17 | "年龄 150000 non-null int64\n",
18 | "负债率 150000 non-null float64\n",
19 | "月收入 120269 non-null float64\n",
20 | "家属数量 146076 non-null float64\n",
21 | "dtypes: float64(3), int64(3)\n",
22 | "memory usage: 6.9 MB\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "import pandas as pd\n",
28 | "from datetime import datetime\n",
29 | "\n",
30 | "data=pd.read_csv(\"loan.csv\",encoding=\"gbk\")\n",
31 | "data.info()\n",
32 | "# print(data)"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 12,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "\n",
45 | "RangeIndex: 150000 entries, 0 to 149999\n",
46 | "Data columns (total 6 columns):\n",
47 | "用户ID 150000 non-null int64\n",
48 | "好坏客户 150000 non-null int64\n",
49 | "年龄 150000 non-null int64\n",
50 | "负债率 150000 non-null float64\n",
51 | "月收入 150000 non-null float64\n",
52 | "家属数量 146076 non-null float64\n",
53 | "dtypes: float64(3), int64(3)\n",
54 | "memory usage: 6.9 MB\n",
55 | "None\n",
56 | " 用户ID 好坏客户 年龄 负债率 月收入 家属数量\n",
57 | "0 1 1 45 0.802982 9120.000000 2.0\n",
58 | "1 2 0 40 0.121876 2600.000000 1.0\n",
59 | "2 3 0 38 0.085113 3042.000000 0.0\n",
60 | "3 4 0 30 0.036050 3300.000000 0.0\n",
61 | "4 5 0 49 0.024926 63588.000000 0.0\n",
62 | "5 6 0 74 0.375607 3500.000000 1.0\n",
63 | "6 7 0 57 5710.000000 6670.221237 0.0\n",
64 | "7 8 0 39 0.209940 3500.000000 0.0\n",
65 | "8 9 0 27 46.000000 6670.221237 NaN\n",
66 | "9 10 0 57 0.606291 23684.000000 2.0\n",
67 | "10 11 0 30 0.309476 2500.000000 0.0\n",
68 | "11 12 0 51 0.531529 6501.000000 2.0\n",
69 | "12 13 0 46 0.298354 12454.000000 2.0\n",
70 | "13 14 1 40 0.382965 13700.000000 2.0\n",
71 | "14 15 0 76 477.000000 0.000000 0.0\n",
72 | "15 16 0 64 0.209892 11362.000000 2.0\n",
73 | "16 17 0 78 2058.000000 6670.221237 0.0\n",
74 | "17 18 0 53 0.188274 8800.000000 0.0\n",
75 | "18 19 0 43 0.527888 3280.000000 2.0\n",
76 | "19 20 0 25 0.065868 333.000000 0.0\n",
77 | "20 21 0 43 0.430046 12300.000000 0.0\n",
78 | "21 22 1 38 0.475841 3000.000000 2.0\n",
79 | "22 23 0 39 0.241104 2500.000000 0.0\n",
80 | "23 24 0 32 0.085512 7916.000000 0.0\n",
81 | "24 25 0 58 0.241622 2416.000000 0.0\n",
82 | "25 26 1 50 1.595253 4676.000000 1.0\n",
83 | "26 27 0 58 0.097672 8333.000000 0.0\n",
84 | "27 28 0 69 0.042383 2500.000000 1.0\n",
85 | "28 29 0 24 0.011761 3400.000000 0.0\n",
86 | "29 30 0 58 0.436103 5500.000000 0.0\n",
87 | "... ... ... .. ... ... ...\n",
88 | "149970 149971 0 58 0.253855 15500.000000 2.0\n",
89 | "149971 149972 0 83 0.013997 5000.000000 0.0\n",
90 | "149972 149973 0 42 0.008638 6945.000000 1.0\n",
91 | "149973 149974 0 44 0.494819 5500.000000 1.0\n",
92 | "149974 149975 0 61 0.603479 5000.000000 0.0\n",
93 | "149975 149976 0 58 2716.000000 6670.221237 0.0\n",
94 | "149976 149977 0 76 60.000000 6670.221237 0.0\n",
95 | "149977 149978 0 29 349.000000 6670.221237 0.0\n",
96 | "149978 149979 0 52 0.259496 2500.000000 0.0\n",
97 | "149979 149980 1 55 0.057235 8700.000000 0.0\n",
98 | "149980 149981 0 64 0.254976 5525.000000 0.0\n",
99 | "149981 149982 0 43 0.121752 6849.000000 4.0\n",
100 | "149982 149983 0 37 0.250272 2760.000000 3.0\n",
101 | "149983 149984 0 82 0.000800 5000.000000 0.0\n",
102 | "149984 149985 0 84 25.000000 6670.221237 0.0\n",
103 | "149985 149986 0 26 0.324962 1950.000000 0.0\n",
104 | "149986 149987 0 49 0.080384 5000.000000 1.0\n",
105 | "149987 149988 0 28 0.055692 3249.000000 0.0\n",
106 | "149988 149989 0 31 0.347924 7515.000000 0.0\n",
107 | "149989 149990 0 62 0.001408 9233.000000 3.0\n",
108 | "149990 149991 0 46 0.609779 4335.000000 2.0\n",
109 | "149991 149992 0 59 0.477658 10316.000000 0.0\n",
110 | "149992 149993 0 50 4132.000000 6670.221237 3.0\n",
111 | "149993 149994 0 22 0.000000 820.000000 0.0\n",
112 | "149994 149995 0 50 0.404293 3400.000000 0.0\n",
113 | "149995 149996 0 74 0.225131 2100.000000 0.0\n",
114 | "149996 149997 0 44 0.716562 5584.000000 2.0\n",
115 | "149997 149998 0 58 3870.000000 6670.221237 0.0\n",
116 | "149998 149999 0 30 0.000000 5716.000000 0.0\n",
117 | "149999 150000 0 64 0.249908 8158.000000 0.0\n",
118 | "\n",
119 | "[150000 rows x 6 columns]\n"
120 | ]
121 | }
122 | ],
123 | "source": [
124 | "# 是不是收入越高的人坏账率越低\n",
125 | "data=data.fillna({\"月收入\":data[\"月收入\"].mean()})\n",
126 | "print(data.info())\n",
127 | "print(data)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": []
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 11,
140 | "metadata": {},
141 | "outputs": [
142 | {
143 | "name": "stdout",
144 | "output_type": "stream",
145 | "text": [
146 | "0 (5000.0, 10000.0]\n",
147 | "1 (0.0, 5000.0]\n",
148 | "2 (0.0, 5000.0]\n",
149 | "3 (0.0, 5000.0]\n",
150 | "4 (20000.0, 100000.0]\n",
151 | "5 (0.0, 5000.0]\n",
152 | "6 (5000.0, 10000.0]\n",
153 | "7 (0.0, 5000.0]\n",
154 | "8 (5000.0, 10000.0]\n",
155 | "9 (20000.0, 100000.0]\n",
156 | "10 (0.0, 5000.0]\n",
157 | "11 (5000.0, 10000.0]\n",
158 | "12 (10000.0, 15000.0]\n",
159 | "13 (10000.0, 15000.0]\n",
160 | "14 NaN\n",
161 | "15 (10000.0, 15000.0]\n",
162 | "16 (5000.0, 10000.0]\n",
163 | "17 (5000.0, 10000.0]\n",
164 | "18 (0.0, 5000.0]\n",
165 | "19 (0.0, 5000.0]\n",
166 | "20 (10000.0, 15000.0]\n",
167 | "21 (0.0, 5000.0]\n",
168 | "22 (0.0, 5000.0]\n",
169 | "23 (5000.0, 10000.0]\n",
170 | "24 (0.0, 5000.0]\n",
171 | "25 (0.0, 5000.0]\n",
172 | "26 (5000.0, 10000.0]\n",
173 | "27 (0.0, 5000.0]\n",
174 | "28 (0.0, 5000.0]\n",
175 | "29 (5000.0, 10000.0]\n",
176 | " ... \n",
177 | "149970 (15000.0, 20000.0]\n",
178 | "149971 (0.0, 5000.0]\n",
179 | "149972 (5000.0, 10000.0]\n",
180 | "149973 (5000.0, 10000.0]\n",
181 | "149974 (0.0, 5000.0]\n",
182 | "149975 (5000.0, 10000.0]\n",
183 | "149976 (5000.0, 10000.0]\n",
184 | "149977 (5000.0, 10000.0]\n",
185 | "149978 (0.0, 5000.0]\n",
186 | "149979 (5000.0, 10000.0]\n",
187 | "149980 (5000.0, 10000.0]\n",
188 | "149981 (5000.0, 10000.0]\n",
189 | "149982 (0.0, 5000.0]\n",
190 | "149983 (0.0, 5000.0]\n",
191 | "149984 (5000.0, 10000.0]\n",
192 | "149985 (0.0, 5000.0]\n",
193 | "149986 (0.0, 5000.0]\n",
194 | "149987 (0.0, 5000.0]\n",
195 | "149988 (5000.0, 10000.0]\n",
196 | "149989 (5000.0, 10000.0]\n",
197 | "149990 (0.0, 5000.0]\n",
198 | "149991 (10000.0, 15000.0]\n",
199 | "149992 (5000.0, 10000.0]\n",
200 | "149993 (0.0, 5000.0]\n",
201 | "149994 (0.0, 5000.0]\n",
202 | "149995 (0.0, 5000.0]\n",
203 | "149996 (5000.0, 10000.0]\n",
204 | "149997 (5000.0, 10000.0]\n",
205 | "149998 (5000.0, 10000.0]\n",
206 | "149999 (5000.0, 10000.0]\n",
207 | "Name: 月收入, Length: 150000, dtype: category\n",
208 | "Categories (5, interval[int64]): [(0, 5000] < (5000, 10000] < (10000, 15000] < (15000, 20000] < (20000, 100000]]\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "cut_bins=[0,5000,10000,15000,20000,100000]\n",
214 | "income_cut=pd.cut(data[\"月收入\"],cut_bins)\n",
215 | "print(income_cut)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 15,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "name": "stdout",
225 | "output_type": "stream",
226 | "text": [
227 | "月收入\n",
228 | "(0, 5000] 0.087543\n",
229 | "(5000, 10000] 0.058308\n",
230 | "(10000, 15000] 0.041964\n",
231 | "(15000, 20000] 0.041811\n",
232 | "(20000, 100000] 0.053615\n",
233 | "Name: 好坏客户, dtype: float64\n"
234 | ]
235 | },
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 15,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "import matplotlib.pyplot as plt\n",
261 | "\n",
262 | "all_income_user=data[\"好坏客户\"].groupby(income_cut).count()\n",
263 | "bad_income_user=data[\"好坏客户\"].groupby(income_cut).sum()\n",
264 | "bad_rate=bad_income_user/all_income_user\n",
265 | "print(bad_rate)\n",
266 | "\n",
267 | "# 绘制月收入与坏账率关系图\n",
268 | "bad_rate.plot.bar()"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 21,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "年龄\n",
281 | "(-0.109, 18.167] 0.000000\n",
282 | "(18.167, 36.333] 0.110124\n",
283 | "(36.333, 54.5] 0.081645\n",
284 | "(54.5, 72.667] 0.041719\n",
285 | "(72.667, 90.833] 0.021585\n",
286 | "(90.833, 109.0] 0.022495\n",
287 | "Name: 好坏客户, dtype: float64\n"
288 | ]
289 | },
290 | {
291 | "data": {
292 | "text/plain": [
293 | ""
294 | ]
295 | },
296 | "execution_count": 21,
297 | "metadata": {},
298 | "output_type": "execute_result"
299 | },
300 | {
301 | "data": {
302 | "image/png": "\n",
303 | "text/plain": [
304 | ""
305 | ]
306 | },
307 | "metadata": {
308 | "needs_background": "light"
309 | },
310 | "output_type": "display_data"
311 | }
312 | ],
313 | "source": [
314 | "# 年龄和坏账率有什么关系\n",
315 | "age_cut=pd.cut(data[\"年龄\"],6)\n",
316 | "all_age_user=data[\"好坏客户\"].groupby(age_cut).count()\n",
317 | "bad_age_user=data[\"好坏客户\"].groupby(age_cut).sum()\n",
318 | "bad_rate=bad_age_user/all_age_user\n",
319 | "print(bad_rate)\n",
320 | "\n",
321 | "bad_rate.plot.bar()"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 23,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "name": "stdout",
331 | "output_type": "stream",
332 | "text": [
333 | "家属数量\n",
334 | "0.0 0.058629\n",
335 | "1.0 0.073529\n",
336 | "2.0 0.081139\n",
337 | "3.0 0.088263\n",
338 | "4.0 0.103774\n",
339 | "5.0 0.091153\n",
340 | "6.0 0.151899\n",
341 | "7.0 0.098039\n",
342 | "8.0 0.083333\n",
343 | "9.0 0.000000\n",
344 | "10.0 0.000000\n",
345 | "13.0 0.000000\n",
346 | "20.0 0.000000\n",
347 | "Name: 好坏客户, dtype: float64\n"
348 | ]
349 | }
350 | ],
351 | "source": [
352 | "# 家庭人口数量和坏账率有什么关系\n",
353 | "all_age_user=data.groupby(\"家属数量\")[\"好坏客户\"].count()\n",
354 | "bad_age_user=data.groupby(\"家属数量\")[\"好坏客户\"].sum()\n",
355 | "bad_rate=bad_age_user/all_age_user\n",
356 | "print(bad_rate)"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 25,
362 | "metadata": {},
363 | "outputs": [
364 | {
365 | "data": {
366 | "text/plain": [
367 | ""
368 | ]
369 | },
370 | "execution_count": 25,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | },
374 | {
375 | "data": {
376 | "image/png": "\n",
377 | "text/plain": [
378 | ""
379 | ]
380 | },
381 | "metadata": {
382 | "needs_background": "light"
383 | },
384 | "output_type": "display_data"
385 | }
386 | ],
387 | "source": [
388 | "bad_rate.plot()"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {},
395 | "outputs": [],
396 | "source": []
397 | }
398 | ],
399 | "metadata": {
400 | "kernelspec": {
401 | "display_name": "Python 3",
402 | "language": "python",
403 | "name": "python3"
404 | },
405 | "language_info": {
406 | "codemirror_mode": {
407 | "name": "ipython",
408 | "version": 3
409 | },
410 | "file_extension": ".py",
411 | "mimetype": "text/x-python",
412 | "name": "python",
413 | "nbconvert_exporter": "python",
414 | "pygments_lexer": "ipython3",
415 | "version": "3.7.3"
416 | },
417 | "toc": {
418 | "base_numbering": 1,
419 | "nav_menu": {},
420 | "number_sections": true,
421 | "sideBar": true,
422 | "skip_h1_title": false,
423 | "title_cell": "Table of Contents",
424 | "title_sidebar": "Contents",
425 | "toc_cell": false,
426 | "toc_position": {},
427 | "toc_section_display": true,
428 | "toc_window_display": false
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 2
433 | }
434 |
--------------------------------------------------------------------------------