├── K_Means.py ├── LICENSE ├── README.md ├── data ├── air_data.csv └── zscoredata.xls ├── data_change(undone).py ├── data_clean.py ├── data_explore.py ├── data_standard.py ├── result ├── KMeans.xls ├── KMeansNum.xls ├── data_cleaned.csv ├── data_selected.csv ├── explore.xls └── zscoreddata.xls └── 图表 ├── 客户价值识别思维导图.png ├── 客户群体数目.png ├── 客户群体特征.png └── 客户群体特征描述.xlsx /K_Means.py: -------------------------------------------------------------------------------- 1 | # K-Means聚类算法 2 | 3 | import pandas as pd 4 | from sklearn.cluster import KMeans 5 | 6 | data = pd.read_excel('./result/zscoreddata.xls') # 数据读取 7 | k = 5 # 聚类类别数目 8 | 9 | # 调用k-means算法 10 | model = KMeans(n_clusters=k, n_jobs=1) # 输入聚类类别数目,n_jobs为并行数 11 | model.fit(data) # 训练 12 | 13 | # model.cluster_centers_ 聚类中心 14 | # labels = model.labels_ 每个样本对应的簇类别标签 15 | r1 = pd.Series(model.labels_).value_counts() # 统计各个类别的数目 16 | r2 = pd.DataFrame(model.cluster_centers_) # 找出聚类中心 17 | r = pd.concat([r2, r1], axis=1) # 得到聚类中心对应的类别下的数目 18 | r.columns = list(data.columns) + [u'类别数目'] # 重命名表头 19 | r.to_excel('./result/KMeansNum.xls') 20 | 21 | r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1) # 详细输出每个样本对应的类别 22 | r.columns = list(data.columns) + [u'聚类类别'] # 重命名表头 23 | r.to_excel('./result/KMeans.xls') 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Bugdragon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | customer_value_recognition 2 | ========================== 3 | 航空公司客户价值识别 4 | --------------------- 5 | + 根据客户信息,对客户进行分类。 6 | + 针对不同类型客户进行特征提取,分析不同类型客户的价值。 7 | + 采取个性化服务,根据客户类型,制定相应营销策略。 8 | 9 | 博客:http://blog.csdn.net/nwu_NBL/article/details/79468263 10 | -------------------------------------------------------------------------------- /data/zscoredata.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/data/zscoredata.xls -------------------------------------------------------------------------------- /data_change(undone).py: -------------------------------------------------------------------------------- 1 | # 属性构造:构造 L R F M C 指标 2 | 3 | import pandas as pd 4 | 5 | data = pd.read_csv('./result/data_selected.csv', encoding='utf-8') # 导入属性规约后的数据 6 | result = './result/data_changed.csv' # 导出属性构造后的数据 7 | 8 | 9 | def diffMonth(startDate, endDate): 10 | start = datetime.strptime(startDate, "%y/%m/%d").date() 11 | end = datetime.strptime(endDate, "%y/%m/%d").date() 12 | 13 | startYear = start.year 14 | startMonth = start.month 15 | endYear = end.year 16 | endMonth = end.month 17 | 18 | # 如果起始日期大于结束日期,报错 19 | if endYear - startYear < 0 or (endYear - startYear == 0 and endMonth - startMonth < 0): 20 | print('结束日期必须大于起始日期') 21 | # 如果不是同年 22 | else: 23 | years = endYear - startYear 24 | diffmonths = years * 12 + endMonth - startMonth 25 | return int(diffmonths + 1) 26 | 27 | 28 | for dt in data: 29 | data[dt]['LOAD_TIME'] = diffMonth(data[dt]['FFP_DATE'], data[dt]['LOAD_TIME']) 30 | 31 | data = data[data['LOAD_TIME'], data['LAST_TO_END'] / 31.0, 'FLIGHT_COUNT', 'SEG_KM_SUM', 'AVG_DISCOUNT'] 32 | data.columns = [u'L', u'R', u'F', u'M', u'C'] # 按规则构造新属性 33 | 34 | data.to_csv(result, sep=",", encoding='utf-8') 35 | -------------------------------------------------------------------------------- /data_clean.py: -------------------------------------------------------------------------------- 1 | # 数据清洗:丢弃满足清洗条件的数据 2 | 3 | import pandas as pd 4 | 5 | data = pd.read_csv('./data/air_data.csv', encoding = 'utf-8') 6 | result = './result/data_cleaned.csv' # 导出数清洗后的数据 7 | 8 | data = data[data['SUM_YR_1'].notnull() & data['SUM_YR_2'].notnull()] # 保留总票价非空值 9 | 10 | term_1 = data['SUM_YR_1'] != 0 11 | term_2 = data['SUM_YR_2'] != 0 12 | term_3 = data['avg_discount'] == 0 13 | data = data[term_1 | term_2 | term_3] # 保留票价非零值或平均折扣率为零的值 14 | 15 | data.to_csv(result, sep=",", encoding='utf-8') -------------------------------------------------------------------------------- /data_explore.py: -------------------------------------------------------------------------------- 1 | # 数据预分析:统计每个属性的缺失值个数,查找最大、最小值。 2 | 3 | import pandas as pd 4 | 5 | data = pd.read_csv('./data/air_data.csv', encoding='utf-8') # 导入航空客户原始数据,指定UTF-8编码 6 | result = './result/explore.xls' # 导出缺失值个数,最大值,最小值 7 | 8 | explore = data.describe().T # percentiles指定计算多少的分位数表,计算非空值数 9 | # describe函数包括count,mean,std,min,25%,50%,75%,max 10 | 11 | explore['null'] = len(data) - explore['count'] # 计算空值数 12 | explore = explore[['null', 'max', 'min']] # 取空值数,最大值,最小值 13 | explore.columns = [u'缺失值个数', u'最大值', u'最小值'] # 表头重命名 14 | 15 | explore.to_excel(result) 16 | -------------------------------------------------------------------------------- /data_standard.py: -------------------------------------------------------------------------------- 1 | # 数据标准差标准化 2 | 3 | import pandas as pd 4 | 5 | data = pd.read_excel('./data/zscoredata.xls') 6 | result = './result/zscoreddata.xls' 7 | 8 | data = (data - data.mean(axis=0)) / (data.std(axis=0)) # 标准差标准化 9 | data.columns = ['Z' + i for i in data.columns] 10 | 11 | data.to_excel(result, index=False) 12 | -------------------------------------------------------------------------------- /result/KMeans.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/result/KMeans.xls -------------------------------------------------------------------------------- /result/KMeansNum.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/result/KMeansNum.xls -------------------------------------------------------------------------------- /result/explore.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/result/explore.xls -------------------------------------------------------------------------------- /result/zscoreddata.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/result/zscoreddata.xls -------------------------------------------------------------------------------- /图表/客户价值识别思维导图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/图表/客户价值识别思维导图.png -------------------------------------------------------------------------------- /图表/客户群体数目.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/图表/客户群体数目.png -------------------------------------------------------------------------------- /图表/客户群体特征.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/图表/客户群体特征.png -------------------------------------------------------------------------------- /图表/客户群体特征描述.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bugdragon/customer_value_recognition/3d20588cd44341182f1564eb5ff6d03f8bcbd159/图表/客户群体特征描述.xlsx --------------------------------------------------------------------------------