├── Main.py
├── README.md
├── baseline.py
├── baseline
    ├── Baseline.py
    ├── Baseline_topk.py
    ├── README.md
    ├── big_old_baseline.py
    ├── data_to_ffm_format.py
    ├── input
    │   ├── data_0420.csv
    │   └── testData.csv
    └── userFeature_to_DataFrame.py
├── feature_ad.py
├── feature_cross.py
├── feature_kmeans.py
├── feature_nlp.py
├── feature_select.py
├── feature_user.py
├── one_hot_baseline.py
├── util_base.py
├── util_convert_ffm.py
├── util_feature_selcet.py
├── util_models.py
├── util_vectorTokmeans.py
└── util_word2vec.py


/Main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:39
 5 | @Function:
 6 | """
 7 | 
 8 | import pandas as pd
 9 | import numpy as np
10 | from Tencent_AD2018.util_models import Model
11 | 
12 | select_cross2_feature = ['aid', 'uid', 'adCategoryId_LBS_prob', 'aid_age_prob', 'advertiserId_ct_prob', 'adCategoryId_marriageStatus_prob',
13 |                   'aid_marriageStatus_prob', 'productId_LBS_prob',
14 |  'aid_gender_prob', 'aid_ct_prob', 'productId_age_prob', 'advertiserId_gender_prob', 'creativeId_consumptionAbility_prob', 'creativeId_gender_prob',
15 |   'aid_carrier_prob', 'productId_ct_prob', 'adCategoryId_consumptionAbility_prob', 'aid_education_prob', 'adCategoryId_age_prob',
16 |   'adCategoryId_ct_prob', 'productId_carrier_prob', 'adCategoryId_education_prob', 'adCategoryId_gender_prob',
17 |   'creativeId_marriageStatus_prob', 'advertiserId_age_prob', 'creativeId_age_prob', 'advertiserId_carrier_prob',
18 |   'advertiserId_consumptionAbility_prob', 'creativeId_carrier_prob']
19 | 
20 | 
21 | def do_exp():
22 |     print('------------------------read data :')
23 |     df_train = pd.read_csv('data/raw_data/train.csv')
24 |     df_test = pd.read_csv('data/raw_data/test2.csv')
25 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
26 |     user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
27 |     ad_feature = pd.read_csv('data/feature_data/clean_ad_feature.csv')
28 |     data = pd.concat([df_train, df_test])
29 |     data = pd.merge(data, user_feature, on=['uid'], how='left')
30 |     data = pd.merge(data, ad_feature, on=['aid'], how='left')
31 |     print('user_feature.shape:', user_feature.shape)
32 |     print('ad_feature.shape:', ad_feature.shape)
33 | 
34 |     cross_feature = pd.read_csv('data/feature_data/cross_feature_probe.csv')
35 |     cross_feature = cross_feature[select_cross2_feature]
36 |     data = pd.merge(data, cross_feature, on=['aid', 'uid'], how='left')
37 |     print('cross_feature.shape:', cross_feature.shape)
38 | 
39 |     # cross_feature3 = pd.read_csv('data/feature_data/cross_feature3_probe.csv')
40 |     # data = pd.merge(data, cross_feature3, on=['aid', 'uid'], how='left')
41 |     # print('cross_feature3.shape:', cross_feature3.shape
42 |     # nlp_feature = pd.read_csv('data/feature_data/nlp_feature.csv')
43 |     # data = pd.merge(data, nlp_feature, on=['uid'], how='left')
44 |     # print('nlp_feature.shape:', nlp_feature.shape)
45 | 
46 |     # kmeans_feature = pd.read_csv('data/feature_data/kmeans_feature.csv')
47 |     # data = pd.merge(data, kmeans_feature, on=['uid'], how='left')
48 |     # print('kmeans_feature.shape:', kmeans_feature.shape)
49 | 
50 |     features = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
51 |     for feat in features:
52 |         kmeans_feature = pd.read_csv('data/w2v_feature/w2v_all_20' + feat + '.csv')
53 |         data = pd.merge(data, kmeans_feature, on=['uid'], how='left')
54 |         print('kmeans_feature.shape:', kmeans_feature.shape)
55 | 
56 |     train = data[data.label.notnull()]
57 |     test = data[data.label.isnull()]
58 |     res = test[['aid', 'uid']]
59 |     print('train.shape:', train.shape)
60 |     print('test.shape:', test.shape)
61 |     remove_cols = ['uid', 'label']
62 |     cols = [col for col in train if col not in remove_cols]
63 |     train_y = train['label'].values
64 |     train_x = train[cols].values
65 |     test_x = test[cols].values
66 | 
67 |     print('train.shape:', np.shape(train_x))
68 |     print('test.shape:', np.shape(test_x))
69 |     print('------------------------train model:')
70 |     model = Model(train_x, train_y, cols, test_x, res)
71 |     model.kfold_model()
72 |     print('-----------end...')
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     do_exp()
77 |     # data = pd.read_csv('result_cv0512.csv')
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tencent_ad
 2 | 
 3 | ### 腾讯社交广告算法大赛 Baseline
 4 | 
 5 | - baseline :
 6 |     - baseline_topk: 选择在interest kw topic等特征中出现频率topk的值，删除剩余的低频值
 7 | 
 8 | - 由于kw topic等id类特征繁多，并且发现很多kw或者topic成对出现：
 9 |     - 因此首先使用word2vec进行词向量构造，
10 |     - 然后使用k-mean对词向量进行降维，对相似度极高的kw或者topic进行合并处理；再使用
11 | 
12 | - 缺陷：
13 |     - 由于机器原因 词向量的维度太小，而id类特征太多，导致词向量无法对id进行很好的区分；
14 |     - 数据量较大，kmean需要极大的内存开销，并且聚类时间较长；
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/baseline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/17 17:39
  5 | @Function:
  6 | """
  7 | 
  8 | import pandas as pd
  9 | import numpy as np
 10 | from sklearn.cross_validation import train_test_split
 11 | 
 12 | from Tencent_AD2018 import feature_ad
 13 | from Tencent_AD2018 import util_models
 14 | 
 15 | 
 16 | def do_exp_with_w2vFeature():
 17 |     print('--------------------read data---------------------------------------')
 18 |     df_train = pd.read_csv('data/raw_data/train.csv')
 19 |     df_test = pd.read_csv('data/raw_data/test1.csv')
 20 |     df_userFeature = pd.read_csv('data/clean_userFeature.csv')
 21 |     df_adFeature = pd.read_csv('data/raw_data/adFeature.csv')
 22 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
 23 | 
 24 |     # print('--------------------concat w2v features ----------------------------')
 25 |     # w2v_featrure = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
 26 |     # # w2v_featrure = ['kw2']
 27 |     # # w2v_featrure = []
 28 |     # for feat in w2v_featrure:
 29 |     #     print("------this is feature: ", feat)
 30 |     #     df_w2vfeat = pd.read_csv('data/topk_w2v_feat/w2v_' + feat + '.csv')
 31 |     #     if df_w2vfeat.shape[0] == df_userFeature.shape[0]:
 32 |     #         df_w2vfeat = df_w2vfeat.round(2)
 33 |     #         df_userFeature = pd.concat([df_userFeature, df_w2vfeat], axis=1)
 34 |     #     else:
 35 |     #         print("************ " + feat + " Shape Error...  **********")
 36 | 
 37 |     # print('--------------------kmeans_feature    --------------------------------')
 38 |     # kmeans_feature = pd.read_csv('data/word2vec_feat/kmeans20.csv')
 39 |     # if kmeans_feature.shape[0] == df_userFeature.shape[0]:
 40 |     #     # kmeans_feature = kmeans_feature[['kw1', 'kw2', 'topic1', 'topic2']]
 41 |     #     df_userFeature = pd.concat([df_userFeature, kmeans_feature], axis=1)
 42 |     #
 43 |     # print('--------------------len_feature    --------------------------------')
 44 |     # len_feature = pd.read_csv('data/topk_w2v_feat/len_feature.csv')
 45 |     # if len_feature.shape[0] == df_userFeature.shape[0]:
 46 |     #     df_userFeature = pd.concat([df_userFeature, len_feature], axis=1)
 47 |     # else:
 48 |     #     print("************ len_feature Shape Error...  *************")
 49 | 
 50 |     print('--------------------merge data-------------------------------------')
 51 |     data = pd.concat([df_train, df_test])
 52 |     data = pd.merge(data, df_userFeature, on=['uid'], how='left')
 53 |     data = pd.merge(data, df_adFeature, on=['aid'], how='left')
 54 | 
 55 |     # features = ['aid', 'label', 'uid', 'age', 'gender', 'marriageStatus', 'education'
 56 |     #             , 'consumptionAbility', 'LBS', 'ct', 'os', 'carrier', 'house', 'kmeans_appIdAction', 'kmeans_appIdInstall', 'kmeans_interest1'
 57 |     #             , 'kmeans_interest2', 'kmeans_interest3', 'kmeans_interest4'
 58 |     #             , 'kmeans_interest5', 'kmeans_kw1', 'kmeans_kw2', 'kmeans_kw3', 'kmeans_topic1'
 59 |     #             , 'kmeans_topic2', 'kmeans_topic3', 'advertiserId', 'campaignId', 'creativeId', 'creativeSize'
 60 |     #             , 'adCategoryId', 'productId', 'productType']
 61 |     # for col in features:
 62 |     #     data[col] = data[col].astype(str)
 63 | 
 64 |     print('--------------------cross feature----------------------------------')
 65 | 
 66 |     data = feature_ad.ad_base_process(data)
 67 |     # data = user.user_ad_feature(data)
 68 | 
 69 |     print('--------------------train model -----------------------------------')
 70 |     print(data.label.unique())
 71 |     print(data.columns.values)
 72 |     print(data.shape)
 73 |     train = data[data.label.notnull()]
 74 |     test = data[data.label.isnull()]
 75 | 
 76 |     # del train['uid']
 77 |     # train_y = train.pop('label')
 78 |     # train_x, test_x, traint_y, test_y = train_test_split(train, train_y, test_size=0.2, random_state=2018)
 79 |     # print('-------------------- train model ------------------------------------')
 80 |     # best_iter = models.lgbCV(train_x, test_x, traint_y, test_y)
 81 | 
 82 |     util_models.base_model(train, test, best_iter=1500)
 83 | 
 84 | 
 85 | def do_exp2_with_adFeature():
 86 |     print('-------------------- read data  -------------------------------------')
 87 |     data = pd.read_csv('data/data_user_aid.csv')
 88 | 
 89 |     print('-------------------- train and test data ----------------------------')
 90 |     print(data.label.unique())
 91 |     train = data[data.label.notnull()]
 92 |     del train['uid']
 93 |     train_y = train.pop('label')
 94 |     train_x, test_x, traint_y, test_y = train_test_split(train, train_y, test_size=0.2, random_state=2018)
 95 |     print('-------------------- train model ------------------------------------')
 96 |     best_iter = util_models.lgbCV(train_x, test_x, traint_y, test_y)
 97 |     # test2 = data[data.label.isnull()]
 98 |     # models.base_model(train, test2, best_iter=1000)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     # do_exp_with_w2vFeature()
103 |     # do_exp2_with_adFeature()
104 |     data = pd.read_csv('data/feature_data/clean_user_feature2.csv')
105 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
106 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
107 |     features = ['kmeans_20_' + feat for feat in vector_feature]
108 |     cols = ['uid'] + features
109 |     data = data[cols]
110 |     data.to_csv('data/feature_data/kmeans_feature.csv', index=False)
111 | 
112 |     print('end....')


--------------------------------------------------------------------------------
/baseline/Baseline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/18 17:49
  5 | @Function:
  6 | """
  7 | 
  8 | from sklearn.preprocessing import OneHotEncoder, LabelEncoder
  9 | import numpy as np
 10 | import pandas as pd
 11 | import lightgbm as lgb
 12 | from gensim.models.word2vec import Word2Vec
 13 | 
 14 | 
 15 | def base_word2vec(x, model, size):
 16 |     vec = np.zeros(size)
 17 |     x = [item for item in x if model.wv.__contains__(item)]
 18 | 
 19 |     for item in x:
 20 |         vec += model.wv[item]
 21 |     if len(x) == 0:
 22 |         return vec
 23 |     else:
 24 |         return vec / len(x)
 25 | 
 26 | 
 27 | def base_process(data):
 28 |     one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
 29 |                        'marriageStatus', 'advertiserId', 'campaignId', 'creativeId',
 30 |                        'adCategoryId', 'productId', 'productType']
 31 | 
 32 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
 33 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
 34 | 
 35 |     lbc = LabelEncoder()
 36 |     for feature in one_hot_feature:
 37 |         print("this is feature:", feature)
 38 |         try:
 39 |             data[feature] = lbc.fit_transform(data[feature].apply(int))
 40 |         except:
 41 |             data[feature] = lbc.fit_transform(data[feature])
 42 | 
 43 |     for feature in vector_feature:
 44 |         print("this is feature:", feature)
 45 |         data[feature] = data[feature].apply(lambda x: str(x).split(' '))
 46 |         model = Word2Vec(data[feature], size=10, min_count=1, iter=5, window=2)
 47 |         data_vec = []
 48 |         for row in data[feature]:
 49 |             data_vec.append(base_word2vec(row, model, size=10))
 50 |         column_names = []
 51 |         for i in range(10):
 52 |             column_names.append(feature + str(i))
 53 |         data_vec = pd.DataFrame(data_vec, columns=column_names)
 54 |         data = pd.concat([data, data_vec], axis=1)
 55 |         del data[feature]
 56 |     return data
 57 | 
 58 | 
 59 | def base_model(train, test, best_iter=100):
 60 |     col = [c for c in train if c not in ['uid', 'label']]
 61 |     X = train[col]
 62 |     y = train['label'].values
 63 |     print('------------------Training LGBM model--------------------------')
 64 |     lgb0 = lgb.LGBMClassifier(
 65 |         objective='binary',
 66 |         # metric='binary_error',
 67 |         num_leaves=40,
 68 |         max_depth=6,
 69 |         learning_rate=0.1,
 70 |         seed=2018,
 71 |         colsample_bytree=0.8,
 72 |         # min_child_samples=8,
 73 |         subsample=0.9,
 74 |         n_estimators=best_iter)
 75 |     lgb_model = lgb0.fit(X, y)
 76 | 
 77 |     print('----------------------predict result --------------------------')
 78 |     pred = lgb_model.predict_proba(test[col])[:, 1]
 79 |     test['score'] = pred
 80 |     test['score'] = test['score'].apply(lambda x: round(x, 7))
 81 | 
 82 |     result = test[['aid', 'uid', 'score']]
 83 |     result.to_csv('submission.csv', index=False)
 84 | 
 85 | 
 86 | def do_exp():
 87 |     print('-------------------- read data  -------------------------------------')
 88 |     # merge之后的data
 89 |     data = pd.read_csv('input/testData.csv')
 90 |     data = base_process(data)
 91 |     data.to_csv('input/data_0420.csv', index=False)
 92 |     print('-------------------- train and test data ----------------------------')
 93 |     train = data[data.label.notnull()]
 94 |     test = data[data.label.isnull()]
 95 |     base_model(train, test, best_iter=1000)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     do_exp()
100 |     print('end...')


--------------------------------------------------------------------------------
/baseline/Baseline_topk.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/20 17:49
  5 | @Function:
  6 | """
  7 | 
  8 | from sklearn.preprocessing import OneHotEncoder, LabelEncoder
  9 | import numpy as np
 10 | import pandas as pd
 11 | import lightgbm as lgb
 12 | from gensim.models.word2vec import Word2Vec
 13 | import collections
 14 | 
 15 | 
 16 | def base_word2vec(x, model, size):
 17 |     vec = np.zeros(size)
 18 |     x = [item for item in x if model.wv.__contains__(item)]
 19 | 
 20 |     for item in x:
 21 |         vec += model.wv[item]
 22 |     if len(x) == 0:
 23 |         return vec
 24 |     else:
 25 |         return vec / len(x)
 26 | 
 27 | 
 28 | def select_topk(data):
 29 |     """
 30 |     选择频率最高的 k 个word 此处为前20%
 31 |     :param data:
 32 |     :return:
 33 |     """
 34 |     word_list = []
 35 |     for words in data:
 36 |         word_list += words
 37 |     result = collections.Counter(word_list)
 38 |     size = len(result)
 39 |     result = result.most_common(int(size * 0.2))
 40 | 
 41 |     word_dict = {}
 42 |     for re in result:
 43 |         word_dict[re[0]] = 1
 44 |     print('word_vec: ', size, len(result))
 45 |     return word_dict
 46 | 
 47 | 
 48 | def base_process(data):
 49 |     one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
 50 |                        'marriageStatus', 'advertiserId', 'campaignId', 'creativeId',
 51 |                        'adCategoryId', 'productId', 'productType']
 52 | 
 53 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
 54 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
 55 | 
 56 |     lbc = LabelEncoder()
 57 |     for feature in one_hot_feature:
 58 |         print("this is feature:", feature)
 59 |         try:
 60 |             data[feature] = lbc.fit_transform(data[feature].apply(int))
 61 |         except:
 62 |             data[feature] = lbc.fit_transform(data[feature])
 63 | 
 64 |     for feature in vector_feature:
 65 |         print("this is feature:", feature)
 66 | 
 67 |         data[feature] = data[feature].apply(lambda x: str(x).split(' '))
 68 |         word_dict = select_topk(data[feature])
 69 |         data[feature] = data[feature].apply(lambda x: ' '.join(
 70 |             [word for word in x if word_dict.__contains__(word)]))
 71 | 
 72 |         model = Word2Vec(data[feature], size=10, min_count=1, iter=5, window=2)
 73 |         data_vec = []
 74 |         for row in data[feature]:
 75 |             data_vec.append(base_word2vec(row, model, size=10))
 76 |         column_names = []
 77 |         for i in range(10):
 78 |             column_names.append(feature + str(i))
 79 |         data_vec = pd.DataFrame(data_vec, columns=column_names)
 80 |         data = pd.concat([data, data_vec], axis=1)
 81 |         del data[feature]
 82 |     return data
 83 | 
 84 | 
 85 | def base_model(train, test, best_iter=100):
 86 |     col = [c for c in train if c not in ['uid', 'label']]
 87 |     X = train[col]
 88 |     y = train['label'].values
 89 |     print('------------------Training LGBM model--------------------------')
 90 |     lgb0 = lgb.LGBMClassifier(
 91 |         objective='binary',
 92 |         # metric='binary_error',
 93 |         num_leaves=40,
 94 |         max_depth=6,
 95 |         learning_rate=0.1,
 96 |         seed=2018,
 97 |         colsample_bytree=0.8,
 98 |         # min_child_samples=8,
 99 |         subsample=0.9,
100 |         n_estimators=best_iter)
101 |     lgb_model = lgb0.fit(X, y)
102 | 
103 |     print('----------------------predict result --------------------------')
104 |     pred = lgb_model.predict_proba(test[col])[:, 1]
105 |     test['score'] = pred
106 |     test['score'] = test['score'].apply(lambda x: round(x, 7))
107 | 
108 |     result = test[['aid', 'uid', 'score']]
109 |     result.to_csv('submission.csv', index=False)
110 | 
111 | 
112 | def do_exp():
113 |     print('-------------------- read data  -------------------------------------')
114 |     # merge之后的data
115 |     data = pd.read_csv('input/testData.csv')
116 |     data = base_process(data)
117 |     data.to_csv('input/data_0420.csv', index=False)
118 |     print('-------------------- train and test data ----------------------------')
119 |     train = data[data.label.notnull()]
120 |     test = data[data.label.isnull()]
121 |     base_model(train, test, best_iter=1000)
122 | 
123 | if __name__ == '__main__':
124 |     do_exp()
125 |     print('end...')


--------------------------------------------------------------------------------
/baseline/README.md:
--------------------------------------------------------------------------------
  1 | # tencent_ad
  2 | 
  3 | ### 腾讯社交广告算法大赛 Baseline
  4 | 
  5 | 大佬已经做出来了高Baseline的代码：https://github.com/YouChouNoBB/2018-tencent-ad-competition-baseline
  6 | 
  7 | 考虑interest kw topic此类特征太多，one-hot直接维数爆掉，所以采用了word2vec方法降维；没有调参的情况下基本可以达到one-hot的得分
  8 | 
  9 | - baseline :
 10 | - baseline_topk: 选择在interest kw topic等特征中出现频率topk的值，删除剩余的低频值
 11 | 
 12 | #### 另附interest kw topic各个维度的长度，以便选择word2vec的size
 13 | 
 14 | #### 用户特征
 15 | 
 16 | 
 17 |                     len_appIdAction len_appIdInstall len_interest1  len_interest2
 18 |             count     1.106480e+07      1.106480e+07   1.106480e+07   1.106480e+07   
 19 |             mean      1.137803e+00      3.306016e+00   1.294338e+01   4.164523e+00   
 20 |             std       1.732410e+00      2.864749e+01   8.972224e+00   4.244111e+00   
 21 |             min       1.000000e+00      1.000000e+00   1.000000e+00   1.000000e+00   
 22 |             25%       1.000000e+00      1.000000e+00   6.000000e+00   1.000000e+00   
 23 |             50%       1.000000e+00      1.000000e+00   1.200000e+01   2.000000e+00   
 24 |             75%       1.000000e+00      1.000000e+00   1.900000e+01   6.000000e+00   
 25 |             max       5.370000e+02      9.200000e+02   3.800000e+01   3.200000e+01   
 26 | 
 27 |  
 28 |                     len_interest3  len_interest4  len_interest5       len_kw1
 29 |             count   1.106480e+07   1.106480e+07   1.106480e+07  1.106480e+07   
 30 |             mean    1.168589e+00   1.050987e+00   1.515969e+01  4.392344e+00   
 31 |             std     1.136084e+00   4.851396e-01   1.185373e+01  1.350022e+00   
 32 |             min     1.000000e+00   1.000000e+00   1.000000e+00  1.000000e+00   
 33 |             25%     1.000000e+00   1.000000e+00   1.000000e+00  5.000000e+00   
 34 |             50%     1.000000e+00   1.000000e+00   1.500000e+01  5.000000e+00   
 35 |             75%     1.000000e+00   1.000000e+00   2.300000e+01  5.000000e+00   
 36 |             max     1.000000e+01   1.000000e+01   8.600000e+01  5.000000e+00   
 37 | 
 38 |                        len_kw2       len_kw3    len_topic1    len_topic2    len_topic3  
 39 |             count   1.106480e+07  1.106480e+07  1.106480e+07  1.106480e+07  1.106480e+07  
 40 |             mean    4.792818e+00  1.181388e+00  4.657463e+00  4.855681e+00  1.183553e+00  
 41 |             std     8.417202e-01  8.301784e-01  1.117917e+00  7.452962e-01  8.366755e-01  
 42 |             min     1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  
 43 |             25%     5.000000e+00  1.000000e+00  5.000000e+00  5.000000e+00  1.000000e+00  
 44 |             50%     5.000000e+00  1.000000e+00  5.000000e+00  5.000000e+00  1.000000e+00  
 45 |             75%     5.000000e+00  1.000000e+00  5.000000e+00  5.000000e+00  1.000000e+00  
 46 |             max     5.000000e+00  5.000000e+00  5.000000e+00  5.000000e+00  5.000000e+00  
 47 | 
 48 | #### 多值特征的统计：
 49 | 
 50 |     word_vec:  count top_20%
 51 | 
 52 |     this is feature: appIdAction
 53 |     word_vec:  6215 1243
 54 |     this is feature: appIdInstall
 55 |     word_vec:  64856 12971
 56 |     this is feature: interest1
 57 |     word_vec:  123 24
 58 |     this is feature: interest2
 59 |     word_vec:  81 16
 60 |     this is feature: interest3
 61 |     word_vec:  11 2
 62 |     this is feature: interest4
 63 |     word_vec:  11 2
 64 |     this is feature: interest5
 65 |     word_vec:  137 27
 66 |     this is feature: kw1
 67 |     word_vec:  259909 51981
 68 |     this is feature: kw2
 69 |     word_vec:  49197 9839
 70 |     this is feature: kw3
 71 |     word_vec:  11922 2384
 72 |     this is feature: topic1
 73 |     word_vec:  10001 2000
 74 |     this is feature: topic2
 75 |     word_vec:  9980 1996
 76 |     this is feature: topic3
 77 |     word_vec:  5873 1174
 78 | 
 79 | #### 选择top20%以后的统计描述信息
 80 | 
 81 |     this is feature: interest1
 82 |     word_vec:  123 24
 83 |     count    1.142004e+07
 84 |     mean     8.807826e+00
 85 |     std      5.239412e+00
 86 |     min      1.000000e+00
 87 |     25%      4.000000e+00
 88 |     50%      9.000000e+00
 89 |     75%      1.300000e+01
 90 |     max      2.400000e+01
 91 |     Name: interest1, dtype: float64
 92 |     this is feature: interest2
 93 |     word_vec:  81 16
 94 |     count    1.142004e+07
 95 |     mean     2.676273e+00
 96 |     std      2.391842e+00
 97 |     min      1.000000e+00
 98 |     25%      1.000000e+00
 99 |     50%      2.000000e+00
100 |     75%      4.000000e+00
101 |     max      1.500000e+01
102 |     Name: interest2, dtype: float64
103 |     this is feature: interest5
104 |     word_vec:  137 27
105 |     count    1.142004e+07
106 |     mean     9.560934e+00
107 |     std      6.553343e+00
108 |     min      1.000000e+00
109 |     25%      1.000000e+00
110 |     50%      1.000000e+01
111 |     75%      1.500000e+01
112 |     max      2.600000e+01
113 |     Name: interest5, dtype: float64
114 |     this is feature: kw1
115 |     word_vec:  263311 52662
116 |     count    1.142004e+07
117 |     mean     4.227658e+00
118 |     std      1.328209e+00
119 |     min      1.000000e+00
120 |     25%      4.000000e+00
121 |     50%      5.000000e+00
122 |     75%      5.000000e+00
123 |     max      5.000000e+00
124 |     Name: kw1, dtype: float64
125 |     this is feature: kw2
126 |     word_vec:  49779 9955
127 |     count    1.142004e+07
128 |     mean     4.680158e+00
129 |     std      9.268412e-01
130 |     min      1.000000e+00
131 |     25%      5.000000e+00
132 |     50%      5.000000e+00
133 |     75%      5.000000e+00
134 |     max      5.000000e+00
135 |     Name: kw2, dtype: float64
136 |     this is feature: topic1
137 |     word_vec:  10001 2000
138 |     count    1.142004e+07
139 |     mean     3.636819e+00
140 |     std      1.328687e+00
141 |     min      1.000000e+00
142 |     25%      3.000000e+00
143 |     50%      4.000000e+00
144 |     75%      5.000000e+00
145 |     max      5.000000e+00
146 |     Name: topic1, dtype: float64
147 |     this is feature: topic2
148 |     word_vec:  9983 1996
149 |     count    1.142004e+07
150 |     mean     3.840774e+00
151 |     std      1.245283e+00
152 |     min      1.000000e+00
153 |     25%      3.000000e+00
154 |     50%      4.000000e+00
155 |     75%      5.000000e+00
156 |     max      5.000000e+00
157 |     Name: topic2, dtype: float64
158 | 


--------------------------------------------------------------------------------
/baseline/big_old_baseline.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # @author:bryan
  3 | # blog: https://blog.csdn.net/bryan__
  4 | # github: https://github.com/YouChouNoBB/2018-tencent-ad-competition-baseline
  5 | import pandas as pd
  6 | import lightgbm as lgb
  7 | from sklearn.model_selection import train_test_split
  8 | from sklearn.feature_extraction.text import CountVectorizer
  9 | from sklearn.preprocessing import OneHotEncoder, LabelEncoder
 10 | from scipy import sparse
 11 | import os
 12 | 
 13 | ad_feature = pd.read_csv('../data/adFeature.csv')
 14 | if os.path.exists('../data/userFeature.csv'):
 15 |     user_feature = pd.read_csv('../data/userFeature.csv')
 16 | else:
 17 |     userFeature_data = []
 18 |     with open('../data/userFeature.data', 'r') as f:
 19 |         for i, line in enumerate(f):
 20 |             line = line.strip().split('|')
 21 |             userFeature_dict = {}
 22 |             for each in line:
 23 |                 each_list = each.split(' ')
 24 |                 userFeature_dict[each_list[0]] = ' '.join(each_list[1:])
 25 |             userFeature_data.append(userFeature_dict)
 26 |             if i % 100000 == 0:
 27 |                 print(i)
 28 |         user_feature = pd.DataFrame(userFeature_data)
 29 |         user_feature.to_csv('../data/userFeature.csv', index=False)
 30 | train = pd.read_csv('../data/train.csv')
 31 | predict = pd.read_csv('../data/test1.csv')
 32 | train.loc[train['label'] == -1, 'label'] = 0
 33 | predict['label'] = -1
 34 | data = pd.concat([train, predict])
 35 | data = pd.merge(data, ad_feature, on='aid', how='left')
 36 | data = pd.merge(data, user_feature, on='uid', how='left')
 37 | data = data.fillna('-1')
 38 | one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
 39 |                    'marriageStatus', 'advertiserId', 'campaignId', 'creativeId',
 40 |                    'adCategoryId', 'productId', 'productType']
 41 | vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5', 'kw1',
 42 |                   'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
 43 | for feature in one_hot_feature:
 44 |     try:
 45 |         data[feature] = LabelEncoder().fit_transform(data[feature].apply(int))
 46 |     except:
 47 |         data[feature] = LabelEncoder().fit_transform(data[feature])
 48 | 
 49 | train = data[data.label != -1]
 50 | train_y = train.pop('label')
 51 | # train, test, train_y, test_y = train_test_split(train,train_y,test_size=0.2, random_state=2018)
 52 | test = data[data.label == -1]
 53 | res = test[['aid', 'uid']]
 54 | test = test.drop('label', axis=1)
 55 | enc = OneHotEncoder()
 56 | train_x = train[['creativeSize']]
 57 | test_x = test[['creativeSize']]
 58 | 
 59 | for feature in one_hot_feature:
 60 |     enc.fit(data[feature].values.reshape(-1, 1))
 61 |     train_a = enc.transform(train[feature].values.reshape(-1, 1))
 62 |     test_a = enc.transform(test[feature].values.reshape(-1, 1))
 63 |     train_x = sparse.hstack((train_x, train_a))
 64 |     test_x = sparse.hstack((test_x, test_a))
 65 | print('one-hot prepared !')
 66 | 
 67 | cv = CountVectorizer()
 68 | for feature in vector_feature:
 69 |     cv.fit(data[feature])
 70 |     train_a = cv.transform(train[feature])
 71 |     test_a = cv.transform(test[feature])
 72 |     train_x = sparse.hstack((train_x, train_a))
 73 |     test_x = sparse.hstack((test_x, test_a))
 74 | print('cv prepared !')
 75 | 
 76 | 
 77 | def LGB_test(train_x, train_y, test_x, test_y):
 78 |     print("LGB test")
 79 |     clf = lgb.LGBMClassifier(
 80 |         boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
 81 |         max_depth=-1, n_estimators=1000, objective='binary',
 82 |         subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
 83 |         learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=-1
 84 |     )
 85 |     clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (test_x, test_y)], eval_metric='auc',
 86 |             early_stopping_rounds=100)
 87 |     # print(clf.feature_importances_)
 88 |     return clf, clf.best_score_['valid_1']['auc']
 89 | 
 90 | 
 91 | def LGB_predict(train_x, train_y, test_x, res):
 92 |     print("LGB test")
 93 |     clf = lgb.LGBMClassifier(
 94 |         boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
 95 |         max_depth=-1, n_estimators=1500, objective='binary',
 96 |         subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
 97 |         learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=-1
 98 |     )
 99 |     clf.fit(train_x, train_y, eval_set=[(train_x, train_y)], eval_metric='auc', early_stopping_rounds=100)
100 |     res['score'] = clf.predict_proba(test_x)[:, 1]
101 |     res['score'] = res['score'].apply(lambda x: float('%.6f' % x))
102 |     res.to_csv('../data/submission.csv', index=False)
103 |     os.system('zip ../data/baseline.zip ../data/submission.csv')
104 |     return clf
105 | 
106 | 
107 | model = LGB_predict(train_x, train_y, test_x, res)
108 | 


--------------------------------------------------------------------------------
/baseline/data_to_ffm_format.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/5/7 20:09
 5 | @Function: transform data to ffm format
 6 | """
 7 | 
 8 | import hashlib
 9 | 
10 | 
11 | def hashstr(str, nr_bins):
12 |     return int(hashlib.md5(str.encode('utf8')).hexdigest(), 16) % (nr_bins - 1) + 1
13 | 
14 | 
15 | def gen_hashed_fm_feats(feats, nr_bins=int(1e+6)):
16 |     feats = ['{0}:{1}:1'.format(field - 1, hashstr(feat, nr_bins)) for (field, feat) in feats]
17 |     return feats
18 | 
19 | 
20 | def get_data():
21 |     one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
22 |                        'marriageStatus', 'aid', 'advertiserId', 'campaignId', 'creativeId',
23 |                        'adCategoryId', 'productId', 'productType']
24 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4',
25 |                       'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
26 |     drop_feature = ['uid', 'label']
27 |     print("reading data")
28 |     f = open('testData.csv.csv', 'r')
29 |     line = f.readline().strip()
30 |     features = line.split(',')
31 | 
32 |     print(features)
33 |     dict = {}
34 |     num = 0
35 |     for line in f:
36 |         datas = line.strip().split(',')
37 |         for i, d in enumerate(datas):
38 |             if not dict.__contains__(features[i]):
39 |                 dict[features[i]] = []
40 |             dict[features[i]].append(d)
41 |         num += 1
42 | 
43 |     f.close()
44 | 
45 |     print("transforming data")
46 |     ftrain = open('data/testtest.ffm', 'w')
47 | 
48 |     for i in range(num):
49 |         feats = []
50 |         for j, f in enumerate(one_hot_feature, 1):
51 |             field = j
52 |             print('-----------dict[f][i]:', field, dict[f][i])
53 |             feats.append((field, f + '_' + dict[f][i]))
54 | 
55 |         for j, f in enumerate(vector_feature, 1):
56 |             field = j + len(one_hot_feature)
57 |             xs = dict[f][i].split(' ')
58 |             print('-----------xs:', xs)
59 |             for x in xs:
60 |                 feats.append((field, f + '_' + x))
61 | 
62 |         feats = gen_hashed_fm_feats(feats)
63 |         ftrain.write(dict['label'][i] + ' ' + ' '.join(feats) + '\n')
64 |         # print(dict['label'][i] + ' ' + ' '.join(feats) + '\n')
65 | 
66 |     ftrain.close()
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     get_data()


--------------------------------------------------------------------------------
/baseline/input/testData.csv:
--------------------------------------------------------------------------------
  1 | aid,label,uid,age,gender,marriageStatus,education,consumptionAbility,LBS,interest1,interest2,interest3,interest4,interest5,kw1,kw2,kw3,topic1,topic2,topic3,appIdInstall,appIdAction,ct,os,carrier,house,advertiserId,campaignId,creativeId,creativeSize,adCategoryId,productId,productType
  2 | 699,0.0,78508957,1,1,10,1,1,576.0,70 100 48 49 23 36 11,,,,,124532 762727 771871 8561 610048,,,4908 9745 7070 4203 3292,,,,,0,0,0,,1082,295940,731679,59,13,0,6
  3 | 1991,0.0,3637295,3,2,13,7,0,353.0,93 109 75 29 59 49 6 5 73 46 22 36 11 3,,,,100 131 37 8 46,531499 542834 303322 503318 68211,89232 30101 117462 117548 85039,,8445 6762 7608 523 7688,4518 862 3643 702 5247,,,,3,1,1,,702,42104,1441131,53,10,4669,11
  4 | 1119,0.0,19229018,4,1,13 10,1,0,94.0,93 37 47 75 29 46 17 9 56 36 11,,,,,255920 190937 576486 460368 135689,,,941 4674 9735 8953 5352,,,,,0,0,0,,3993,63752,798752,59,10,19256,11
  5 | 2013,0.0,79277120,2,1,10,2,1,48.0,49 18,42 9 1 46 70 14 29 10 35,,,92 116 128 42 5 121 64,101914 294138,57664 32928 40670 11395 79112,,4538 6790 8850 3422 1940,2979 1877 358 666 4166,,,,3 1,1,1,1.0,6937,186348,1427984,35,89,3791,9
  6 | 692,0.0,41528441,1,1,10,7,1,333.0,70 109 12 75 76 106 29 59 49 41 111 85 17 56 112 36 34 11,47 42 73 9 31 46 32 70 21 33 19 60 13 74 16 55 7 37,,,100 72 37 11 4 79 113 6 46 59,155585 562294 112872 273650 371405,67951 40801 56571 119723 16159,,4535 5590 47 5570 2078,856 4587 1877 392 9103,,,,1 3 4,1,1,,6946,296367,455396,59,24,3794,11
  7 | 1119,0.0,62381478,4,1,10,1,0,668.0,93 46 17 56 36 11,24 4 30 48,,,100 131 37 8 6 46 129,131879 691885 588835 544089 540263,37802 58596 90034 77707 105942,,7163 7604 8778 9411 6293,7807 2445 2093 6411 5441,,,,4 1 3,1,3,,3993,63752,798752,59,10,19256,11
  8 | 117,0.0,36832847,4,1,11,1,1,333.0,93 118 12 75 4 69 8 29 59 49 27 6 42 92 46 71 35 36 34 11 20,24 52 21 30,,,127 52 100 72 131 37 13 47 4 109 29 8 6 46 74 59 14 10 83,589299 172344 497092 472718 107181,117546 114920 84508 63003 53212,,9826 4848 2987 523 9016,9853 8067 6132 9118 6945,,,,3 1,1,1,,702,18552,619519,53,24,5615,11
  9 | 389,0.0,22023604,1,1,10,1,1,85.0,75 29 36 11,73,,,72 92 116 13 47 71 109 8 28 6 43 76 20 57 56 38 114 52 107 79 69 121 59 10 110 120 93,671109 271833 338851 428098 706940,4343 25019 41793 44101,,105 8402 5178 3734 602,812 4746 1965 9837 2115,,,,3 1 2,2,2,,9106,662422,1354071,79,21,0,4
 10 | 432,0.0,39527165,5,1,10,7,2,809.0,93 70 86 76 28 59 49 15 72 95 46 85 36 11,22 73 79 9 1 46 70 6 74 29 35,,,100 72 92 131 37 116 79 71 109 29 8 41 42 76 27 75 46 121 64 21 40 93 15,761845 542962 670609 476729 250048,55816 1628 67579 766 80747,,7989 4486 3343 3752 9418,6755 898 5641 8985 5927,,,,3 1,1,3,,11437,18237,1081485,100,21,0,4
 11 | 1468,0.0,47080817,4,1,13 9,6,1,791.0,93 70 109 12 98 47 75 45 29 49 6 46 81 56 36 11 76 59 114 91 111 57 17 9,47 9 20 33 45,,,100 131 37 116 101 13 109 8 6 42 9 46 121 57 118 58 129 103,519363 573044 669604 133833 329854,61483 17460 11395 111807 79112,,6710 254 523 1450 8525,9817 3921 425 5944 9932,,,,3 1,1,1,1.0,915,994,1610899,60,51,0,4
 12 | 1468,0.0,56085174,1,1,13 9,7,1,246.0,70 47 76 9 36 11,,,,52 72 131 116 47 79 71 8 73 6 42 121 59 49 129 103 21,131835 462134 162804 605930 625029,88002 34525 92783 90599 66915,,369 3252 260 5828 1528,7648 7507 8250 9785 9767,,,,1 3,1,3,,915,994,1610899,60,51,0,4
 13 | 404,0.0,22117015,1,1,10,1,1,585.0,42 35 36 11 96,22 73 9 1 46 70 6 21 33 76 29 10 35,,,127 52 131 116 101 109 34 8 30 50 6 132 42 89 46 59 118 60 58 64 129,264294 346914 171619 152397 588177,113183 43582 8546 36764 93916,,2787 6535 5267 3596 7620,1352 982 2386 6041 8263,,,,1 3,1,1,1.0,821,888,1353465,59,10,439,11
 14 | 369,0.0,20159477,5,1,10,6,1,514.0,70 12 76 69 59 49 15 18 88 55 54,1 70,,,52 92 131 37 116 13 109 8 32 6 9 46 121 74 59 129 15,745188 220981 643238 405681,116369 9371 100125 34154 8412,,7066 758 9528 1609 1460,1066 5689 3771 5669 1886,,,,1 3,2,1,1.0,66025,170445,1229175,109,94,0,4
 15 | 411,0.0,49029134,2,2,10,2,1,809.0,109 47 59 49 119 113 9,31 44,,,52 72 131 116 78 71 8 17 113 30 50 28 73 6 132 99 76 121 60 129 93 44,67109 14694 4830 2723 359273,56438 66915 106194 116540 72676,,8901 9758 2277 8013 2654,1024 9204 4239 1632 3786,,,,0,0,3,,9106,163120,220179,79,21,0,4
 16 | 1940,0.0,31132182,2,2,10,7,0,662.0,70 76 36 11,70 54,,,,8561 731496 605930 591952 566440,81246 66915 45800 65891 40605,,6366 4535 7 5891 3657,8562 1825 1181 9806 7037,,,,0,0,0,,5552,30399,530889,35,27,113,9
 17 | 846,0.0,4039629,1,1,10,7,2,94.0,86 69 16 59 49 83 6 42 18 114 72 85 35 17 56 116 36 11 20,47 22 70 19 4 56 13 39,,,52 92 131 116 13 8 50 28 6 42 39 55 9 75 121 59 60 122 64 129,750790 315270 369678 499618 241171,81981 49132 5698 51429 81357,,1971 9826 5156 7651 123,8935 792 4480 9668 2801,,,,3 1,1,1,,8864,310473,1073411,22,43,25730,9
 18 | 543,0.0,20601591,1,1,10,7,1,458.0,109 47 59 49 119 95 17 9 56 36 65 11,,,,72 37 116 13 71 109 8 50 6 42 76 9 46 36 64 103 38 52 131 79 73 132 5 121 59 60 51 54,46340 367967 127250 118425 711071,80263 105115 38442 69077 118492,,2277 8053 9829 9826 881,5257 4019 7724 4107 6783,,,,3,1,0,,1082,295940,1391569,59,22,0,6
 19 | 1415,0.0,22744430,3,1,11,2,2,458.0,93 70 77 118 37 47 69 8 41 46 9 36 11 44,47 32 70 54 10,,,52 100 72 80 131 37 11 78 79 71 109 8 30 6 42 89 46 74 59 64 91 114 120,250648 13477 183412 490237 497092,105175 78448 15571 94809 45950,,1297 1614 537 506 9158,5584 7035 7552 4578 977,,,,3 4 1 2,1,0,,133292,464828,1334609,22,74,0,4
 20 | 136,0.0,9828105,4,1,11,6,2,812.0,93 70 12 75 76 4 69 29 49 95 46 36 34 11 38 44,63 24 73 30,,,127 100 131 37 116 4 8 6 76 39 27 46 121 59 129 40 93,338851 113419 497092 410690 210767,73237 95254 81476 120122 35408,,2566 9826 3582 9188 2723,5641 7621 6552 5884 816,,,,3 2 4 1,1,1,,452,50305,1187573,35,10,7992,11
 21 | 1918,0.0,10336143,4,1,11,5,1,585.0,93 70 37 109 47 4 76 28 49 15 6 119 46 9 116 36 11,,,,52 100 72 37 116 1 4 109 29 8 69 6 46 121 129,318446 237920 384825 481727 376552,31920 56906 52844 21942 45375,,8912 278 506 5568 1095,8577 4014 7610 6547 3390,,,,1 3,2,1,,158679,643438,1690612,60,4,0,4
 22 | 561,0.0,75233751,4,1,0,0,0,0.0,93 75 69 29 49 27 6 42 73 114 19 46 57 36 11 117 96 44,,,,,315951 10299 458661 130932 106345,,,8005 957 4959 6939 9826,,,,,3,0,0,,44008,100565,808799,79,21,0,4
 23 | 1468,0.0,72903924,5,1,10,2,0,258.0,70 12 106 88 36 11,,,,72 37 116 113 46 121,518300 100274 405681 31232 405351,55426 57123 92642 87912 82339,,9826 523 4421 4325 7914,2082 3723 1040 85 5242,,,,0,0,1,,915,994,1610899,60,51,0,4
 24 | 692,0.0,81670516,2,1,0,2,0,364.0,70 76 36 11,,,,72 11,374880 8561 177291 645705 289152,112877 95597 18185 17339 56986,,2935 6956 6661 2011 6254,2685 6559 1285 3313 8116,,,,3,1,1,,6946,296367,455396,59,24,3794,11
 25 | 1215,0.0,9947318,5,2,10,5,1,348.0,70 86 109 100 47 76 48 106 67 83 6 119 42 57 9 17 56 36 11 96,46 72 29,,,72 92 116 134 1 71 109 128 8 6 76 27 75 58 114 40 52 31 131 11 107 79 121 59 118 25 129 93,652765 487978 279516 315848 562294,36849 8546 119687 53212 111275,,9264 9826 5845 2686 6811,1586 9118 3664 6782 8054,,,,3 1,2,1,1.0,5459,172796,1360276,59,142,0,6
 26 | 846,0.0,35278604,1,1,12 13,7,1,464.0,70 86 109 76 49 122 6 119 72 17 56 116 36 11,22 73 24 9 1 52 46 70 21 4 80 30 74 39 23 14 29 10 35,,,72 92 37 116 13 79 128 50 73 6 132 19 111 9 75 46 121 94 59 129 35,714504 88709 646536 50273 764860,49404 117548 90248 120295 52864,,7410 6611 9826 4367 3203,6417 2277 8515 4818 5188,,,,3 1 4,1,3,1.0,8864,310473,1073411,22,43,25730,9
 27 | 174,0.0,64116325,1,1,10,7,1,184.0,93 109 47 6 119 46 9 116 36 11,52 21,,,104 100 72 116 13 47 78 1 71 109 8 30 50 28 6 76 20 46 35 38 114 21 52 131 11 29 16 73 121 59 10 22 93,618131 100680 459055 321918 158229,72528 63243 114920 1225 69067,,8792 9826 523 7684 14,8688 6045 2504 4226 4513,,,,3 1,1,3,,11487,668182,1512679,22,21,0,4
 28 | 1749,0.0,33838753,5,1,10,7,0,275.0,70 76 28 67 6 116 36 11,28 43,,,,617658 435330 371848 468212 583202,36865 83318 101193 50725 93916,,7113 4224 8901 7445 8732,810 7015 7881 5297 6796,,,,3 1,2,0,,21359,361928,585909,100,21,0,4
 29 | 1507,0.0,1320930,2,2,10,1,0,94.0,36 11,,,,131 37 71 8 6 42 76 27 46 59 25 64 130 90 44,,117462 55952 60959 88924 76242,,8110 7780 5119 5958 3111,5421 2482 163 8193 6945,,,,0,0,3,,327,358536,745048,35,67,113,9
 30 | 1254,0.0,28769142,5,2,10,7,1,886.0,109 47 75 29 59 49 6 39 116 36 11 65,22 79 73 70 6,,,72 92 116 53 47 1 71 109 8 30 50 86 6 76 75 58 114 52 31 131 11 79 61 89 121 59 118 60 10 18 129 96,373554 218365 410565 761845,90034 9679 93916 10358 18185,,46 9826 7684 4305 9158,6459 981 6311 5115 105,,,,3 1,1 2,3,,8350,244601,1383456,35,59,0,4
 31 | 1119,1.0,48708770,4,1,0,2,0,430.0,93 70 77 109 121 47 75 1 102 29 49 6 42 73 46 56 36 11 96 76 48 59 39 114 60 71 17 9 116 3,24 6 19 13 30,,,100 80 37 116 71 73 6 46 62 59 118 58 129 114 65,338851 463925 691885 80873 350047,74061 67582 44005 14660 104708,,6939 8366 1338 1450 5955,3274 3865 4159 7924 8465,,,,1 3,1,1,,3993,63752,798752,59,10,19256,11
 32 | 846,0.0,18634306,1,1,0,7,0,988.0,,,,,72 92 116 13 78 71 8 30 50 28 6 42 76 55 27 75 94 58 64 21 40 52 31 131 11 107 69 73 132 89 62 121 60 18 93,,,,,,,,,3 1,1,1,1.0,8864,310473,1073411,22,43,25730,9
 33 | 1119,0.0,40672131,4,1,13,0,1,687.0,93 46 36 11,,,,,490902 53354 422546 561451 461000,109762 116065 16082 109084,,5768 162 2617 1066 6939,6389 9875 7509 3664 1988,,,,0,0,0,,3993,63752,798752,59,10,19256,11
 34 | 916,0.0,39650505,3,2,11,1,1,612.0,93 70 86 109 100 76 75 106 29 67 49 6 119 18 73 95 72 46 57 36 11 87,24 46 70 30 14 54 29,,,100 72 37 11 17 50 46,18022 88633 293068 547858 10251,72453 89228 79280 16156 94601,,6661 9934 8992 6374 3036,8291 6024 8609 8712 6662,,,,3 1,2 1,0,,17597,51385,838056,35,25,0,6
 35 | 1415,0.0,60864809,5,1,11,7,1,27.0,70 86 109 75 29 49 6 2 42 73 35 36 11 76 48 28 106 67 59 122 119 114 39 111 113 23 57 116 87,24 73 30,,,,107700 249603 108440 279516 65237,90656 8546 18185 117548 32219,,9826 8826 7684 8383 9205,3394 2737 598 6459 2943,,,,1 3,1,0,,133292,464828,1334609,22,74,0,4
 36 | 1335,1.0,44072024,3,1,11,2,0,0.0,93 70 77 76 48 46 36 11,,,,,727009 338851 68812 8561 391535,87384 81211 97866 85039 56336,,7318 257 5828 8048 8402,1052 1825 8112 1188 3755,,,,3,1,0,,9106,645468,1769240,79,21,0,4
 37 | 1781,0.0,44285573,4,1,11,6,1,209.0,70 86 75 69 48 29 59 67 49 83 41 122 2 113 36 11 38,73 79 31 1 70 6 4 18 41 48 44 27,,,,338851 515009 687036 139628 550675,85580 9088 56438 47308 40921,,8445 2747 3162 7375 644,5115 598 5641 2965 8822,,,,1 3,1,0,,25420,135565,1606251,53,4,0,4
 38 | 2216,0.0,61237244,4,1,11,6,2,48.0,93 70 77 86 37 109 98 75 69 8 105 29 49 108 42 73 46 35 56 62 36 11 54 96 44 118 100 76 114 39 17 87,24 30,,,100 80 92 37 116 47 71 128 8 115 6 42 75 46 36 56 114 52 131 132 5 89 121 59 129 81,16860 761845 493555 348518 551823,18185 27844 76242 5827 83271,,1644 7684 644 3162 7688,8577 7949 8709 6586 1976,,,,1 3,2,3,,5459,172796,1321733,91,142,0,6
 39 | 117,0.0,47475273,4,1,0,2,2,222.0,93 70 77 86 37 109 75 4 1 102 33 8 29 27 108 6 42 53 72 46 35 56 36 11 96 30 118 76 39 71 57 17,,,,100 80 37 4 6 46 59 129,654027 532453 761845 691885 189810,34410 109981 11395 79112 28206,36983 30214 8820 56152 20006,6939 8525 9826 523 9324,7593 8830 7465 9482 4192,6048 5826 2334 6969 4345,,,3,1,0,,702,18552,619519,53,24,5615,11
 40 | 1119,1.0,64954925,4,1,11,7,1,687.0,93 70 86 37 75 4 69 29 83 6 42 104 46 36 11 96 44 100 76 48 106 41 84 71 116,24 30,,,,172567 10251 634625 388504 527776,32481 6918 44999 28033 47192,,2123 5828 4771 3014 4388,6269 3632 1570 3540 7555,,,,3 1,2,0,,3993,63752,798752,59,10,19256,11
 41 | 436,0.0,68093353,1,1,10,2,0,246.0,77 109 39 46 36 11,,,,,259878 796282 102556 226464 154710,11395 79112 61483 17460 4343,,2353 6966 7805 2459 6282,5566 7947 185 4050 5489,,,,3 1,1,0,,7926,378648,621766,77,8,0,4
 42 | 411,0.0,5256406,1,1,10,2,2,958.0,70 76,47 22 73 9 1 46 32 70 28 74 43 14 54 10 35,,,52 72 92 131 11 13 8 50 6 19 76 9 75 59 129 96 93,,8646 8546 90748 41548 93916,36983 51317 41633 51818 36499,6572 8984 6143 5703 1124,7205 2690 625 9927 8430,7883 5609 7150 1140 3809,,,3 1,1,1,,9106,163120,220179,79,21,0,4
 43 | 1379,0.0,69938159,2,1,12 13,7,2,114.0,70 76 75 29 6 116 36 11,46 29,,,72 116 50 41 6 121 59 60 129,602416 162804 47332 784470 425508,65128 974 17355 52864 87160,,7896 1950 3931 1778 6179,5689 3771 5669 5272 5854,,,,3 1 2,2,3,,8864,90700,469197,22,27,113,9
 44 | 432,0.0,64938005,4,1,11,7,1,296.0,93 70 12 45 106 29 6 73 46 60 17 56 116 112 36 11 87,24 30,,,100 72 80 37 4 113 69 6 46 129,645850 210767 412811 564214 122144,94407 8125 91012 85950 90275,,1584 9335 3052 3179 5473,3368 7962 9360 5772 973,,,,3 1,1,1,,11437,18237,1081485,100,21,0,4
 45 | 1605,0.0,34025684,2,0,15,2,1,0.0,,,,,,,109288 35373 18185 112877 58678,,,8704 7265 3810 3313 6168,,,,0,0,0,,11195,19215,1755470,53,140,0,4
 46 | 1566,0.0,54876557,2,1,10,2,0,116.0,49 23,,,,,659383 189237 546308 722409,,,8383 2734 8516 331 5566,,,,,0,0,0,,6946,296367,520004,59,24,3794,11
 47 | 432,0.0,38454452,4,2,11,1,1,809.0,70 118 76 75 4 8 106 29 67 73 46 36 11 3,,,,52 131 116 4 8 28 6 99 46 121 59 118 58,8561 568780 605930 606954 298533,87384 846 33055 120667 92155,,506 2949 164 8456 1450,9353 7180 364 9784 6378,,,,4 1 3,1,1,,11437,18237,1081485,100,21,0,4
 48 | 1918,0.0,60491542,5,1,11,7,1,585.0,93 70 86 118 109 76 4 75 28 8 29 108 6 119 46 71 116 36 11,63 24 30,,,100 72 92 37 116 47 78 4 71 8 30 6 42 27 75 46 58 64 21 40 52 131 11 132 89 121 59 118 129 22,291028 606795 572280 707862 531249,106673 105941 60043 2042 28681,,5271 1633 3393 3255 9779,8986 8361 102 8173 7407,,,,3 4 1,1,1,,158679,643438,1690612,60,4,0,4
 49 | 1415,1.0,51156977,4,1,11,1,1,85.0,70 30 77 75 76 8 29 67 27 42 92 46 35 17 56 36 11 96,47 24 73 32 33 19 13 30 74 45,,,127 52 72 80 31 92 131 37 116 47 12 8 69 28 6 75 20 46 121 59 129 81,688291 67406 122910 75722 18022,51399 104182 64220 45950 49193,,5338 7608 5935 7604 7456,7366 1094 2902 5188 1892,,,,1 4 3,1,1,,133292,464828,1334609,22,74,0,4
 50 | 1728,0.0,64335974,4,1,11,7,1,879.0,93 70 77 37 12 47 4 69 8 49 6 42 46 35 112 36 11 38 96 118 100 76 48 106 16 41 15 71 9,24 33 30 45,,,127 100 72 80 131 37 4 71 48 8 113 6 42 89 46 59 36 103 38,761845 210767 60245 250260 363747,99827 83281 5262 70903 120959,,644 4367 7043 9826 9324,1761 2044 4441 5838 105,,,,3 1,2,1,,8203,42625,884105,59,218,0,6
 51 | 1904,0.0,40936344,1,1,13 10,7,0,346.0,,,,,119 72 116 71 8 69 50 6 42 19 5 76 59 60 64 129 130 21 93,,66009 100552 100125 76242 13947,,,2905 8197 8357 6160 2239,,,,3 1,1,2,1.0,8203,37818,414738,109,142,0,6
 52 | 1407,0.0,58000262,1,1,10,2,1,112.0,49 6 89 18 73 116 36 11 87,24 30,,,52 72 31 92 131 37 116 13 8 41 6 76 111 89 46 121 59 14 129 130 93,422643 787308 446434 639553 709965,51872 92596 39525 81211 92155,,298 2153 5512 1803 7089,7977 9838 2353 2372 9008,,,,3 1 4,1,1,1.0,702,12724,962151,105,10,4669,11
 53 | 302,0.0,81352665,2,2,10,1,1,792.0,70 76 28 42 62 96,22 70 54,,,104 72 13 78 109 8 30 50 28 6 42 76 27 20 58 64 35 85 40 131 11 79 89 59 24 118 129 130 51 120 93 83,361151 258463 318593 204792 746484,1373 51931 14528 41409 66079,,286 7620 7143 2396 6666,5169 8402 966 3051 1765,,,,3 1,1,1,,18621,745599,1628574,91,21,0,4
 54 | 1291,0.0,38358319,5,2,11,5,1,348.0,70 76 69 106 59 67 49 6 116 36 11 20,58 24 46 70 4 30 41 29,,,52 72 131 116 107 134 78 71 109 8 30 28 55 27 20 121 114 90 21 26,162804 649472 69826 92320 246587,105115 56438 119223 102944 40921,,793 328 6011 4701 8525,2351 8883 4123 8040 9487,,,,3 1,2,2,,1082,40405,1434096,53,13,0,6
 55 | 692,0.0,52145834,3,1,11,2,2,210.0,93 70 76 46 36 11,,,,,488669 605930 208810 317494 562294,103447 5623 12933 62728 117462,,4305 6699 2711 397 3476,9347 5331 125 5830 8525,,,,0,0,0,,6946,296367,455396,59,24,3794,11
 56 | 903,0.0,2217066,5,1,11,7,1,346.0,93 70 118 37 109 47 76 8 106 59 49 41 6 119 46 60 23 57 9 116 36 11 65,46 70 4 39 14 10 29,,,52 100 72 92 131 37 47 8 50 41 6 42 19 76 75 46 74 59 10 64 129 103 96,233867 453249 172534 162804 265174,33201 37224 46553 90287 24409,,4449 7856 9826 6627 3941,6307 1806 9491 5768 1471,,,,3 1 2,1,1,,285,59293,210057,42,30,0,6
 57 | 1254,0.0,52265874,1,2,10,7,2,440.0,70 109 47 76 45 16 106 29 59 49 41 6 5 73 68 9 17 56 116 36 11 90,9 1 70 33 40 54,,,72 33 92 116 13 78 71 109 8 30 50 86 28 6 42 99 76 55 27 9 20 58 64 56 21 40 52 31 131 11 107 79 113 69 41 89 102 121 59 118 60 18 129 93 83,700362 457366 225420 61266 545668,52864 461 12933 846 35373,41633 44157 36983 25613 34599,7628 9673 976 166 2868,7003 2776 780 9784 7136,5918 4297 899 447 7242,,,1 3,1,1,1.0,8350,244601,1383456,35,59,0,4
 58 | 1496,0.0,22862228,4,2,15,6,1,718.0,70 12 75 69 28 29 59 67 49 115 88 36 34 11 54,52 21 19 13,,,92 116 13 134 78 71 109 8 32 30 50 28 6 42 76 55 9 36 64 56 131 132 121 23 59 60 129 51,10518 591351 232760 522888 466095,846 20656 105115 114125 107309,,3925 1431 46 5595 7684,4883 428 5142 7286 3495,,,,3 1 2,1,2,,702,52258,1673644,59,10,4669,11
 59 | 916,0.0,21818509,5,2,13 15,3,2,514.0,70 100 47 76 9 36 11,63 47 73 79 1 70 20 6 30,,,52 72 92 131 11 8 69 50 6 42 75 89 123 64 129 103,580549 307587 447798 100680 423580,62527 102474 104182 45950 14470,,803 4061 6537 2696 4301,1023 9110 5655 9541 4922,,,,3,1,1,,17597,51385,838056,35,25,0,6
 60 | 1605,0.0,2258262,1,1,10,7,1,66.0,70 100 75 69 29 49 15 73 36 11 54 3,,,,100 13 78 71 30 46 94 14 84 56,796636 672143 36095 423580 236021,43585 32356 51859 97701 41740,,9264 8525 9464 6762 2291,4929 5884 9300 131 1808,,,,1 3,1,1,,11195,19215,1755470,53,140,0,4
 61 | 70,0.0,31511040,1,2,10,3,0,94.0,70 86 100 47 76 28 108 9 36 11,66 47 70 20 10,,,72 116 13 134 47 109 8 6 42 76 9 64 52 31 131 11 107 79 69 5 89 121 74 59 10 130 96 93 83,120259 279516 499813 95059 61287,18185 117462 1718 23924 113315,,325 1661 6989 6939 4495,4642 3253 6945 3601 6237,,,,3 1,1,1,1.0,327,5616,5977,22,27,113,9
 62 | 302,0.0,9685368,5,2,11,7,1,428.0,70 86 28 59 49 108 6 18 73 114 113 57 116 36 11 87,73 70 4 74 48 10,,,72 131 116 109 8 113 50 66 6 42 121 59 60 64 129 83,13321 64588 47358 54928 133606,36764 93916 8546 120667 16186,,9578 9826 1977 9292 9022,9708 6700 9826 9047 414,,,,3 1,1,1,,18621,745599,1628574,91,21,0,4
 63 | 2031,0.0,56023654,1,2,10,7,1,972.0,70 109 28 119,,,,72 92 116 134 78 71 109 8 30 6 2 42 76 75 64 133 56 103 114 52 31 131 79 89 121 59 120 93 83,353815 64588 515008 223841 89154,102474 84817 56906 66915 85039,,7684 1443 4876 9682 1426,5371 2823 5008 5302 2902,,,,1,1,3,,83042,280832,425385,22,70,0,4
 64 | 916,0.0,24552689,2,2,10,7,2,86.0,70 69 28 106 59 49 6 58 116 36 11 38,66 22 73 70 19 36 74 54,,,127 104 72 116 47 78 71 109 8 30 50 28 98 6 7 76 20 46 58 91 21 131 11 61 73 19 89 102 121 59 118 60 18 129 81 130 120 93 83,47358 115103 571034 406593 640848,4201 55108 11547 5685 85177,,9826 7706 7959 4351 4742,7068 1553 8184 296 598,,,,1 3 4,1,1,,17597,51385,838056,35,25,0,6
 65 | 1119,0.0,72702682,4,1,11,3,0,203.0,93 77 109 12 75 4 8 29 49 27 6 42 73 19 46 35 56 112 36 11 118 59 114 39 111 85 71 23 17 87,4 41,,,100 80 116 13 4 8 50 28 6 42 76 27 20 46 64 122 40 52 131 17 61 121 74 59 60 129 96 93,764860 277671 283399 271833 691885,117462 56819 51859 88143 60276,,3866 654 7318 3162 3450,244 8193 2783 3953 3142,,,,1 3 2 4,1,1,,3993,63752,798752,59,10,19256,11
 66 | 1950,0.0,6190305,2,1,10,2,0,333.0,,,,,,,,,,,,,,0,0,0,,41806,233191,1016027,35,13,27855,9
 67 | 765,0.0,77351073,5,2,13 10,7,1,333.0,70 76 28 36 11,79 46 6 37,,,52 72 80 131 116 71 8 113 28 6 76 20 46 121 23 59 114 93 44,737447 796062 76132 773477 258788,93916 117462 74065 74249 121235,,8399 7262 1338 1947 8901,981 9565 3865 4381 4310,,,,3 1,1,1,,388,134068,1271219,35,27,113,9
 68 | 2044,0.0,7984138,5,2,12 13,7,2,87.0,70 100 47 76 75 106 29 41 9 36 11,,,,104 72 33 116 107 79 109 69 6 87 42 27 89 121 59 64 40,289152 627369 258177 597905 484088,94520 37224 85177 74065 10267,,9163 4017 3110 2459 7684,5108 6552 597 6522 4148,,,,1 3,1,3,,49772,487541,869346,35,9,0,9
 69 | 1566,0.0,66815603,1,1,13 10,6,1,514.0,47 75 29 59 49 85 23 9 36 11,,,,52 131 8 6 59 129,399416 289609 572280 771323 589299,109660 974 47288 3465 38842,,2011 3385 9095 644 7098,8726 2177 8822 4323 4819,,,,1 3,1,1,,6946,296367,520004,59,24,3794,11
 70 | 1379,0.0,70317685,5,1,13 9,7,1,687.0,93 70 86 109 12 47 75 8 29 49 83 6 2 42 46 88 35 56 112 36 11 118 28 59 119 89 114 91 58 17 116 21 65,63 22 79 73 9 46 70 6 21 33 4 80 28 30 41 75 48 37 65 29,,,52 100 72 131 116 101 1 71 109 29 124 8 69 50 6 132 42 46 121 60 129 103 21,468512 445518 471038 790848 218639,121876 101276 63051 113228 56823,,9826 7684 8820 1450 6939,3747 3858 3124 7799 1656,,,,3 1 4 2,1,1,,8864,90700,469197,22,27,113,9
 71 | 699,0.0,32650128,1,1,10,7,1,275.0,93 70 47 28 46 17 9 56 36 11,22 73 79 24 31 70 6 21 4 30 41 48 44,,,100 72 92 37 116 134 4 109 128 8 50 115 6 42 46 64 52 131 17 69 89 121 23 59 54,738790 181856 131520 277173 100680,34410 109981 43040 14449 77308,,3052 8212 7766 5754 1196,9385 3629 5976 5532 8922,,,,3 1,1,3,,1082,295940,731679,59,13,0,6
 72 | 369,0.0,16417505,2,1,10,2,0,464.0,109 59 49 15 119 17 56 36 11,,,,92 116 71 128 50 28 6 132 99 75 59 60 21,126393 343050 407851 87586 398226,34410 67015 28692 80220 79200,,4264 3422 2968 140 9631,881 2654 9708 7767 7828,,,,3 1 2,1,3,,66025,170445,1229175,109,94,0,4
 73 | 191,0.0,34971131,2,1,10,2,1,300.0,42 62,,,,78 30 6 42 76 23 64 130,480013 53354 231370 40878,117462 100125 34154 40971 56819,,7468 8501 4209 1514 9824,3023 5234 922 3489 6160,,,,3,1,1,,25485,50138,58465,35,51,15454,11
 74 | 1415,0.0,57819574,3,1,11,6,2,958.0,93 70 86 76 4 106 67 49 42 73 72 46 23 35 36 11 87,,,,100 131 78 4 71 8 30 6 42 46 23 103 114,302611 717819 678510 365269 51739,17392 88936 4443 23417 82374,,6762 3863 523 8088 8557,1957 3795 3995 4745 7937,,,,1 3,2,2,,133292,464828,1334609,22,74,0,4
 75 | 916,0.0,28622289,5,2,10,6,2,502.0,93 86 69 49 83 6 46 60 71 23 36 54 11 38 44,66 22 70 54,,,,780194 480775 172873 76762 420904,15941 98059 85039 1227 14946,,3194 1923 1187 180 3182,2478 982 5800 2076 5810,,,,1 3 2,1,0,,17597,51385,838056,35,25,0,6
 76 | 1566,0.0,7994109,5,1,6 13,7,1,946.0,70 86 37 118 12 75 76 28 8 29 83 43 46 115 88 112 36 34 11,,,,131 37 116 13 71 8 6 5 9 46 121 94 74 122 129 35 21,468512 72241 298850 13228 369678,28206 11395 79112 92902 17392,,9826 9205 6939 3459 9845,523 2955 9213 5146 3107,,,,1 3,1,3,,6946,296367,520004,59,24,3794,11
 77 | 966,0.0,53608383,3,1,11,1,1,437.0,93 70 77 109 98 47 69 45 102 8 29 49 26 46 56 36 11 44 25 118 76 48 106 67 41 119 95 17 65 80,63 30,,,,497092 323029 220178 319506 89154,110506 55238 29350 20274 67582,,8618 9826 6836 6661 644,5169 8387 4518 1758 4401,,,,3 1,1,0,,915,31020,666210,22,51,0,4
 78 | 561,0.0,35347584,1,1,13 10,7,1,192.0,93 4 69 59 49 15 122 6 46 23 57 116 36 54 11,,,,100 72 37 116 13 4 71 8 6 42 9 46 94 85 56 103 114 21 65 52 131 41 121 59,347508 32976 172567 738145 681561,18185 56819 38306 93916 58846,,3658 9290 9022 2317 468,7816 3560 4323 5732 2631,,,,1 3,2,3,,44008,100565,808799,79,21,0,4
 79 | 1201,0.0,53886088,2,2,13 10,7,1,296.0,70 76 69 28 106 41 42 17 56 62 36 11 20,,,,33 78 71 109 30 28 6 42 76 55 89 59 18 58 64 129 56 130 103 93 83,319706 561405 574847 557440 467108,41565 66915 51859 69786 119226,,4080 6634 9402 5382 5269,2548 6792 6985 989 7342,,,,3 1,1,3,,5552,68476,1172593,35,27,113,9
 80 | 692,0.0,73996764,1,1,10,7,1,544.0,69 38,,,4 5,72 37 116 11 71 69 6 87 46 121 59 129 21,,5623 18185 58598 22630 76242,,2887 8034 1803 1398 7407,9270 7654 2957 105 6583,,,2183 4807 2911,1 3,1,3,,6946,296367,455396,59,24,3794,11
 81 | 369,1.0,32300456,1,1,6 13,7,1,585.0,70 109 76 16 59 49 122 6 73 111 60 113 57 116 36 11 90 87,24 46 30 72,,,52 131 37 13 71 8 50 6 76 27 46 126 59 60 56 114 96 40 93,91104 614872 420403 778320 612659,95439 3372 117660 79604 18185,,5840 3814 5997 4940 117,8917 41 4793 3298 1063,,,,1 3,1,1,,66025,170445,1229175,109,94,0,4
 82 | 136,0.0,40196103,4,1,11,6,2,210.0,93 70 77 37 109 12 47 4 75 8 29 49 6 46 112 36 11 51 118 76 28 106 59 64 119 18 57 9,63 47 24 73 9 31 32 21 4 30 48 7,,,52 100 72 80 131 37 78 4 8 113 30 69 41 6 46 59 129,338851 235002 561451 162804 650033,106910 82323 7794 89401 7827,,8048 7165 9397 4723 36,1094 5791 1877 9708 3184,,,,1 3 2,1,2,,452,50305,1187573,35,10,7992,11
 83 | 404,0.0,3690099,2,1,10,5,1,94.0,70 86 109 12 47 49 83 6 73 115 88 56 36 34 11 100 76 48 28 67 64 119 84 68 23 9 17 116 87,73 80,,,72 80 92 101 13 71 109 8 86 115 6 2 42 46 57 35 133 21 52 131 79 29 89 59 129,277172 267586 750628 118425 279516,67595 67294 37666 1225 44005,,9826 584 5309 7684 7612,6170 8075 2872 1380 6311,,,,3 1,1,2,1.0,821,888,1353465,59,10,439,11
 84 | 369,0.0,24131128,1,1,10,2,1,112.0,49 18,,,,52 92 131 116 13 78 71 124 8 30 50 28 132 2 42 5 55 24 35 133 114,44867 278255 378006 449721 111144,90599 56819 51859 76242 97765,,2224 972 9826 9016 4367,7612 9211 3222 8823 6311,,,,3 1 2,1,3,1.0,66025,170445,1229175,109,94,0,4
 85 | 1468,0.0,47910804,5,1,10,1,1,332.0,109 59 49 6 119 89 73 91 111 116 36 11 87,,,,72 131 116 29 8 73 6 132 42 97 121 59 36,463633 182986 580633 46340 625983,69067 63773 81592 113431 64956,,2654 4025 46 881 1469,8688 6045 2504 784 5370,,,,1 3 4,1,3,,915,994,1610899,60,51,0,4
 86 | 1379,0.0,13140929,5,1,11,7,2,195.0,36 11,22 24 73 46 70 30 74 14,,,6 42 74 64,298606 487978 32129 562294 397136,11395 57049 34802 23634 81493,8820 58327 33417 35940 54643,6762 3836 4971 9649 7131,4159 8384 6586 6994 8402,3418 6413 309 8788 1614,,,1 3,1,1,,8864,90700,469197,22,27,113,9
 87 | 894,1.0,3596334,4,2,11,7,1,188.0,93 70 77 37 109 98 75 33 28 29 42 46 111 35 36 11,58 79 24 73 1 46 70 6 19 28 13 30 74 43 29,,,127 52 100 80 131 37 78 8 30 6 46 74 59 129,338851 497092 224284 410690 100680,104708 105941 106673 60043 51247,,6939 2036 6816 9826 1379,1944 3279 6116 7403 4295,,,,3 1 4,2,3,,452,38391,43862,35,10,12193,11
 88 | 7,0.0,66429852,4,1,11,7,1,623.0,93 70 86 98 75 8 29 49 6 42 73 72 46 35 56 36 11 51 118 100 76 48 28 106 59 67 89 60 57 17 87,24 30,,,104 100 72 33 37 116 101 109 8 6 42 76 46 36 131 11 48 113 69 132 89 59 129 93,171741 249044 297041 637957 172972,32301 6918 44999 16522 34154,,2267 6586 9826 8672 7281,1633 7095 6250 6224 2635,,,,3 4 1,1 2,1,,3387,163957,221432,42,137,3826,11
 89 | 1379,0.0,60696969,2,1,13 10,1,1,360.0,59 49 89 18 36 11,42 1 52 70 21 10,,,116 13 78 71 30 61 50 6 5 76 9 89 94 35 21 93,601816 297041 442904 658713 642888,63196 77529 14306 73934 52489,,4620 4316 2028 8488 7322,803 8828 9968 1881 358,,,,1 3,1,1,,8864,90700,469197,22,27,113,9
 90 | 1468,0.0,35825134,2,1,10,2,0,585.0,,70 10 35,,,,,43582 8546 5623 6786 10695,,,1352 982 2386 1869 9261,,,,0,0,0,,915,994,1610899,60,51,0,4
 91 | 1819,0.0,77714697,3,2,0,2,0,839.0,70 76 28 6 71 36 11,,,,,162804 345784 529821 460690 8561,29946 87912 18185 5685 76242,,3414 2430 7382 9826 449,1365 4329 4009 4178 6945,,,,3 1,1,0,,10055,169332,1562482,53,192,0,6
 92 | 846,0.0,38348970,1,1,10,6,0,192.0,36 11,24 73 21 33 28 3 12 30,,,,329102 649409 23026 712002 8561,6365 26480 94407 8125 37890,,9675 4704 9457 7277 6463,2446 8306 4905 8715 9239,,,,3 4 1,1,0,,8864,310473,1073411,22,43,25730,9
 93 | 1918,0.0,82156772,4,1,11,6,0,131.0,93 70 86 47 76 75 48 28 106 29 59 49 64 2 42 46 17 9 56 62 36 11,58 24 52 21 49 30,,,127 52 100 72 37 48 8 113 28 6 20 46 59,562294 703270 88372 331116 367216,56195 20745 104708 53146 16465,,5382 5276 5229 9826 1634,5005 6058 8149 7242 8265,,,,1 3,1,1,,158679,643438,1690612,60,4,0,4
 94 | 561,0.0,50511083,4,2,11,7,1,451.0,70 118 76 4 48 28 8 106 59 49 114 46 36 11,9 33 45,,,77 100 72 37 116 13 78 4 109 8 30 50 28 6 76 27 9 20 46 58 40 131 29 113 61 41 16 66 121 59 118 129 96,367174 517924 468012 77820 667520,11395 79112 87384 81211 38163,,5103 8525 2873 9600 5828,6337 7805 3430 9358 5916,,,,3 1,1,1,,44008,100565,808799,79,21,0,4
 95 | 173,0.0,10178454,2,1,13 10,2,1,373.0,70 109 12 28 49 41 6 119 42 114 68 81 116 36 11 96,,,,80 37 116 78 109 8 30 50 6 42 76 46 58 103 119 52 131 61 73 132 89 121 59 118 60 129 51 120,735633 335634 118425 303439 751721,19721 41716 37848 96549 95597,,7519 4943 9826 7613 5384,5191 691 4686 4034 8449,,,,3 1,1 2,1,1.0,6937,186348,267290,35,89,3791,9
 96 | 1379,0.0,61856062,2,2,10,2,1,683.0,6 116,22 58 24 79 9 70 6 21 33 4 49 30 41,,,52 72 131 79 8 6 129,191763 8561,27676 32550 13119 70625 98049,,1823 8775 7667 3883 1596,5274 3629 7374 5524 8782,,,,3 1 2,1,1,,8864,90700,469197,22,27,113,9
 97 | 191,0.0,55861836,2,1,13 10,6,1,346.0,70 118 76 1 75 69 102 33 28 8 29 59 49 6 42 114 35 116 36 11 44,79 6,,,92 116 13 1 71 109 34 8 30 50 6 76 27 75 9 126 94 57 84 114 40 52 73 132 125 89 121 74 59 60 93,627157 605930 40430 746587 408789,55843 24307 77678 18185 121237,,4399 9826 449 5869 164,5658 4574 8881 9250 4381,,,,3 1 2,1,3,1.0,25485,50138,58465,35,51,15454,11
 98 | 2048,0.0,19662838,2,2,13 10,1,0,687.0,109 47 119 9 36 11,,,,,777889 339725 123790 27876 529583,,,6308 7089 9826 2296 1874,,,,,0,0,0,,8203,37818,240336,59,142,0,6
 99 | 1291,0.0,34552591,5,1,2 13,6,1,280.0,93 70 86 118 12 47 75 76 8 106 29 59 49 72 46 115 17 9 56 112 36 34 11,24 73 4 30 48,,,52 100 131 37 13 45 8 30 50 6 46 59 24 14,694136 334768 289672 271817 424303,102474 56400 84023 26546 37224,,5053 6718 5382 8003 5610,8577 2277 8752 450 6959,,,,3 1,2,1,,1082,40405,1434096,53,13,0,6
100 | 1107,0.0,42695592,2,2,10,2,2,945.0,,70 54,,,52 31 92 116 78 109 8 30 6 42 43 76 121 59 18 58 64 120 93,,4008 53143 75218 67579 115712,,,1725 882 9298 8683 8402,,,,3,1,1,,5552,68476,476885,35,27,113,9
101 | 1350,0.0,41497358,5,1,0,2,1,585.0,70 109 28 106 59 49 6 119 57 36 11,47 9 31 32 21 7,,,,575681 726923 164253 251617 279516,11395 79112 1225 16090 31287,,296 506 5563 1478 9357,441 134 8187 565 7914,,,,3 1,1,0,,7565,353610,1554384,109,94,0,4
102 | 2031,0.0,27195866,2,2,10,7,1,576.0,86 69 108 73 36 11 38 87,1 70 54,,,77 72 131 63 11 78 71 109 8 113 30 6 42 76 89 59 118 58 103 114 21 93,414079 739307 507912 640492 368434,34390 974 66915 120667 85930,,4022 9418 5731 5571 8351,3938 2584 8977 3701 4625,,,,3 1,1,3,,83042,280832,425385,22,70,0,4
103 | 688,0.0,16162119,2,1,10,6,1,432.0,70 75 69 28 29 27 6 7 19 60 36 11 20,47 58 42 73 24 9 31 52 1 46 70 6 21 33 4 71 30 23 37 29 53 82 74 48 39 44 10,,,127 52 72 92 131 37 116 47 79 71 8 50 6 111 75 46 121 60 10 129 21,773220 11404 365269 630327 793589,11395 79112 97754 43225 2095,,6427 3229 4608 2374 8710,7062 9430 6945 7457 7770,,,,4 1 3,1,1,,915,994,1753448,60,51,0,4
104 | 1201,0.0,68010161,2,2,13 10,2,2,437.0,,70 54 10,,,,,67579 29946 66915 85039 28023,,,9018 8763 3773 263 6286,,,,1 3,1,3,,5552,68476,1172593,35,27,113,9
105 | 191,0.0,29352320,2,1,10,7,1,432.0,47 6 22 9 36 11,22 42 46 70 28 43 54 29 10,,,92 37 116 78 71 109 128 8 30 6 42 43 76 46 58 64 56 52 131 121 23 59 18 129 93 15,758674 333303 517534 397243 605677,33961 30855 80452 55867 37979,,6179 5344 385 7465 9481,8005 1386 7772 2669 5200,,,,3,2,2,,25485,50138,58465,35,51,15454,11
106 | 191,0.0,68506212,1,1,10,2,1,434.0,93 70 75 76 29 73 46 36 11 90,,,,,453006 504018 395787 162804 131879,117462 34128 72933 51859 63352,,1937 2013 1885 6851 3102,7736 9047 3023 5230 5234,,,,0,0,0,,25485,50138,58465,35,51,15454,11
107 | 914,0.0,14271524,4,1,11,1,2,116.0,93 70 77 37 109 98 75 69 8 29 49 27 19 46 56 36 11 44 118 76 59 41 91 17,,,,100 80 37 4 76 46 93,322345 337178 497092 777052 621989,17861 63924 20861 89027 76242,,7165 1953 523 9826 6070,2406 13 8645 4883 507,,,,1 3,1,1,,47823,111645,141973,100,21,0,4
108 | 1284,0.0,53470326,4,1,11,6,1,209.0,93 70 77 86 37 100 98 76 75 28 29 59 49 6 72 46 116 36 11,47 22 73 24 9 69 70 21 30 74 23,,,100 80 37 116 28 98 6 46 121 59,524306 453518 711140 238846 529100,44571 86574 19669 89401 110671,,6254 9826 8726 8829 1172,9027 6945 8704 4128 3460,,,,3 1 4,2,3,,6841,159118,1758880,42,10,3733,11
109 | 1107,0.0,75761304,5,2,10,7,0,325.0,,52 70 21 54,,,,,106194 77689 38500 29033 67313,,,7892 1176 2216 2187 5815,,,,0,0,0,,5552,68476,476885,35,27,113,9
110 | 191,0.0,50045382,1,1,13 10,2,1,112.0,75 29 59 49 122 89 18 36 11,73 9 4 26 39 23,,,52 92 131 63 78 109 8 30 6 42 76 75 89 62 59 64 129 93,338851 559130 53354 742497 237738,10358 63898 4770 13956 69033,,2011 6762 1820 7684 7598,2400 7780 7318 3308 1489,,,,1 4 3,1,1,1.0,25485,50138,58465,35,51,15454,11
111 | 1468,0.0,58950186,5,1,13,7,1,678.0,70 109 12 47 75 69 33 29 49 6 115 88 81 56 36 34 11 20 28 59 122 114 111 23 17 9 116,47 69,,,100 72 92 37 116 13 78 8 30 50 6 42 111 9 46 94 57 35 103 52 31 131 124 29 61 121 23 59 129,768010 547462 394079 241887 74461,3490 85930 118492 66760 80082,,1835 3568 9826 3118 9256,6710 9427 7433 9438 3018,,,,3 1,1,1,1.0,915,994,1610899,60,51,0,4
112 | 70,0.0,73222626,2,2,10,2,2,769.0,86 75 29 6 2 71 36 11,70 4 39 54,,,77 104 131 78 71 109 8 30 28 6 7 99 55 20 59 120 21,411962 338851 773027 221215 775431,15571 95597 94789 12937 117462,,644 3162 9205 8822 7468,1881 9108 803 8886 7101,,,,3,1,1,,327,5616,5977,22,27,113,9
113 | 1566,0.0,52203658,2,1,10,2,2,972.0,70 28 49 73 17 58 56 36 11 87,,,,6 76 27 59 51 40,500054 764353 89154 371241 6618,53851 18185 57844 30191 95822,,9026 46 166 2272 5402,8890 5835 1633 4797 4248,,,,3 1,2,1,,6946,296367,520004,59,24,3794,11
114 | 286,0.0,30810158,2,1,10,7,0,951.0,70 109 47 48 106 119 9 36 11,24 9 30,,,92 37 116 78 30 86 73 6 42 76 89 46 18 58 64 129 93,338891 407303 515478 8561 717647,90248 6918 44999 53818 60238,,3036 8522 9462 3231 7171,1712 8577 9438 2376 7909,,,,3 2 4,2,1,,25485,104406,131847,42,51,15454,11
115 | 529,0.0,35466183,3,1,0,3,1,492.0,93 70 118 76 75 48 28 8 29 6 42 73 26 46 57 62 36 11 3 25,79 24 73 52 70 6 21 4 30 74 10,6 8 10 4 9 1 7 3 5 2,,100 131 37 116 78 8 30 6 42 5 76 46 74 59 129 68 96,378633 384549 734925 8561 775554,61483 17460 87384 81211 65838,36983 25452 37558 35051 55758,3851 1338 1031 6157 9264,1079 2685 9854 4557 9283,1038 43 3651 8439 7393,60806 8899 7764 50585 19763 45857 50704 12226 45134 47751 11938 19039 13158 47826 59290,,1 3,1,1,,10122,163352,220558,35,10,3733,11
116 | 1605,0.0,38026129,2,1,10,7,0,809.0,70 109 47 75 28 29 49 15 119 9 36 11,22 70 10,,,71 109 50 16 6 59 129 56 38 83,351497 27382 570208 347740 435043,102474 82720 48733 31076 85264,,9826 4816 8088 2287 1796,5764 4073 7708 9751 3600,,,,3 1,2,1,,11195,19215,1755470,53,140,0,4
117 | 561,0.0,52873995,1,1,11,2,1,431.0,93 70 86 109 12 98 75 69 29 49 83 6 72 46 81 36 11 20 28 59 41 119 95 91 111 60 22 71 57 116,,,,100 72 92 116 13 47 1 78 71 109 8 30 50 86 6 42 27 9 46 122 85 103 21 40 65 52 131 11 79 29 132 19 97 89 121 23 59 129 22,517534 171066 405889 462178 120065,8546 37224 73507 104156 50832,,4225 9826 2654 523 2941,5115 7903 4797 2979 8116,,,,3 1,1,1,,44008,100565,808799,79,21,0,4
118 | 173,0.0,53468309,1,1,10,7,1,502.0,70 37 12 100 76 49 18 46 36 34 11,9,,,80 131 37 13 78 71 8 30 61 50 6 42 76 27 9 46 59 129 68 38 21 40 93,573044 330347 289152 640481 495006,65668 113002 43585 305 95597,,9826 7457 8322 4848 6762,4975 6423 296 6311 5615,,,,1 3,1,1,,6937,186348,267290,35,89,3791,9
119 | 1566,0.0,14974603,2,1,10,1,1,115.0,109 49 91 111 17 58 56 36 11,22 70 21,,,131 8 50 60 18 58,140919 633715 160924 384951 562294,718 286 31506 112877 95597,,88 6517 8538 5115 2737,2372 6468 9708 2353 4484,,,,1 3 2,1,0,,6946,296367,520004,59,24,3794,11
120 | 1950,0.0,54704299,1,1,13 10,7,1,432.0,70 12 47 76 106 49 15 42 9 58 36 34 11 96,73 9 46 74 29,,,,697262 43935 21321 181856 306902,11395 79112 70387 111423 305,,2994 4647 5088 9826 5895,5142 3568 9705 8525 9567,,,,3 1,2 1,0,,41806,233191,1016027,35,13,27855,9
121 | 411,0.0,764565,4,1,10,7,1,638.0,49 18,,,,,92104,95444 81436 21577 109180 43391,,2570 6563 7684 727 9826,1272 3198 4159 6145 1911,,,,0,0,0,,9106,163120,220179,79,21,0,4
122 | 432,0.0,2255587,4,1,11,7,2,402.0,93 70 77 37 12 98 47 75 8 29 49 42 46 88 35 56 36 11 118 76 28 106 59 41 18 114 23 9 17 21,47 24 79 73 9 53 46 20 6 4 30 74 41 39 23 72,,,77 100 72 80 33 37 116 13 108 47 1 109 8 50 6 42 46 122 103 52 113 61 41 89 121 74 59 10 54,568428 555051 36095 28984 582341,43350 29523 90931 3381 46769,,644 6978 3929 523 6939,7211 9413 7306 2599 8582,,,,3 1 4,1 2,1,1.0,11437,18237,1081485,100,21,0,4
123 | 1468,0.0,78517296,1,1,10,1,1,364.0,49 18 36 11,63 47 73 9 69 46 19 4 18 36 30 74 41 29,,,100 131 116 13 78 109 124 8 30 50 6 42 9 89 46 121 59 64 35 120,644068,102474 9279 60463 75000 104375,,9547 5400 5155 6252 2937,2913 3394 2737 598 6459,,,,1 3 4,1,1,,915,994,1610899,60,51,0,4
124 | 1119,0.0,49549070,5,1,15,1,1,910.0,70 75 28 29 36 11,,,,127 52 100 92 131 116 109 128 8 6 42 76 46 121 59 129 103 96 15,196312 8561 605930 567871 620347,70672 80808 23163 116065 38500,,2905 2452 6532 9826 9508,1066 4426 4342 6417 3298,,,,3 1,2,1,,3993,63752,798752,59,10,19256,11
125 | 1918,0.0,54468510,5,1,11,7,1,910.0,93 70 86 109 47 75 48 33 29 83 27 119 73 39 19 46 9 17 56 36 11 117,24 73 30 74,,,100 31 92 37 47 128 8 17 50 6 27 46 59 10 40,345784 93317 204257 578241 53354,11395 43134 86574 79112 13723,,2013 8133 7684 6939 7017,6547 5879 2774 9529 7120,,,,3 1 4,1,1,,158679,643438,1690612,60,4,0,4
126 | 1254,0.0,61321991,1,2,10,7,1,544.0,6 26 71 36 11 40,73 46 70 18 14 27 29,,,127 131 37 116 47 109 8 6 132 76 27 46 121 59 10 129 120 40 93,562294 542834 334799 677556 772742,44452 56906 74860 8546 98369,,7608 4980 8048 5935 5272,6459 6311 5115 5641 5377,,,,3 1,2,1,,8350,244601,1383456,35,59,0,4
127 | 1950,0.0,18919872,2,1,13 10,2,1,265.0,93 70 86 109 75 76 29 83 6 2 119 46 71 17 56 36 11,,,,52 100 72 31 131 116 71 29 8 41 73 6 132 87 46 121 59 129 21,522888 521866 53354 721882 473296,22154 91085 43585 88109 70539,,9826 6370 7816 2961 1123,5013 3145 1921 6100 7903,,,,1 3 4,1,1,,41806,233191,1016027,35,13,27855,9
128 | 692,0.0,60585954,5,1,10,7,1,112.0,70 76 49 41 73 23 17 56 36 11 90,22 79 73 46 70 6 19 18 13 74 29,,,72 92 116 13 71 8 50 28 98 6 76 111 9 38 52 131 107 79 16 132 19 70 121 59 60 96 93,131622 697129 715820 248375 117916,8546 16186 12933 13189 94457,,6519 976 4720 7989 3099,5641 6311 4260 672 7895,,,,4 1 3,2,1,,6946,296367,455396,59,24,3794,11
129 | 1468,1.0,63545149,5,1,10,6,1,246.0,93 46 36 11,,,,100 6 46 129,26682,46123,,8176 1525 4197 1646 7704,525 6374 1188 9163 9775,,,,3 1,2,1,,915,994,1610899,60,51,0,4
130 | 1254,0.0,50134234,1,2,13,3,1,642.0,70 77 118 98 75 76 69 8 29 59 49 114 46 36 54 11,73 46 4 74 48 29,,,72 80 92 116 13 134 71 109 8 50 28 6 42 76 27 9 75 20 46 64 103 114 21 40 106 52 131 11 79 113 61 121 59 129 120 51 93 54,121880 22550 125566 229272 600876,43977 87384 81211 120298 8546,,6976 6931 5898 46 7246,1649 5115 2816 8225 3466,,,,3 1,1,1,,8350,244601,1383456,35,59,0,4
131 | 1291,0.0,80262002,1,2,10,7,1,275.0,70 86 101 47 76 105 106 29 83 46 9 36 11,9 46 37,,,72 92 116 108 71 128 8 50 28 115 6 7 42 76 111 75 20 46 36 64 21 52 131 29 19 121 59 93,711231 522888 222101 107700 646842,51049 79166 66737 85195 117548,,6876 7463 9805 8976 9826,756 9934 3923 7621 9089,,,,3 1,2,1,,1082,40405,1434096,53,13,0,6
132 | 1044,0.0,60089880,5,2,15,7,1,431.0,93 86 118 109 8 2 26 72 91 46 36 11 25,,,,100 72 92 116 13 78 71 8 30 86 28 6 42 99 76 111 75 9 46 64 122 56 38 52 131 69 89 121 23 59 129 93,223421 41734 422740 542834 487978,53212 105942 77689 38500 118648,,9621 7688 9826 8778 6661,3600 5566 105 6945 8726,,,,1 3,2,1,,18630,33813,38299,35,4,0,4
133 | 2031,0.0,69561396,1,2,13 10,6,1,464.0,70 86 12 76 75 29 42 72 88 36 82 11 96,73 31 44,,,78 71 30 6 42 76 59 130 103 21,345784 424336 593100 93199 447841,56438 81716 31370 105942 87963,,8244 4527 3434 5009 4260,5566 2351 6447 3640 1818,,,,3 1,2,1,1.0,83042,280832,425385,22,70,0,4
134 | 1940,0.0,11004128,1,2,13 10,7,2,839.0,77 86 2 46,22 1 70 54,,,,262926 517534 497092 580549 466036,11547 5685 85177 79353 117394,,7593 5669 4388 9131 348,5987 8592 7260 8019 5018,,,,0,0,0,,5552,30399,530889,35,27,113,9
135 | 1918,0.0,42721219,1,1,11,7,2,333.0,70 109 12 47 69 49 6 73 115 88 56 36 34 11 54 28 59 41 119 71 17 9 116 90,51 22 73 79 9 1 46 70 6 33 29 10 35,,,92 63 116 13 47 1 71 109 128 8 50 86 6 42 76 27 9 94 14 35 103 21 40 26 52 131 19 121 88 74 59 60 129 110 120 93,730949 656641 169282 152857 76448,89701 65128 974 18185 93916,,5344 512 1002 2242 9369,2774 8183 3820 3706 597,,,,1 3,1,2,1.0,158679,643438,1690612,60,4,0,4
136 | 173,0.0,46965109,1,1,10,2,0,532.0,70 76 6 17 56 116 36 11,,,,72 131 37 13 79 8 6 89 46 94 118 58 129 35,340531 37367 20889 236021 162804,18678 28305 25174 42879 66413,,809 2263 1395 9520 6551,8675 7658 9047 4046 2955,,,,1 3,1,1,,6937,186348,267290,35,89,3791,9
137 | 1379,0.0,18308214,5,1,6 13,7,1,100.0,70 86 12 47 75 69 8 29 49 83 6 42 35 112 36 11 54 96 118 76 28 59 41 15 114 9 58 116,22 9 70 4 48,,,100 72 33 116 47 4 71 109 8 50 6 42 76 46 36 64 38 65 52 131 79 41 121 10 60 129 130 93,468512 623073 295648 778442 287136,37213 27247 86941 77930 88919,,7134 8501 3467 3031 7113,8048 2620 5031 3313 8562,,,,1 3 4,1,3,,8864,90700,469197,22,27,113,9
138 | 966,0.0,5481239,4,1,10,7,0,576.0,,,,,,,120032 27629 60959 13801 50959,,,4107 1060 3664 7153 5432,,,,0,0,0,,915,31020,666210,22,51,0,4
139 | 1622,0.0,63304831,2,2,10,6,2,544.0,109 5 73 36 11 117,46 29,,,92 116 13 109 86 28 7 42 121 14 64 83,127901 482924 481181 8561 9162,23107 37224 29946 1675 43977,,9839 3518 4071 4950 2794,7813 1342 1078 2774 7099,,,,3,1,1,,17597,51385,838455,35,27,0,6
140 | 1596,0.0,61563581,2,2,13 10,1,2,317.0,118 8 36 11,63 30,,,,46751 585460 360782,36138 235 65112 100593 9202,,3287 3562 6432 9957 7767,4338 7315 8747 8083 2287,,,,3,1,0,,24704,48236,181137,35,27,113,9
141 | 136,0.0,25031802,4,1,11,6,0,219.0,93 70 77 86 118 109 100 76 69 28 8 119 72 46 17 56 36 11 54,24 30,,,100 80 33 131 37 116 109 8 6 5 46 121 59 129,669594 26682 677556 209024 723339,32301 38865 45236 29848 10819,,7281 7465 1902 9826 1183,2013 318 5383 278 744,,,,3 2 4,2,1,,452,50305,1187573,35,10,7992,11
142 | 1254,0.0,48513746,5,2,15,7,1,880.0,70 86 109 76 69 28 59 49 83 6 119 111 116 36 11 54,,,,72 92 116 53 13 47 78 71 109 8 30 28 6 42 76 27 46 14 58 64 68 52 31 131 73 132 89 121 59 118 25 129 120 127 100 33 134 12 128 86 115 99 55 20 75 90 40 65 107 69 87 10 130 93,542834 175711 297079 359554 630314,89066 56906 99882 38500 93127,,9826 6520 1874 8525 3162,6477 1924 849 5540 2209,,,,2 3 1,2,2,1.0,8350,244601,1383456,35,59,0,4
143 | 792,0.0,9988157,5,2,11,7,1,753.0,93 70 67 46 36 11,,,,52 100 92 131 71 8 61 50 115 6 75 46 129 114,4569 659836 172382 522239 64588,53212 119890 7014 88720 77448,,7993 5415 6604 1485 3802,4071 8165 3701 7734 5879,,,,0,0,0,,8350,331396,1564743,22,59,0,4
144 | 561,0.0,81260936,1,1,11,7,1,275.0,93 46,,,,,28185,114609 55816 8546 8646 103897,,5728 165 5128 2199 1066,499 5927 9401 1135 8890,,,,1 3,2,0,,44008,100565,808799,79,21,0,4
145 | 389,0.0,279902,5,2,13 10,1,1,458.0,70 109 47 69 28 59 67 49 114 26 91 9 36 65 11 38 25,63 42 73 31 46 70 21 33 4 28 3 30 7 37 44 65 14 10 29,6 4 10 9 1 7 5 2,,127 72 80 92 37 116 78 8 30 28 115 6 76 55 27 46 40 131 69 5 121 59 129 93,552046 218501 231442 603826 701150,64342 98330 99414 74860 16156,36983 51356 55758 9888 6164,609 8726 353 2392 7264,5524 3104 7173 6945 8116,2937 43 4445 7481 7730,10876 45028 6071 29193 55984 26035 37826 42554 58506 23206 64678 40044 25207 4980 12004 61664 45505 1123 3237 57061 47366 62385 24522 56616 34691 22759 63809 30699 60283 52013 21970 54768 9174 47169 49587 50237 18814 46808 37518 59311 42747 14237 46809 28827 59290 19020 57849 13132 14380 39261 19039 22164 11245 144 40038 58031 64692 42892 41664 35015 57514 12035 16267,,3 1,1,2,1.0,9106,662422,1354071,79,21,0,4
146 | 1714,0.0,54613804,4,2,11,6,1,353.0,93 70 86 76 75 48 28 106 29 6 2 42 72 46 116 62 36 11,52 21,,,52 100 72 92 131 116 29 8 28 115 6 132 42 20 46 121 59 64 129,8215 8561 109768 42864 781803,87795 93971 70619 7675 76242,,9826 9158 1450 7463 537,6640 7974 4706 2117 8577,,,,1 3,2,2,,79,25739,1608684,59,10,1455,11
147 | 191,0.0,54129227,2,1,10,1,1,115.0,70 109 47 76 28 49 119 95 17 9 56 36 11,,,,78 109 30 118 58 120,681558 107389 559980 429629 307740,10358 29946 69448 104155 10420,,2532 7596 4008 4486 9358,1422 6974 755 9019 9132,,,,3,1,1,,25485,50138,58465,35,51,15454,11
148 | 336,0.0,52250353,4,2,11,6,0,539.0,93 70 77 86 37 109 98 4 75 69 8 29 49 6 2 73 72 46 36 11 20 30 118 76 48 39 23 116 3 117 87,,,,,131085 338851 538596 131879 349924,103790 62888 82366 117462 105314,,638 6939 9826 393 6661,9542 3994 4107 1726 7433,,,,0,0,0,,370,4833,119845,22,67,113,9
149 | 2031,1.0,10068646,1,2,10,2,2,532.0,73 36 11 87,,,,72 33 131 11 109 8 66 6 76 89 59 129 93,636275 675598 530883 210500 85622,37879 51886 79676 92902 4721,18779 28250 31925 55196 24190,1015 5302 7661 9826 8402,8048 1195 3726 926 7388,2336 1888 8454 1084 6935,39799,,1 3,1,1,,83042,280832,425385,22,70,0,4
150 | 1119,0.0,36527173,5,1,0,0,2,27.0,,,,,,,,,,,,,,3,1,0,,3993,63752,798752,59,10,19256,11
151 | 231,0.0,57978528,1,2,13 10,6,1,540.0,70 86 37 98 121 47 102 33 10 8 29 49 83 27 6 7 73 19 46 56 36 11 118 76 48 28 106 114 57 17 9 80 87,47 24 69 30,,,100 72 92 37 116 134 78 71 109 128 12 8 112 30 28 98 115 6 7 42 76 27 20 46 64 103 114 40 52 31 131 79 41 89 121 23 59 129 130 120 93 83,141504 195965 204257 301906 752145,75000 90656 40765 41409 28206,,9826 8133 7017 5230 4746,7608 8369 7740 5674 5791,,,,3 1,1,3,,11487,159012,991964,20,1,0,4
152 | 1918,0.0,30692162,4,1,11,6,2,514.0,93 70 77 86 109 12 47 4 69 49 83 6 42 73 46 112 36 66 82 11 63 59 41 119 57 23 9 116 117,24 30,,,52 100 72 80 92 37 4 8 50 41 28 6 27 75 20 46 24 129 26,624687 500814 589299 132088 573047,3381 2042 88002 90383 80172,,9826 7684 2353 1971 4409,6116 2391 1944 6000 7403,,,,4 3 1,2,1,,158679,643438,1690612,60,4,0,4
153 | 1596,0.0,6586514,2,2,10,2,1,502.0,93 75 69 29 46 36 11 54,24 73 9 1 70 19 28 13 30 74,,,100 31 131 109 8 6 42 76 46 74 59 64 130 120,664543 63742 446696 220981 723267,94407 8125 73295 93916 17355,,904 2927 2359 696 2770,1425 6144 1097 6424 8905,,,,3 1,1,1,,24704,48236,181137,35,27,113,9
154 | 1918,0.0,54663149,5,1,11,6,1,210.0,70 76 36 11,24 30,,,127 52 100 131 116 13 8 50 6 76 46 126 121 59 24 129 130 93,365427,11395 79112 95597 105115 34525,,1106 9546 5631 1398 2562,9751 1635 1267 9687 9769,,,,3,1,1,1.0,158679,643438,1690612,60,4,0,4
155 | 846,0.0,52592275,5,1,11,6,1,687.0,70 86 12 47 75 69 29 49 2 42 43 72 115 88 56 112 36 34 11 54 96 48 28 59 15 18 23 9 17 65,70 4 48 10,,,131 37 116 71 8 6 132 42 46 62 103 114 21,268355 468512 100680 96100 764860,31076 85264 11395 79112 82720,,9826 1682 4760 644 7566,7938 523 1180 6543 9283,,,,1 3,1,1,1.0,8864,310473,1073411,22,43,25730,9
156 | 420,1.0,21547984,4,2,0,7,1,131.0,70 86 76 75 4 28 106 29 59 49 41 27 64 6 2 92 46 17 56 116 36 11,47 24 46 32 30 37 29,,,52 72 131 11 4 48 8 113 28 66 6 42 76 20 46 59 103 93,330721 627369 259755 314149 529100,28206 102474 88214 78971 36903,,4812 4801 2583 7375 9249,2970 509 7509 5389 3706,,,,1 3 4,1,1,,2676,692763,1451219,77,8,0,4
157 | 302,0.0,66762714,2,2,10,1,1,951.0,70 76 28 59 49 36 11,47 73 24 9 70 20 6 4 30 10,,,52 72 131 37 101 78 79 109 8 30 42 46 64 120,627369 43329 561319 709037 78993,87384 81211 117062 93674 66536,,164 2519 1834 2830 2397,9708 2531 139 6442 7658,,,,3 4,1,1,,18621,745599,1628574,91,21,0,4
158 | 1566,0.0,49247015,1,1,10,2,0,280.0,98 47 75 69 29 49 6 95 71 23 9 36 11 44,,,,131 116 101 109 8 6 121 129,118076 94912 442133 547154 272709,32981 95979 45119 108417 48892,,8598 310 5007 3356 644,7573 8116 5555 3082 9705,,,,3,1,3,,6946,296367,520004,59,24,3794,11
159 | 369,0.0,75796170,2,1,10,2,2,705.0,93 77 86 109 47 75 29 83 6 119 39 46 57 17 9 56 36 11,,,,,338851 253554 453551 139628 493181,40821 18185 61936 74083 87078,,3052 6939 644 1598 6762,4075 5675 7816 6477 3865,,,,0,0,0,,66025,170445,1229175,109,94,0,4
160 | 1468,0.0,14354520,2,1,10,1,1,13.0,70 86 109 47 76 75 28 29 59 49 27 6 2 119 42 92 91 111 22 9 62 36 11,70,,,127 72 37 116 8 6 42 76 46 58 103 52 131 107 79 69 132 39 121 23 59 118 129 93,524579 446227 367931 211443 112618,107504 100124 44265 105942 110109,,3803 9205 3207 9232 7684,1765 7856 4358 9251 5431,,,,3 1 4,1,1,,915,994,1610899,60,51,0,4
161 | 916,0.0,4976076,2,2,12 13,6,1,100.0,86 83 42 17 56 62 36 11,58 31 70 49 44 54,,,119 31 63 47 78 109 8 30 6 42 76 59 64 129 81 130 120 93,365075 319706 725039 755294 183412,105115 111921 100786 71378 41409,,7684 7827 1982 7512 1135,7052 2872 8058 2422 6447,,,,3 1,1,1,,17597,51385,838056,35,25,0,6
162 | 1468,0.0,35084182,5,1,11,7,2,950.0,93 70 77 86 37 109 75 69 29 49 83 6 72 46 56 36 11 44 100 59 119 18 57 17 116,46 37,,,100 80 92 37 116 13 71 8 28 6 2 42 20 46 94 122 133 56 114 21 131 132 121 59,338851 612024 572479 701150 210767,96549 32356 114283 53851 54980,,5297 5822 7608 9546 7457,644 5482 2391 6520 8823,,,,1 3,1,3,,915,994,1610899,60,51,0,4
163 | 939,0.0,75365386,1,1,10,1,1,85.0,93 70 77 118 47 76 8 46 9 36 11,22 73 9 70 33 4 74 41 45,,,100 80 131 37 4 8 6 76 46 59 129 93,497092 633122 437174 610640 628198,11395 34802 82852 90747 19826,50583 38454 36342 23076 14161,7165 6695 8575 2557 6095,2913 1094 2902 5188 218,9274 360 3729 2031 7388,24692,,3,1,1,,3993,84178,104188,59,100,24947,11
164 | 136,0.0,9476214,1,1,10,2,1,441.0,26 25,22 42 9 70,,,72 92 116 13 1 71 109 34 8 30 50 28 6 76 111 20 58 35 21 52 131 113 121 59 60 18 129 130,373554,11395 79112 23929 37979 30286,,4239 7433 9077 7053 1929,783 8658 9131 2154 7433,,,,1 3,1,1,,452,50305,1187573,35,10,7992,11
165 | 1338,0.0,19193584,5,1,10,2,0,585.0,,,,,,,,,,,,,,0,0,0,,702,12724,1147463,105,10,4669,11
166 | 302,0.0,38465760,2,2,10,1,1,415.0,70 28 59 49 6 57 17 56 116 36 11,47 73 32 21,,,,709037 244352 687054 113431 790907,105115 8546 82643 55545 78366,,9046 9396 9819 3830 9437,4909 3301 9154 5005 8955,,,,0,0,0,,18621,745599,1628574,91,21,0,4
167 | 2047,0.0,32701898,2,1,13 10,7,1,431.0,17 56 36 11,46 29,,,52 78 71 8 30 6 42 76 59 118 18 58 64 129 21 93,636011 162804 251612 780946,66915 55514 22154 66338 94229,,631 9235 8048 9700 6370,9708 5204 9968 4776 8357,,,,1 3,2,3,,11459,18296,20199,91,94,0,4
168 | 1746,0.0,44752953,4,2,13,7,2,809.0,93 70 77 75 69 29 49 6 42 46 56 36 82 11 44 100 76 48 67 59 15 41 18 114 68 113 71 17 116,4 48,,,100 72 80 63 37 116 71 109 8 50 28 6 42 20 46 103 21 52 131 79 17 121 59 129,338851 368262 213191 172567 562294,30101 73505 72453 28206 11395,,6939 8427 4392 2302 9786,2880 5441 8544 667 41,,,,3 1 4,1,2,,75748,204378,287080,22,121,0,4
169 | 411,0.0,42078922,4,2,11,6,1,705.0,70 118 76 48 28 8 59 67 49 6 18 23 116 36 11,73 19 13,,,72 92 63 116 53 13 78 71 109 8 32 30 50 28 115 6 42 76 55 27 9 20 64 56 40 26 52 31 131 87 89 121 59 60 129 130 93,529100 64588 446617 521474 298533,46205 30551 86586 120741 8546,,9826 283 6118 3450 4080,5641 6311 4260 672 7895,,,,3 1,2,1,,9106,163120,220179,79,21,0,4
170 | 311,0.0,1620969,3,2,11,2,1,892.0,70 37 76 75 106 29 46 36 11,,,,,627369 8561 542834 105884 31080,84348 98976 34004 118151 1088,,7684 6762 408 3445 3058,7444 1647 644 4753 4039,,,,3,1,0,,915,994,27461,60,51,0,4
171 | 1291,0.0,52411168,5,1,11,6,1,275.0,118 47 75 8 29 49 27 42 92 73 113 9 35 36 11 117,79 9 46 6 21 33 4 28 76 41 48 39 43 37 14 29,,,52 33 131 116 13 71 109 8 61 50 6 5 121 59 129 35 56 21,393837 411962 59569 598449 2268,1225 117548 14906 25687 120705,,644 8822 1370 1091 8329,412 9110 5655 9541 2039,,,,3 1 4,1,3,1.0,1082,40405,1434096,53,13,0,6
172 | 914,0.0,16053777,1,1,13 10,6,2,115.0,93 70 32 77 37 109 102 28 49 41 27 6 119 7 95 46 57 23 116 36 11,,,,100 80 131 63 37 109 8 6 46 129,197267 745923 154475 74772 491118,18780 974 18185 76657 66915,8820 36983 27421 56849 43748,7603 523 6939 1314 506,2101 1988 2097 4793 8867,5432 6492 4921 840 5949,,,3,1,0,,47823,111645,141973,100,21,0,4
173 | 1468,0.0,6938995,1,1,10,7,1,72.0,70 77 86 98 47 76 69 28 29 59 49 83 27 6 42 19 46 9 35 116 36 11 20,,,,52 72 80 131 116 13 47 8 113 61 50 73 6 132 9 46 121 18 58 129 22,790848 437005 498020 297041 162804,16683 59581 1118 104517,,1751 7684 4410 8503 9253,330 3992 8234 300 2234,,,,3 1,1,1,1.0,915,994,1610899,60,51,0,4
174 | 70,0.0,38063767,2,2,13 10,2,2,85.0,70 118 109 48 28 8 106 67 59 49 6 119 73 91 17 56 116 36 11 117,73 70 28 74 65 54,,,72 92 116 78 71 109 128 8 30 50 6 2 42 58 133 21 52 131 79 69 73 132 89 70 59 118 60 129 120,435043 64588 679913 118709 209756,79676 73349 78366 80747 85177,,3168 2132 63 3965 6525,8556 9298 4985 1594 10,,,,3 1,1,2,1.0,327,5616,5977,22,27,113,9
175 | 692,0.0,75503662,5,1,10,1,0,45.0,17 56 36 11,,,,,691885 271833 696607 495006,80263 19826 116112,,5099 8889 230 2002 8062,2990 1060 7373 3551 3771,,,,3,1,0,,6946,296367,455396,59,24,3794,11
176 | 1781,0.0,24488149,1,2,10,7,1,958.0,93 70 77 86 100 76 28 83 27 6 7 2 42 46 60 22 71 116 36 11 96,63 18 30 68,,,100 72 80 92 116 4 71 109 128 8 112 30 50 86 28 6 42 27 75 20 46 64 103 114 40 26 52 31 131 11 79 17 19 62 121 59 129 83,573044 684639 555710 526578 341120,101564 4837 38532 34154 100125,,8525 2519 3764 9826 7143,4779 5927 6923 9224 9195,,,,3 1,2,1,1.0,25420,135565,1606251,53,4,0,4
177 | 916,0.0,67312009,2,2,10,6,2,72.0,93 70 109 76 28 59 49 122 6 119 5 46 60 116 36 11,66 22 79 1 70 6 54,,,,107700 64588 366962 68091 119206,80263 11547 5685 85177 52864,,6278 8462 4437 6787 9826,8161 4346 3360 5658 263,,,,3,2,0,,17597,51385,838056,35,25,0,6
178 | 1716,0.0,16054191,1,2,11,7,1,783.0,70 86 109 76 48 28 106 67 119 2 42 5 111 62 36 11,47 42 73 31 1 70 20 44 54,,,131 63 116 101 1 71 109 8 50 6 132 42 76 59 60 64 129 21 93 83,583314 601348 568750 289975 183412,16373 90580 100648 42542 41340,,3843 9221 8823 1502 6666,7580 8016 358 3701 1553,,,,1 3,1 2,1,,5552,158101,1080850,35,27,113,9
179 | 692,0.0,27042690,2,1,10,3,1,458.0,70 109 76 119 36 11,,,,92 116 47 128 8 50 28 115 6 42 76 55 36 64 103 31 73 132 89 121 59 10 60 129 93,508680 754683 555051 268945 454738,91187 117462 55952 57013 44630,,8815 6989 9580 3513 3401,5169 4745 7788 598 9817,,,,1 3,1,3,,6946,296367,455396,59,24,3794,11
180 | 1379,1.0,27974696,1,1,13 10,6,2,13.0,70 109 75 76 28 29 59 49 41 64 6 119 114 17 56 116 36 11,22 9 70,,,72 13 79 6 42 27 9 89 74 59 84 64 129 35 40,666079 394919 182185 89154 272264,79250 77181 117548 18185 27802,,5915 3207 607 7800 6096,585 1732 7612 1014 9968,,,,1 4 3,1,1,,8864,90700,469197,22,27,113,9
181 | 411,0.0,17828902,4,1,10,6,1,1.0,70 76,,,,92 116 132 42 75 64,37509 36095 8561 605930,48826 85177 97626 52864 106259,,9264 9992 2380 6816 5531,4818 4393 895 2376 2324,,,,3 1 4,1,3,,9106,163120,220179,79,21,0,4
182 | 1622,0.0,20967479,4,2,12 13,7,1,170.0,93 70 118 76 28 33 8 29 6 46 60 17 56 36 11,47 58 73 24 32 19 4 13 49 30 41 48,,,52 100 72 131 63 37 53 78 71 109 8 30 61 50 41 66 6 46 59 56 114 120,417848 665425 35889 41290 543455,11395 79112 305 52173 4783,,8648 6816 9534 571 4071,7457 588 6732 336 6586,,,,3 1,1,1,,17597,51385,838455,35,27,0,6
183 | 692,0.0,4357412,1,1,10,2,1,502.0,49 122 17 56 36 11,31 44,,,,57006 111225 127865 134533 10518,10130 105942 81716 56438 37224,,1749 5709 4751 7180 7813,2485 1699 9603 6945 9571,,,,1,2,0,,6946,296367,455396,59,24,3794,11
184 | 1254,0.0,20623764,1,2,13 10,7,2,943.0,93 70 109 76 28 16 59 49 119 46 36 11,73 24 9 1 70 30 74,,,100 72 131 37 78 79 71 8 30 69 6 42 76 46 59 18 58 64 129 21 93,238510 485930 214599 247634 64588,846 49681 38842 73295 42808,,8569 9978 353 182 8919,9047 4325 4739 5791 6457,,,,3 1,1,1,,8350,244601,1383456,35,59,0,4
185 | 1284,0.0,78202974,4,1,11,6,1,458.0,93 77 86 37 109 12 98 75 29 49 83 108 6 73 46 88 112 36 34 11 59 39 71 23 116 3,24 73 52 21 19 13 30 74,,,100 72 80 92 37 116 47 1 78 71 109 8 30 50 6 42 76 111 46 64 103 21 52 131 79 61 66 89 121 123 59 129 81 96 93,338851 542834 538596 53354 468512,40141 12426 85368 94382 104708,,6563 3414 8778 6939 7684,4739 5791 6457 9917 4338,,,,3 1 2,1,1,,6841,159118,1758880,42,10,3733,11
186 | 960,0.0,42208088,2,2,13 10,6,1,395.0,70 47 76 67 41 9 36 11,58 21 71,,,52 72 131 116 107 78 71 109 8 17 30 69 50 6 42 121 59 64 129 120 21,298533 435667 409591 53498 245830,69298 74030 95597 82643 105115,,644 350 4071 5645 607,5792 3368 75 2372 6256,,,,3 1,1 2,1,,702,52258,1655556,59,10,4669,11
187 | 1350,0.0,71886464,5,1,10,1,0,18.0,73 90,63 30,,,,275623 726802 117916 261912 562294,107661 18185 89620 8125 10695,,1978 1837 6586 9826 1213,3816 9860 566 7866 4324,,,,0,0,0,,7565,353610,1554384,109,94,0,4
188 | 1044,0.0,47074301,2,2,10,2,2,849.0,,79 6,,,,,103447 117462 35010 55952 58787,,,8525 2097 7457 1078 5316,,,,0,0,0,,18630,33813,38299,35,4,0,4
189 | 432,0.0,24477523,5,1,10,2,0,567.0,93 77 109 69 59 49 6 119 46 71 57 24 116 36 11 38,,,,127 100 80 116 71 8 50 28 98 6 42 46 64 56 103 21 52 131 121 23 74 59 60 129,104068 527814 279670 10251 460690,103645 6690 974 7014 52156,,8048 1172 5935 9826 8402,5169 6978 2990 2748 6287,,,,3 1,2,3,,11437,18237,1081485,100,21,0,4
190 | 1407,0.0,8852328,5,1,0,6,0,270.0,93 46,24 9 21 30 55,,,100 6 89 46 129,,10662 61840 313 52864 461,36983 8820 42579 15493 7540,6939 8088 3162 8048 7892,,43 5918 7114 3470 2313,,,3 1,1 2,2,,702,12724,962151,105,10,4669,11
191 | 1503,0.0,77951745,1,1,13 10,7,0,737.0,,24 30,,,,,305 52173 34154 116611 87384,,,5692 2474 1776 7958 1627,,,,3 1 2,1,0,,79,2295,1230210,59,10,38,11
192 | 1379,0.0,15095533,2,1,6 13,7,1,809.0,93 70 12 76 28 59 67 49 15 41 6 46 22 17 56 36 34 11,22 79 9 46 70 6 33 55 23 29,,,100 72 37 116 11 13 47 79 69 41 6 9 46 121 59 81,89154 490686 761493 577673 562294,44452 56906 98971 11395 79112,,9984 9020 5915 3656 6895,7062 5732 9435 9125 9310,,,,3 1,1,3,,8864,90700,469197,22,27,113,9
193 | 1904,0.0,30866123,5,2,10,7,1,809.0,70 109 47 75 76 28 29 6 119 42 9 116 36 11 96,58 73 1 70 21 19 4 71 13 74 48,,,104 72 92 116 13 47 1 109 8 50 6 42 76 27 9 75 64 40 52 131 41 121 59 60 129 81 22 120 93 83,666240 794030 576727 649587 47507,105115 11395 79112 93916 8546,,384 9906 5096 8442 3188,5641 3074 1101 1487 981,,,,3 1,1,1,1.0,8203,37818,414738,109,142,0,6
194 | 302,0.0,20336345,2,2,10,6,0,774.0,70 76 67,73 1 46 70 21 74 37,6 8 10 4 9 1 7 3 5 2,,37 6 46 129,279516 723709 446340 497578 605930,11395 79112 52864 461 12933,30214 14542 36983 8820 14981,4818 2267 3479 139 7430,1921 5060 6269 8914 1547,5015 3651 1038 7202 7467,,,3,1,1,,18621,745599,1628574,91,21,0,4
195 | 846,0.0,35156445,2,1,10,1,1,662.0,36 11,22 24 46 70 30 14,,,78 30 76 93,522888 8561 562294,9202 100593 81476 63970 15760,,286 1481 46 2036 7753,783 8658 9171 9131 2154,,,,0,0,1,,8864,310473,1073411,22,43,25730,9
196 | 1017,0.0,58534311,4,1,11,7,2,270.0,93 70 98 69 29 49 6 42 73 46 56 36 11 96 107 76 48 28 106 67 59 41 114 71 57 23 17 58 116 50,52 21,,,,315951 204795 89154 666066 487978,72686 73250 87384 11538 77778,,360 9826 7281 5382 1931,5322 8317 7433 4441 5189,,,,3 1 2,1,0,,11487,741453,1614385,22,21,0,4
197 | 1566,0.0,19634292,4,1,11,2,1,312.0,75 29 17 56 36 11,,,,,262127 773220 223415 411962 318081,14090,,644 3162 7684 6939 358,5764 5389 4571 1950 7185,,,,3,1,0,,6946,296367,520004,59,24,3794,11
198 | 1605,0.0,35534038,1,2,10,6,2,174.0,70 76 69 67 36 11 20,22 31 52 1 70 21 44,,,72 131 107 8 69 28 6 76 27 20 59 130 40,486707 418990 582977 279516 298533,103645 6690 105115 70625 37077,,3468 3040 3401 6011 4330,7922 364 1807 4098 3606,,,,3 1,1,3,,11195,19215,1755470,53,140,0,4
199 | 692,0.0,74616878,5,1,11,7,1,687.0,70 76 28 106 59 49 41 6 17 56 116 36 11,,,,52 92 131 116 8 50 5 75 60,87038 453249 260366 435043 85893,82910 22381 66797 83926 84717,,1584 7264 9826 8559 9427,163 1802 295 7952 6144,,,,1 3,1,1,,6946,296367,455396,59,24,3794,11
200 | 70,0.0,24696694,2,2,10,2,1,275.0,36 11,73 70 54,,,,8561 403083 780946 758223,35944 85195 15868 8207 11702,,2030 8497 1482 9428 388,9862 5307 9570 9132 5239,,,,3 1,2,0,,327,5616,5977,22,27,113,9
201 | 1918,0.0,14193492,5,1,11,7,1,87.0,93 70 77 86 109 98 75 76 29 59 49 15 6 119 73 72 46 60 57 116 36 11 3,73,,,52 100 80 131 47 8 6 42 76 46 62 59 64 129 81 93,707913 761845 188229 65240 745621,117548 70935 76242 62591 119740,,3756 6866 6939 3162 644,2561 2746 2084 2066 8952,,,,1 3,2,1,,158679,643438,1690612,60,4,0,4
202 | 1044,0.0,72779900,1,2,10,2,1,821.0,70 76 28 73 36 11 3,31 44,,,52 72 33 131 107 79 109 8 28 6 76 27 20 59 40 93,571228 64588 26682 506575 116972,53464 99799 63117 87795 93971,,2519 3775 9264 3202 4796,5566 9272 469 8883 847,,,,3 1,2,1,,18630,33813,38299,35,4,0,4
203 | 1291,0.0,64953808,5,1,11,7,1,348.0,86 37 118 12 98 75 69 8 29 59 49 83 6 2 46 60 115 71 36 34 11 20,,,,72 92 37 116 13 47 71 128 8 50 86 28 6 42 9 75 20 46 94 56 103 52 131 41 132 121 74 59 60 10 129,92320 231370 118076 721375 53354,6855 87084 33270 114456 2095,,8858 8510 9826 5828 2545,4357 2596 8673 9049 1057,,,,1 3,2,1,1.0,1082,40405,1434096,53,13,0,6
204 | 2066,0.0,20008733,4,2,13 10,2,1,353.0,109 59 49 6 119 111 116 36 11,,,,,384132 195616 94895 665289 502695,94093 81476 36301 27783 19826,,7684 8901 9865 6666 1778,3014 1895 1932 9488 751,,,,0,0,0,,14818,286065,1075635,109,27,113,9
205 | 692,0.0,8051803,3,1,13,7,2,99.0,93 70 77 37 98 101 47 75 4 8 29 49 27 6 42 73 53 26 46 56 36 82 11 25 118 100 76 113 71 9 17 116 87,24 1 70 19 13 30,,,52 100 80 37 108 78 4 8 30 6 42 46 18 58 129 103,610185 538596 310728 761845 469171,75000 17581 58902 56688 41409,,3936 523 8778 6939 7964,4724 212 9160 4431 258,,,,1 3,1,1,,6946,296367,455396,59,24,3794,11
206 | 1027,0.0,66620114,3,1,11,6,1,428.0,93 70 77 109 98 47 75 69 29 49 27 6 92 73 46 56 36 11 44 76 106 18 114 39 60 22 23 57 9 17 3 87,63 24 30,,,100 80 131 37 116 8 6 46 121 59 129,396602 493555 145050 79725 773027,63970 86469 109613 53818 29089,,9826 417 6661 506 9235,8193 3678 7998 7291 1802,,,,3 1,1,1,,915,31020,1266652,22,51,0,4
207 | 692,0.0,36157305,2,1,10,3,0,115.0,59 49 95 36 11,,,,72 131 116 11 8 6 42 27 89 121 64 40,784463 46751 442581 81119 162804,11395 79112 119291 51859 16567,,8013 9484 3781 8918 8597,6978 1195 1227 9721 8833,,,,3 1,1,2,,6946,296367,455396,59,24,3794,11
208 | 725,0.0,43228422,2,2,10,6,2,554.0,93 70 28 46 36 11,,,,,191560 516518 365427 415855 627369,12198 89860 1673 29950 9634,,5488 6975 5415 8839 46,1170 7444 8402 3104 152,,,,0,2,0,,370,170485,1485462,22,67,113,9
209 | 302,0.0,19856371,3,2,11,2,2,341.0,93 70 77 30 86 37 76 75 4 69 33 8 29 83 27 92 73 46 36 11 50 20 87,24 70 4 28 30 48 54,,,52 100 72 80 131 37 11 4 79 71 8 28 6 7 46 59 21,235296 616985 497092 457717 408698,90656 30101 17355 85039 63343,41633 13035 8820 36983 51621,506 8778 4442 7330 2328,4846 7922 1471 9643 7580,4417 4231 7478 2951 8643,,,1 3,1,3,,18621,745599,1628574,91,21,0,4
210 | 191,0.0,67249695,4,1,11,1,0,47.0,75 4 29 46 36 11,52 21,,,,53354 711955 158229 338851 568814,80452 55867 50570 53851 51938,,4789 7816 7684 3162 8957,445 6337 6218 5105 7519,,,,0,0,0,,25485,50138,58465,35,51,15454,11
211 | 1468,1.0,3202763,2,1,11,2,1,112.0,93 70 86 109 76 28 106 59 49 83 119 46 111 17 56,9 52 46 21 72,,,100 72 92 116 13 47 1 109 8 6 42 76 111 75 9 46 64 35 52 131 79 69 132 89 121 59 10 129 110 96 54,111690 750790 170026 460828 505995,55125 42150 120784 110188 4837,,6666 2342 2940 5233 6651,2480 2889 3269 8725 7688,,,,3 1,2,2,1.0,915,994,1610899,60,51,0,4
212 | 12,0.0,31703979,1,2,13 10,7,2,232.0,70 86 109 12 33 8 29 49 27 6 2 92 73 26 19 112 36 11 25 118 100 48 28 59 18 39 114 111 113 116 3 117,73 46 74 14,,,52 104 100 33 116 78 1 71 109 8 17 30 50 28 132 20 46 21,668572 338851 383144 172567 207553,846 114283 34525 120667 8546,,6955 9826 3682 9499 6666,3573 5445 556 2817 9953,,,,3 1,1,3,,388,420987,1612095,35,67,113,9
213 | 1605,0.0,7502512,4,1,11,6,1,246.0,93 86 37 109 47 4 75 8 29 49 83 6 46 36 11 51 30 118 16 59 15 122 91 111 71 113 57 9 116,79 73 46 70 6 19 4 36 18 13 41 29,,,100 72 92 37 116 13 47 78 4 71 45 109 128 34 8 30 50 28 6 42 76 55 27 75 46 58 64 122 103 21 40 52 131 41 132 19 39 89 121 23 59 118 60 129 22 93 15,289706 767326 18724 479826 497092,8546 37224 74065 104780 94093,,6724 523 2593 9826 6710,5641 6311 5115 8387 7204,,,,3 2 1 4,2,3,,11195,19215,1755470,53,140,0,4
214 | 1415,0.0,46502704,4,1,11,4,1,486.0,93 70 77 86 109 75 69 45 8 29 49 83 6 46 56 36 11 44 51 118 76 28 106 59 114 111 71 57 23 17 116,,,,100 72 80 33 92 116 13 47 1 109 128 8 50 86 28 6 20 46 122 52 131 113 69 41 121 59 24 129 110,26682 704724 171741 159810 529100,95007 5001 18185 56819 106720,,3450 9826 644 3090 8525,598 857 1424 4998 8059,,,,3 1 4,2 1,1,,133292,464828,1334609,22,74,0,4
215 | 411,0.0,71812125,2,1,10,7,0,803.0,59 49 36 11,,,,,306150 198167 401060 50273 193810,,,506 4891 3701 9390 3938,616 446 2400 9220 387,,,,0,0,0,,9106,163120,220179,79,21,0,4
216 | 1918,0.0,16419335,5,1,11,2,0,192.0,93 70 77 75 29 41 46 36 11,63 73 1 70 19 4 13 30 74 48,,,100 80 131 37 13 8 61 50 6 9 46 59 14 122 129,157203 497092 69666 10251 690200,90747 90718 36764 19826 93916,,8525 4695 7165 9710 2459,4166 6145 9048 9642 1913,,,,3 1,1,1,,158679,643438,1690612,60,4,0,4
217 | 1918,0.0,16028961,5,1,11,7,1,346.0,93 77 37 109 98 29 49 119 73 46 113 36 11 87,,,,52 100 80 131 37 116 53 47 78 8 30 6 132 42 46 121 64 129 22 67,368434 128266 265398 725039 555710,18185 7467 95597 118492 35722,,5400 9170 9826 5427 6939,4165 3752 415 2118 9421,,,,3,1,1,,158679,643438,1690612,60,4,0,4
218 | 70,0.0,44400969,5,2,10,7,1,335.0,70 86 28 83 36 11,,,,,123755 644438 89154 309419 204795,84497 114314 17355 56438 718,,247 7108 2011 9252 5703,1953 4962 3480 1564 4828,,,,3,1,0,,327,5616,5977,22,27,113,9
219 | 302,0.0,33774716,2,2,10,7,1,808.0,74 93 97 70 77 109 76 1 69 102 33 29 6 42 39 46 111 57 62 116 36 11 20,,,,100 72 80 131 63 107 79 109 8 28 6 99 27 20 46 74 59 40 83,616597 527604 581802 761845 438702,16785 56986 94457 99348 88002,,8306 9826 9474 7827 5252,1281 4518 6945 6640 3664,,,,3,1,1,,18621,745599,1628574,91,21,0,4
220 | 1746,0.0,44836980,3,1,13,2,1,647.0,93 70 109 12 75 29 41 42 46 111 115 88 81 17 56 62 36 34 11,9 52 46 21 19 4 13 39 14,,,100 46,2268 743380 729853 649615 230873,66348 117586 97793 37224 52864,,9826 7709 7684 3579 9264,3786 8854 8016 9784 5242,,,,1 3 2 4,1,2,,75748,204378,287080,22,121,0,4
221 | 1605,0.0,41899198,1,2,13 10,2,1,437.0,36 11,73 79 1 46 70 6 80 74 54 29,,,33 131 116 47 78 109 8 30 6 132 76 121 59 10 129 81 130 93,297041 590770,107549 64039 85368 60053 87249,,4203 927 7964 7124 3836,8525 1130 4610 9777 5098,,,,3 1,1,3,,11195,19215,1755470,53,140,0,4
222 | 1468,0.0,70704609,5,1,10,7,1,189.0,,,,,92 71 115 38,,51938 93176 98066 7014 85039,,,1331 2064 8014 263 4158,,,,0,0,1,,915,994,1610899,60,51,0,4
223 | 838,0.0,49014980,2,2,10,2,0,275.0,,24 30,,,,,21675 76242 46275 56545 30947,,,3104 9721 3883 8701 2097,,,,0,0,0,,452,131853,1640724,93,10,12193,11
224 | 519,0.0,71870985,3,2,0,7,1,921.0,93 70 77 86 37 98 121 75 4 102 8 29 49 6 73 72 46 56 36 11 118 100 76 48 28 23 17 116 3 117 87,24 21 4 30 39,,,100 80 131 37 4 8 46,590981 678945 731217 542834 711140,44360 59543 64804 106503 42752,,6939 644 9826 6865 523,5985 2578 1850 5912 5851,,,,3 1,2,0,,370,358059,577602,22,67,113,9
225 | 1566,0.0,81025267,4,1,10,7,0,576.0,93 37 6 46 24 36 11,,,,,100680 497092 118076 52250 530883,105274,,6939 9108 4426 1170 2847,2119 137 809 7102 762,,,,1,1,0,,6946,296367,520004,59,24,3794,11
226 | 1291,0.0,59572881,1,2,13 10,7,1,27.0,93 70 77 86 118 109 47 76 28 8 67 2 5 43 46 9 36 11,47 73 79 46 70 32 6 21 18 27 29 68,,,100 72 80 92 116 78 71 109 8 30 50 86 6 42 43 46 64 21 52 131 79 132 19 121 59 60 129 120,120517 64588 571034 156479 527187,39525 25158 11249 26624 47966,,8052 2050 8525 9826 644,5115 5641 296 8704 8763,,,,1 3,2,2,1.0,1082,40405,1434096,53,13,0,6
227 | 529,0.0,13656913,3,1,11,3,2,192.0,93 70 77 86 37 109 47 75 1 102 29 49 6 2 42 46 35 56 36 11 100 76 28 39 71 23 57 9 17 116,9 1 70,,,100 80 37 101 109 28 6 7 46 129,792721 691885 71940 290900 433422,79250 77181 117548 1225 76242,,3162 523 9331 2905 5400,9925 1957 3444 9619 107,,,,2 3 1,1 2,3,,10122,163352,220558,35,10,3733,11
228 | 1566,0.0,14893891,2,1,10,7,2,809.0,70 109 76 28 106 59 49 6 119 42 73 22 57 35 36 11 96 87,,,,,354338 392248 196312 610805 389318,63832 85039 63773 18185 21577,,4519 8741 2121 9232 2557,6045 3835 9938 2783 263,,,,0,0,0,,6946,296367,520004,59,24,3794,11
229 | 


--------------------------------------------------------------------------------
/baseline/userFeature_to_DataFrame.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 15:44
 5 | @Function:
 6 | """
 7 | 
 8 | from csv import DictWriter
 9 | 
10 | with open('input/userFeature.csv', 'w') as out_f:
11 |     headers = ['uid', 'age', 'gender', 'marriageStatus', 'education', 'consumptionAbility', 'LBS', 'interest1', 'interest2',
12 |                'interest3', 'interest4', 'interest5', 'kw1', 'kw2', 'kw3',  'topic1', 'topic2', 'topic3', 'appIdInstall',
13 |                'appIdAction', 'ct', 'os', 'carrier', 'house']
14 |     writer = DictWriter(out_f, fieldnames=headers, lineterminator='\n')
15 |     writer.writeheader()
16 | 
17 |     in_f = open('input/userFeature.data', 'r')
18 |     for t, line in enumerate(in_f, start=1):
19 |         line = line.replace('\n', '').split('|')
20 |         userFeature_dict = {}
21 |         for each in line:
22 |             each_list = each.split(' ')
23 |             userFeature_dict[each_list[0]] = ' '.join(each_list[1:])
24 |         writer.writerow(userFeature_dict)
25 |         if t % 100000 == 0:
26 |             print(t)
27 |     in_f.close()
28 | 
29 | 


--------------------------------------------------------------------------------
/feature_ad.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:39
 5 | @Function:
 6 | """
 7 | import gc
 8 | from sklearn import preprocessing
 9 | 
10 | import pandas as pd
11 | 
12 | from sklearn.preprocessing import LabelEncoder
13 | 
14 | 
15 | class Ad:
16 | 
17 |     def __init__(self, ad_data):
18 |         self.ad_raw = ad_data.fillna('-1')
19 |         self.ad_feature = pd.DataFrame()
20 |         self.ad_feature['aid'] = self.ad_raw['aid']
21 | 
22 |         self.base_feature = ['advertiserId', 'campaignId', 'creativeId', 'creativeSize', 'adCategoryId', 'productId', 'productType']
23 | 
24 |         for feat in self.base_feature:
25 |             try:
26 |                 self.ad_feature[feat] = LabelEncoder().fit_transform(self.ad_raw[feat].apply(int))
27 |             except:
28 |                 self.ad_feature[feat] = LabelEncoder().fit_transform(self.ad_raw[feat])
29 | 
30 |         print('------------ad base feature process over...')
31 |         print()
32 | 
33 | 
34 | if __name__ == '__main__':
35 | 
36 |     data = pd.read_csv('data/raw_data/adFeature.csv')
37 |     ad = Ad(data)
38 | 
39 |     ad.ad_feature.to_csv('data/feature_data/clean_ad_feature.csv', index=False)
40 | 


--------------------------------------------------------------------------------
/feature_cross.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/17 17:39
  5 | @Function:
  6 | """
  7 | import gc
  8 | from sklearn import preprocessing
  9 | 
 10 | import pandas as pd
 11 | from sklearn.model_selection import train_test_split
 12 | from sklearn.neighbors import NearestNeighbors
 13 | from sklearn.preprocessing import LabelEncoder
 14 | 
 15 | 
 16 | class Cross_feature:
 17 | 
 18 |     def __init__(self, base_data, cross_feature):
 19 |         self.base_data = base_data
 20 |         self.cross_feature = cross_feature
 21 | 
 22 |     def cross_2feature(self, feat1, feat2):
 23 |         item = self.base_data.groupby(feat1, as_index=False)['uid'].agg({feat1 + '_count': 'count'})
 24 |         self.base_data = pd.merge(self.base_data, item, on=[feat1], how='left')
 25 | 
 26 |         itemcnt = self.base_data.groupby([feat1, feat2], as_index=False)['uid'].agg({feat1 + feat2: 'count'})
 27 |         self.base_data = pd.merge(self.base_data, itemcnt, on=[feat1, feat2], how='left')
 28 |         self.cross_feature[feat1 + '_' + feat2 + '_prob'] = self.base_data[feat1 + feat2] / self.base_data[feat1 + '_count']
 29 |         self.cross_feature[feat1 + '_' + feat2 + '_prob'] = pd.qcut(self.cross_feature[feat1 + '_' + feat2 + '_prob'], 10, duplicates='drop')
 30 |         # self.cross_feature[feat1 + '_' + feat2 + '_prob'] = self.cross_feature[feat1 + '_' + feat2 + '_prob'].round(7)
 31 | 
 32 |         del self.base_data[feat1 + '_count']
 33 |         del self.base_data[feat1 + feat2]
 34 |         gc.collect()
 35 |         print('    ----cross feature: %s  and %s' % (feat1, feat2))
 36 | 
 37 | 
 38 |     def cross_3feature(self, feat1, feat2, feat3):
 39 |         item = self.base_data.groupby([feat1, feat2], as_index=False)['uid'].agg({feat1 + feat2 + '_count': 'count'})
 40 |         self.base_data = pd.merge(self.base_data, item, on=[feat1, feat2], how='left')
 41 | 
 42 |         itemcnt = self.base_data.groupby([feat1, feat2, feat3], as_index=False)['uid'].agg({feat1 + feat2 + feat3: 'count'})
 43 |         self.base_data = pd.merge(self.base_data, itemcnt, on=[feat1, feat2, feat3], how='left')
 44 |         self.cross_feature[feat1 + '_' + feat2 + '_' + feat3 + '_prob'] = \
 45 |             self.base_data[feat1 + feat2 + feat3] / self.base_data[feat1 + feat2 + '_count']
 46 |         # self.cross_feature[feat1 + '_' + feat2 + '_' + feat3 + '_prob'] = \
 47 |         # pd.cut(self.cross_feature[feat1 + '_' + feat2 + '_' + feat3 + '_prob'], 10, labels=range(10))
 48 | 
 49 |         self.cross_feature[feat1 + '_' + feat2 + '_' + feat3 + '_prob'] = \
 50 |             self.cross_feature[feat1 + '_' + feat2 + '_' + feat3 + '_prob'].round(7)
 51 | 
 52 |         del self.base_data[feat1 + feat2 + '_count']
 53 |         del self.base_data[feat1 + feat2 + feat3]
 54 |         gc.collect()
 55 |         print('    ----cross feature: %s  %s  and %s' % (feat1, feat2, feat3))
 56 | 
 57 | 
 58 |     def base_cross(self):
 59 |         print('-------------------------------cross features----------------------------------')
 60 | 
 61 |         feature1 = ['aid', 'advertiserId', 'adCategoryId',  'creativeId', 'productId']
 62 |         feature2 = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'ct', 'marriageStatus']
 63 | 
 64 |         for feat1 in feature1:
 65 |             for feat2 in feature2:
 66 |                 self.cross_2feature(feat1, feat2)
 67 | 
 68 |     def base_cross3(self):
 69 |         print('-------------------------------cross features----------------------------------')
 70 | 
 71 |         feature1 = ['aid', 'advertiserId', 'adCategoryId']
 72 |         feature2 = ['LBS', 'age', 'carrier', 'consumptionAbility']
 73 |         feature3 = ['education', 'gender', 'ct', 'marriageStatus']
 74 | 
 75 |         for feat1 in feature1:
 76 |             for feat2 in feature2:
 77 |                 for feat3 in feature3:
 78 |                     self.cross_3feature(feat1, feat2, feat3)
 79 | 
 80 |     def combine_feature(self):
 81 |         """用户直接拼接特征"""
 82 |         print('---------------combine_feature--------------------')
 83 |         features = ['aid', 'advertiserId', 'age', 'gender']
 84 |         for col in features:
 85 |             self.base_data[col] = self.base_data[col].astype(str)
 86 | 
 87 |         self._combine('aid', 'age')
 88 |         self._combine('advertiserId', 'age')
 89 |         self._combine('aid', 'gender')
 90 |         self._combine('advertiserId', 'gender')
 91 | 
 92 |     def _combine(self, feat1, feat2):
 93 |         print('    ----cross feature: %s and %s' % (feat1, feat2))
 94 |         self.base_data[feat1 + '_' + feat2] = self.base_data[feat1] + '_' + self.base_data[feat2]
 95 |         self.cross_feature[feat1 + '_' + feat2] = LabelEncoder().fit_transform(self.base_data[feat1 + '_' + feat2])
 96 |         self.cross_feature[feat1 + '_' + feat2] = self.cross_feature[feat1 + '_' + feat2].apply(int)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     print('--------------------cross feature----------------------')
101 |     # train = pd.read_csv('data/raw_data/train.csv')
102 |     # test = pd.read_csv('data/raw_data/test1.csv')
103 |     # user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
104 |     # ad_feature = pd.read_csv('data/feature_data/clean_ad_feature.csv')
105 |     #
106 |     # base_data = pd.concat([train, test])
107 |     # base_data = pd.merge(base_data, user_feature, on=['uid'], how='left')
108 |     # base_data = pd.merge(base_data, ad_feature, on=['aid'], how='left')
109 |     #
110 |     # # base_data = pd.read_csv('data/data_5%_fillna.csv')
111 |     #
112 |     # cross_feature = pd.DataFrame()
113 |     # cross_feature[['aid', 'uid']] = base_data[['aid', 'uid']]
114 |     # cross = Cross_feature(base_data, cross_feature)
115 |     # cross.base_cross()
116 |     # cross.base_cross3()
117 |     # cross.cross_feature.to_csv('data/feature_data/cross_feature_qcut.csv', index=False)
118 | 
119 | 
120 | 
121 |     user_feature = pd.read_csv('data/feature_data/cross_feature_qcut.csv')
122 |     cols = user_feature.columns.values
123 |     cols.remove('aid')
124 |     cols.remove('uid')
125 | 
126 |     for feat in cols:
127 |         user_feature[feat] = pd.qcut(user_feature[feat], 5, duplicates='drop')
128 |         user_feature[feat] = LabelEncoder().fit_transform(user_feature[feat])
129 | 
130 |         user_feature.to_csv('data/feature_data/cross_feature_qcut.csv', index=False)
131 |     print('end....')
132 | 


--------------------------------------------------------------------------------
/feature_kmeans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:39
 5 | @Function:
 6 | """
 7 | 
 8 | import pandas as pd
 9 | from sklearn.cluster import KMeans
10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
11 | 
12 | 
13 | class User:
14 |     """用户的基本特征"""
15 |     def __init__(self, user_feature, user_data=None):
16 |         self.user_feature = user_feature
17 |         # self.vector_feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
18 |         self.vector_feature = ['interest1']
19 | 
20 |     def get_vector_kmeans_feature(self, n_clusters=2, file_path='data/w2v_feature/w2v_all_20'):
21 |         """使用词向量对用户进行聚类操作，作为用户特征"""
22 |         print('------------get_vector_kmeans_feature start...')
23 |         for i, feat in enumerate(self.vector_feature):
24 |             print("    ---get_kmeans_feature: ", feat)
25 |             df_w2vfeat = pd.read_csv(file_path + feat + '.csv')
26 |             temp = df_w2vfeat['uid']
27 |             del df_w2vfeat['uid']
28 |             df_w2vfeat = df_w2vfeat.values
29 |             k_means = KMeans(n_clusters=n_clusters, n_jobs=-1).fit(df_w2vfeat)
30 |             label = k_means.labels_
31 |             # print(np.shape(label))
32 |             if len(label) == self.user_feature.shape[0]:
33 |                 temp['kmeans_' + str(n_clusters) + "_" + feat] = label
34 |                 self.user_feature = pd.merge(self.user_feature, temp, on=['uid'], how='left')
35 |             else:
36 |                 print('*****************kmeans shape error!!!*****************')
37 | 
38 |         print('------------get_vector_kmeans_feature end...')
39 |         print()
40 | 
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
45 |     # vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4',
46 |     #                   'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
47 | 
48 |     user_feature = user_feature['uid']
49 |     user = User(user_feature)
50 |     user.get_vector_kmeans_feature()
51 |     user.user_feature.to_csv('data/feature_data/w2v_kmeans_feature.csv', index=False)
52 | 


--------------------------------------------------------------------------------
/feature_nlp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/17 17:49
  5 | @Function:
  6 | """
  7 | import time
  8 | 
  9 | import numpy
 10 | import random
 11 | import pandas as pd
 12 | import scipy.special as special
 13 | 
 14 | 
 15 | class Nlp_feature:
 16 | 
 17 |     def __init__(self, nlp_feature, base_data=None, user_data=None, feature=None):
 18 |         """
 19 | 
 20 |         :param nlp_feature: 处理后生成的模型 每个用户一个
 21 |         :param base_data:  用户和广告的merge数据， 用于计算item的转化率
 22 |         :param user_data: 用户数据 用户根据 feature 生成对应的nlp_feature
 23 |         :param feature: 需要处理的特征列表
 24 |         """
 25 |         self.nlp_feature = nlp_feature
 26 |         self.base_data = base_data
 27 |         self.user_data = user_data
 28 |         self.feature = feature
 29 | 
 30 |     class HyperParam(object):
 31 |         """贡献平滑参数"""
 32 | 
 33 |         def __init__(self, alpha, beta):
 34 |             self.alpha = alpha
 35 |             self.beta = beta
 36 | 
 37 |         def sample_from_beta(self, alpha, beta, num, imp_upperbound):
 38 |             # 产生样例数据
 39 |             sample = numpy.random.beta(alpha, beta, num)
 40 |             I = []
 41 |             C = []
 42 |             for click_ratio in sample:
 43 |                 imp = random.random() * imp_upperbound
 44 |                 # imp = imp_upperbound
 45 |                 click = imp * click_ratio
 46 |                 I.append(imp)
 47 |                 C.append(click)
 48 |             return pd.Series(I), pd.Series(C)
 49 | 
 50 |         def update_from_data_by_FPI(self, tries, success, iter_num, epsilon):
 51 |             # 更新策略
 52 |             for i in range(iter_num):
 53 |                 new_alpha, new_beta = self.__fixed_point_iteration(tries, success, self.alpha, self.beta)
 54 |                 if abs(new_alpha - self.alpha) < epsilon and abs(new_beta - self.beta) < epsilon:
 55 |                     break
 56 |                 self.alpha = new_alpha
 57 |                 self.beta = new_beta
 58 | 
 59 |         def __fixed_point_iteration(self, tries, success, alpha, beta):
 60 |             # 迭代函数
 61 |             sumfenzialpha = 0.0
 62 |             sumfenzibeta = 0.0
 63 |             sumfenmu = 0.0
 64 |             sumfenzialpha = (special.digamma(success + alpha) - special.digamma(alpha)).sum()
 65 |             sumfenzibeta = (special.digamma(tries - success + beta) - special.digamma(beta)).sum()
 66 |             sumfenmu = (special.digamma(tries + alpha + beta) - special.digamma(alpha + beta)).sum()
 67 | 
 68 |             return alpha * (sumfenzialpha / sumfenmu), beta * (sumfenzibeta / sumfenmu)
 69 | 
 70 |     def _nlp_feature_score(self, feat):
 71 |         item_count = {}  # 记录 item 出现的次数
 72 |         item_true_count = {}  # 记录item的转化率信息
 73 | 
 74 |         data_list = self.base_data[feat].values.tolist()
 75 |         # label_list = self.base_data['label'].values.tolist()
 76 |         for i, list in enumerate(data_list):
 77 |             items = list.split(' ')
 78 |             for item in items:
 79 |                 if item_count.__contains__(item):
 80 |                     item_count[item] += 1
 81 |                 else:
 82 |                     item_count[item] = 1
 83 |                 if not item_true_count.__contains__(item):
 84 |                     item_true_count[item] = 1
 85 |                 elif item_true_count.__contains__(item):
 86 |                     item_true_count[item] += 1
 87 |         dianji = []
 88 |         zhuanhua = []
 89 |         for item in item_true_count.keys():
 90 |             dianji.append(item_count[item])
 91 |             zhuanhua.append(item_true_count[item])
 92 |         # 获取平滑参数
 93 |         info = pd.DataFrame({'dianji': dianji, 'zhuanhua': zhuanhua})
 94 |         hyper = self.HyperParam(1, 1)
 95 |         hyper.update_from_data_by_FPI(info['dianji'], info['zhuanhua'], 1000, 0.00000001)
 96 | 
 97 |         data_socre = []
 98 |         # 使用平滑参数 平滑数据 （类似拉普拉斯平滑）
 99 |         data_list = self.user_data[feat].values.tolist()
100 |         for i, list in enumerate(data_list):
101 |             score = 1
102 |             for item in list.split(' '):
103 |                 if not item_true_count.__contains__(item):
104 |                     score += 0
105 |                 else:
106 |                     score = score * (1 - (item_true_count[item] + hyper.alpha) / (
107 |                             item_count[item] + hyper.alpha + hyper.beta))
108 |             data_socre.append(score)
109 | 
110 |         self.nlp_feature[feat + '_score'] = data_socre
111 |         self.nlp_feature[feat + '_score'] = self.nlp_feature[feat + '_score'].round(7)
112 |         # self.nlp_feature[feat + '_score'] = pd.cut(self.nlp_feature[feat + '_score'], 10, labels=range(10))
113 | 
114 |     def get_nlp_data(self):
115 |         print('-----------------nlp feature:')
116 |         for feat in self.feature:
117 |             print('   ----this is feature: ', feat)
118 |             self._nlp_feature_score(feat)
119 | 
120 |     def get_nlp_discr(self):
121 |         print('-----------------nlp get_nlp_discr:')
122 |         for feat in self.feature:
123 |             print('   ----this is feature: ', feat)
124 |             self.nlp_feature[feat + '_score'] = pd.cut(self.nlp_feature[feat + '_score'], 10, labels=range(10))
125 |             # self.nlp_feature[feat + '_score'] = pd.qcut(self.nlp_feature[feat + '_score'], 10, labels=range(10))
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     df_train = pd.read_csv('data/raw_data/train.csv')
130 |     df_test = pd.read_csv('data/raw_data/test1.csv')
131 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
132 |     df_userFeature = pd.read_csv('data/raw_data/userFeature.csv')
133 |     df_adFeature = pd.read_csv('data/raw_data/adFeature.csv')
134 |     data = pd.concat([df_train, df_test])
135 |     data = pd.merge(data, df_userFeature, on=['uid'], how='left')
136 |     data = pd.merge(data, df_adFeature, on=['aid'], how='left')
137 | 
138 |     data = data.fillna('-1')
139 |     df_userFeature = df_userFeature.fillna('-1')
140 | 
141 |     nlp_feature = pd.DataFrame()
142 |     nlp_feature['uid'] = df_userFeature['uid']
143 | 
144 |     # nlp_feature = pd.read_csv('data/feature_data/nlp_feature.csv')
145 | 
146 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
147 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
148 |     # vector_feature = ['kw1', 'kw2', 'topic1', 'topic2']
149 |     # vector_feature = ['kw2']
150 |     start = time.time()
151 |     # nlp = Nlp_feature(nlp_feature, feature=vector_feature)
152 |     nlp = Nlp_feature(nlp_feature, data, df_userFeature, vector_feature)
153 |     nlp.get_nlp_data()
154 |     # nlp.get_nlp_discr()
155 |     nlp.nlp_feature.to_csv('data/feature_data/nlp_feature.csv', index=False)
156 | 
157 |     # nlp.nlp_feature.to_csv('data/feature_data/nlp_feature_cut.csv', index=False)
158 |     print('cost time :', (time.time() - start) / 60)
159 | 
160 | 


--------------------------------------------------------------------------------
/feature_select.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:49
 5 | @Function:
 6 | """
 7 | 
 8 | from MLFeatureSelection import FeatureSelection as FS
 9 | from sklearn.metrics import log_loss
10 | import lightgbm as lgbm
11 | import pandas as pd
12 | import numpy as np
13 | 
14 | def select_user_feature():
15 |     data = pd.read_csv('clean_user_feature.csv')
16 | 
17 | def select_cross_feature():
18 |     data = pd.read_csv('data/feature_data/cross_feature_probe.csv')
19 |     cols = ['aid', 'uid', 'aid_LBS_prob', 'aid_age_prob',
20 |             'adCategoryId_education_prob', 'creativeId_consumptionAbility_prob']
21 |     data = data[cols]
22 | 
23 |     data.to_csv('data/feature_data/selected_cross2_feat.csv', index=False)
24 | 
25 | 
26 | def select_cross3_feature():
27 |     data = pd.read_csv('data/feature_data/cross_feature3_probe.csv')
28 |     cols = ['aid', 'uid', 'aid_LBS_education_prob', 'aid_age_gender_prob',
29 |             'aid_age_ct_prob', 'aid_consumptionAbility_gender_prob', 'advertiserId_LBS_gender_prob']
30 | 
31 |     data = data[cols]
32 | 
33 |     data.to_csv('data/feature_data/selected_cross3_feat.csv', index=False)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     select_cross_feature()
38 |     select_cross3_feature()
39 | 
40 | 


--------------------------------------------------------------------------------
/feature_user.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:39
 5 | @Function:
 6 | """
 7 | 
 8 | import pandas as pd
 9 | from sklearn.cluster import KMeans
10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
11 | 
12 | 
13 | class User:
14 |     """用户的基本特征"""
15 |     def __init__(self, user_feature, user_data=None):
16 | 
17 |         self.user_feature = user_feature
18 |         self.base_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender',
19 |                              'house', 'os', 'ct', 'marriageStatus']
20 | 
21 |         self.vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4',
22 |                                'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
23 |         if user_data != None:
24 |             print('   -----do base user feature')
25 |             for feat in self.base_feature:
26 |                 try:
27 |                     self.user_feature[feat] = LabelEncoder().fit_transform(user_data[feat].apply(int))
28 |                 except:
29 |                     self.user_feature[feat] = LabelEncoder().fit_transform(user_data[feat])
30 | 
31 |             for feat in self.vector_feature:
32 |                 self.user_feature['len_' + feat] = user_data[feat].apply(lambda x: len(str(x).split(' ')))
33 |                 self.user_feature['len_' + feat] = pd.cut(self.user_feature['len_' + feat], 5, labels=range(5))
34 |         print('------------user base feature process over...')
35 |         print('user feature shape: ', user_feature.shape)
36 | 
37 |     def get_vector_kmeans_feature(self, n_clusters=20, file_path='data/w2v_feature/w2v_all_15'):
38 |         """使用词向量对用户进行聚类操作，作为用户特征"""
39 |         print('------------get_vector_kmeans_feature start...')
40 |         for i, feat in enumerate(self.vector_feature):
41 |             print("    ---get_kmeans_feature: ", feat)
42 |             df_w2vfeat = pd.read_csv(file_path + feat + '.csv')
43 |             k_means = KMeans(n_clusters=n_clusters, n_jobs=-1).fit(df_w2vfeat)
44 |             label = k_means.labels_
45 |             # print(np.shape(label))
46 |             if len(label) == self.user_feature.shape[0]:
47 |                 self.user_feature['kmeans_' + str(n_clusters) + "_" + feat] = label
48 |             else:
49 |                 print('*****************kmeans shape error!!!*****************')
50 |         print('------------get_vector_kmeans_feature end...')
51 |         print()
52 | 
53 |     def get_base_kmeans_feature(self, n_clusters=30):
54 |         """对base的特征进行one-hot之后再做用户聚类，找到聚类特征"""
55 | 
56 |         print('------------get_base_kmeans_feature start...')
57 |         one_hot_feature = pd.DataFrame()
58 |         for feat in self.base_feature:
59 |             one_hot_feature[feat] = OneHotEncoder().fit_transform(self.user_feature[feat])
60 |         k_means = KMeans(n_clusters=n_clusters, n_jobs=10).fit(one_hot_feature)
61 |         label = k_means.labels_
62 | 
63 |         self.user_feature['kmeans_base'] = label
64 | 
65 |         print('------------get_base_kmeans_feature end...')
66 |         print()
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     # userFeature = pd.read_csv('data/raw_data/userFeature.csv')
71 |     # userFeature = userFeature.fillna('-1')
72 |     # user_feature = pd.DataFrame()
73 |     # user_feature['uid'] = userFeature['uid']
74 | 
75 |     user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
76 |     # vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4',
77 |     #                   'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
78 | 
79 |     # for feat in vector_feature:
80 |     #
81 |     #     user_feature['len_' + feat] = pd.cut(user_feature['len_' + feat], 5, labels=range(5))
82 |     user_feature = user_feature['aid']
83 |     user = User(user_feature)
84 |     user.get_vector_kmeans_feature()
85 |     # user.get_base_kmeans_feature()
86 |     # print(user_feature.shape)
87 |     # len_feature = ['len_interest1', 'len_interest2', 'len_interest5', 'len_kw3']
88 |     # for feat in len_feature:
89 |     #     print('this is ', feat)
90 |     #     user_feature[feat] = pd.cut(user_feature[feat], 5, labels=range(5))
91 |     #     # user_feature[feat] = LabelEncoder().fit_transform(user_feature[feat])
92 |     # print('write..')
93 |     # user_feature.to_csv('data/feature_data/clean_user_feature2.csv', index=False)
94 |     # print('end')
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/one_hot_baseline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/17 17:39
  5 | @Function:
  6 | """
  7 | 
  8 | from scipy import sparse
  9 | 
 10 | import pandas as pd
 11 | from sklearn.feature_extraction.text import CountVectorizer
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 14 | import lightgbm as lgb
 15 | 
 16 | import numpy as np
 17 | 
 18 | import warnings
 19 | 
 20 | from Tencent_AD2018.tencent.models import base_model, lgbCV
 21 | 
 22 | warnings.filterwarnings("ignore")
 23 | 
 24 | 
 25 | def LGB_predict(train_x, train_y, test_x, res):
 26 |     print("LGB test")
 27 |     clf = lgb.LGBMClassifier(
 28 |         boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
 29 |         max_depth=-1, n_estimators=1500, objective='binary',
 30 |         subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
 31 |         learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=-1
 32 |     )
 33 |     clf.fit(train_x, train_y, eval_set=[(train_x, train_y)], eval_metric='auc', early_stopping_rounds=100)
 34 |     res['score'] = clf.predict_proba(test_x)[:, 1]
 35 |     res['score'] = res['score'].apply(lambda x: float('%.6f' % x))
 36 |     res.to_csv('data/submit_ffm_05_10.csv', index=False)
 37 | 
 38 |     return clf
 39 | 
 40 | 
 41 | def get_one_hot_feature(data):
 42 | 
 43 |     one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
 44 |                        'marriageStatus', 'advertiserId', 'campaignId', 'creativeId',
 45 |                        'adCategoryId', 'productId', 'productType']
 46 |     vector_feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
 47 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
 48 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
 49 | 
 50 |     print(data.label.unique())
 51 |     train = data[data.label.notnull()]
 52 |     test = data[data.label.isnull()]
 53 | 
 54 |     print(train.label.unique())
 55 |     print(test.label.unique())
 56 |     print(data.shape, train.shape, test.shape)
 57 | 
 58 |     train_y = train.pop('label').values
 59 | 
 60 |     del train['uid']
 61 |     res = test[['aid', 'uid']]
 62 |     test = test.drop('label', axis=1)
 63 |     clean_feature = [feat for feat in train if feat not in vector_feature]
 64 |     vector_feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
 65 |     print(clean_feature)
 66 |     train_x = train[['creativeSize']].values
 67 |     test_x = test[['creativeSize']].values
 68 | 
 69 |     enc = OneHotEncoder()
 70 |     print('-----one-hot prepared:')
 71 |     for feature in clean_feature:
 72 |         print('     ---feature: ', feature)
 73 |         enc.fit(data[feature].values.reshape(-1, 1))
 74 |         train_a = enc.transform(train[feature].values.reshape(-1, 1))
 75 |         test_a = enc.transform(test[feature].values.reshape(-1, 1))
 76 |         train_x = sparse.hstack((train_x, train_a))
 77 |         test_x = sparse.hstack((test_x, test_a))
 78 |     print('-----cv prepared:')
 79 |     cv = CountVectorizer(token_pattern='(?u)\\b\\w+\\b')
 80 |     cv = CountVectorizer()
 81 |     for feature in vector_feature:
 82 |         print('     ---feature: ', feature)
 83 |         # data[feature] =data[feature].apply(str)
 84 |         cv.fit(data[feature])
 85 |         train_a = cv.transform(train[feature])
 86 |         test_a = cv.transform(test[feature])
 87 |         train_x = sparse.hstack((train_x, train_a))
 88 |         test_x = sparse.hstack((test_x, test_a))
 89 | 
 90 |     # sparse.save_npz("train.npz", train)
 91 |     # sparse.save_npz("train_y.npz", train_y)
 92 |     # sparse.save_npz("test_x.npz", train_y)
 93 | 
 94 |     # print(np.shape(train_x), np.shape(train_y), np.shape(test_x))
 95 |     return train_x, train_y, test_x, res
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     print('------------------------read data :')
100 |     df_train = pd.read_csv('data/raw_data/train.csv')
101 |     df_test = pd.read_csv('data/raw_data/test2.csv')
102 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
103 |     user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
104 |     ad_feature = pd.read_csv('data/feature_data/clean_ad_feature.csv')
105 |     data = pd.concat([df_train, df_test])
106 |     data = pd.merge(data, user_feature, on=['uid'], how='left')
107 |     data = pd.merge(data, ad_feature, on=['aid'], how='left')
108 |     print('user_feature.shape:', user_feature.shape)
109 |     print('ad_feature.shape:', ad_feature.shape)
110 | 
111 |     # cross_feature = pd.read_csv('data/feature_data/cross_feature.csv')
112 |     # data = pd.merge(data, cross_feature, on=['aid', 'uid'], how='left')
113 |     # print('cross_feature.shape:', cross_feature.shape)
114 | 
115 |     # cross_feature3 = pd.read_csv('data/feature_data/cross_feature3_probe.csv')
116 |     # data = pd.merge(data, cross_feature3, on=['aid', 'uid'], how='left')
117 |     # print('cross_feature3.shape:', cross_feature3.shape)
118 | 
119 |     # nlp_feature = pd.read_csv('data/feature_data/nlp_feature.csv')
120 |     # data = pd.merge(data, nlp_feature, on=['uid'], how='left')
121 |     # print('nlp_feature.shape:', nlp_feature.shape)
122 | 
123 |     # one_hot_feature = pd.read_csv('data/raw_data/userFeature.csv')
124 |     # vector_feature = ['uid', 'interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
125 |     #
126 |     # one_hot_feature = one_hot_feature[vector_feature]
127 |     # one_hot_feature = one_hot_feature.fillna('-1')
128 |     # data = pd.merge(data, one_hot_feature, on=['uid'], how='left')
129 |     # print('one_hot_feature.shape:', one_hot_feature.shape)
130 | 
131 |     one_hot_feature = pd.read_csv('data/raw_data/userFeature_kmeans.csv')
132 |     one_hot_feature = one_hot_feature.fillna('-1')
133 |     data = pd.merge(data, one_hot_feature, on=['uid'], how='left')
134 |     print('one_hot_feature.shape:', one_hot_feature.shape)
135 | 
136 |     # features = ['kw1', 'kw2', 'topic1', 'topic2']
137 |     # for feat in features:
138 |     #     kmeans_feature = pd.read_csv('data/w2v_feature/w2v_15' + feat + '.csv')
139 |     #     data = pd.merge(data, kmeans_feature, on=['uid'], how='left')
140 |     #     print('kmeans_feature.shape:', kmeans_feature.shape)
141 | 
142 |     # data = pd.read_csv('data/raw_data/data_5%.csv')
143 |     print('--------------------one_hot data-----------------------------------')
144 |     train_X, train_y, test_X, res = get_one_hot_feature(data)
145 |     print('--------------------train lgb model -------------------------------')
146 |     # LGB_predict(train_x, train_y, test_x, res)
147 |     train_x, test_x, train_y, test_y = train_test_split(train_X, train_y, test_size=0.002, random_state=2018)
148 | 
149 |     # lgbCV(train_x, test_x, train_y, test_y, 'null')
150 | 
151 |     base_model(train_x, train_y, test_X, res)
152 | 
153 |     # FM = pylibfm.FM(num_factors=500, num_iter=10, verbose=True, task="classification",
154 |     #                 initial_learning_rate=0.01, learning_rate_schedule="optimal")
155 |     # FM.fit(train_x, train_y)
156 | 
157 |     print('end....')


--------------------------------------------------------------------------------
/util_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 17:49
 5 | @Function:
 6 | """
 7 | 
 8 | import pandas as pd
 9 | from sklearn.model_selection import train_test_split
10 | from sklearn.preprocessing import LabelEncoder
11 | import numpy as np
12 | 
13 | def get_samell_data():
14 |     print('--------------------read data---------------------------------------')
15 |     df_train = pd.read_csv('data/train_small_20%.csv')
16 |     # df_train = pd.read_csv('data/raw_data/train.csv')
17 |     # df_test = pd.read_csv('data/raw_data/test1.csv')
18 |     df_userFeature = pd.read_csv('data/raw_data/userFeature.csv')
19 |     df_adFeature = pd.read_csv('data/raw_data/adFeature.csv')
20 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
21 | 
22 |     print('--------------------merge data-------------------------------------')
23 |     # data = pd.concat([df_train, df_test])
24 |     data = pd.merge(df_train, df_userFeature, on=['uid'], how='left')
25 |     data = pd.merge(data, df_adFeature, on=['aid'], how='left')
26 | 
27 |     # print(df_train.label.count())
28 |     # print(df_train[df_train.label == 1].count())
29 |     # train_y = df_train.pop('label')
30 |     # #
31 |     # X_train, X_test, y_train, y_test = train_test_split(df_train.values, train_y.values, test_size=0.2)
32 |     # #
33 |     # data = pd.DataFrame(X_test, columns=['aid', 'uid'])
34 |     # data['label'] = y_test
35 |     # print(len(data.aid.unique()))
36 |     data.to_csv('data/data_20%.csv', index=False)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     # data = pd.read_csv('data/raw_data/userFeature.csv')
41 |     # data = pd.read_csv('data/raw_data/train.csv')
42 |     # get_samell_data()
43 |     data = pd.read_csv('data/data_5%_k_base.csv', encoding='utf-8')
44 |     data = data.fillna('-1')
45 |     colunms = data.columns.values
46 |     row = data.shape[0]
47 |     test_data = data.sample(int(row * 0.2))
48 |     all_ = pd.concat([data, test_data])
49 |     train_data = all_.drop_duplicates(keep=False)
50 |     print(data.shape, test_data.shape, train_data.shape)
51 |     # data.to_csv('data/data_5%_fillna.csv', index=False)
52 |     # print(test_data[test_data.label==1].count())
53 |     train_data.to_csv('train_5%_data.csv', index=False)
54 |     test_data.to_csv('test_5%_data.csv', index=False)
55 | 


--------------------------------------------------------------------------------
/util_convert_ffm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/5/7 20:09
 5 | @Function:
 6 | """
 7 | 
 8 | # coding=utf-8
 9 | 
10 | import hashlib
11 | 
12 | 
13 | def hashstr(str, nr_bins):
14 |     return int(hashlib.md5(str.encode('utf8')).hexdigest(), 16) % (nr_bins - 1) + 1
15 | 
16 | 
17 | def gen_hashed_fm_feats(feats, nr_bins=int(1e+6)):
18 |     feats = ['{0}:{1}:1'.format(field - 1, hashstr(feat, nr_bins)) for (field, feat) in feats]
19 |     return feats
20 | 
21 | 
22 | def get_data():
23 |     one_hot_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
24 |                        'marriageStatus', 'aid', 'advertiserId', 'campaignId', 'creativeId',
25 |                        'adCategoryId', 'productId', 'productType']
26 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4',
27 |                       'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
28 | 
29 |     drop_feature = ['uid', 'label']
30 |     print("reading data")
31 |     f = open('test_5%_data.csv', 'r')
32 |     line = f.readline().strip()
33 |     features = line.split(',')
34 | 
35 |     print(features)
36 |     dict = {}
37 |     num = 0
38 |     for line in f:
39 |         datas = line.strip().split(',')
40 |         for i, d in enumerate(datas):
41 |             if not dict.__contains__(features[i]):
42 |                 dict[features[i]] = []
43 |             dict[features[i]].append(d)
44 |         num += 1
45 | 
46 |     f.close()
47 | 
48 |     print("transforming data")
49 |     ftrain = open('data/testtest.ffm', 'w')
50 | 
51 |     for i in range(num):
52 |         feats = []
53 |         for j, f in enumerate(one_hot_feature, 1):
54 |             field = j
55 |             print('-----------dict[f][i]:', field, dict[f][i])
56 |             feats.append((field, f + '_' + dict[f][i]))
57 | 
58 |         for j, f in enumerate(vector_feature, 1):
59 |             field = j + len(one_hot_feature)
60 |             xs = dict[f][i].split(' ')
61 |             print('-----------xs:', xs)
62 |             for x in xs:
63 |                 feats.append((field, f + '_' + x))
64 | 
65 |         feats = gen_hashed_fm_feats(feats)
66 |         ftrain.write(dict['label'][i] + ' ' + ' '.join(feats) + '\n')
67 |         # print(dict['label'][i] + ' ' + ' '.join(feats) + '\n')
68 | 
69 |     ftrain.close()
70 | 
71 | 
72 | if __name__ == '__main__':
73 | 
74 |     #  ../../libffm/ffm-train -l 0.00002 -k 8 -r 0.05 -s 30 -t 25 -p valid.ffm train1.ffm v1_model
75 |     get_data()


--------------------------------------------------------------------------------
/util_feature_selcet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/5/15 15:06
  5 | @Function:
  6 | """
  7 | import random
  8 | 
  9 | import lightgbm as lgb
 10 | import time
 11 | from sklearn.metrics import auc, roc_auc_score
 12 | from sklearn.model_selection import train_test_split, StratifiedKFold
 13 | import pandas as pd
 14 | import warnings
 15 | 
 16 | warnings.filterwarnings("ignore")
 17 | import numpy as np
 18 | 
 19 | 
 20 | class Feature_selection:
 21 |     def __init__(self, data, base_feature, test_feature):
 22 | 
 23 |         self.data = data
 24 |         self.test_features = test_feature
 25 |         self.base_feature = base_feature
 26 |         self.best_features = []
 27 |         print('len base_feature: ', len(self.base_feature), '  len test feature: ', len(self.test_features))
 28 | 
 29 |     def _base_classifier(self):
 30 |         clf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
 31 |                                  max_depth=-1, n_estimators=100, objective='binary',
 32 |                                  subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
 33 |                                  learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=20)
 34 |         return clf
 35 | 
 36 |     def _get_features_score(self, features):
 37 |         # 计算三次结果的最好得分作为最终的结果，尽量消除由随机带来的误差
 38 |         best_scores = []
 39 |         model = self._base_classifier()
 40 |         X_train, X_test, y_train, y_test = train_test_split(self.data[features].values, self.data['label'].values,
 41 |                                                             test_size=0.5)
 42 |         skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1024)
 43 |         for i, (train_i, test_i) in enumerate(skf.split(X_train, y_train)):
 44 |             model.fit(X_train[train_i], y_train[train_i])
 45 |             y_pred = model.predict_proba(X_train[test_i])[:, 1]
 46 |             score = roc_auc_score(y_train[test_i], y_pred)
 47 |             best_scores.append(score)
 48 |         return np.mean(best_scores)
 49 | 
 50 |     def find_best_features(self):
 51 |         feat_num = len(self.test_features)
 52 | 
 53 |         try:
 54 |             for num_round in range(5):
 55 |                 test_features = self.test_features[:]
 56 |                 now_feature = self.base_feature[:]
 57 |                 now_scores = self._get_features_score(self.base_feature)
 58 |                 print('-------------now best features len: ', len(self.best_features), 'best scores: ', now_scores,
 59 |                       '-------------this is round 5 \ :', num_round)
 60 |                 for i in range(feat_num):
 61 |                     start = time.time()
 62 |                     feat = test_features[random.randint(0, len(test_features) - 1)]
 63 |                     print('    --- this is feature:', feat, '  ---this is: ', i, ' / ', len(self.test_features))
 64 |                     now_feature.append(feat)
 65 |                     new_scores = self._get_features_score(now_feature)
 66 |                     # print('        --- now_scores:', now_scores, '----(now - best) score : ', new_scores - now_scores)
 67 |                     if (new_scores - now_scores) > 0.0001:
 68 |                         now_scores = new_scores
 69 |                         if feat not in self.best_features:
 70 |                             self.best_features.append(feat)
 71 |                             print('        ---now best features len: ', len(self.best_features), '   ---best scores: ',
 72 |                                   now_scores)
 73 |                     else:
 74 |                         now_feature.remove(feat)
 75 |                     test_features.remove(feat)
 76 |                     print('    ----time min:', (time.time() - start) / 60)
 77 |         except Exception as e:
 78 |             print('************************** Error ************************************')
 79 |             print(e)
 80 |             print('*********************************************************************')
 81 |         finally:
 82 |             print('       ---best features len: ', len(self.best_features))
 83 |             print(self.best_features)
 84 | 
 85 | 
 86 | def get_data():
 87 |     print('------------------------read data :')
 88 |     df_train = pd.read_csv('data/raw_data/train.csv')
 89 |     df_test = pd.read_csv('data/raw_data/test1.csv')
 90 |     df_train['label'] = df_train['label'].apply(lambda x: 0 if x == -1 else x)
 91 |     user_feature = pd.read_csv('data/feature_data/clean_user_feature.csv')
 92 |     ad_feature = pd.read_csv('data/feature_data/clean_ad_feature.csv')
 93 |     data = pd.concat([df_train, df_test])
 94 |     data = pd.merge(data, user_feature, on=['uid'], how='left')
 95 |     data = pd.merge(data, ad_feature, on=['aid'], how='left')
 96 |     print('user_feature.shape:', user_feature.shape)
 97 |     print('ad_feature.shape:', ad_feature.shape)
 98 | 
 99 |     # cross_feature = pd.read_csv('data/feature_data/cross_feature_probe.csv')
100 |     # data = pd.merge(data, cross_feature, on=['aid', 'uid'], how='left')
101 |     # print('cross_feature.shape:', cross_feature.shape)
102 | 
103 |     # cross_feature3 = pd.read_csv('data/feature_data/cross_feature3_probe.csv')
104 |     # data = pd.merge(data, cross_feature3, on=['aid', 'uid'], how='left')
105 |     # print('cross_feature3.shape:', cross_feature3.shape)
106 | 
107 |     # nlp_feature = pd.read_csv('data/feature_data/nlp_feature.csv')
108 |     # data = pd.merge(data, nlp_feature, on=['uid'], how='left')
109 |     # print('nlp_feature.shape:', nlp_feature.shape)
110 | 
111 |     kmeans_feature = pd.read_csv('data/feature_data/kmeans_feature.csv')
112 |     data = pd.merge(data, kmeans_feature, on=['uid'], how='left')
113 |     print('kmeans_feature.shape:', kmeans_feature.shape)
114 | 
115 |     # features = ['kw1', 'kw2', 'topic1', 'topic2']
116 |     # for feat in features:
117 |     #     kmeans_feature = pd.read_csv('data/w2v_feature/w2v_15' + feat + '.csv')
118 |     #     data = pd.merge(data, kmeans_feature, on=['uid'], how='left')
119 |     #     print('kmeans_feature.shape:', kmeans_feature.shape)
120 | 
121 |     train = data[data.label.notnull()]
122 |     return train
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     data = get_data()
127 | 
128 |     base_feature = ['LBS', 'age', 'carrier', 'consumptionAbility', 'education', 'gender', 'house', 'os', 'ct',
129 |                     'marriageStatus', 'advertiserId', 'campaignId', 'creativeId', 'adCategoryId', 'productId',
130 |                     'productType', 'len_interest1', 'len_interest2', 'len_interest5', 'len_kw3', 'aid']
131 | 
132 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
133 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
134 |     remove_feature = ['uid', 'label']
135 |     out_feature = base_feature + vector_feature + remove_feature
136 | 
137 |     test_features = [feat for feat in data.columns.tolist() if feat not in out_feature]
138 |     print(test_features)
139 |     feature_select = Feature_selection(data, base_feature, test_features)
140 |     feature_select.find_best_features()
141 |     # print(feature_select.best_features)


--------------------------------------------------------------------------------
/util_models.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/17 18:07
 5 | @Function:
 6 | """
 7 | import datetime
 8 | 
 9 | import pandas as pd
10 | import lightgbm as lgb
11 | from sklearn.model_selection import StratifiedKFold, train_test_split
12 | from sklearn.metrics import log_loss
13 | import json
14 | 
15 | class Model:
16 | 
17 |     def __init__(self, train_x, train_y, feaures_name, test_x=None, res=None):
18 |         """
19 |         :param train_x:
20 |         :param train_y:
21 |         :param test_x: 测试集特征
22 |         :param res: 预测的信息
23 |         :param feaures_name: 特征名称：用户输出重要程度
24 |         """
25 |         self.features = feaures_name
26 |         self.train_x = train_x
27 |         self.train_y = train_y
28 |         self.test_x = test_x
29 |         self.res = res
30 | 
31 |         self.base_model = self._base_model()
32 | 
33 |     def feature_impt_model(self):
34 |         X_train, X_test, y_train, y_test = train_test_split(self.train_x, self.train_y, test_size=0.2)
35 | 
36 |         print('-------------------feature_impt_model:')
37 |         self.base_model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test,  y_test)],
38 |                             eval_metric='auc', early_stopping_rounds=100)
39 |         best_iter = self.base_model.best_iteration_
40 |         print("   ---best_iter:", best_iter)
41 | 
42 |         predictors = [i for i in self.features]
43 |         feat_imp = pd.Series(self.base_model.feature_importances_, predictors).sort_values(ascending=False)
44 |         print(feat_imp)
45 |         now = datetime.datetime.now()
46 |         now = now.strftime('%m-%d-%H-%M')
47 |         feat_imp.to_csv('data/feature_data/feature_impt' + str(now) + '.csv')
48 | 
49 |     def kfold_model(self, n_folds=3):
50 |         print('------------------get kfold_model result --------------------------')
51 |         self.train_x, X_test, self.train_y, y_test = train_test_split(self.train_x, self.train_y, test_size=0.002)
52 |         skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=1024)
53 |         for i, (train_i, test_i) in enumerate(skf.split(self.train_x, self.train_y)):
54 |             print('---fold: ', i)
55 |             self.base_model.fit(self.train_x[train_i], self.train_y[train_i], eval_metric='auc',
56 |                                 eval_set=[(self.train_x[train_i], self.train_y[train_i]),
57 |                                           (self.train_x[test_i], self.train_y[test_i])],
58 |                                 early_stopping_rounds=100)
59 | 
60 |             pred = self.base_model.predict_proba(self.test_x, num_iteration=-1)[:, 1]
61 |             self.res['probe_' + str(i)] = pred
62 | 
63 |         print('----------------------predict result --------------------------')
64 |         self.res.to_csv('result_cv.csv', index=False)
65 |         self.res['score'] = self.res['probe_0']
66 |         for i in range(1, n_folds):
67 |             self.res['score'] += self.res['probe_' + str(i)]
68 |         self.res['score'] = self.res['score'] / n_folds
69 |         self.res['score'] = self.res['score'].apply(lambda x: round(x, 7))
70 | 
71 |         result = self.res[['aid', 'uid', 'score']]
72 |         now = datetime.datetime.now()
73 |         now = now.strftime('%m-%d-%H-%M')
74 |         result.to_csv('lgb_kfold_' + str(now) + '.csv', index=False)
75 | 
76 |     def _base_model(self):
77 |         base_model = lgb.LGBMClassifier(
78 |             boosting_type='gbdt', num_leaves=31, reg_alpha=0.01, reg_lambda=0.05,
79 |             max_depth=6, n_estimators=1500, objective='binary',
80 |             subsample=0.9, colsample_bytree=0.8, subsample_freq=1,
81 |             learning_rate=0.1, min_child_weight=50, random_state=2018, n_jobs=-1
82 |         )
83 |         return base_model
84 | 


--------------------------------------------------------------------------------
/util_vectorTokmeans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Infaraway
 4 | @time: 2018/4/19 12:03
 5 | @Function:
 6 | """
 7 | 
 8 | import pickle
 9 | import pandas as pd
10 | from gensim.models.word2vec import Word2Vec
11 | from sklearn.cluster import KMeans
12 | 
13 | 
14 | def get_kmeans_feature(test, n_clusters=10):
15 |     print('-------------------------get_kmeans_feature--------')
16 |     vector_feature = ['appIdAction', 'appIdInstall', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5',
17 |                       'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3']
18 |     vector_feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
19 | 
20 |     data = test[vector_feature]
21 |     data['uid'] = test['uid']
22 |     for i, feature in enumerate(vector_feature):
23 |         print("------this is feature: ", feature)
24 |         model = Word2Vec.load('data/w2v_model/w2v_20' + feature + '.mod')
25 |         word_vector = model.wv.syn0
26 |         if word_vector.shape[0] < 1000:
27 |             num_clusters = 25
28 |         elif word_vector.shape[0] < 50000:
29 |             num_clusters = 250
30 |         else:
31 |             num_clusters = 500
32 | 
33 |         kmeans_clustering = KMeans(n_clusters=num_clusters, n_jobs=-1)
34 |         idx = kmeans_clustering.fit_predict(word_vector)
35 |         word_centroid_map = dict(zip(model.wv.index2word, idx))
36 | 
37 |         filename = 'data/w2v_model/' + feature + 'word_to_idx_map.pickle'
38 |         with open(filename, 'bw') as f:
39 |             pickle.dump(word_centroid_map, f)
40 | 
41 |         def get_idx(x, word_centroid_map):
42 |             id_set = set([str(word_centroid_map[word]) for word in x if word_centroid_map.__contains__(word)])
43 |             return ' '.join(id_set)
44 | 
45 |         data[feature] = data[feature].apply(lambda x: str(x).split(' '))
46 |         data[feature] = data[feature].apply(lambda x: get_idx(x, word_centroid_map))
47 |     return data
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     data = pd.read_csv('data/raw_data/userFeature.csv')
52 |     # test = pd.read_csv('data/raw_data/userFeature_kmeans.csv')
53 |     data = get_kmeans_feature(data)
54 |     data.to_csv('data/raw_data/userFeature_kmeans.csv', index=False)
55 |     # print(data.shape)
56 |     # model = Word2Vec.load('data/w2v_model/interest115_w2v.mod')
57 |     # print(model.most_similar('11'))
58 |     print('end...')
59 | 
60 | 


--------------------------------------------------------------------------------
/util_word2vec.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Infaraway
  4 | @time: 2018/4/19 12:03
  5 | @Function:
  6 | """
  7 | import collections
  8 | 
  9 | import gc
 10 | from gensim.models.word2vec import Word2Vec
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | 
 15 | class Feature2vec:
 16 | 
 17 |     def __init__(self, data, size=15):
 18 |         self.size = size
 19 |         data = data.fillna('-1')
 20 | 
 21 |         # self.feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
 22 |         self.feature = ['topic2']
 23 | 
 24 |         self.w2v_feature = pd.DataFrame()
 25 |         self.w2v_feature['uid'] = data['uid']
 26 |         for feat in self.feature:
 27 |             self.w2v_feature[feat] = data[feat]
 28 | 
 29 |     def get_feature(self):
 30 |         for feature in self.feature:
 31 |             print("this is feature:", feature)
 32 |             # self._select_topk(feature)
 33 |             self.w2v_feature[feature] = self.w2v_feature[feature].apply(lambda x: str(x).split(' '))
 34 |             model = Word2Vec(self.w2v_feature[feature], size=self.size, min_count=1, window=2, iter=5, workers=32)
 35 |             # model = Word2Vec.load('model/' + str(self.size) + feature +  '15_w2v.mod')
 36 |             model.save('data/w2v_model/w2v_' + str(self.size) + feature + '.mod')
 37 | 
 38 |             data_vec = []
 39 |             for row in self.w2v_feature[feature]:
 40 |                 data_vec.append(self._base_word2vec(row, model))
 41 |             column_names = []
 42 |             for i in range(self.size):
 43 |                 column_names.append(feature + str(i))
 44 | 
 45 |             data_vec = pd.DataFrame(data_vec, columns=column_names)
 46 |             data_vec['uid'] = self.w2v_feature['uid']
 47 |             data_vec = data_vec.round(4)
 48 |             data_vec.to_csv("data/w2v_feature/w2v_all_" + str(self.size) + feature + '.csv', index=False)
 49 |             gc.collect()
 50 | 
 51 | 
 52 |     def _base_word2vec(self, x, model):
 53 |         vec = np.zeros(self.size)
 54 |         # x = [word for word in x if model.__contains__(word)]
 55 |         for item in x:
 56 |             vec += model.wv[item]
 57 |         if len(x) == 0:
 58 |             return vec
 59 |         else:
 60 |             return vec / len(x)
 61 | 
 62 |     def _select_topk(self, feature):
 63 |         word_list = []
 64 |         for line in self.w2v_feature[feature]:
 65 |             words = str(line).split(' ')
 66 |             word_list += words
 67 |         result = collections.Counter(word_list)
 68 |         size = len(result)
 69 |         result = result.most_common(int(size))
 70 | 
 71 |         print(result[0], result[int(size * 0.01)], result[int(size * 0.05)], result[int(size * 0.1)],
 72 |               result[int(size * 0.2)], result[int(size * 0.3)], result[int(size * 0.4)], result[int(size * 0.5)])
 73 |         result = [res for res in result if 1000 < res[1] < 1000000]
 74 | 
 75 |         word_dict = {}
 76 |         for re in result:
 77 |             word_dict[re[0]] = 1
 78 | 
 79 |         self.w2v_feature[feature] = self.w2v_feature[feature].apply(
 80 |             lambda x: ' '.join([word for word in str(x).split(' ') if word_dict.__contains__(word)]))
 81 | 
 82 |     def get_topk(self):
 83 |         new_pd = pd.DataFrame()
 84 |         vector_feature = ['interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
 85 |         # vector_feature = ['kw2']
 86 |         for feature in vector_feature:
 87 |             print("this is feature:", feature)
 88 | 
 89 |             self.data[feature] = self.data[feature].apply(lambda x: str(x).split(' '))
 90 |             word_dict = self.select_topk(self.data[feature])
 91 |             new_pd[feature] = self.data[feature].apply(
 92 |                 lambda x: ' '.join([word for word in x if word_dict.__contains__(word)]))
 93 |             temp = new_pd[feature].apply(lambda x: len(str(x).split(' ')))
 94 |             print(temp.describe())
 95 | 
 96 |         new_pd.to_csv('./data/data_top50%feature.csv', index=False)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     data = pd.read_csv('data/raw_data/userFeature.csv')
101 |     # data = pd.read_csv('data/data_5%.csv')
102 |     word_vec = Feature2vec(data, size=20)
103 |     word_vec.get_feature()
104 |     # word_vec.w2v_feature.to_csv('data/raw_data/user_w2vFeature.csv', index=False)
105 |     data = data.fillna('-1')
106 | 
107 |     # features = ['uid', 'interest1', 'interest2', 'interest5', 'kw1', 'kw2', 'topic1', 'topic2']
108 |     # data = data[features]
109 |     # print(data.shape)
110 |     # all_ = data.shape[0]
111 |     # for feat in features:
112 |     #     count = data[data[feat] == '-1'].shape[0]
113 |     #     print(feat, count / all_)
114 |     # data.to_csv('data/feature_data/user_w2vFeature_all.csv', index=False)
115 | 


--------------------------------------------------------------------------------