├── Recommender-systems ├── readme ├── pmf.m ├── bayespmf.m ├── demo.m ├── pred.m ├── makematrix.m ├── README.txt ├── 深度学习预测.py └── shape.ipynb ├── README.md ├── prediction.csv └── catdog.ipynb /Recommender-systems/readme: -------------------------------------------------------------------------------- 1 | DateCastle猜你喜欢比赛 第二名代码分享 2 | -------------------------------------------------------------------------------- /Recommender-systems/pmf.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/AlexNet-based-on-keras/master/Recommender-systems/pmf.m -------------------------------------------------------------------------------- /Recommender-systems/bayespmf.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishpatel26/AlexNet-based-on-keras/master/Recommender-systems/bayespmf.m -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DateCastle 2 | 3 | 这里是猫狗大战的分享区域。 4 | 5 | html文件下载后浏览器打开可见。 6 | 7 | ipynb文件下载后可直接运行于jupyter notebook。 8 | 9 | prediction.csv为本次的比赛的最后一次提交。 10 | 11 | -- 12 | 欢迎提出改进意见。 13 | -------------------------------------------------------------------------------- /Recommender-systems/demo.m: -------------------------------------------------------------------------------- 1 | %restart=1; 2 | %fprintf(1,'Running Probabilistic Matrix Factorization (PMF) \n'); 3 | %pmf 4 | 5 | restart=1; 6 | fprintf(1,'\nRunning Bayesian PMF\n'); 7 | bayespmf 8 | 9 | -------------------------------------------------------------------------------- /Recommender-systems/pred.m: -------------------------------------------------------------------------------- 1 | function [pred_out] = pred(w1_M1_sample,w1_P1_sample,N,mean_rating); 2 | 3 | %%% Make predicitions on the validation data 4 | 5 | aa_p = double(N(:,1)); 6 | aa_m = double(N(:,2)); 7 | rating = double(N(:,3)); 8 | 9 | pred_out = sum(w1_M1_sample(aa_m,:).*w1_P1_sample(aa_p,:),2) + mean_rating; 10 | ff = find(pred_out>5); pred_out(ff)=5; 11 | ff = find(pred_out<1); pred_out(ff)=1; 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Recommender-systems/makematrix.m: -------------------------------------------------------------------------------- 1 | % Version 1.000 2 | % 3 | % Code provided by Ruslan Salakhutdinov 4 | % 5 | % Permission is granted for anyone to copy, use, modify, or distribute this 6 | % program and accompanying programs and documents for any purpose, provided 7 | % this copyright notice is retained and prominently displayed, along with 8 | % a note saying that the original programs are available from our 9 | % web page. 10 | % The programs and documents are distributed without any warranty, express or 11 | % implied. As the programs were written for research purposes only, they have 12 | % not been tested to the degree that would be advisable in any important 13 | % application. All use of these programs is entirely at the user's own risk. 14 | 15 | 16 | 17 | %% Create a matrix of size num_p by num_m from triplets {user_id, movie_id, rating_id} 18 | 19 | load train 20 | 21 | num_m = 14726; 22 | num_p = 223970; 23 | count = sparse(num_p,num_m); %for Netflida data, use sparse matrix instead. 24 | 25 | for mm=1:num_m 26 | ff= find(M(:,2)==mm); 27 | fprintf(1, '\n %d / %d \t \n', mm,num_m); 28 | count(M(ff,1),mm) = M(ff,3); 29 | end 30 | 31 | save makematrix count 32 | -------------------------------------------------------------------------------- /Recommender-systems/README.txt: -------------------------------------------------------------------------------- 1 | % Code provided by Ruslan Salakhutdinov 2 | % 3 | % Permission is granted for anyone to copy, use, modify, or distribute this 4 | % program and accompanying programs and documents for any purpose, provided 5 | % this copyright notice is retained and prominently displayed, along with 6 | % a note saying that the original programs are available from our 7 | % web page. 8 | % The programs and documents are distributed without any warranty, express or 9 | % implied. As the programs were written for research purposes only, they have 10 | % not been tested to the degree that would be advisable in any important 11 | % application. All use of these programs is entirely at the user's own risk. 12 | 13 | How to make it work: 14 | 15 | 1. Create a separate directory and download all these files into the same directory 16 | 2. Download the following 7 files: 17 | * demo.m Main file for training PMF and Bayesian PMF 18 | * pmf.m Training PMF model 19 | * bayespmf.m Bayesian PMF model that implements Gibbs sampler. 20 | * moviedata.mat Sample data that contains triplets (user_id, movie_id, rating) 21 | * makematrix.m Helper function that converts triplets into large matrix. 22 | This file is used by bayespmf.m 23 | * pred.m Helper function that makes predictions on the validation set. 24 | * README.txt 25 | 26 | 3. Simply run demo.m in Matlab. It will fit PMF and then will run Bayesian PMF. 27 | 28 | This code uses Matlab stats toolbox to sample from Wishart distribution. 29 | If you don't have stats toolbox you can use Tom Minka's 30 | "The Lightspeed Matlab Toolbox" (just google it). 31 | 32 | 33 | I did not try to optimize this code, but please e-mail me if you find bugs. 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /Recommender-systems/深度学习预测.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | np.random.seed(2016) 5 | 6 | import os 7 | import glob 8 | import math 9 | import pickle 10 | import datetime 11 | 12 | from keras.layers import Input, Embedding, LSTM, Dense,Flatten, Dropout, merge 13 | from keras.models import Model 14 | 15 | def load_train(): 16 | X_train_uid=[] 17 | X_train_iid=[] 18 | Y_train_score=[] 19 | 20 | path = os.path.join('./data', 'train.csv') 21 | print('Read train data',path) 22 | 23 | f = open(path, 'r') 24 | line = f.readline() 25 | while (1): 26 | line = f.readline() 27 | if line == '': 28 | break 29 | arr = line.strip().split(',') 30 | X_train_uid.append(int(arr[0])) 31 | X_train_iid.append(int(arr[1])) 32 | Y_train_score.append(int(arr[2])) 33 | f.close() 34 | return X_train_uid,X_train_iid,Y_train_score 35 | 36 | def load_test(): 37 | X_test_uid=[] 38 | X_test_iid=[] 39 | 40 | path = os.path.join('./data', 'test.csv') 41 | print('Read test data',path) 42 | 43 | f = open(path, 'r') 44 | line = f.readline() 45 | while (1): 46 | line = f.readline() 47 | if line == '': 48 | break 49 | arr = line.strip().split(',') 50 | X_test_uid.append(int(arr[0])) 51 | X_test_iid.append(int(arr[1])) 52 | f.close() 53 | return X_test_uid,X_test_iid 54 | 55 | 56 | X_train_uid,X_train_iid,Y_train_score = load_train() 57 | #print len(X_train_uid),X_train_uid[33177260],max(X_train_uid) 58 | #print len(X_train_iid),X_train_iid[33177260],max(X_train_iid) 59 | #print len(Y_train_score),Y_train_score[33177260] 60 | print "load train data OK." 61 | 62 | X_test_uid,X_test_iid = load_test() 63 | #print len(X_test_uid),X_test_uid[100],max(X_test_uid) 64 | #print len(X_test_iid),X_test_iid[100],max(X_test_iid) 65 | print "load test data OK." 66 | 67 | # normalize train date 68 | X_train_uid=np.array(X_train_uid) 69 | X_train_uid=X_train_uid.reshape(X_train_uid.shape[0],1) 70 | 71 | X_train_iid=np.array(X_train_iid) 72 | X_train_iid=X_train_iid.reshape(X_train_iid.shape[0],1) 73 | 74 | Y_train_score = np.array(Y_train_score).astype('float32') 75 | Y_train_score = (Y_train_score - 1)/ 4 76 | 77 | # normalize test date 78 | X_test_uid=np.array(X_test_uid) 79 | X_test_uid=X_test_uid.reshape(X_test_uid.shape[0],1) 80 | 81 | X_test_iid=np.array(X_test_iid) 82 | X_test_iid=X_test_iid.reshape(X_test_iid.shape[0],1) 83 | 84 | # define model 85 | input_1=Input(shape=(1,), dtype='int32') 86 | input_2=Input(shape=(1,), dtype='int32') 87 | x1=Embedding(output_dim=128, input_dim=223970, input_length=1)(input_1) 88 | x2=Embedding(output_dim=128, input_dim=14726, input_length=1)(input_2) 89 | x1=Flatten()(x1) 90 | x2=Flatten()(x2) 91 | x = merge([x1, x2], mode='concat') 92 | x = Dropout(0.2)(x) 93 | x = Dense(512, activation='relu')(x) 94 | x = Dropout(0.2)(x) 95 | x = Dense(64, activation='relu')(x) 96 | x = Dropout(0.2)(x) 97 | out = Dense(1, activation='sigmoid')(x) 98 | model = Model(input=[input_1, input_2], output=out) 99 | model.compile(optimizer='rmsprop', 100 | loss='mean_squared_error', 101 | metrics=[]) 102 | # train model 103 | model.fit([X_train_uid, X_train_iid], Y_train_score, 104 | nb_epoch=10, batch_size=1024*6) 105 | 106 | # predict 107 | Y_test_score = model.predict([X_test_uid, X_test_iid],batch_size=2048) 108 | Y_test_score = Y_test_score * 4 + 1 109 | 110 | f=open("out.csv","w") 111 | f.write("score\n") 112 | for i in range(Y_test_score.shape[0]): 113 | f.write("{:1.4f}".format(Y_test_score[i,0])) 114 | f.write("\n") 115 | f.close() 116 | 117 | -------------------------------------------------------------------------------- /prediction.csv: -------------------------------------------------------------------------------- 1 | uid 2 | 98c71123-8ea8-45f6-b8ab-4f6b28d2f7bc 3 | 9198e297-850f-4c48-aec6-3a9e99820e87 4 | 7460b03a-bedd-4f4b-a1b0-7bda91aebd7c 5 | eabd19b7-95d2-49aa-9e89-32f4c6bb28f9 6 | 98f1b7f2-79d1-490e-ae62-ee2d5799f93f 7 | 899f8b90-d8bc-408a-b7e4-8975fc837d0c 8 | feaaafb9-cde2-4c87-bf66-0e0ec0920ce3 9 | af94d1a3-5f26-4078-834b-17784da08779 10 | 78a60efc-9524-4a29-8228-2187d4b84b95 11 | c000409d-5ed5-4983-9f08-64f511e06830 12 | 17ce07e6-8577-47b6-988c-5e6de8009beb 13 | eea851fc-8936-4923-80c3-9714a4d73aee 14 | c330b9e0-92a5-4670-b050-7de1cbd0b905 15 | 4d11c8c4-a697-4a1c-9f4b-2617cbd55cd6 16 | 715907a2-1ff9-4dc3-bffc-22e9ff48e94b 17 | 2d36f463-feef-46dd-8daf-c1c78e81bbe1 18 | 42f6dfaa-f9d0-46b1-b947-edbe4f4d2653 19 | 5ce2706c-ee74-4220-9776-fdf2f02b480c 20 | 288e784d-7b4e-40b3-9f66-2b6cf61dc95f 21 | 42403abe-4a26-4199-9696-558948cf583d 22 | 1a9bfa35-b8e3-499c-a0fc-f95430ed2987 23 | b3906795-0dd5-4d0d-a28f-ae3a6845eee5 24 | 4794109a-5c2c-4436-b9be-71c6678fab67 25 | 2489cf3e-c55c-4238-bff3-f171fcfcf3ec 26 | 2aed02f1-e3de-4588-9870-d2d1c59b13af 27 | f1faaee2-01f3-4888-8e21-c8d161c3416e 28 | aae83e33-5673-44bb-87e9-38b5ccc72deb 29 | eba49705-092b-4717-b51a-c1ec52c0979a 30 | c63045c9-f0f2-457e-a315-7c6decf32c22 31 | 14eb67bc-dc31-4b8e-8e01-9dcfbc098930 32 | 8b88f36a-ecf2-42b9-b7e8-b135101ab91c 33 | e139d7fe-094b-45d3-993e-234d4d06056e 34 | 82ceb57f-da93-496b-af81-87c93b08e526 35 | 327ec6aa-5f44-4841-85c0-4e71c13dff43 36 | 0260b7b8-e41c-42b8-8baf-50d7b1108c2f 37 | 2028fd50-5f1f-4507-b297-2bbabc3a8c5f 38 | 51549a99-f18f-460b-a6a6-e174908955a6 39 | 018b0e60-a879-40c1-bc5b-7fd077182105 40 | 0d42164c-c34b-488e-a3a2-a2adb32c6a0f 41 | 7f080fc2-06e0-45cf-a2ca-a8b89a5a074b 42 | 7515d2d6-63a7-414d-ab57-327f5da89444 43 | cc412093-ca64-4ec2-9f7f-1a2ce3fe5346 44 | 049baf54-be84-4fb6-b1f9-05b56214c3f8 45 | 8f3a5bd6-b902-4a0b-83bb-69b88c730b1f 46 | 36eb1d96-a9ae-446a-906f-2224dfba8788 47 | 67a4cd4f-311c-4ea8-ad9c-cce99be9f7a5 48 | 6abdd425-deff-4564-a148-1879f4c25023 49 | 199790f3-e3c1-4891-8943-e620764e8423 50 | 66cfcf2c-7700-455d-ae97-10db4a559265 51 | 3604a301-d147-4b68-975c-916a427d8f87 52 | bf5df503-d6f9-4bfc-a757-40c0cef1d456 53 | 0da163ce-936c-4e09-a153-eadc5068079c 54 | cee60b2b-6417-4528-92ac-24ec369f8642 55 | 2b4b6788-1315-41f1-a262-955c000fa116 56 | 09e2a69e-5419-40cf-9bd1-13fb0cbe7301 57 | 0a9ef475-6921-47b2-ab80-2fd14932bade 58 | 3005e62d-ea44-4d39-8cf7-69a79a438cde 59 | 8b9d8799-8a19-4c81-9419-a03016ee6982 60 | 96705bd5-785b-416d-9871-ec19a3f65ccb 61 | 98e59dc5-eb5a-4ade-a2e5-834f1710ac5f 62 | bd1d0736-725d-4b98-bcbb-d051f2cb584a 63 | 07cbbe56-4a68-4bcd-a8d9-61f644aadf4a 64 | 992ea018-508b-45de-b5ec-163238fe26b8 65 | acae71c7-f867-4011-8ae3-90f31871279e 66 | 8c854c49-303d-4d13-b39c-cfe346f9785b 67 | 8807fe82-5074-443c-9d17-596c8ccbf2b7 68 | f3227171-312b-47dd-bdea-1b0c81ab41d6 69 | ca2bd5f2-10c0-4768-bb53-f9ba8ed944e4 70 | f5878404-bb68-441e-8a09-0e3524a87153 71 | 2d7d9cec-7944-49f8-95c0-2596eed08466 72 | 6453462d-16a8-4a8d-a0bc-29fafd4b05a0 73 | 54c8dd63-049b-465c-8d78-d761ac6bcdda 74 | ca7928ee-cdef-40b9-a7ac-3e1422450db2 75 | 2745ba4e-07e7-4537-b9ea-d5f6946f3de1 76 | 7562e0ad-bece-42f2-a2ae-61802d6bd57d 77 | 5968a65e-d087-40b8-aa90-ac05b1aaa8ca 78 | 21fafd0b-0ef3-4e5f-baa4-390a7e05baf8 79 | e2f54f15-dd23-4f65-a486-283bd9819598 80 | 7816f4bd-c2ff-4042-a746-c609809ee77a 81 | 27dc1c88-fb7b-48e1-a054-b8412c0a72ba 82 | ff2426a1-c91e-4886-ab0e-b040b020820a 83 | d7668d8e-d6f6-40a0-beb8-a90049a3d9f2 84 | 5f70610e-73af-427d-b36d-97e436c6b514 85 | e55bb5d1-5e9e-4e31-9588-ead094352451 86 | edab3925-48a4-42ac-8416-7f0c4677b325 87 | beeae62c-047f-4099-acad-162ae4889a10 88 | 0c34ec58-bbfb-4221-8066-861ff995643a 89 | 2784f212-d330-49ba-8480-2c97be2bb8a0 90 | 485babac-34d7-4e17-b500-3f564cb9be39 91 | cf264000-9b86-4dc1-8956-76644438763f 92 | 4cdf5b9f-837e-49b1-98c1-4727198d1d04 93 | c998bb80-1774-4266-93ea-0d82f685ebef 94 | 17bab7f9-ba9d-4ee3-8ce5-483461ec1f34 95 | defe2b2a-7011-48d0-a164-0f812c6cdb6e 96 | de327aff-fa5f-4f7c-9bb5-8b5e18102202 97 | 94ebf189-6a7b-4597-934b-1378692e4b00 98 | 79e941af-571f-4ea7-8db1-677ee78c7567 99 | bdd60d84-b6d8-4f61-a7a1-aa84a1a8a2a1 100 | fad4a83b-a475-42fd-8889-4e7f3b4b04e5 101 | 2dc78ee4-8fe3-4272-b7dc-1e2ee787ebc3 102 | 082cebbd-9142-4195-8551-cea70f61378e 103 | ac8185dc-4425-4f82-94fd-95defccb3d71 104 | f36c696c-f6bc-458c-8097-696e373aef42 105 | 21c3f5e0-8e78-47ae-8607-3d0a2c965484 106 | 756db94e-abed-4899-8f28-b7977bbb50d4 107 | 49d7fffc-0e45-4064-9f1b-d58411956c59 108 | e0e8b71c-ebe9-4333-bcbe-597198d47003 109 | 0b979299-02c8-4bea-89b4-0eff868fe3cd 110 | 40d44977-5d41-4361-ae95-7817c93823bd 111 | 6fd17138-d8b9-4831-b637-d9c2e5ae15db 112 | 584cb650-9fe5-4df2-970d-20ccf4ac8648 113 | 6d1854b3-dbe1-4609-9e6b-19202f15d8a0 114 | 061339f5-ef6e-4266-b814-9381d9a93595 115 | 45cdea24-e737-4af3-9108-9ae1c23bec6e 116 | 221f8ff4-2310-49bb-8de1-e81c80bbf418 117 | f81efd6e-a97e-4044-adf7-91e21ab24b08 118 | 8ef403bb-dbcf-493f-ae3e-9b38e4983808 119 | 3bb87eba-d23f-4d5b-b73d-0a2cf591f89c 120 | c5f53ba5-1a6c-4645-96db-b946b098ab21 121 | 92f58c79-dc50-49ba-a75a-9978ddac877e 122 | fa72ead1-15c1-40cb-9d34-a57c9e919540 123 | b3f6bb8e-b421-44ab-9c76-86a8c367888b 124 | 5ae66fcc-23fc-46a3-848e-c096eb75bb16 125 | aca7541a-1474-4c83-9436-848c57e4dc4a 126 | 4930b820-0d47-4227-9762-cfe98c5294bf 127 | 2a2fda53-6978-4e7b-963f-296459fae495 128 | 7ed6ef7a-733e-4acb-9871-1ccb8a400abb 129 | 9fff6b02-c1b9-4fb2-abc0-b4cd08d3a7f7 130 | 3589dfd0-8d91-47cd-8d1d-3cb46c7c48b5 131 | 777381ec-ecc1-460c-b69a-2e6687fe7404 132 | 371569b0-a68b-4ffd-a26c-08680170d8ca 133 | 2e47f523-2948-4deb-9e06-672aef1482dc 134 | 4a594e57-f026-4571-af24-8b83008a9651 135 | 3d1c8345-0256-4675-843b-b0dd4dd19eb0 136 | 1e3c6be0-fdfc-4e65-a4f0-24374e78fae8 137 | f3a3d8d0-76d8-4a49-9e98-fc0bfca42f1d 138 | b919c436-f528-4210-9700-9eb42c7f21ec 139 | dbcd5cab-a316-48ec-98bf-e59be3c7b013 140 | 0760bcf6-25c0-4f96-b02c-18f6831280da 141 | 335985bc-120a-4208-89d6-be22bed33aca 142 | 9d40c006-8be1-4ce9-94ff-8a16f4205faf 143 | 59173289-bb10-4f2a-8bbf-0d24b123495d 144 | c33000b9-cad3-4b4a-96c1-57c311a2c406 145 | 8100e7ec-f6ab-46b5-90d5-f2e9396de63a 146 | 39eabf6f-8c03-467f-8612-7e21d5c887a2 147 | b2a5d360-8fe4-4fa3-9fcd-02ba61488d06 148 | 3ca71d82-8a99-4aeb-b842-940add0a72e1 149 | 563a079c-f106-44f5-b016-136ec01e846c 150 | a46efc2b-1486-4172-a39f-546657edb892 151 | e8f3ac8d-a7bd-4040-a0a3-d5cd8ea21910 152 | d78806b5-587d-4a75-95c8-7a562bb5080a 153 | 7a8aca35-8b07-4a2a-9831-e96c2e751788 154 | ae71c80b-74d6-4a89-af1c-989c7a74dc30 155 | 63eaf44e-081d-4ad6-b6c7-55fdc98b2bf6 156 | bef8ddfe-c1a4-4fa6-84c3-5088d61b23e8 157 | b3b3f78e-07ac-496a-ac6d-17fc394b626c 158 | 4bd403e0-ba9b-44e2-ad7d-f5a7546181e3 159 | 942ecc0e-2dfa-4752-bb09-c220a4ba2c88 160 | d0fe33f3-68fd-4c64-ac9a-8a885d081dae 161 | 12802da7-f63c-40c0-808a-1b5361309f61 162 | d5b06fc1-0734-43d3-9f13-e194e2f68484 163 | 2f75b658-49f0-4f42-b987-181265ee5add 164 | 792b570b-22c1-44a6-816d-7a7993973585 165 | 89793271-e049-47e2-9a06-8d09b48f8861 166 | 4c468349-937c-476f-bf3f-e2a3ae47149c 167 | cb971158-75af-4396-8c6b-4227541e10bb 168 | a66d24ca-cad9-4bbd-8d60-0433686070aa 169 | 8c2e64f4-b0cb-45b0-a917-e53ea767780c 170 | 8019fbfb-0c47-4598-85df-4390dc57a0b6 171 | 8855ec95-a9cc-4a0b-835b-14655c2e3374 172 | 43774179-eeb3-46f2-8e4d-f5c9ba898dc9 173 | c7a7106c-5913-47e3-85a6-04507d034c78 174 | f7133c08-bbf6-4ef3-aa4d-60a053c8b033 175 | f5586481-66dc-46a9-8aff-3e2ab134b58f 176 | 3f38a004-0cad-4761-8082-b69c6f21cff1 177 | 07975a2c-0e97-465f-abae-eb4d37b0ce84 178 | a19e17bb-7275-4817-aac0-8df51a1330fe 179 | 2b33dff2-b027-4b12-a1e8-f31dc796054b 180 | 5e8b3d30-d85a-4526-8eca-9a6da11147b4 181 | 2f996601-2ba8-4269-9a71-18b616792158 182 | 4e8836c2-26df-4a0e-b23f-0cdb5141c2b1 183 | 0c73df46-16cc-482d-9017-c03e6d45d1b2 184 | 93d24bec-f7c0-4b7e-b5d8-62d60d5e2de8 185 | 840702bd-5dac-4041-98f0-edadb04272e2 186 | 5e7d0670-a2d1-4792-93e2-100424ee02b5 187 | a65aca2f-217c-46ab-8d9a-1afe0aba183c 188 | 96ac1e1f-6d49-470f-ac1e-28ff1a38f01f 189 | 7c806e64-4068-43ac-81d1-4aa3d8e16e41 190 | 6bf77b8b-864b-45d4-b241-22d6458ad298 191 | 192 | -------------------------------------------------------------------------------- /catdog.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "我是参加DataCastle猫狗大战的选手,kuhung。在测评中,我提交的数据集最后评分0.98639。以下是我的备战过程及心得体会。(最后有完整代码及较全面的注释)\n", 8 | "\n", 9 | "## 个人介绍\n", 10 | "华中科技大学机械学院的大二(准大三)学生,接触数据挖掘快有一年了。早期在学生团队做过一些D3数据可视化方面的工作,今年上半年开始数据挖掘实践。想把这个爱好发展成事业。做过阿里的天池竞赛,也有在kaggle混迹。算个数据新手,但一直不承认:你是新人,所以成绩不好看没啥关系。 \n", 11 | "\n", 12 | "## 初识比赛\n", 13 | "第一次接触数据集,就感觉有些难度。因为以前没做过图片分类的比赛,更没想过要用深度学习的神经网络进行识别。思索一番,还是觉得特征提取后,使用决策树靠谱。自己也下去找过资料,发现并不容易实现。期间,还曾一度想过肉眼识别。但打开文件,看到那1400+图片,就觉得这时间花在肉眼识别上不值。中间一度消停。\n", 14 | "\n", 15 | "## 初见曙光——yinjh战队分享\n", 16 | "后来上论坛逛过几次。一次偶然的机会,让我看到了yinjh团队分享的vgg16模型。乍一看,代码简单、效果不错。更为重要的是,这个模型自己以前从未见过。于是抱着验证学习的态度,我把代码扣了下来,打算自己照着做一遍。\n", 17 | "\n", 18 | "## 过程艰难\n", 19 | "一开始,我就把一屏的代码放进了我的jupyter notebook中,一步一步试水。很明显,我的很多依赖包都没安装,所以也是错误不断。早先是在Windows系统下,使用python2.7,需要什么包,就安装什么包。在安装keras过程中,我发现了Anaconda——很好用的一个科学计算环境,集成了各种数据挖掘包。即使是这样,仍然是满屏的错误,亟待排查。\n", 20 | "\n", 21 | "## 步步优化\n", 22 | "离比赛截止就还只有几天,一边准备期末考试,一边焦急地排查bug。Windows系统下仍有个别难以解决的错误,我索性切换到了做NAO机器人时装的Ubuntu系统下。结合keras给的官方文档,我对原代码进行了函数拆分解耦,又在循环体部分增加了异常检测。综合考虑性能,稍微修改了循环结构。下载好训练的vgg16_weights,在没有错误之后,焦急地等待25分钟后,屏幕开始打印结果。\n", 23 | "\n", 24 | "## 欣喜万分\n", 25 | "第一次提交,随便截取了前面一段,没成绩。折腾了几次,才发现是提交的格式出了问题。后面取p=0.99+部分,提交结果在0.58左右,数据集大概有90个。估计了下,狗狗总数应该在180左右。第二次提交,取了180左右,结果0.97多一点。第三次,也是最后一次提交,取了result前189个,结果0.98639,一举升到第一。\n", 26 | "\n", 27 | "---\n", 28 | "### 比赛总结\n", 29 | "这次比赛,首先还得感谢yinjh团队的yin前辈。如果没有您分享的代码,就不会有我今天的成绩。感谢您分享的代码,感想您在我写这篇分享时提供的代码指导。\n", 30 | "再者,感谢我的女票晶晶,谢谢你一直陪在我身边,谢谢你包容我写代码时不那么快的回复手速。我是新手,但我一直不觉得成绩低是理所当。立志从事这一行,就需要快速地学习、快速地成长。新人,也需要做到最好。当然,自己目前还存在很多问题。一些基本的概念只是模糊掌握,需要更多的实践,需要更多的理论积淀,而不是简单地做一个调包侠。\n", 31 | "\n", 32 | "### 给新手的建议\n", 33 | "- 善用搜索引擎,多读官方文档,不要一开始就依赖Google。\n", 34 | "- Google Groups、Stack Overflow、GitHub是好东西。\n", 35 | "- 干!就是干!" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "** ------------------------------------------------------------------------------------ **" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 完整代码\n", 50 | "- ** 以下操作均在Ubuntu14.04+Anaconda中进行 **\n", 51 | 52 | "### 导入python标准包 " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "import os # 处理字符串路径\n", 64 | "\n", 65 | "import glob # 用于查找文件" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### 导入相关库\n", 73 | "- keras\n", 74 | " - keras是基于Theano的深度学习(Deep Learning)框架 \n", 75 | "\n", 76 | " - 详细信息请见[keras官方文档](http://keras.io/) \n", 77 | " \n", 78 | "##### 安装过程\n", 79 | " \n", 80 | " > conda update conda\n", 81 | " \n", 82 | " > conda update --all\n", 83 | " \n", 84 | " > conda install mingw libpython\n", 85 | " \n", 86 | " > pip install git+git://github.com/Theano/Theano.git\n", 87 | " \n", 88 | " > pip install git+git://github.com/fchollet/keras.git\n", 89 | "\n", 90 | "- cv2 \n", 91 | " - OpenCV库\n", 92 | " \n", 93 | " > conda isntall opnecv \n", 94 | "- numpy\n", 95 | " - Anaconda自带" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "from keras.models import Sequential\n", 107 | "\n", 108 | "from keras.layers.core import Flatten, Dense, Dropout\n", 109 | "\n", 110 | "from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D\n", 111 | "\n", 112 | "from keras.optimizers import SGD\n", 113 | "\n", 114 | "import cv2, numpy as np" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### 使用keras建立vgg16模型\n", 122 | " - 参考官方示例" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "def VGG_16(weights_path=None):\n", 134 | "\n", 135 | " model = Sequential()\n", 136 | "\n", 137 | " model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))\n", 138 | "\n", 139 | " model.add(Convolution2D(64, 3, 3, activation='relu'))\n", 140 | "\n", 141 | " model.add(ZeroPadding2D((1,1)))\n", 142 | "\n", 143 | " model.add(Convolution2D(64, 3, 3, activation='relu'))\n", 144 | "\n", 145 | " model.add(MaxPooling2D((2,2), strides=(2,2)))\n", 146 | "\n", 147 | "\n", 148 | "\n", 149 | " model.add(ZeroPadding2D((1,1)))\n", 150 | "\n", 151 | " model.add(Convolution2D(128, 3, 3, activation='relu'))\n", 152 | "\n", 153 | " model.add(ZeroPadding2D((1,1)))\n", 154 | "\n", 155 | " model.add(Convolution2D(128, 3, 3, activation='relu'))\n", 156 | "\n", 157 | " model.add(MaxPooling2D((2,2), strides=(2,2)))\n", 158 | "\n", 159 | "\n", 160 | " model.add(ZeroPadding2D((1,1)))\n", 161 | "\n", 162 | " model.add(Convolution2D(256, 3, 3, activation='relu'))\n", 163 | "\n", 164 | " model.add(ZeroPadding2D((1,1)))\n", 165 | "\n", 166 | " model.add(Convolution2D(256, 3, 3, activation='relu'))\n", 167 | "\n", 168 | " model.add(ZeroPadding2D((1,1)))\n", 169 | "\n", 170 | " model.add(Convolution2D(256, 3, 3, activation='relu'))\n", 171 | "\n", 172 | " model.add(MaxPooling2D((2,2), strides=(2,2)))\n", 173 | "\n", 174 | "\n", 175 | " model.add(ZeroPadding2D((1,1)))\n", 176 | "\n", 177 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 178 | "\n", 179 | " model.add(ZeroPadding2D((1,1)))\n", 180 | "\n", 181 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 182 | "\n", 183 | " model.add(ZeroPadding2D((1,1)))\n", 184 | "\n", 185 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 186 | "\n", 187 | " model.add(MaxPooling2D((2,2), strides=(2,2)))\n", 188 | "\n", 189 | "\n", 190 | " model.add(ZeroPadding2D((1,1)))\n", 191 | "\n", 192 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 193 | "\n", 194 | " model.add(ZeroPadding2D((1,1)))\n", 195 | "\n", 196 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 197 | "\n", 198 | " model.add(ZeroPadding2D((1,1)))\n", 199 | "\n", 200 | " model.add(Convolution2D(512, 3, 3, activation='relu'))\n", 201 | "\n", 202 | " model.add(MaxPooling2D((2,2), strides=(2,2)))\n", 203 | "\n", 204 | "\n", 205 | " model.add(Flatten())\n", 206 | "\n", 207 | " model.add(Dense(4096, activation='relu'))\n", 208 | "\n", 209 | " model.add(Dropout(0.5))\n", 210 | "\n", 211 | " model.add(Dense(4096, activation='relu'))\n", 212 | "\n", 213 | " model.add(Dropout(0.5))\n", 214 | "\n", 215 | " model.add(Dense(1000, activation='softmax'))\n", 216 | "\n", 217 | "\n", 218 | " if weights_path:\n", 219 | "\n", 220 | " model.load_weights(weights_path)\n", 221 | "\n", 222 | "\n", 223 | " return model" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "### 引入训练好的vgg16_weights模型\n", 231 | "** Note: ** \n", 232 | "- vgg16_weights.h5需单独下载,并与代码文件处于同一文件夹下,否则会报错。\n", 233 | " - 网上有资源 附百度云盘链接 [vgg16_weights.h5下载](http://pan.baidu.com/s/1qX0CJSC)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "model = VGG_16('vgg16_weights.h5')" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": true 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)\n", 256 | "model.compile(optimizer=sgd, loss='categorical_crossentropy')" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "### 猫和狗的特征" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "collapsed": true 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "dogs=[251, 268, 256, 253, 255, 254, 257, 159, 211, 210, 212, 214, 213, 216, 215, 219, 220, 221, 217, 218, 207, 209, 206, 205, 208, 193, 202, 194, 191, 204, 187, 203, 185, 192, 183, 199, 195, 181, 184, 201, 186, 200, 182, 188, 189, 190, 197, 196, 198, 179, 180, 177, 178, 175, 163, 174, 176, 160, 162, 161, 164, 168, 173, 170, 169, 165, 166, 167, 172, 171, 264, 263, 266, 265, 267, 262, 246, 242, 243, 248, 247, 229, 233, 234, 228, 231, 232, 230, 227, 226, 235, 225, 224, 223, 222, 236, 252, 237, 250, 249, 241, 239, 238, 240, 244, 245, 259, 261, 260, 258, 154, 153, 158, 152, 155, 151, 157, 156]\n", 275 | "\n", 276 | "cats=[281,282,283,284,285,286,287]" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "### 待处理文件导入\n", 284 | "** Note: **\n", 285 | "- 将测试集改名为test,放入imgs文件夹下,imgs文件夹又与此代码处于同一文件夹下。\n", 286 | "- 当然,你也可以修改下面的路径。" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "path = os.path.join('imgs', 'test', '*.jpg') #拼接路径\n", 298 | " \n", 299 | "files = glob.glob(path) #返回路径" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "### 定义几个变量" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "collapsed": true 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "result=[]" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "collapsed": true 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "flbase=0\n", 329 | "p=0\n", 330 | "temp=0" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "### 定义图像加载函数" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "collapsed": false 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "def load_image(imageurl):\n", 349 | " im = cv2.resize(temp ,(224,224)).astype(np.float32)\n", 350 | " im[:,:,0] -= 103.939\n", 351 | " im[:,:,1] -= 116.779\n", 352 | " im[:,:,2] -= 123.68\n", 353 | " im = im.transpose((2,0,1))\n", 354 | " im = np.expand_dims(im,axis=0)\n", 355 | " return im " 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "### 定义预测函数" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": { 369 | "collapsed": true 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "def predict(url):\n", 374 | " im = load_image(url) \n", 375 | " out = model.predict(im)\n", 376 | " flbase = os.path.basename(url)\n", 377 | " p = np.sum(out[0,dogs]) / (np.sum(out[0,dogs]) + np.sum(out[0,cats]))\n", 378 | " result.append((flbase,p))" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "### 开始预测\n", 386 | "** Note: **\n", 387 | "- 此处的if,else异常检测很重要,因为cv2.imread(fl)在遇到某几张图时会为空,抛出错误,程序中途停止,图片集得不到完全检测。\n", 388 | "- 一般配置电脑跑这部分时,大约需要20~30分钟,不是程序没有工作,请耐心等待。" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "collapsed": false 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "for fl in files:\n", 400 | " temp=cv2.imread(fl) \n", 401 | " if temp ==None: \n", 402 | " pass\n", 403 | " else:\n", 404 | " predict(fl) " 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "### 对结果进行排序" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": { 418 | "collapsed": false 419 | }, 420 | "outputs": [], 421 | "source": [ 422 | "result=sorted(result, key=lambda x:x[1], reverse=True)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "### 打印预测结果与相应概率" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": { 436 | "collapsed": false 437 | }, 438 | "outputs": [], 439 | "source": [ 440 | "for x in result:\n", 441 | " print x[0],x[1]" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "### 预测结果\n", 449 | "- 根据上面的概率,选择相应的前多少张图片\n", 450 | "- 复制进csv文件,使用一般编辑器将\".jpg\"以空格替代" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "collapsed": false 458 | }, 459 | "outputs": [], 460 | "source": [ 461 | "for x in result:\n", 462 | " print x[0]" 463 | ] 464 | } 465 | ], 466 | "metadata": { 467 | "anaconda-cloud": {}, 468 | "kernelspec": { 469 | "display_name": "Python [Root]", 470 | "language": "python", 471 | "name": "Python [Root]" 472 | }, 473 | "language_info": { 474 | "codemirror_mode": { 475 | "name": "ipython", 476 | "version": 2 477 | }, 478 | "file_extension": ".py", 479 | "mimetype": "text/x-python", 480 | "name": "python", 481 | "nbconvert_exporter": "python", 482 | "pygments_lexer": "ipython2", 483 | "version": "2.7.11" 484 | } 485 | }, 486 | "nbformat": 4, 487 | "nbformat_minor": 0 488 | } 489 | -------------------------------------------------------------------------------- /Recommender-systems/shape.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "test=pd.read_csv('test.csv')" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "train=pd.read_csv('train.csv')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 5, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "(33177270, 4)\n", 49 | " uid iid score time\n", 50 | "0 0 0 2 19\n", 51 | "1 0 8 4 273\n", 52 | "2 0 13 1 587\n", 53 | "3 0 18 3 15\n", 54 | "4 0 34 3 17\n", 55 | " uid iid score time\n", 56 | "33177265 223969 12729 2 1346\n", 57 | "33177266 223969 12983 1 1346\n", 58 | "33177267 223969 13000 4 1346\n", 59 | "33177268 223969 13291 3 1346\n", 60 | "33177269 223969 13531 4 1346\n" 61 | ] 62 | } 63 | ], 64 | "source": [ 65 | "print train.shape\n", 66 | "print train.head()\n", 67 | "print train.tail()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 7, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "user=train['uid']\n", 79 | "item=train['iid']\n", 80 | "score=train['score']" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 8, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "user = user.drop_duplicates() \n", 92 | "item = item.drop_duplicates()\n", 93 | "score = score.drop_duplicates() " 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 11, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "(157949L,)\n", 108 | "(14620L,)\n", 109 | "(5L,)\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "print user.shape\n", 115 | "print item.shape\n", 116 | "print score.shape" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 3, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "train2=train.loc[:,['uid','iid','score']]" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 5, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "train2.to_csv('train2.csv')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 6, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/html": [ 151 | "
| \n", 156 | " | uid | \n", 157 | "iid | \n", 158 | "score | \n", 159 | "
|---|---|---|---|
| count | \n", 164 | "3.317727e+07 | \n", 165 | "3.317727e+07 | \n", 166 | "3.317727e+07 | \n", 167 | "
| mean | \n", 170 | "8.481261e+04 | \n", 171 | "5.109407e+03 | \n", 172 | "3.495277e+00 | \n", 173 | "
| std | \n", 176 | "6.057698e+04 | \n", 177 | "3.616496e+03 | \n", 178 | "1.088213e+00 | \n", 179 | "
| min | \n", 182 | "0.000000e+00 | \n", 183 | "0.000000e+00 | \n", 184 | "1.000000e+00 | \n", 185 | "
| 25% | \n", 188 | "3.136900e+04 | \n", 189 | "1.993000e+03 | \n", 190 | "3.000000e+00 | \n", 191 | "
| 50% | \n", 194 | "7.584700e+04 | \n", 195 | "4.417000e+03 | \n", 196 | "4.000000e+00 | \n", 197 | "
| 75% | \n", 200 | "1.314850e+05 | \n", 201 | "8.039000e+03 | \n", 202 | "4.000000e+00 | \n", 203 | "
| max | \n", 206 | "2.239690e+05 | \n", 207 | "1.472500e+04 | \n", 208 | "5.000000e+00 | \n", 209 | "
| \n", 290 | " | uid | \n", 291 | "iid | \n", 292 | "
|---|---|---|
| 0 | \n", 297 | "0 | \n", 298 | "12960 | \n", 299 | "
| 1 | \n", 302 | "1 | \n", 303 | "12726 | \n", 304 | "
| 2 | \n", 307 | "1 | \n", 308 | "11463 | \n", 309 | "
| 3 | \n", 312 | "1 | \n", 313 | "10739 | \n", 314 | "
| 4 | \n", 317 | "1 | \n", 318 | "3441 | \n", 319 | "
| 5 | \n", 322 | "1 | \n", 323 | "301 | \n", 324 | "
| 6 | \n", 327 | "1 | \n", 328 | "13291 | \n", 329 | "
| 7 | \n", 332 | "1 | \n", 333 | "2814 | \n", 334 | "
| 8 | \n", 337 | "1 | \n", 338 | "2857 | \n", 339 | "
| 9 | \n", 342 | "2 | \n", 343 | "12860 | \n", 344 | "
| 10 | \n", 347 | "2 | \n", 348 | "11091 | \n", 349 | "
| 11 | \n", 352 | "2 | \n", 353 | "13057 | \n", 354 | "
| 12 | \n", 357 | "3 | \n", 358 | "8992 | \n", 359 | "
| 13 | \n", 362 | "3 | \n", 363 | "11082 | \n", 364 | "
| 14 | \n", 367 | "3 | \n", 368 | "2665 | \n", 369 | "
| 15 | \n", 372 | "3 | \n", 373 | "12570 | \n", 374 | "
| 16 | \n", 377 | "3 | \n", 378 | "13410 | \n", 379 | "
| 17 | \n", 382 | "3 | \n", 383 | "12714 | \n", 384 | "
| 18 | \n", 387 | "3 | \n", 388 | "14649 | \n", 389 | "
| 19 | \n", 392 | "3 | \n", 393 | "2635 | \n", 394 | "
| 20 | \n", 397 | "4 | \n", 398 | "14339 | \n", 399 | "
| 21 | \n", 402 | "4 | \n", 403 | "13000 | \n", 404 | "
| 22 | \n", 407 | "5 | \n", 408 | "1326 | \n", 409 | "
| 23 | \n", 412 | "5 | \n", 413 | "2308 | \n", 414 | "
| 24 | \n", 417 | "5 | \n", 418 | "1934 | \n", 419 | "
| 25 | \n", 422 | "5 | \n", 423 | "2405 | \n", 424 | "
| 26 | \n", 427 | "5 | \n", 428 | "13509 | \n", 429 | "
| 27 | \n", 432 | "5 | \n", 433 | "12362 | \n", 434 | "
| 28 | \n", 437 | "5 | \n", 438 | "7636 | \n", 439 | "
| 29 | \n", 442 | "5 | \n", 443 | "5155 | \n", 444 | "
| ... | \n", 447 | "... | \n", 448 | "... | \n", 449 | "
| 546166 | \n", 452 | "223267 | \n", 453 | "12181 | \n", 454 | "
| 546167 | \n", 457 | "223267 | \n", 458 | "3569 | \n", 459 | "
| 546168 | \n", 462 | "223277 | \n", 463 | "11865 | \n", 464 | "
| 546169 | \n", 467 | "223686 | \n", 468 | "12983 | \n", 469 | "
| 546170 | \n", 472 | "223842 | \n", 473 | "1801 | \n", 474 | "
| 546171 | \n", 477 | "223842 | \n", 478 | "1418 | \n", 479 | "
| 546172 | \n", 482 | "223842 | \n", 483 | "146 | \n", 484 | "
| 546173 | \n", 487 | "223842 | \n", 488 | "3033 | \n", 489 | "
| 546174 | \n", 492 | "223842 | \n", 493 | "282 | \n", 494 | "
| 546175 | \n", 497 | "223842 | \n", 498 | "2883 | \n", 499 | "
| 546176 | \n", 502 | "223842 | \n", 503 | "2161 | \n", 504 | "
| 546177 | \n", 507 | "223842 | \n", 508 | "10018 | \n", 509 | "
| 546178 | \n", 512 | "223842 | \n", 513 | "11218 | \n", 514 | "
| 546179 | \n", 517 | "223842 | \n", 518 | "4753 | \n", 519 | "
| 546180 | \n", 522 | "223842 | \n", 523 | "9687 | \n", 524 | "
| 546181 | \n", 527 | "223842 | \n", 528 | "1321 | \n", 529 | "
| 546182 | \n", 532 | "223842 | \n", 533 | "601 | \n", 534 | "
| 546183 | \n", 537 | "223842 | \n", 538 | "1340 | \n", 539 | "
| 546184 | \n", 542 | "223842 | \n", 543 | "1335 | \n", 544 | "
| 546185 | \n", 547 | "223842 | \n", 548 | "3428 | \n", 549 | "
| 546186 | \n", 552 | "223842 | \n", 553 | "6155 | \n", 554 | "
| 546187 | \n", 557 | "223842 | \n", 558 | "4664 | \n", 559 | "
| 546188 | \n", 562 | "223842 | \n", 563 | "2918 | \n", 564 | "
| 546189 | \n", 567 | "223842 | \n", 568 | "6607 | \n", 569 | "
| 546190 | \n", 572 | "223842 | \n", 573 | "3576 | \n", 574 | "
| 546191 | \n", 577 | "223842 | \n", 578 | "7033 | \n", 579 | "
| 546192 | \n", 582 | "223842 | \n", 583 | "2391 | \n", 584 | "
| 546193 | \n", 587 | "223842 | \n", 588 | "2625 | \n", 589 | "
| 546194 | \n", 592 | "223842 | \n", 593 | "6477 | \n", 594 | "
| 546195 | \n", 597 | "223969 | \n", 598 | "9758 | \n", 599 | "
546196 rows × 2 columns
\n", 603 | "