└── page_identify ├── CNN_LSTM.py ├── LSTM_CNN.py ├── TextCNN.py ├── data ├── NegativeFile6.csv ├── PositiveFile6.csv ├── negative_urls.csv └── positive_urls.csv ├── data_Processer.py ├── simpleNN.py ├── train_LSTMCNN.py ├── train_simpleNN.py ├── train_textCNN.py ├── train_textCNN_w2vec.py └── word2vec_tool.py /page_identify/CNN_LSTM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:CNN_LSTM 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: {文件作用描述} 17 | # creatData:2019/5/15 18 | 19 | import tensorflow as tf 20 | 21 | 22 | class CNN_LSTM(object): 23 | def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, 24 | l2_reg_lambda=0.0, num_hidden=100): 25 | # PLACEHOLDERS 26 | self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") # X - The Data 27 | self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") # Y - The Lables 28 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Dropout 29 | 30 | l2_loss = tf.constant(0.0) # Keeping track of l2 regularization loss 31 | 32 | # 1. EMBEDDING LAYER ################################################################ 33 | with tf.device('/cpu:0'), tf.name_scope("embedding"): 34 | self.W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") 35 | self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) 36 | self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) 37 | 38 | # 2. CONVOLUTION LAYER + MAXPOOLING LAYER (per filter) ############################### 39 | pooled_outputs = [] 40 | for i, filter_size in enumerate(filter_sizes): 41 | with tf.name_scope("conv-maxpool-%s" % filter_size): 42 | # CONVOLUTION LAYER 43 | filter_shape = [filter_size, embedding_size, 1, num_filters] 44 | W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") 45 | b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") 46 | conv = tf.nn.conv2d(self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") 47 | # NON-LINEARITY 48 | h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") 49 | # MAXPOOLING 50 | pooled = tf.nn.max_pool(h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], 51 | padding='VALID', name="pool") 52 | pooled_outputs.append(pooled) 53 | 54 | # COMBINING POOLED FEATURES 55 | num_filters_total = num_filters * len(filter_sizes) 56 | self.h_pool = tf.concat(pooled_outputs, 3) 57 | self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) 58 | 59 | # 3. DROPOUT LAYER ################################################################### 60 | with tf.name_scope("dropout"): 61 | self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) 62 | 63 | # 4. LSTM LAYER ###################################################################### 64 | cell = tf.contrib.rnn.LSTMCell(num_hidden, state_is_tuple=True) 65 | self.h_drop_exp = tf.expand_dims(self.h_drop, -1) 66 | val, state = tf.nn.dynamic_rnn(cell, self.h_drop_exp, dtype=tf.float32) 67 | 68 | # embed() 69 | 70 | val2 = tf.transpose(val, [1, 0, 2]) 71 | last = tf.gather(val2, int(val2.get_shape()[0]) - 1) 72 | 73 | out_weight = tf.Variable(tf.random_normal([num_hidden, num_classes])) 74 | out_bias = tf.Variable(tf.random_normal([num_classes])) 75 | 76 | with tf.name_scope("output"): 77 | # lstm_final_output = val[-1] 78 | # embed() 79 | self.scores = tf.nn.xw_plus_b(last, out_weight, out_bias, name="scores") 80 | self.predictions = tf.nn.softmax(self.scores, name="predictions") 81 | 82 | with tf.name_scope("loss"): 83 | self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) 84 | self.loss = tf.reduce_mean(self.losses, name="loss") 85 | 86 | with tf.name_scope("accuracy"): 87 | self.correct_pred = tf.equal(tf.argmax(self.predictions, 1), tf.argmax(self.input_y, 1)) 88 | self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, "float"), name="accuracy") 89 | 90 | # embed() 91 | -------------------------------------------------------------------------------- /page_identify/LSTM_CNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:LSTM_CNN 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: LSTM_CNN模型 17 | # LSTM_CNN模型兼具了RNN和CNN的优点,考虑了句子的序列信息,又能捕捉关键信息 但是 没法并行计算,因此,训练时速度要比FastText和TextCNN慢得多 18 | # creatData:2019/5/10 19 | import tensorflow as tf 20 | 21 | 22 | class LSTM_CNN(object): 23 | # 1. Embed --> LSTM 24 | # 2. LSTM --> CNN 25 | # 3. CNN --> Pooling/Output 26 | 27 | def __init__(self, 28 | sequence_length, #输入序列长度 29 | num_classes, #分类数目 30 | vocab_size, #词汇尺寸 31 | embedding_size, #隐藏层尺寸 32 | filter_sizes, #过滤器(卷积核)尺寸 33 | num_filters, #过滤器(卷积核)数量 34 | l2_reg_lambda=0.0,#l2正则化参数 35 | ): 36 | #L2正则化 37 | self.l2_loss = tf.constant(0.0) 38 | 39 | #输入训练数据与验证数据以及dropout层 40 | self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") # X - The Data 41 | self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") # Y - The Lables 42 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Dropout 43 | 44 | #隐藏层 45 | self.EmbeddingLayer(vocab_size,embedding_size) 46 | 47 | #LSTM层 48 | self.LSTMLayer() 49 | 50 | #卷积层+maxpool层 51 | num_filters_total=self.ConvMaxpoolLayer(sequence_length,embedding_size,filter_sizes,num_filters) 52 | 53 | # Dropout层 54 | self.DropoutLayer() 55 | 56 | # 输出层 得分和预测 57 | self.OutputLayer(num_filters_total,num_classes) 58 | 59 | # loss 60 | self.calc_loss(l2_reg_lambda) 61 | 62 | # accuracy 63 | self.calc_acc() 64 | 65 | 66 | #------------------------------------------------------------------------------------- 67 | 68 | def EmbeddingLayer(self,vocab_size,embedding_size): 69 | ''' 70 | 隐藏层 71 | :return: 72 | ''' 73 | with tf.name_scope("embedding"): 74 | self.W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") 75 | self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) #embedding_lookup 选取一个张量里面对应的索引元素 76 | # self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) 77 | 78 | def LSTMLayer(self): 79 | ''' 80 | LSTM层 81 | :return: 82 | ''' 83 | self.lstm_cell = tf.contrib.rnn.LSTMCell(32, state_is_tuple=True) 84 | # self.h_drop_exp = tf.expand_dims(self.h_drop,-1) 85 | self.lstm_out, self.lstm_state = tf.nn.dynamic_rnn(self.lstm_cell, self.embedded_chars, dtype=tf.float32) 86 | 87 | self.lstm_out_expanded = tf.expand_dims(self.lstm_out, -1) 88 | 89 | def ConvMaxpoolLayer(self,sequence_length,embedding_size,filter_sizes,num_filters): 90 | ''' 91 | 卷积层+maxpool层 92 | :return: 93 | ''' 94 | pooled_outputs = [] 95 | for i, filter_size in enumerate(filter_sizes): 96 | with tf.name_scope("conv-maxpool-%s" % filter_size): 97 | #卷积层 以LSTM层的输出为输入 98 | filter_shape = [filter_size, embedding_size, 1, num_filters] 99 | 100 | W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") 101 | b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") 102 | 103 | conv = tf.nn.conv2d(self.lstm_out_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") 104 | 105 | #非线性化 106 | h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") 107 | 108 | #每一个卷积层的池化层 109 | pooled = tf.nn.max_pool(h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], 110 | padding='VALID', name="pool") 111 | pooled_outputs.append(pooled) 112 | 113 | # 结合所有的池化层 114 | num_filters_total = num_filters * len(filter_sizes) 115 | self.h_pool = tf.concat(pooled_outputs, 3) 116 | self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) 117 | 118 | return num_filters_total 119 | 120 | def DropoutLayer(self): 121 | ''' 122 | Dropout层 123 | :return: 124 | ''' 125 | with tf.name_scope("dropout"): 126 | self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) 127 | 128 | def OutputLayer(self,num_filters_total,num_classes): 129 | ''' 130 | 输出层 并计算L2正则化损失 得分和预测 131 | :param num_filters_total: 卷积层的所有大小 132 | :param num_classes: 分类数目 133 | :return: 134 | ''' 135 | 136 | with tf.name_scope("output"): 137 | W = tf.get_variable( 138 | "W", 139 | shape=[num_filters_total, num_classes], 140 | initializer=tf.contrib.layers.xavier_initializer()) 141 | b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") 142 | 143 | self.l2_loss += tf.nn.l2_loss(W) 144 | self.l2_loss += tf.nn.l2_loss(b) 145 | self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") 146 | self.predictions = tf.argmax(self.scores, 1, name="predictions") 147 | 148 | def calc_loss(self,l2_reg_lambda): 149 | ''' 150 | 计算损失率 151 | :return: 152 | ''' 153 | with tf.name_scope("loss"): 154 | losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) 155 | self.loss = tf.reduce_mean(losses) + l2_reg_lambda * self.l2_loss 156 | 157 | def calc_acc(self): 158 | ''' 159 | 计算正确率 160 | :return: 161 | ''' 162 | with tf.name_scope("accuracy"): 163 | correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) 164 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") 165 | -------------------------------------------------------------------------------- /page_identify/TextCNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:TextCNN 13 | # project: TextCNN_SpamSort 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: CNN模型 采用Yoon Kim 提出的TextCNN模型 17 | # creatData:2019/5/9 18 | 19 | import tensorflow as tf 20 | 21 | class TextCNN(): 22 | ''' 23 | 文本分类的CNN模型 24 | 25 | 1. Embeddding Layer 隐藏层 26 | 2. Convolution Layer 卷积层 27 | 3. Max-Poling Layer 最大值池化层 28 | 4. Softmax Layer softmax层 29 | ''' 30 | 31 | def __init__(self, 32 | sequence_length, #序列长度 输入定长处理 超过的截断 不足的补零 33 | num_classes, #分类的数目 分为几类 34 | embedding_size, #词向量的维度 (降维) 35 | filter_sizes, #所有过滤器的尺寸 36 | num_filters, #过滤器的数量 37 | l2_reg_lambda=0.0): 38 | 39 | #输入 40 | self.input_x=tf.placeholder(tf.float32,[None,sequence_length,embedding_size],name="input_x") #输入数据 41 | self.input_y=tf.placeholder(tf.float32,[None,num_classes],name="input_y") #验证数据 42 | self.dropout_keep_prob=tf.placeholder(tf.float32,name="dropout_keep_prob") #dropout 43 | 44 | 45 | #隐藏层 46 | self.EmbeddinfLayer() 47 | 48 | #卷积层+max_pool层 49 | self.Convoluthion_maxpoolLayer(filter_sizes,embedding_size,num_filters,sequence_length) 50 | 51 | #dropout层 (防止过拟合) 52 | self.DropoutLayer() 53 | 54 | #输出层 55 | self.OutputLayer(num_filters*len(filter_sizes),num_classes) 56 | 57 | #计算损失函数 交叉熵 58 | self.calc_loss(l2_reg_lambda) 59 | 60 | #计算准确率 61 | self.calc_accuracy() 62 | 63 | 64 | 65 | def EmbeddinfLayer(self): 66 | ''' 67 | 隐藏层 68 | 69 | 将one-hot编码的词投影到一个低维的空间中 70 | ''' 71 | 72 | self.embedded_chars=self.input_x 73 | self.embedded_chars_expended=tf.expand_dims(self.embedded_chars,-1)#增加维度 74 | 75 | def Convoluthion_maxpoolLayer(self,filter_sizes,embedding_size,num_filters,sequence_length): 76 | ''' 77 | 卷积层+maxpool层 78 | 79 | 为不同尺寸的filter都建立一个卷积层(多个feature map) 80 | ''' 81 | 82 | #循环遍历建立 83 | pooled_outputs=[] 84 | for i,filter_size in enumerate(filter_sizes): 85 | 86 | #不同的命名空间 87 | with tf.name_scope("conv-maxpool-%s"%filter_size): 88 | #卷积层 89 | filter_shape=[filter_size,embedding_size,1,num_filters] 90 | weight=tf.Variable(tf.truncated_normal(filter_shape,stddev=0.1),name="weight") 91 | bias =tf.Variable(tf.constant(0.1,shape=[num_filters]),name="biase") 92 | 93 | conv=tf.nn.conv2d(self.embedded_chars_expended,#input 输入数据 Tensor 94 | weight, #filter 卷积核 Tensor 其shape必须为[高度,宽度,通道数,个数] 95 | strides=[1,1,1,1], #strides 每一维的步长 96 | padding="VALID", #padding "SAME"或者"VALID"之一 决定不同的卷积方式 97 | name="conv") 98 | 99 | #relu激活函数(非线性) 100 | relued=tf.nn.relu(tf.nn.bias_add(conv,bias),#features 将卷积加上bias 101 | name="relu") 102 | 103 | #Maxpooling 池化 104 | pooled=tf.nn.max_pool(relued, #value 需要池化的的输入 [batch, height, width, channels]这样的shape 105 | ksize=[1,sequence_length-filter_size+1,1,1], #池化窗口大小 四维向量 一般是[1, height, width, 1],不想在batch和channels上做池化 106 | strides=[1,1,1,1], #每一维上的步长 107 | padding='VALID', #padding "SAME"或者"VALID"之一 决定不同的卷积方式 108 | name="pool") 109 | 110 | #添加到集合中 111 | pooled_outputs.append(pooled) 112 | 113 | #结合所有池化后的特征 114 | num_filter_total=num_filters*len(filter_sizes) 115 | 116 | self.h_pool = tf.concat(pooled_outputs,3) #张量连接 维度3 117 | self.h_pool_flat = tf.reshape(self.h_pool,[-1,num_filter_total]) 118 | 119 | 120 | def DropoutLayer(self): 121 | ''' 122 | Dropout层 123 | 124 | 防止过拟合 125 | ''' 126 | 127 | with tf.name_scope("dropout"): 128 | self.h_drop=tf.nn.dropout(self.h_pool_flat, #数据 129 | self.dropout_keep_prob #dropout 概率 130 | ) 131 | def OutputLayer(self,num_filters_total,num_classes): 132 | ''' 133 | 输出层 134 | ''' 135 | 136 | #L2正则化损失 137 | self.l2_loss=tf.constant(0.0) 138 | 139 | with tf.name_scope("output"): 140 | weight=tf.get_variable("weight", 141 | shape=[num_filters_total,num_classes], 142 | initializer=tf.contrib.layers.xavier_initializer()#保持每一层梯度大小差不多 143 | ) 144 | bias=tf.Variable(tf.constant(0.1,shape=[num_classes],name="bias")) 145 | 146 | self.l2_loss+=tf.nn.l2_loss(weight) 147 | self.l2_loss+=tf.nn.l2_loss(bias) 148 | 149 | #得到结果 150 | self.score=tf.nn.xw_plus_b(self.h_drop,weight,bias,name="score")#相当与matmul(x,weigt)+bias 151 | self.predictions=tf.argmax(self.score,1,name="predictions") 152 | 153 | 154 | def calc_loss(self,l2_reg_lambda): 155 | ''' 156 | 计算损失函数 157 | 158 | 使用交叉熵来计算 159 | ''' 160 | 161 | with tf.name_scope("loss"): 162 | 163 | losses=tf.nn.softmax_cross_entropy_with_logits(logits=self.score,labels=self.input_y) 164 | self.loss=tf.reduce_mean(losses)+l2_reg_lambda*self.l2_loss #总损失 165 | 166 | def calc_accuracy(self): 167 | ''' 168 | 计算准确率 169 | ''' 170 | 171 | with tf.name_scope("accuracy"): 172 | correct_predictions = tf.equal(self.predictions,tf.argmax(self.input_y,1)) 173 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,"float"),name="accuracy") -------------------------------------------------------------------------------- /page_identify/data/PositiveFile6.csv: -------------------------------------------------------------------------------- 1 | filename,actionFlag,nullHyperlinkFlag,suspecLinkFlag,outLinkFlag,zeroLinkFlag,outCssFlag 2 | http://qq.com,1,0.0294,0,0.0364,0,0 3 | http://sohu.com,0,0,0,0.0947,0,0 4 | http://weibo.com,0,0,0,0,1,0 5 | http://sina.com.cn,1,0.0283,0.022,0.0991,0,0 6 | http://163.com,0,0.1891,0.0005,0.0373,0,0 7 | http://douban.com,0,0.0174,0,0.0217,0,0 8 | http://tv.sohu.com,0,0.0074,0,0.0135,0,0 9 | http://iqiyi.com,0,0.3684,0,0.0145,0,0 10 | http://youku.com,0,0,0,0.0375,0,0 11 | http://autohome.com.cn,0,0.0488,0,0.0383,0,0 12 | http://ifeng.com,0,0,0.0016,0.0129,0,0 13 | http://zol.com.cn,0,0.0012,0.0695,0.0857,0,0 14 | http://v.qq.com,0,0.0057,0,0.0105,0,0 15 | http://eastmoney.com,0,0.0142,0.0049,0.074,0,0 16 | http://sogou.com,0,0,0.0253,0.0759,0,0 17 | http://people.com.cn,0,0.0011,0.0155,0.182,0,1 18 | http://so.com,0,0.1282,0,0,0,0 19 | http://taobao.com,0,0.0379,0.0303,0.1364,0,0 20 | http://58.com,0,0.0013,0,0.0151,0,0 21 | http://bilibili.com,0,0.0132,0,0.0132,0,0 22 | http://qidian.com,0,0.0047,0.0189,0.0284,0,0 23 | http://ctrip.com,0,0.0498,0.017,0.079,0,0 24 | http://jd.com,0,0.0219,0,0,0,0 25 | http://mafengwo.cn,0,0,0,0,1,0 26 | http://fang.com,0,0,0,0,1,0 27 | http://360doc.com,0,0,0,0,1,0 28 | http://movie.douban.com,1,0.0187,0,0.0312,0,1 29 | http://pconline.com.cn,0,0,0.0008,0.0212,0,0 30 | http://pcbaby.com.cn,0,0.0144,0.0004,0.1614,0,0 31 | http://kugou.com,0,0.1085,0,0.1038,0,0 32 | http://dict.cn,0,0.0238,0,0.0714,0,1 33 | http://weather.com.cn,0,0.0062,0.0031,0.0679,0,1 34 | http://kuwo.cn,0,0.2199,0,0.1411,0,0 35 | http://39.net,0,0.0038,0.0011,0.021,0,0 36 | http://1905.com,0,0,0,0.089,0,0 37 | http://120ask.com,0,0,0,0,1,0 38 | http://onlinedown.net,0,0.0005,0,0.0389,0,0 39 | http://tianqi.com,0,0,0.0164,0.1589,0,0 40 | http://911cha.com,0,0,0,0,1,0 41 | http://csdn.net,0,0,0,0.0565,0,0 42 | http://gushiwen.org,0,0,0.5642,0.0467,0,0 43 | http://blog.csdn.net,0,0,0,0.011,0,0 44 | http://cnblogs.com,0,0.0055,0.0055,0.1099,0,0 45 | http://y.qq.com,0,0.4797,0,0.1138,0,0 46 | http://pclady.com.cn,0,0.0174,0.0007,0.1256,0,0 47 | http://kuaidi100.com,0,0.037,0,0.2037,0,0 48 | http://baike.pcbaby.com.cn,0,0.0713,0,0.0876,0,0 49 | http://v.ifeng.com,0,0,0,0,0,0 50 | http://ruiwen.com,0,0.0237,0,0.013,0,0 51 | http://baike.com,0,0.0078,0.0039,0.0936,0,1 52 | http://meishij.net,1,0.0036,0.0325,0.13,0,0 53 | http://360.cn,1,0.0094,0.0258,0.1643,0,0 54 | http://zhihu.com,0,0,0,0,1,0 55 | http://cncn.com,0,0.25,0.0041,0.0826,0,0 56 | http://4399.com,0,0.025,0.0009,0.1426,0,0 57 | http://guba.eastmoney.com,0,0.0063,0.0084,0.0422,0,0 58 | http://1688.com,0,0.1395,0,0.1337,0,0 59 | http://tvmao.com,0,0,0,0,1,0 60 | http://hao123.com,1,0.0635,0,0.7328,0,0 61 | http://xinshipu.com,0,0.2059,0,0,0,0 62 | http://xiachufang.com,0,0,0,0.0163,0,0 63 | http://music.163.com,0,0.5024,0,0.0047,0,0 64 | http://mama.cn,0,0.0366,0.0065,0.1427,0,0 65 | http://iciba.com,0,0,0,0,1,0 66 | http://jia.com,0,0.0281,0,0.0734,0,0 67 | http://you.ctrip.com,0,0.0238,0.0079,0.0576,0,0 68 | http://tieba.baidu.com,0,0.125,0,0.0076,0,0 69 | http://wenku.baidu.com,0,0.0017,0,0.0153,0,0 70 | http://zhidao.baidu.com,0,0.0508,0,0,0,1 71 | http://image.baidu.com,0,0.0714,0,0,0,0 72 | http://xh.5156edu.com,1,0,0.0118,0,0,0 73 | http://jingyan.baidu.com,0,0.0154,0,0,0,0 74 | http://map.baidu.com,0,0,0,0,1,0 75 | http://www.zybang.com,0,0.1333,0,0.2,0,1 76 | http://v.baidu.com,0,0.0184,0,0.0429,0,1 77 | http://www.douyu.com,0,0.0267,0,0.0978,0,1 78 | http://biquge.info,0,0.0141,0.0047,0.0516,0,0 79 | http://tianya.cn,0,0,0,0.5,0,1 80 | http://hupu.com,0,0.0098,0.0049,0.113,0,0 81 | http://eastday.com,1,0.0019,0.0019,0.2111,0,1 82 | http://china.com.cn,0,0,0,0.1199,0,0 83 | http://lvyou.baidu.com,0,0,0,0.0971,0,0 84 | http://gaosan.com,0,0.0044,0,0.0789,0,0 85 | http://jjwxc.net,0,0.0303,0.0909,0.2121,0,0 86 | http://chinaz.com,0,0.0011,0.0022,0.0223,0,0 87 | http://smzdm.com,0,0.0864,0,0.0354,0,0 88 | http://bbs.tianya.cn,0,0.0168,0,0.002,0,1 89 | http://gamersky.com,0,0,0,0,1,0 90 | http://huya.com,0,0,0.0078,0.1783,0,1 91 | http://ali213.net,0,0,0,0.0463,0,0 92 | http://3dmgame.com,0,0.006,0,0.0732,0,0 93 | http://fanyi.baidu.com,0,0.0085,0,0.0684,0,0 94 | http://music.baidu.com,0,0.2508,0,0.0735,0,0 95 | http://pptv.com,0,0.0562,0,0.0226,0,0 96 | http://finance.sina.com.cn,0,0.0081,0.074,0.0569,0,0 97 | http://hexun.com,0,0.0093,0.0093,0.0481,0,0 98 | http://17173.com,0,0.0611,0.0009,0.1282,0,0 99 | http://mtime.com,0,0.0124,0,0.0166,0,0 100 | http://xcar.com.cn,0,0,0,0,1,0 101 | http://news.ifeng.com,0,0,0.0045,0.2768,0,0 102 | http://ccb.com,0,0,0,0,1,0 103 | http://zdic.net,0,0,0.0938,0.1094,0,0 104 | http://7k7k.com,0,0.0037,0.011,0.0154,0,0 105 | http://pcauto.com.cn,0,0.1438,0.0004,0.0342,0,0 106 | http://mbalib.com,0,0,0,0.04,0,0 107 | http://w3school.com.cn,1,0,0.9231,0.0462,0,0 108 | http://cntv.cn,0,0.0149,0,0.9538,0,0 109 | http://book.douban.com,0,0.0445,0.0342,0.0103,0,1 110 | http://xiaohongshu.com,0,0,0,0,1,0 111 | http://bitauto.com,1,0.0056,0.0014,0.2193,0,0 112 | http://51240.com,0,0.0079,0,0.0237,0,0 113 | http://51test.net,0,0,0.0026,0.0571,0,0 114 | http://pc6.com,0,0,0,0.029,0,0 115 | http://quote.eastmoney.com,0,0.0259,0.0029,0.0605,0,0 116 | http://8684.cn,0,0,0.0083,0.1736,0,0 117 | http://360kan.com,0,0.0596,0.1836,0.0099,0,0 118 | http://booktxt.net,0,0,0.0566,0,0,0 119 | http://xinhuanet.com,1,0.0139,0.0013,0.2393,0,0 120 | http://liuxue86.com,0,0.0038,0,0.0073,0,0 121 | http://youdao.com,0,0.0238,0.0238,0.1429,0,0 122 | http://51job.com,0,0,0.0603,0.0261,0,0 123 | http://google.cn,0,0,0,0.75,0,0 124 | http://zhibo8.cc,0,0,0.2269,0.508,0,0 125 | http://blog.sina.com.cn,0,0.0112,0.0074,0.0223,0,0 126 | http://zhaopin.com,0,0.0014,0.0014,0.1048,0,0 127 | http://alipay.com,0,0.1818,0.0455,0.3182,0,0 128 | http://anjuke.com,0,0,0,0.0138,0,1 129 | http://xiami.com,0,0,0,0,1,0 130 | http://toutiao.com,0,0,0,0,1,0 131 | http://chinadaily.com.cn,0,0,0,0,1,0 132 | http://wiki.mbalib.com,0,0.0021,0.0172,0,0,0 133 | http://58pic.com,0,0.0456,0.0977,0.0684,0,0 134 | http://qunar.com,0,0.0809,0.0108,0.035,0,0 135 | http://hao.360.cn,1,0.0438,0.0018,0.2676,0,0 136 | http://weixin.qq.com,0,0,0,0.0714,0,0 137 | http://hujiang.com,0,0,0,0,1,0 138 | http://yesky.com,0,0.0013,0,0.1093,0,0 139 | http://xuexila.com,0,0.0021,0.0021,0.0761,0,0 140 | http://baixing.com,0,0.0088,0.0018,0.0864,0,0 141 | http://news.qq.com,0,0,0,0,1,0 142 | http://www.boc.cn,1,0.0089,0,0.2552,0,0 143 | http://ip138.com,1,0,0.0109,0.2283,0,0 144 | http://xiangha.com,0,0.0556,0,0.0787,0,0 145 | http://china.com,0,0.003,0,0.0417,0,0 146 | http://kanzhun.com,0,0,0,0,1,0 147 | http://fund.eastmoney.com,0,0.0006,0.0042,0.1941,0,0 148 | http://unjs.com,0,0,0,0.1351,0,0 149 | http://yxdown.com,0,0.0303,0,0.0636,0,0 150 | http://ganji.com,0,0.0013,0.0467,0.0214,0,0 151 | http://yinyuetai.com,0,0.1026,0,0.0385,0,0 152 | http://faloo.com,0,0,0.0427,0.122,0,0 153 | http://66law.cn,0,0,0.389,0.0209,0,0 154 | http://bendibao.com,1,0.0075,0,0.005,0,0 155 | http://docin.com,0,0.0036,0,0.0873,0,1 156 | http://b.faloo.com,0,0,0.0191,0.0322,0,0 157 | http://offcn.com,1,0.003,0.0049,0.3633,0,0 158 | http://sports.sina.com.cn,0,0.0064,0.0046,0.0285,0,0 159 | http://mp.weixin.qq.com,0,0.0312,0,0.0312,0,0 160 | http://mail.163.com,0,0,0,0.0488,0,0 161 | http://haodf.com,0,0,0,0,1,0 162 | http://pingan.com,0,0,0.0049,0.3024,0,0 163 | http://detail.zol.com.cn,0,0.0496,0.0349,0.0317,0,0 164 | http://zongheng.com,0,0.0023,0.0023,0.1285,0,0 165 | http://jiameng.com,0,0,0,1,0,0 166 | http://news.sohu.com,0,0.0045,0,0.1514,0,0 167 | http://cnki.net,0,0,0.0185,0.0481,0,0 168 | http://www.dianping.com,0,0,0,0.0714,0,0 169 | http://122.gov.cn,0,0,0,0,1,0 170 | http://huawei.com,0,0.0609,0.0128,0.0833,0,0 171 | http://eol.cn,0,0.0208,0.0016,0.0817,0,0 172 | http://nba.com,0,0.015,0,0.5935,0,0 173 | http://lianjia.com,0,0,0,0,1,0 174 | http://price.pcauto.com.cn,0,0.0303,0,0,0,0 175 | http://ent.qq.com,1,0.0048,0.0008,0.0787,0,0 176 | http://zdface.com,0,0.0284,0,0.1225,0,0 177 | http://sc.chinaz.com,0,0,0.004,0.03,0,0 178 | http://lz13.cn,0,0,0,0.0257,0,0 179 | http://doyo.cn,0,0.0033,0,0.0196,0,0 180 | http://redocn.com,0,0.0455,0.0017,0.1171,0,0 181 | http://d1xz.net,0,0.0186,0.5315,0.0516,0,0 182 | http://doc88.com,0,0.1375,0.15,0.025,0,0 183 | http://aizhan.com,0,0,0.0108,0.0649,0,0 184 | http://juzimi.com,0,0,0,0,1,0 185 | http://translate.google.cn,0,0,0,0.5556,0,1 186 | http://car.autohome.com.cn,0,0.0024,0,0,0,0 187 | http://manhuatai.com,0,0,0,0.0476,0,0 188 | http://dmzj.com,0,0.0577,0,0.0096,0,0 189 | http://www.mi.com,0,0.0088,0,0.0658,0,0 190 | http://suning.com,0,0.0702,0,0.0246,0,0 191 | http://tvsou.com,0,0,0.0036,0.2071,0,0 192 | http://mail.qq.com,0,0,0,0.0625,0,1 193 | http://ximalaya.com,0,0,0,0,1,0 194 | http://meishichina.com,0,0.0023,0,0.0519,0,0 195 | http://news.sina.com.cn,0,0.0068,0.0301,0.1191,0,0 196 | http://dict.youdao.com,0,0.0238,0.0238,0.1429,0,0 197 | http://dytt8.net,0,0.0036,0,0.0089,0,0 198 | http://maigoo.com,0,0.0363,0,0.0544,0,0 199 | http://18183.com,1,0.0005,0.0005,0.1174,0,0 200 | http://xywy.com,0,0.0865,0.0198,0.0743,0,0 201 | http://news.163.com,1,0.0181,0,0.2155,0,0 202 | http://so.gushiwen.org,0,0,0,0,1,0 203 | http://guancha.cn,0,0.0032,0.0008,0.0142,0,0 204 | http://sports.qq.com,0,0.0971,0.0022,0.0993,0,0 205 | http://flights.ctrip.com,0,0.2097,0.0334,0.1277,0,0 206 | http://nuomi.com,0,0.0177,0,0.0638,0,0 207 | http://sports.sohu.com,0,0,0.0808,0.0053,0,0 208 | http://qq.ip138.com,0,0,0.0952,0.0952,0,0 209 | http://xbiao.com,0,0.0209,0.0417,0.0654,0,0 210 | http://gkcx.eol.cn,0,0,0,0,1,0 211 | http://maoyan.com,0,0,0.03,0.09,0,0 212 | http://juqingba.cn,1,0.0055,0,0.0771,0,0 213 | http://ent.sina.com.cn,0,0.019,0.0011,0.1196,0,0 214 | http://jianshu.com,0,0,0,0,1,0 215 | http://ivsky.com,0,0,0,0.1087,0,0 216 | http://bytravel.cn,0,0,0,0,1,0 217 | http://jmw.com.cn,0,0.0087,0.0515,0.0228,0,0 218 | http://51credit.com,0,0.0222,0,0.1333,0,0 219 | http://cr173.com,0,0.0024,0,0.0782,0,0 220 | http://renren.com,0,0.0192,0,0.25,0,1 221 | http://tuliu.com,0,0.0071,0.0048,0.1619,0,0 222 | http://www.chsi.com.cn,0,0,0,0.3097,0,1 223 | http://fh21.com.cn,0,0.0188,0,0.1009,0,0 224 | http://2345.com,0,0.0381,0.0296,0.4271,0,0 225 | http://yw11.com,0,0,0.0488,0.0395,0,0 226 | http://hydcd.com,0,0,0,0.0736,0,0 227 | http://leha.com,0,0.0099,0,0.0377,0,0 228 | http://sh.qihoo.com,1,0.0312,0.0156,0.5859,0,0 229 | http://80txt.com,0,0,0.0016,0.0459,0,0 230 | http://ask.39.net,0,0,0,0,1,0 231 | http://elecfans.com,1,0.0035,0.0177,0.1062,0,0 232 | http://china-10.com,0,0.078,0,0.0683,0,0 233 | http://cngold.org,0,0.0008,0,0.0017,0,1 234 | http://haodou.com,0,0,0.2203,0.2938,0,0 235 | http://wx.qq.com,0,0.2353,0,0.1765,0,0 236 | http://fanyi.youdao.com,0,0.7273,0,0.0152,0,1 237 | http://360kad.com,0,0.002,0.2454,0.033,0,0 238 | http://yue365.com,0,0,0,0.0623,0,0 239 | http://enterdesk.com,0,0.0026,0,0.0885,0,0 240 | http://shangc.net,0,0.0056,0,0.1732,0,0 241 | http://xzw.com,0,0.0017,0,0.0915,0,0 242 | http://manmankan.com,0,0,0,0.0258,0,0 243 | http://ac.qq.com,0,0,0,0.0048,0,0 244 | http://91jm.com,0,0.0005,0.0005,0.0189,0,0 245 | http://9ku.com,0,0.0051,0,0.0212,0,0 246 | http://64365.com,0,0,0.1416,0.0838,0,0 247 | http://7y7.com,0,0.0082,0,0.0655,0,0 248 | http://9game.cn,0,0.0024,0,0.1014,0,0 249 | http://finance.ifeng.com,0,0.0144,0.0072,0.0072,0,0 250 | http://manhua.dmzj.com,0,0,0,0.0172,0,0 251 | http://chachaba.com,0,0,0,0,1,0 252 | http://im.qq.com,0,0.075,0,0.1,0,0 253 | http://jianke.com,0,0,0,0.04,0,0 254 | http://1ting.com,0,0.0042,0.0014,0.0407,0,0 255 | http://mgtv.com,0,0.0235,0.0157,0.0706,0,0 256 | http://cndzys.com,0,0.0158,0,0.0585,0,0 257 | http://ent.ifeng.com,0,0,0,0,0,0 258 | http://ccoo.cn,1,0.0016,0,0.0574,0,1 259 | http://17500.cn,0,0.0344,0.047,0.0919,0,0 260 | http://aliyun.com,0,0.0033,0,0.0426,0,0 261 | http://rednet.cn,1,0.0053,0.0011,0.2577,0,0 262 | http://www.tmall.com,0,0,0,0,0,0 263 | http://www.microsoft.com,0,0,0.0549,0.2857,0,0 264 | http://yy.com,0,0.0833,0,0.2361,0,0 265 | http://che168.com,0,0.0127,0,0.0056,0,0 266 | http://qj.com.cn,0,0.005,0.0006,0.0599,0,0 267 | http://jrj.com.cn,0,0.0541,0,0.0239,0,0 268 | http://dangdang.com,0,0.0039,0.0941,0.0353,0,0 269 | http://le.com,0,0.0152,0.0015,0.0412,0,0 270 | http://baofeng.com,0,0.0024,0,0.1415,0,0 271 | http://nipic.com,0,0,0.0164,0.1639,0,0 272 | http://www.chinairn.com,0,0,0,0.1365,0,0 273 | http://6.cn,0,0.0893,0.1202,0.031,0,0 274 | http://nba.hupu.com,0,0,0,0.1133,0,0 275 | http://zcool.com.cn,0,0.0381,0.0286,0.1238,0,0 276 | http://178.com,0,0.0248,0,0.1801,0,0 277 | http://qqtn.com,0,0.0012,0,0.0826,0,0 278 | http://aicai.com,0,0.0396,0,0.0422,0,0 279 | http://bing.com,0,0.0323,0,0.5806,0,0 280 | http://mydrivers.com,0,0.0712,0,0.0556,0,0 281 | http://jiayuan.com,0,0.8359,0.0808,0.0177,0,1 282 | http://www.adobe.com,0,0.1287,0,0,0,0 283 | http://liebiao.com,0,0,0.0025,0.0639,0,1 284 | http://lofter.com,0,0,0,0,1,0 285 | http://boosj.com,0,0.0047,0,0.1745,0,0 286 | http://chinapp.com,0,0.0121,0,0.0492,0,0 287 | http://buyiju.com,0,0,0.0595,0.1488,0,0 288 | http://safetree.com.cn,0,0.0764,0.1338,0.0828,0,0 289 | http://qfang.com,0,0,0,0.2687,0,0 290 | http://xpgod.com,0,0,0,0.062,0,0 291 | http://www.99zuowen.com,0,0.0026,0.0077,0.0166,0,0 292 | http://dm5.com,0,0,0,0.0163,0,0 293 | http://vip.com,0,0.0294,0.0882,0,0,0 294 | http://xunlei.com,0,0.381,0.0159,0.1587,0,0 295 | http://focus.cn,0,0.0534,0,0.0354,0,0 296 | http://www.chazidian.com,0,0.0026,0,0.0078,0,0 297 | http://moegirl.org,0,0,0.1818,0.1364,0,0 298 | http://sto.cn,0,0.02,0,0.09,0,0 299 | http://www.10086.cn,0,0,0,0,1,0 300 | http://qqyou.com,0,0.0019,0.1029,0.0524,0,0 301 | http://job592.com,0,0,0,0.1935,0,0 302 | http://pan.baidu.com,0,0,0.0455,0,0,0 303 | http://so.redocn.com,0,0.005,0.0025,0.0451,0,0 304 | http://cp2y.com,0,0.0196,0.0065,0.1699,0,0 305 | http://qzone.qq.com,0,0,0,0.05,0,0 306 | http://zgjm.org,0,0,0.0039,0.03,0,0 307 | http://book118.com,0,0.0049,0.0148,0.0345,0,0 308 | http://tv.cctv.com,0,0.0216,0,0.0086,0,1 309 | http://ooopic.com,0,0.2013,0.0941,0.071,0,0 310 | http://tgbus.com,0,0,0,0.027,0,0 311 | http://china.cn,0,0.0156,0.0182,0.0195,0,0 312 | http://zh.moegirl.org,0,0,0.0299,0.009,0,0 313 | http://liqucn.com,0,0,0.0395,0,0,0 314 | http://www.12306.cn,0,0.7754,0,0,0,0 315 | http://sdo.com,0,0,0,0,0,0 316 | http://to8to.com,0,0.062,0.0155,0.0827,0,0 317 | http://www.56.com,0,0.004,0,0.0121,0,0 318 | http://zjtcn.com,0,0,0,0,1,0 319 | http://jisutiyu.com,0,0.0016,0.0501,0.3006,0,0 320 | http://www.189.cn,0,0,0,0.0588,0,0 321 | http://www.cmbchina.com,0,0,0.1135,0.0871,0,0 322 | http://www.10010.com,0,0,0,0,1,0 323 | http://19lou.com,0,0.0643,0,0.1607,0,0 324 | http://chinaacc.com,0,0,0,0.0415,0,0 325 | http://www.icbc.com.cn,0,0,0,0,1,0 326 | http://uuu9.com,0,0.0016,0.0048,0.1356,0,0 327 | http://tuniu.com,0,0.0132,0,0.0245,0,0 328 | http://china.nba.com,0,0.015,0,0.5935,0,0 329 | http://99166.com,0,0.0084,0.0232,0.0989,0,0 330 | http://duanmeiwen.com,0,0,0,0.0106,0,0 331 | http://cn.bing.com,0,0.0323,0,0.5806,0,0 332 | http://amazon.cn,0,0,0.0075,0.1493,0,1 333 | http://tech.ifeng.com,0,0,0,0.2465,0,0 334 | http://yingjiesheng.com,0,0.0008,0.0047,0.0267,0,0 335 | http://5173.com,0,0.2216,0.0324,0.1243,0,1 336 | http://alibaba.com,0,0.0268,0.0179,0.2143,0,0 337 | http://runoob.com,1,0.0165,0,0,0,0 338 | http://sanguosha.com,0,0.0922,0.0184,0.1382,0,0 339 | http://52pk.com,0,0.0132,0.0237,0.0908,0,0 340 | http://fashion.ifeng.com,0,0.012,0,0,0,0 341 | http://movie.mtime.com,0,0.0992,0,0,0,0 342 | http://tencent.com,0,0,0,0,1,0 343 | http://51cto.com,0,0.0559,0.2148,0.0537,0,0 344 | http://51zxw.net,1,0.0299,0.0212,0.0405,0,0 345 | http://www.huanqiu.com,0,0,0,0.0257,0,0 346 | http://3d66.com,0,0.1836,0.0015,0.0262,0,0 347 | http://dzwww.com,0,0.0115,0.0069,0.2676,0,0 348 | http://gome.com.cn,0,0,0.0056,0.067,0,0 349 | http://meitu.com,0,0.1538,0,0.0879,0,0 350 | http://360-bo.com,0,0.0769,0.0063,0.0895,0,0 351 | http://huatu.com,0,0,0.0352,0.1262,0,0 352 | http://17k.com,0,0.0292,0,0.0251,0,0 353 | http://hp.com,0,0,0.003,0.0973,0,0 354 | http://114la.com,1,0.0159,0.0264,0.5509,0,0 355 | http://thepaper.cn,1,0.0292,0,0.0117,0,0 356 | http://nike.com,0,0.0034,0,0.0201,0,0 357 | http://wps.cn,0,0.0105,0,0.3368,0,0 358 | http://news.4399.com,1,0.0025,0.0302,0.1935,0,1 359 | http://99114.com,0,0.0123,0,0.0062,0,0 360 | http://co188.com,0,0,0,0.1765,0,0 361 | http://yundaex.com,0,0,0,0,1,0 362 | http://youth.cn,0,0.0152,0,0.0884,0,0 363 | http://bbs.hupu.com,0,0,0.0055,0.011,0,0 364 | http://mozilla.org,0,0,0,0.2126,0,0 365 | http://ttrar.com,0,0.0009,0,0.0379,0,0 366 | http://meituan.com,0,0,0,0.0158,0,0 367 | http://findlaw.cn,0,0.0075,0.0045,0.006,0,0 368 | http://126.com,0,0,0,0.6757,0,0 369 | http://liepin.com,0,0.0128,0,0.0291,0,0 370 | http://zhms.cn,0,0,0,0.3333,0,1 371 | http://vmall.com,0,0.0563,0.0036,0.0853,0,1 372 | http://so.ooopic.com,0,0.0187,0.0688,0.125,0,0 373 | http://cz89.com,0,0,0.0042,0.2046,0,0 374 | http://jiwu.com,0,0,0,0,1,0 375 | http://gaokao.com,0,0.0004,0,0.0251,0,1 376 | http://linkedin.com,0,0,0,0,0,0 377 | http://lagou.com,1,0.0127,0,0.094,0,0 378 | http://qyer.com,0,0.0759,0.0063,0.2278,0,0 379 | http://kuai8.com,0,0.001,0.0048,0.0534,0,0 380 | http://cnkang.com,0,0.3593,0,0.054,0,0 381 | http://xdf.cn,0,0.0045,0,0.1091,0,0 382 | http://xici.net,0,0.0138,0.0138,0.1,0,0 383 | http://www.acfun.cn,0,0.0361,0.1271,0.0095,0,0 384 | http://gmw.cn,0,0.0034,0,0.0809,0,0 385 | http://zxxk.com,0,0.0033,0.0017,0.0294,0,0 386 | http://kaijiang.500.com,0,0.0365,0.1642,0.0401,0,0 387 | http://12371.cn,0,0.0275,0.1181,0.1016,0,1 388 | http://www.tudou.com,1,0.0203,0,0.0983,0,0 389 | http://xilu.com,0,0.0028,0,0.006,0,0 390 | http://jxedt.com,0,0.0075,0.03,0.015,0,0 391 | http://xueqiu.com,0,0,0,0,1,0 392 | http://1ppt.com,0,0,0.0058,0.1433,0,0 393 | http://yuedu.baidu.com,0,0.0039,0,0.0039,0,0 394 | http://kdnet.net,0,0,0.0321,0.1526,0,0 395 | http://52pojie.cn,1,0.0329,0.4042,0.0749,0,0 396 | http://www.bankcomm.com,0,0,0,0,1,0 397 | http://woshipm.com,0,0.0106,0,0.3564,0,0 398 | http://china.findlaw.cn,0,0.0059,0.004,0.0435,0,0 399 | http://qulishi.com,0,0,0,0,1,0 400 | http://ithome.com,0,0,0,0,1,0 401 | http://kongfz.com,0,0,0,0,1,0 402 | http://jiwu.com,0,0,0,0,1,0 403 | http://gaokao.com,0,0.0004,0,0.0251,0,1 404 | http://linkedin.com,0,0.0154,0,0.0308,0,1 405 | http://lagou.com,1,0.0127,0,0.094,0,0 406 | http://qyer.com,0,0.0759,0.0063,0.2278,0,0 407 | http://kuai8.com,0,0.001,0.0048,0.0534,0,0 408 | http://cnkang.com,0,0.3593,0,0.054,0,0 409 | http://xdf.cn,0,0.0045,0,0.1091,0,0 410 | http://xici.net,0,0.0137,0.0137,0.099,0,0 411 | http://www.acfun.cn,0,0.0361,0.1271,0.0095,0,0 412 | http://gmw.cn,0,0.0034,0,0.0809,0,0 413 | http://zxxk.com,0,0.0033,0.0017,0.0294,0,0 414 | http://kaijiang.500.com,0,0.0365,0.1642,0.0401,0,0 415 | http://12371.cn,0,0.0275,0.1185,0.1019,0,1 416 | http://www.tudou.com,1,0.0203,0,0.0983,0,0 417 | http://xilu.com,0,0.0028,0,0.006,0,0 418 | http://jxedt.com,0,0.0075,0.03,0.015,0,0 419 | http://xueqiu.com,0,0,0,0,1,0 420 | http://1ppt.com,0,0,0.0058,0.1433,0,0 421 | http://yuedu.baidu.com,0,0.0039,0,0.0039,0,0 422 | http://pku.edu.cn,1,0.1321,0.0142,0.0755,0,0 423 | http://kdnet.net,0,0,0.0323,0.1532,0,0 424 | http://52pojie.cn,1,0.0301,0.4036,0.0753,0,0 425 | http://www.bankcomm.com,0,0,0,0,1,0 426 | http://woshipm.com,0,0.0106,0,0.3564,0,0 427 | http://china.findlaw.cn,0,0.0059,0.004,0.0435,0,0 428 | http://qulishi.com,0,0,0,0,1,0 429 | http://ithome.com,0,0,0,0,1,0 430 | http://kongfz.com,0,0,0,0,1,0 431 | http://fx.cp2y.com,0,0.0183,0.0046,0.0274,0,0 432 | http://kankan.com,0,0.0026,0,0.0154,0,0 433 | http://stockpage.10jqka.com.cn,0,0.9222,0.0039,0,0,1 434 | http://zaobao.com,0,0.0028,0.0028,0.0028,0,0 435 | http://lenovo.com.cn,0,0.0054,0.0067,0.4973,0,0 436 | http://baomihua.com,0,0,0.0567,0,0,0 437 | http://woyaogexing.com,0,0.0327,0.0065,0.1242,0,0 438 | http://cyol.com,0,0,0,0,1,0 439 | http://1kkk.com,0,0,0,0.0102,0,0 440 | http://okooo.com,0,0.021,0.021,0.021,0,1 441 | http://samsung.com,0,0.1313,0.0303,0.202,0,0 442 | http://pchome.net,0,0.0032,0.0016,0.0429,0,0 443 | http://dongao.com,0,0.0112,0.0072,0.0337,0,0 444 | http://koolearn.com,0,0.0043,0,0.1066,0,0 445 | http://sz.gov.cn,0,0,0,0,1,0 446 | http://elong.com,0,0,0,0.0443,0,1 447 | http://www.cidianwang.com,0,0,0,0.1615,0,0 448 | http://www.court.gov.cn,0,0,0,0,1,0 449 | http://southcn.com,0,0.0033,0.0066,0.1549,0,0 450 | http://oschina.net,0,0,0,0,0,0 451 | http://znds.com,0,0.0232,0.0659,0.0525,0,1 452 | http://xingzuo360.cn,0,0.0127,0,0.0445,0,0 453 | http://jstv.com,0,0.0507,0.0054,0.0652,0,0 454 | http://guokr.com,0,0.0513,0,0.0513,0,0 455 | http://u17.com,0,0.0054,0.0044,0.0446,0,1 456 | http://mail.126.com,0,0,0,0.6757,0,0 457 | http://qichacha.com,0,1,0,0,0,0 458 | http://hotels.ctrip.com,0,0.0188,0.0258,0.1056,0,0 459 | http://hc360.com,0,0.0043,0.0008,0.033,0,0 460 | http://lawtime.cn,0,0,0.0048,0.0887,0,0 461 | http://cicpa.org.cn,0,0.0081,0.0081,0.0565,0,0 462 | http://caixin.com,0,0.0043,0,0.0239,0,0 463 | http://duote.com,0,0.0212,0,0.0296,0,1 464 | http://kekenet.com,0,0.0125,0.0274,0.0615,0,0 465 | http://ickd.cn,0,0.0177,0,0.0354,0,0 466 | http://jzb.com,0,0,0,0.0864,0,0 467 | http://flight.qunar.com,0,0.2561,0.014,0.1263,0,0 468 | http://quanjing.com,0,0,0.1171,0.045,0,0 469 | http://aipai.com,0,0.0048,0.128,0.0106,0,1 470 | http://wanmei.com,0,0,0,0.2,0,0 471 | http://jb51.net,0,0,0.0034,0.0425,0,0 472 | http://fengniao.com,0,0.0016,0.0031,0.0343,0,0 473 | http://onegreen.net,0,0,0,0.2222,0,0 474 | http://duanwenxue.com,0,0.0676,0.018,0.0541,0,0 475 | http://cnmo.com,0,0.0076,0.0038,0.1943,0,0 476 | http://cnr.cn,0,0.017,0,0.1449,0,0 477 | http://duwenzhang.com,0,0,0.0314,0.2147,0,0 478 | http://qpgyy.com,0,0.0108,0,0.0036,0,0 479 | http://tianyancha.com,0,0,0,0.0398,0,0 480 | http://leju.com,0,0.0793,0.0006,0.1354,0,0 481 | http://netbian.com,0,0,0,0.5104,0,0 482 | http://globrand.com,0,0,0.0014,0.0801,0,0 483 | http://downza.cn,0,0,0,1,0,0 484 | http://nga.cn,0,0,0.6196,0.25,0,0 485 | http://www.233.com,0,0.022,0,0.002,0,0 486 | http://360che.com,0,0.0015,0.0405,0.0474,0,0 487 | http://99.com.cn,0,0,0.003,0.0376,0,0 488 | http://product.pconline.com.cn,0,0.0177,0,0.0063,0,0 489 | http://csai.cn,0,0.051,0,0.0637,0,0 490 | http://cjdby.net,1,0.0205,0.1818,0.2229,0,0 491 | http://17house.com,0,0,0,0,1,0 492 | http://booking.com,0,0.0088,0,0.0088,0,1 493 | http://ikea.com,0,0.2195,0.0081,0.2033,0,0 494 | http://ifanr.com,0,0,0.037,0.1343,0,1 495 | http://home.meishichina.com,0,0.0068,0,0,0,0 496 | http://fudan.edu.cn,0,0.1086,0,0.08,0,0 497 | http://online.sh.cn,0,0,0,0.0341,0,0 498 | http://cztv.com,0,0,0,0,1,0 499 | http://ijq.tv,0,0.0016,0,0.0143,0,0 500 | http://588ku.com,0,0,0,0,1,0 501 | http://www.pcsoft.com.cn,0,0.0114,0,0.0171,0,0 502 | http://home.meishichina.com,0,0.0068,0,0,0,0 503 | http://fudan.edu.cn,0,0.1086,0,0.08,0,0 504 | http://online.sh.cn,0,0,0,0.0341,0,0 505 | http://cztv.com,0,0,0,0,1,0 506 | http://ijq.tv,0,0.0016,0,0.0143,0,0 507 | http://588ku.com,0,0,0,0,1,0 508 | http://www.pcsoft.com.cn,0,0.0114,0,0.0171,0,0 509 | http://zhongkao.com,0,0.0033,0,0.0473,0,1 510 | http://xxsy.net,0,0.0026,0,0.0385,0,0 511 | http://chaoxing.com,0,0.4375,0,0.125,0,0 512 | http://11467.com,0,0,0.0028,0.0615,0,0 513 | http://guazi.com,0,0,0,0,1,0 514 | http://crsky.com,0,0,0,0.0652,0,0 515 | http://win4000.com,0,0.0117,0,0.1016,0,0 516 | http://zhenai.com,0,0,0,0.1212,0,0 517 | http://ce.cn,1,0,0,0.1,0,0 518 | http://win007.com,0,0,0.025,0.0442,0,0 519 | http://zhulong.com,0,0.0274,0,0.1222,0,0 520 | http://tangdou.com,0,0,0,0.4571,0,0 521 | http://newmotor.com.cn,0,0.0012,0.0023,0.0808,0,0 522 | http://downcc.com,0,0,0,0.0172,0,0 523 | http://iqilu.com,0,0.0234,0.0047,0.0973,0,0 524 | http://973.com,0,0.0011,0,0.0432,0,0 525 | http://taoguba.com.cn,0,0,0,0.1338,0,0 526 | http://lt.cjdby.net,1,0.0205,0.1818,0.2229,0,0 527 | http://25pp.com,0,0.0278,0.0056,0.2167,0,0 528 | http://jiaoyimao.com,0,0.0047,0,0.0292,0,0 529 | http://leleketang.com,0,0.0172,0.069,0.7931,0,0 530 | http://bj.lianjia.com,0,0,0,0,1,0 531 | http://meijutt.com,0,0,0,0.0326,0,0 532 | http://5sing.kugou.com,0,0.1504,0,0.0155,0,0 533 | http://gxnews.com.cn,1,0.0125,0.0196,0.098,0,0 534 | http://lvmama.com,0,0.0202,0,0.0808,0,0 535 | http://amap.com,0,0,0,0,1,0 536 | http://vivo.com.cn,0,0,0.025,0.2687,0,0 537 | http://weimeicun.com,0,0,0,0.3125,0,0 538 | http://meilele.com,0,0.0308,0,0.0408,0,0 539 | http://louisvuitton.com,0,0,0,0.0357,0,0 540 | http://sinaapp.com,0,0,0,0,0,0 541 | http://data.eastmoney.com,0,0,0.0024,0.0012,0,0 542 | http://ele.me,0,0,0,0,1,0 543 | http://job5156.com,0,0.0155,0,0.2481,0,0 544 | http://36kr.com,0,0,0,0.0494,0,0 545 | http://ems.com.cn,1,0.0164,0,0.1475,0,0 546 | http://damai.cn,0,0,0,0.1395,0,0 547 | http://bjrbj.gov.cn,0,0.014,0,0.1402,0,0 548 | http://forex.hexun.com,0,0.0033,0.0132,0.1258,0,0 549 | http://ftchinese.com,0,0.0025,0.0203,0.0304,0,0 550 | http://chinagwy.org,0,0,0,0,0,0 551 | http://meet99.com,0,0,0,0.0926,0,1 552 | http://www.meizu.com,0,0,0,0,1,0 553 | http://tom.com,0,0,0,0.0161,0,0 554 | http://sports.163.com,1,0.0764,0,0.044,0,1 555 | http://live.bilibili.com,0,0,0,0,0,0 556 | http://2018.cn,0,0,0,0.1485,0,0 557 | http://yoka.com,0,0.0409,0,0.0409,0,0 558 | http://ci123.com,0,0,0,0,1,0 559 | http://55125.cn,0,0.0013,0.0687,0.186,0,0 560 | http://club.autohome.com.cn,0,0.0391,0.0036,0.0036,0,0 561 | http://cnfol.com,0,0.0127,0.0382,0.0921,0,0 562 | http://enorth.com.cn,1,0,0,0.1086,0,0 563 | http://post.smzdm.com,0,0,0,0,1,0 564 | http://cankaoxiaoxi.com,0,0.0376,0,0.032,0,0 565 | http://downxia.com,0,0.0198,0,0.0229,0,0 566 | http://wbiao.cn,0,0.0202,0,0.0587,0,0 567 | http://huangye88.com,0,0,0,0.0218,0,0 568 | http://1010jiajiao.com,0,0,0.1514,0.2615,0,1 569 | http://km.com,0,0,0,0.1304,0,0 570 | http://dl.pconline.com.cn,0,0,0,0.0378,0,0 571 | http://zysj.com.cn,0,0.065,0.0569,0.0813,0,0 572 | http://news.china.com,0,0.003,0,0,0,0 573 | http://yhd.com,0,0.0263,0,0.0702,0,0 574 | http://xiaopi.com,0,0,0,0.1293,0,0 575 | http://5nd.com,0,0.2189,0,0.0287,0,0 576 | http://39yst.com,0,0.0132,0,0.2456,0,0 577 | http://keaidian.com,0,0,0,0.0348,0,0 578 | http://kuaikanmanhua.com,0,0,0,0.0229,0,0 579 | http://ixiumei.com,0,0.0021,0,0.0719,0,0 580 | http://xoyo.com,0,0,0,0,0,0 581 | http://15tianqi.com,0,0,0,0,0,0 582 | http://qingting.fm,0,0.0028,0,0.0114,0,0 583 | http://duitang.com,0,0.0377,0,0.1509,0,0 584 | http://developer.mozilla.org,0,0,0,0.1385,0,0 585 | http://b2b168.com,0,0,0.0024,0.0209,0,0 586 | http://mayi.com,0,0,0,0.2116,0,0 587 | http://nanrenwo.net,0,0.0275,0.0013,0.0525,0,0 588 | http://qupu123.com,0,0,0,0.0144,0,0 589 | http://aoshu.com,0,0.0068,0.0012,0.0371,0,1 590 | http://skycn.com,0,0.0319,0,0.0887,0,0 591 | http://yangshitianqi.com,0,0.0204,0,0.3571,0,0 592 | http://ddooo.com,0,0.0032,0.0037,0.0277,0,0 593 | http://myzaker.com,0,0,0.0586,0.1016,0,0 594 | http://bmlink.com,0,0.0052,0.0052,0.3571,0,0 595 | http://jobs.zhaopin.com,0,0.0029,0,0.0019,0,0 596 | http://yyets.com,0,0,0,0,1,0 597 | http://tiexue.net,0,0,0.0031,0.135,0,0 598 | http://gaodun.com,1,0.0022,0.0054,0.0574,0,0 599 | http://mnw.cn,0,0.0127,0.0382,0.2166,0,0 600 | http://5tu.cn,0,0.004,0.008,0.0478,0,0 601 | http://rong360.com,0,0,0,0,1,0 602 | http://southmoney.com,0,0.0088,0,0.1029,0,0 603 | http://ent.163.com,1,0.0293,0,0.2122,0,1 604 | http://go007.com,0,0.0153,0.0044,0.0675,0,0 605 | http://www.pansoso.com,0,0,0,0,1,0 606 | http://bmw.com.cn,0,0.0909,0,0.0164,0,0 607 | http://61ertong.com,1,0,0.1627,0.098,0,0 608 | http://yz.chsi.com.cn,0,0,0.0035,0.0319,0,1 609 | http://cheshi.com,0,0.0106,0.0342,0.0386,0,0 610 | http://3158.cn,0,0.0007,0,0.0074,0,0 611 | http://baike.1688.com,0,0.0777,0,0.0303,0,0 612 | http://med66.com,0,0.0068,0,0.0901,0,0 613 | http://xinshangmeng.com,0,0,0,0,1,0 614 | http://pingshu8.com,0,0,0.0008,0.0131,0,0 615 | http://4399.cn,0,0.0154,0.0028,0.6152,0,1 616 | http://tv.cntv.cn,0,0.0149,0,0.9538,0,0 617 | http://cncrk.com,0,0.0014,0.0007,0.0477,0,0 618 | http://xiziwang.net,0,0.2,0,0.6,0,0 619 | http://fun.tv,0,0.0368,0.0074,0.2022,0,1 620 | http://xinnet.com,0,0.0497,0,0.0821,0,0 621 | http://zixun.jia.com,0,0.0505,0,0.0599,0,0 622 | http://bbs.pinggu.org,0,0,0,0,1,0 623 | http://nowscore.com,0,0,0.01,0.0448,0,0 624 | http://www.zto.com,0,0.0087,0,0.113,0,0 625 | http://xinli001.com,0,0.0069,0,0.0242,0,0 626 | http://shiyebian.net,0,0,0,0,1,0 627 | http://p2peye.com,0,0,0,0,1,0 628 | http://www.cebbank.com,0,0.0156,0,0.0469,0,0 629 | http://yicai.com,0,0.023,0,0.0345,0,0 630 | http://228.com.cn,0,0,0,0,1,0 631 | http://gezila.com,0,0.0073,0.0228,0.0529,0,1 632 | http://zgjsks.com,0,0.0286,0.0015,0.0573,0,0 633 | http://jobui.com,0,0,0,0,1,0 634 | http://5068.com,1,0,0,0.0389,0,0 635 | http://chemicalbook.com,0,0,0.0508,0.1111,0,0 636 | http://puercn.com,0,0,0.0018,0.065,0,0 637 | http://appchina.com,0,0.0062,0.0062,0.1636,0,1 638 | http://e23.cn,0,0.0031,0.0368,0.1626,0,0 639 | http://cnhubei.com,0,0.0241,0.0193,0.1614,0,0 640 | http://rouding.com,0,0,0,0.2,0,0 641 | http://www.17jita.com,0,0,0,0,1,0 642 | http://110.com,0,0,0.0392,0.0114,0,0 643 | http://bbs.jjwxc.net,0,0.0263,0.7105,0.1053,0,0 644 | http://paypal.com,0,0.0161,0,0.129,0,1 645 | http://trains.ctrip.com,0,0.029,0.0343,0.1029,0,0 646 | http://oppo.com,0,0,0,0.1633,0,0 647 | http://chashebao.com,0,0,0,0.1406,0,0 648 | http://lol.qq.com,0,0,0.0172,0,0,0 649 | http://boqii.com,0,0,0,0,1,0 650 | http://jiemeng8.com,0,0.0315,0.0037,0.1074,0,0 651 | http://mmbang.com,0,0,0,0,1,0 652 | http://gdtv.cn,0,0,0,0.1592,0,0 653 | http://aies.cn,0,0,0,0,0,0 654 | http://100ppi.com,0,0,0.0017,0.0833,0,0 655 | http://zhan.com,0,0.1154,0,0.0308,0,0 656 | http://youzan.com,0,0.044,0.022,0.044,0,0 657 | http://csair.com,0,0,0,0,1,0 658 | http://hudong.com,0,0,0,0.8571,0,1 659 | http://lizhi.fm,0,0.0054,0,0.0323,0,0 660 | http://alimama.com,0,0,0,0.2639,0,1 661 | http://www.beijing.gov.cn,0,0,0,0,1,0 662 | http://acadsoc.com.cn,0,0.0588,0.3922,0.0392,0,0 663 | http://hinews.cn,0,0.0047,0.0388,0.3163,0,0 664 | http://familydoctor.com.cn,0,0.0463,0.0026,0.0718,0,0 665 | http://www.volvocars.com,0,0,0,0,0,0 666 | http://youbianku.com,0,0,0,0,1,0 667 | http://jj.cn,0,0.0847,0.0678,0.3051,0,0 668 | http://scol.com.cn,0,0,0.0021,0.1684,0,0 669 | http://yanyue.cn,0,0,0,0,1,0 670 | http://23us.la,0,0,0,0,1,0 671 | http://xitek.com,0,0.0102,0.034,0.0714,0,0 672 | http://house365.com,0,0,0.4615,0,0,0 673 | http://www.mohurd.gov.cn,0,0,0,0.123,0,0 674 | http://supfree.net,0,0.0039,0.0118,0.0315,0,0 675 | http://taoche.com,0,0.0553,0.0008,0.0261,0,0 676 | http://wmzhe.com,0,0.031,0,0.0853,0,0 677 | http://ibm.com,0,0,0,0,0,0 678 | http://www.sznews.com,0,0.0064,0.0181,0.1906,0,0 679 | http://gongchang.com,0,0.0335,0.0457,0.0762,0,0 680 | http://soso.com,0,0,0,1,0,0 681 | http://xzbu.com,0,0.0556,0,0.3472,0,0 682 | http://yy8844.cn,1,0,0,0.0474,0,0 683 | http://guahao.com,0,0.014,0.0016,0.0217,0,1 684 | http://jqw.com,0,0.0011,0.0429,0.0506,0,0 685 | http://xs8.cn,0,0,0,0.0268,0,0 686 | http://zuzuche.com,0,0.0339,0.1235,0.1255,0,0 687 | http://wandoujia.com,0,0.0044,0.0087,0.262,0,0 688 | http://vodjk.com,0,0.0345,0,0.0651,0,0 689 | http://live.qq.com,0,0.0115,0.0115,0.0632,0,0 690 | http://tuchong.com,0,0,0,0.1053,0,0 691 | http://health.pclady.com.cn,0,0.0888,0,0.0133,0,0 692 | http://qianlima.com,0,0,0,0,1,0 693 | http://mipang.com,0,0.0069,0,0.128,0,0 694 | http://jandan.net,1,0.3447,0.0062,0.0062,0,0 695 | http://aliexpress.com,0,0.0379,0,0.2045,0,0 696 | http://07073.com,0,0.0424,0,0.1042,0,0 697 | http://dota2.com.cn,0,0,0,0,1,0 698 | http://sports.cctv.com,0,0.0309,0,0.037,0,1 699 | http://mofangge.com,0,0,0,0.2593,0,0 700 | http://zjut.cc,0,0,0,0.0183,0,0 701 | http://wenming.cn,0,0.0066,0.0033,0.1579,0,0 702 | http://doupocangqiong1.com,0,0,0,0.0144,0,0 703 | http://dajie.com,0,0.0064,0,0.0051,0,1 704 | http://www.fedex.com,0,0,0,0.0176,0,0 705 | http://wanfangdata.com.cn,0,0.007,0.007,0.0559,0,0 706 | http://battlenet.com.cn,0,0,0,0.7732,0,0 707 | http://efu.com.cn,0,0.003,0.0023,0.039,0,1 708 | http://huochepiao.com,0,0,0,0,0,0 709 | http://cnbeta.com,0,0,0.0045,0.1161,0,0 710 | http://audi.cn,0,0.0366,0.0037,0.0201,0,0 711 | http://jobs.51job.com,0,0,0.011,0,0,0 712 | http://www.tsinghua.edu.cn,0,0,0,0,1,0 713 | http://cndesign.com,0,0,0.0032,0.1022,0,0 714 | http://pchouse.com.cn,0,0.0025,0.0006,0.0324,0,0 715 | http://dxy.cn,0,0.0295,0.0369,0.262,0,1 716 | http://cqmmgo.com,0,0.0635,0,0.1436,0,0 717 | http://fwol.cn,0,0.0616,0.9058,0.024,0,0 718 | http://699pic.com,1,0.0294,0.0034,0.0646,0,0 719 | http://voc.com.cn,0,0.0024,0.0459,0.1932,0,0 720 | http://www.cardbaobao.com,0,0,0.0588,0.1197,0,0 721 | http://ppkao.com,0,0.0031,0.0008,0.0173,0,0 722 | http://tingclass.net,0,0.0014,0.0036,0.0499,0,0 723 | http://55128.cn,0,0,0.153,0.0388,0,0 724 | http://naver.com,0,0.3423,0,0.0238,0,1 725 | http://sccnn.com,0,0,0.0038,0.1547,0,0 726 | http://danci.911cha.com,0,0,0,0,1,0 727 | http://sciencenet.cn,1,0.0041,0.1837,0.0735,0,0 728 | http://hongxiu.com,0,0,0,0.0232,0,0 729 | http://jsyks.com,0,0.0026,0,0.112,0,0 730 | http://nvsay.com,0,0,0,0.0762,0,0 731 | http://shejiben.com,0,0.0021,0.0404,0.1021,0,0 732 | http://liansuo.com,0,0.0467,0,0.0299,0,0 733 | http://kan300.com,0,0,0,0,1,0 734 | http://fccs.com,0,0,0,0.007,0,0 735 | http://zhicheng.com,0,0,0,0.2113,0,0 736 | http://zgzcw.com,0,0.0288,0,0.0535,0,0 737 | http://qzone.cc,0,0.1058,0,0.0212,0,0 738 | http://oracle.com,0,0,0,0.0941,0,0 739 | http://www.duowan.com,0,0.0054,0.0076,0.075,0,0 740 | http://upantool.com,0,0,0,0.0117,0,0 741 | http://soku.com,0,0,0,0,1,0 742 | http://kaixin001.com,0,0.0244,0.0244,0.1463,0,1 743 | http://bus.mapbar.com,0,0.0091,0,0.0114,0,0 744 | http://12333sb.com,0,0,0,0.1017,0,0 745 | http://keyunzhan.com,0,0,0.0115,0.0885,0,0 746 | http://auto.sina.com.cn,0,0.0014,0,0.0593,0,0 747 | http://www.saic.gov.cn,1,0.1407,0,0.4885,0,0 748 | http://www.10jqka.com.cn,0,0,0,0,1,0 749 | http://qdaily.com,0,0.1731,0,0.0128,0,0 750 | http://kongzhong.com,0,0,0,0.0789,0,0 751 | http://bbs.3dmgame.com,1,0.0053,0.0286,0.0233,0,0 752 | http://zbj.com,0,0.0084,0,0.092,0,0 753 | http://lamabang.com,0,0.0038,0,0.0804,0,0 754 | http://phb123.com,0,0.0263,0.8907,0.083,0,0 755 | http://wdzj.com,0,0,0,0,1,0 756 | http://kf.qq.com,0,0,0,0.1667,0,0 757 | http://iyiou.com,0,0.0345,0,0.0517,0,0 758 | http://jiakaobaodian.com,0,0,0,0.1098,0,0 759 | http://youboy.com,0,0.0024,0,0.0142,0,0 760 | http://www.dyhjw.com,0,0,0,0.1616,0,0 761 | http://zhanqi.tv,0,0.2148,0.0067,0.2953,0,0 762 | http://howbuy.com,0,0.0371,0,0.1335,0,0 763 | http://s.cn,0,0.0285,0.0032,0.0507,0,0 764 | http://99.com,0,0,0.0278,0.0833,0,0 765 | http://weidian.com,0,0,0,0,1,0 766 | http://kingdee.com,0,0.1029,0,0.0625,0,0 767 | http://www.sipo.gov.cn,0,0.0183,0,0.3973,0,1 768 | http://xhd.cn,0,0,0,0,1,0 769 | http://dajiazhao.com,0,0,0.2419,0.13,0,0 770 | http://douguo.com,0,0,0,0.0211,0,0 771 | http://msn.com,1,0.0164,0,0.541,0,0 772 | http://kdslife.com,0,0.0216,0.0216,0.0216,0,1 773 | http://eqxiu.com,0,0,0,0,0,0 774 | http://louisvuitton.cn,0,0,0,0.0074,0,0 775 | http://mercedes-benz.com.cn,0,0.0294,0.0294,0.0441,0,0 776 | http://diaoyur.com,0,0,0,0.0255,0,0 777 | http://west.cn,0,0.0038,0.2308,0.0712,0,0 778 | http://news.hexun.com,0,0,0,0,1,0 779 | http://ltaaa.com,0,0.004,0.161,0.4036,0,0 780 | http://sh.lianjia.com,0,0,0,0,1,0 781 | http://teamviewer.com,0,0.049,0.0196,0.1176,0,0 782 | http://dhl.com,0,0.0049,0.0049,0.4261,0,0 783 | http://21food.cn,0,0.0075,0,0.0386,0,0 784 | http://miit.gov.cn,0,0,0,0.0284,0,0 785 | http://hunliji.com,0,0.0115,0,0.1374,0,0 786 | http://yuemei.com,0,0.042,0,0.0252,0,0 787 | http://kaoyan.com,0,0.0016,0.03,0.0632,0,0 788 | http://t.qq.com,0,0,0,0,1,0 789 | http://huxiu.com,0,0,0,0,1,0 790 | http://go.cqmmgo.com,0,0.0635,0,0.1436,0,0 791 | http://cnbanbao.cn,0,0.2,0,0.6,0,0 792 | http://1234567.com.cn,0,0,0,0,1,0 793 | http://wallstreetcn.com,0,0,0,0.1413,0,0 794 | http://78dm.net,0,0.0236,0.2827,0.0995,0,0 795 | http://watch.xbiao.com,0,0.0513,0,0.0292,0,0 796 | http://nsfc.gov.cn,0,0,0,0.0694,0,0 797 | http://tiancity.com,0,0.0294,0,0.0588,0,0 798 | http://388g.com,0,0.0303,0,0.0909,0,0 799 | http://lanrentuku.com,0,0.0054,0.0108,0.0432,0,0 800 | http://verycd.com,0,0.0183,0,0.0248,0,1 801 | http://pvp.qq.com,0,0.0996,0,0.0111,0,0 802 | http://news.mydrivers.com,0,0.0127,0,0.1556,0,0 803 | http://sports.eastday.com,0,0.0068,0,0.0612,0,0 804 | http://web.sanguosha.com,0,0.4412,0,0.0294,0,0 805 | http://cfi.cn,0,0,0.0094,0.0126,0,0 806 | http://fantizi5.com,0,0,0,0.2676,0,0 807 | http://lol.duowan.com,0,0.0457,0.0074,0.0568,0,0 808 | http://szhome.com,0,0,0.0017,0.0101,0,0 809 | http://artron.net,0,0.0097,0.114,0.0231,0,0 810 | http://wenzhangba.com,0,0,0.1096,0.0365,0,0 811 | http://stock.eastmoney.com,0,0.002,0.002,0.0219,0,0 812 | http://www.ceair.com,0,0.0047,0.0233,0.2744,0,0 813 | http://cli.im,0,0.1429,0,0.0102,0,0 814 | http://xunjiepdf.com,0,0,0.0161,0.1935,0,0 815 | http://jiazhao.com,0,0,0,0.0233,0,0 816 | http://mkzhan.com,0,0.0121,0,0.0922,0,0 817 | http://shop.bytravel.cn,0,0.0013,0,0.0188,0,0 818 | http://pc.qq.com,0,0,0,0,1,0 819 | http://doc.mbalib.com,0,0,0.0066,0.0066,0,0 820 | http://eduyun.cn,0,0,0,0,1,0 821 | http://www.csrc.gov.cn,0,0,0,0,1,0 822 | http://cnwav.com,0,0,0.4582,0.1045,0,0 823 | http://duba.com,1,0.0204,0.0092,0.6293,0,0 824 | http://it168.com,0,0,0,0.0679,0,0 825 | http://icauto.com.cn,0,0.0326,0,0.0196,0,0 826 | http://77nt.com,0,0,0,0,0,0 827 | http://chengyu.t086.com,1,0.0231,0.2409,0.0693,0,0 828 | http://gfan.com,1,0.0244,0.0756,0.1732,0,1 829 | http://hikvision.com,0,0,0,0,1,0 830 | http://jitaba.cn,0,0,0,0,1,0 831 | http://zhe800.com,0,0,0,0.0659,0,0 832 | http://travel.qunar.com,0,0.0476,0.004,0.0079,0,0 833 | http://dnf.qq.com,0,0,0,0,0,0 834 | http://www.cmbc.com.cn,0,0.0314,0.0052,0.1152,0,0 835 | http://gucheng.com,0,0,0,0.2009,0,0 836 | http://114piaowu.com,0,0.0034,0,0.1275,0,1 837 | http://v.6.cn,0,0.0869,0.1188,0.0301,0,0 838 | http://cqu.edu.cn,0,0.0256,0.0769,0.1026,0,0 839 | http://zjzwfw.gov.cn,0,0.2727,0,0.1818,0,0 840 | http://www.pbc.gov.cn,0,0,0,0,1,0 841 | http://8264.com,0,0.0122,0.0091,0.076,0,0 842 | http://cdstm.cn,0,0.0368,0.0167,0.0736,0,0 843 | http://chinachugui.com,0,0,0.0089,0.0653,0,0 844 | http://xiaomi.cn,0,0,0,0.0627,0,0 845 | http://yidianzixun.com,0,0.027,0,0,0,0 846 | http://365jia.cn,0,0.0702,0,0.0439,0,0 847 | http://kujiale.com,0,0,0,0,1,0 848 | http://fanjian.net,0,0,0,0,1,0 849 | http://mt.sohu.com,0,0.0058,0,0,0,0 850 | http://www.cib.com.cn,0,0.0355,0,0.0508,0,0 851 | http://jxnews.com.cn,0,0.0042,0.0572,0.2524,0,0 852 | http://ea3w.com,0,0,0,0,1,0 853 | http://fobshanghai.com,0,0,0,0,1,0 854 | http://ebrun.com,0,0,0,0.1509,0,0 855 | http://newasp.net,0,0,0,0.0336,0,0 856 | http://qingdaonews.com,0,0.0148,0.021,0.1233,0,0 857 | http://download.csdn.net,0,0.1205,0,0.0045,0,0 858 | http://kanunu8.com,0,0,0,0,1,0 859 | http://www.customs.gov.cn,0,0.0048,0.0944,0.0242,0,0 860 | http://www.baihe.com,0,0.4211,0.0526,0,0,0 861 | http://pedaily.cn,0,0.0042,0.0063,0.2292,0,0 862 | http://cwl.gov.cn,0,0.0165,0.0028,0.0909,0,0 863 | http://www.imooc.com,0,0.0082,0.0041,0.0327,0,0 864 | http://club.kdnet.net,0,0.006,0.018,0.1018,0,0 865 | http://midea.com,0,0.0991,0,0.2252,0,0 866 | http://chiphell.com,0,0.2,0,0.6,0,0 867 | http://feng.com,0,0.0582,0.1799,0.1852,0,0 868 | http://17zwd.com,0,0.2759,0.0033,0.0116,0,0 869 | http://instrument.com.cn,0,0,0.0101,0.0113,0,0 870 | http://chem17.com,0,0.0108,0.005,0.054,0,0 871 | http://bjx.com.cn,0,0,0,0,1,0 872 | http://qbaobei.com,0,0,0,0.0548,0,0 873 | http://ibaotu.com,0,0.034,0,0.0615,0,0 874 | http://account.chsi.com.cn,0,0,0,0.25,0,0 875 | http://vvvdj.com,0,0,0,0.0562,0,0 876 | http://bbs.tiexue.net,0,0,0.0071,0.2146,0,0 877 | http://search.liqucn.com,0,0.005,0.005,0.015,0,0 878 | http://37.com,0,0.0255,0.2593,0.0864,0,0 879 | http://d1ev.com,0,0,0,0,1,0 880 | http://news.eastday.com,0,0,0.037,0.358,0,0 881 | http://dingtalk.com,0,0.0516,0.0065,0.0839,0,0 882 | http://www.cgbchina.com.cn,1,0.0216,0,0.039,0,0 883 | http://huoche.net,0,0,0,0.0164,0,0 884 | http://oray.com,0,0,0,0.1102,0,0 885 | http://ef43.com.cn,0,0.0015,0.0081,0.1759,0,0 886 | http://xa.gov.cn,0,0,0,0,1,0 887 | http://1024sj.com,1,0,0.0209,0.0026,0,0 888 | http://uisdc.com,0,0.0085,0,0.1603,0,0 889 | http://windows7en.com,0,0.0048,0,0.0481,0,0 890 | http://anruan.com,0,0.0017,0.0009,0.0497,0,0 891 | http://soso.nipic.com,0,0.0068,0.0135,0.1486,0,0 892 | http://smm.cn,0,0.0162,0,0.1136,0,0 893 | http://gxrc.com,0,0.0833,0.0417,0.25,0,0 894 | http://woniu.com,0,0.0833,0,0.0965,0,0 895 | http://gz.gov.cn,0,0.0142,0,0.6023,0,0 896 | http://gaokaopai.com,0,0.0725,0,0.1451,0,0 897 | http://ccutu.com,0,0,0,0.1167,0,0 898 | http://www.zju.edu.cn,0,0,0,0,1,0 899 | http://mail.10086.cn,0,0.5,0,0,0,0 900 | http://zgpingshu.com,0,0.0384,0.0024,0.0312,0,0 901 | http://www.cjcp.com.cn,0,0.0086,0.256,0.0584,0,0 902 | http://mop.com,0,0.0806,0.0081,0.0323,0,0 903 | http://kankanews.com,0,0,0,0.0583,0,0 904 | http://uc129.com,0,0,0,0.0996,0,0 905 | http://s.weibo.com,0,0,0,0.1176,0,0 906 | http://90sheji.com,0,0.2522,0.0025,0.0443,0,1 907 | http://www.gaoxiaojob.com,0,0.0006,0.0019,0.0208,0,0 908 | http://gamedog.cn,0,0,0,0,1,0 909 | http://16888.com,0,0.0124,0.0012,0.0547,0,0 910 | http://money.163.com,1,0.0229,0.0014,0.0844,0,1 911 | http://dongqiudi.com,0,0,0,0,1,0 912 | http://vmware.com,0,0,0,0.0702,0,0 913 | http://258.com,0,0,0,0,1,0 914 | http://www.cpic.com.cn,0,0.0447,0,0.0279,0,1 915 | http://newcger.com,0,0.1801,0,0.0621,0,0 916 | http://zzidc.com,0,0,0,0,1,0 917 | http://bookschina.com,0,0,0.0468,0.009,0,0 918 | http://cantonfair.org.cn,0,0.0048,0.2319,0.1256,0,0 919 | http://asus.com.cn,0,0,0,0,1,0 920 | http://scu.edu.cn,0,0.5714,0,0,0,0 921 | http://okbuy.com,0,0.0062,0,0.0123,0,1 922 | http://tieyou.com,0,0.0042,0.0126,0.1048,0,0 923 | http://cf.qq.com,0,0.3605,0,0,0,0 924 | http://a9vg.com,0,0,0.0016,0.0714,0,0 925 | http://vvic.com,0,0.0085,0.0015,0.0216,0,0 926 | http://alitrip.com,1,0.0035,0.0142,0.5887,0,0 927 | http://hm.com,0,0,0,0,0,0 928 | http://www.sjtu.edu.cn,0,0,0,0,0,0 929 | http://17track.net,0,0,0,0.2,0,0 930 | http://biquku.com,0,0,0.0051,0.0051,0,0 931 | http://dd373.com,0,0.0289,0,0.0675,0,0 932 | http://100bt.com,0,0,0,0.2537,0,1 933 | http://jiaodong.net,0,0.0142,0.016,0.0872,0,0 934 | http://ustc.edu.cn,0,0,0.0813,0.0407,0,0 935 | http://hanfan.cc,0,0.0072,0,0.0072,0,1 936 | http://ktkkt.com,0,0,0,0.0543,0,0 937 | http://sbj.saic.gov.cn,0,0,0,0.0924,0,0 938 | http://military.china.com,0,0,0,0,0,0 939 | http://medsci.cn,0,0,0,0,1,0 940 | http://faxingzhan.com,0,0.0204,0,0.1145,0,0 941 | http://stock.jrj.com.cn,0,0.0085,0,0.0393,0,1 942 | http://justeasy.cn,0,0.0129,0.1587,0.0443,0,0 943 | http://most.gov.cn,0,0,0.005,0.2525,0,0 944 | http://www.dongfeng-nissan.com.cn,0,0.021,0.007,0.049,0,0 945 | http://jiankang.com,0,0.0175,0,0.1329,0,0 946 | http://games.qq.com,1,0.0179,0.0028,0.2455,0,1 947 | http://chinatimes.com,0,0.0203,0,0.0122,0,0 948 | http://ups.com,0,0.027,0,0,0,0 949 | http://vjshi.com,0,0.0092,0.0062,0.1354,0,0 950 | http://cqvip.com,0,0.0429,0.0714,0.2286,0,0 951 | http://962.net,0,0.0019,0,0.1078,0,0 952 | http://city8.com,0,0,0,0,1,0 953 | http://wenjuan.com,0,0.041,0,0.082,0,0 954 | http://tingroom.com,0,0.0058,0.0487,0.0288,0,1 955 | http://hangzhou.com.cn,0,0.0062,0.0404,0.172,0,0 956 | http://fanli.com,0,0.0059,0,0.0742,0,0 957 | http://55haitao.com,0,0,0.004,0.0054,0,0 958 | http://chemcp.com,0,0,0.1512,0.0522,0,0 959 | http://joyme.com,0,0,0,0,1,0 960 | http://iplaysoft.com,0,0,0,0.0379,0,0 961 | http://loupan.com,0,0,0,0.0307,0,0 962 | http://53.com.cn,0,0.0061,0.0009,0.0509,0,0 963 | http://66rpg.com,0,0.156,0,0.0071,0,0 964 | http://u51.com,0,0,0,0,1,0 965 | http://310win.com,0,0,0.1538,0.0625,0,0 966 | http://chinayigui.com,0,0,0.0096,0.0663,0,0 967 | http://fengj.com,0,0,0.0215,0.0789,0,0 968 | http://zanmeishi.com,0,0.1366,0.0058,0.0349,0,0 969 | http://pgyer.com,0,0.0476,0,0.1619,0,0 970 | http://blogchina.com,0,0.0225,0,0.015,0,0 971 | http://chunyuyisheng.com,0,0,0,0,0,0 972 | http://exam8.com,0,0.0059,0.0164,0.1565,0,0 973 | http://dict.hjenglish.com,0,0,0,0,1,0 974 | http://xiaojukeji.com,0,0,0,0,1,0 975 | http://lkong.net,1,0.028,0.3925,0.1729,0,0 976 | http://zgsydw.com,0,0.012,0,0.1222,0,0 977 | http://yinhang123.net,0,0.0136,0.0218,0.0536,0,0 978 | http://space.bilibili.com,0,0,0,0,1,0 979 | http://115.com,0,0.3043,0,0.2174,0,0 980 | http://ch.com,0,0.0338,0.0045,0.1104,0,0 981 | http://www.haier.com,0,0.0068,0,0.0317,0,0 982 | http://21cnjy.com,0,0.0166,0.0281,0.0215,0,0 983 | http://52z.com,0,0.0062,0,0.0741,0,0 984 | http://msdn.microsoft.com,0,0.0132,0,0.25,0,1 985 | http://open.com.cn,0,0.0236,0,0.0708,0,0 986 | http://hsw.cn,0,0.0006,0,0.0254,0,0 987 | http://www.psbc.com,0,0,0,0,1,0 988 | http://jihaoba.com,0,0.0099,0,0.0496,0,0 989 | http://vacations.ctrip.com,0,0.0394,0.0255,0.1111,0,0 990 | http://28.com,0,0.0013,0.0013,0.0126,0,0 991 | http://wmxa.cn,0,0,0,0.0277,0,0 992 | http://xitongzhijia.net,0,0,0,0,1,0 993 | http://longzhu.com,0,0,0,0.1694,0,0 994 | http://gb.cri.cn,0,0.0264,0.0053,0.0722,0,0 995 | http://5tps.com,0,0,0,0.0198,0,0 996 | http://www.zhipin.com,0,0.0111,0,0.0238,0,0 997 | http://wtoip.com,0,0.0687,0,0.0473,0,0 998 | http://cnool.net,0,0.0459,0.0077,0.1684,0,0 999 | http://oeeee.com,0,0.0435,0.205,0.3043,0,0 1000 | http://xincheping.com,0,0.0029,0,0.0162,0,0 1001 | http://todayonhistory.com,0,0.0163,0,0.049,0,0 1002 | -------------------------------------------------------------------------------- /page_identify/data/positive_urls.csv: -------------------------------------------------------------------------------- 1 | normalURL 2 | http://plato.stanford.edu/entries/aesthetics-18th-french/ 3 | http://plato.stanford.edu/entries/aesthetic-judgment/ 4 | http://www.iep.utm.edu/aestheti/ 5 | http://www2.lib.virginia.edu/artsandmedia/artmuseum/africanart/index.html 6 | http://beautymatters.blogspot.com/2000_01_02_beautymatters_archive.html 7 | http://beautymatters.blogspot.com/ 8 | http://www.csulb.edu/~jvancamp/361/ 9 | http://www.biddingtons.com/content/shechtertest.html 10 | http://ndpr.nd.edu/news/23396/?id=1260 11 | http://faculty.frostburg.edu/phil/forum/Art.htm 12 | http://www.infography.com/content/411158825462.html 13 | http://plato.stanford.edu/entries/japanese-aesthetics/ 14 | http://thinkexist.com/quotes/john_berger 15 | http://muse.jhu.edu/journal/158 16 | http://www.gutenberg.org/ebooks/2176 17 | http://www.terraingallery.org/ 18 | http://theasif.info/ 19 | http://www.tbecker.net/main/philosophy-english.htm 20 | http://transcendentalism-legacy.tamu.edu/ 21 | http://ndpr.nd.edu/news/23300/?id=1210 22 | http://www.selectedworks.co.uk/ 23 | http://theasif.info/ 24 | http://muse.jhu.edu/journal/158 25 | http://www.gutenberg.org/ebooks/2176 26 | http://transcendentalism-legacy.tamu.edu/ 27 | http://ndpr.nd.edu/news/23396/?id=1260 28 | http://www2.lib.virginia.edu/artsandmedia/artmuseum/africanart/index.html 29 | http://www.iep.utm.edu/aestheti/ 30 | http://ndpr.nd.edu/news/23300/?id=1210 31 | http://thinkexist.com/quotes/john_berger 32 | http://beautymatters.blogspot.com/ 33 | http://www.tbecker.net/main/philosophy-english.htm 34 | http://www.terraingallery.org/ 35 | http://plato.stanford.edu/entries/aesthetics-18th-french/ 36 | http://plato.stanford.edu/entries/japanese-aesthetics/ 37 | http://www.infography.com/content/411158825462.html 38 | http://beautymatters.blogspot.com/2000_01_02_beautymatters_archive.html 39 | http://www.csulb.edu/~jvancamp/361/ 40 | http://faculty.frostburg.edu/phil/forum/Art.htm 41 | http://plato.stanford.edu/entries/aesthetic-judgment/ 42 | http://www.biddingtons.com/content/shechtertest.html 43 | http://www.selectedworks.co.uk/ 44 | http://www.americanmuseumofmagic.org/ 45 | https://www.bauhaus.de/en/ 46 | http://www.bonamuseum.com/ 47 | https://www.google.com/culturalinstitute/project/art-project 48 | http://www.seeing-stars.com/Museums/ 49 | http://theimasonline.org/ 50 | http://www.jancodada.co.il/ 51 | http://www.logicpuzzlemuseum.org/ 52 | http://www.madametussauds.com/ 53 | http://www.museodellemaschere.it/ 54 | http://www.mcny.org/ 55 | http://www.museumsyndicate.com/ 56 | http://www.smb.museum/ 57 | http://www.nms.ac.uk/ 58 | http://www.roberson.org/ 59 | http://russianimperial.tripod.com/ 60 | http://www.washingtonpavilion.org/ 61 | http://theimasonline.org/ 62 | https://www.bauhaus.de/en/ 63 | http://www.jancodada.co.il/ 64 | https://www.google.com/culturalinstitute/project/art-project 65 | http://www.nms.ac.uk/ 66 | http://www.smb.museum/ 67 | http://www.madametussauds.com/ 68 | http://www.museumsyndicate.com/ 69 | http://www.mcny.org/ 70 | http://www.americanmuseumofmagic.org/ 71 | http://www.museodellemaschere.it/ 72 | http://www.bonamuseum.com/ 73 | http://www.logicpuzzlemuseum.org/ 74 | http://www.roberson.org/ 75 | http://russianimperial.tripod.com/ 76 | http://www.washingtonpavilion.org/ 77 | http://www.seeing-stars.com/Museums/ 78 | http://artsculture.newsandmediarepublic.org/ 79 | http://artsculture.newsandmediarepublic.org/ 80 | http://www.library.upenn.edu/finearts/ 81 | http://the-bac.edu/students/library 82 | http://library.rice.edu/brown-fine-arts-library 83 | http://library.columbia.edu/locations/avery.html 84 | http://finearts.library.cornell.edu/ 85 | http://www.frick.org/library/ 86 | http://www.hrc.utexas.edu/ 87 | http://scholes.alfred.edu/ 88 | http://ryerson.artic.edu/ 89 | http://www.library.ucla.edu/arts 90 | http://www.library.ucsb.edu/depts/arts/ 91 | http://www.umanitoba.ca/libraries/units/archfa/ 92 | http://www.lib.umich.edu/finearts/ 93 | http://artlibrary.vassar.edu/ 94 | http://the-bac.edu/students/library 95 | http://library.columbia.edu/locations/avery.html 96 | http://library.rice.edu/brown-fine-arts-library 97 | http://www.library.ucla.edu/arts 98 | http://finearts.library.cornell.edu/ 99 | http://www.library.ucsb.edu/depts/arts/ 100 | http://www.frick.org/library/ 101 | http://www.hrc.utexas.edu/ 102 | http://www.library.upenn.edu/finearts/ 103 | http://artlibrary.vassar.edu/ 104 | http://www.umanitoba.ca/libraries/units/archfa/ 105 | http://www.lib.umich.edu/finearts/ 106 | http://ryerson.artic.edu/ 107 | http://scholes.alfred.edu/ 108 | http://artistree.org/ 109 | http://artistree.org/ 110 | http://www.art4net.com/ 111 | http://www.artincontext.org/ 112 | http://www.artoffer.com/ 113 | http://www.artsearch.us/ 114 | http://www.artboomer.com/ 115 | http://www.artcyclopedia.com/ 116 | http://www.artfacts.net/ 117 | http://www.artnet.com/ 118 | http://www.artweblinks.com/ 119 | http://askart.com/AskART/index.aspx 120 | http://eserver.org/ 121 | http://www.euran.com/ 122 | http://www.thefanlistings.org/ 123 | http://www.floridaartistsregistry.com/ 124 | http://www.net-art.it/search/ 125 | http://www.noteaccess.com/DIRECTORIES/index.htm 126 | http://www.oriscus.com/ 127 | http://www.paaw.com/ 128 | http://worldartistdirectory.com/ 129 | http://www.zeroland.co.nz/ 130 | http://www.net-art.it/search/ 131 | http://eserver.org/ 132 | http://www.artboomer.com/ 133 | http://www.artoffer.com/ 134 | http://askart.com/AskART/index.aspx 135 | http://www.artfacts.net/ 136 | http://www.thefanlistings.org/ 137 | http://www.zeroland.co.nz/ 138 | http://worldartistdirectory.com/ 139 | http://www.euran.com/ 140 | http://www.artincontext.org/ 141 | http://www.floridaartistsregistry.com/ 142 | http://www.artcyclopedia.com/ 143 | http://www.artsearch.us/ 144 | http://www.paaw.com/ 145 | http://www.oriscus.com/ 146 | http://www.artweblinks.com/ 147 | http://www.noteaccess.com/DIRECTORIES/index.htm 148 | http://www.art4net.com/ 149 | http://www.artnet.com/ 150 | http://dreamersglade.proboards.com/ 151 | http://dreamersglade.proboards.com/ 152 | http://www.wellfedwriter.com/ 153 | http://www.enitharmon.co.uk/ 154 | http://www.fearofwriting.com/ 155 | http://www.fimp.net/ 156 | http://www.flairnet.com.au/ 157 | http://www.hennesseyingalls.com/ 158 | http://www.thehourstories.com/ 159 | http://www.kalwriters.com/ 160 | http://www.lundhumphries.com/ 161 | http://www.mckendry.net/ 162 | http://www.rock-art.com/ 163 | http://www.publishedart.com.au/ 164 | http://www.smartartpress.com/ 165 | http://www.writingtheartiststatement.com/ 166 | http://www.yellowmoon.com/ 167 | http://www.kalwriters.com/ 168 | http://www.flairnet.com.au/ 169 | http://www.wellfedwriter.com/ 170 | http://www.thehourstories.com/ 171 | http://www.fearofwriting.com/ 172 | http://www.writingtheartiststatement.com/ 173 | http://www.publishedart.com.au/ 174 | http://www.lundhumphries.com/ 175 | http://www.fimp.net/ 176 | http://www.smartartpress.com/ 177 | http://www.enitharmon.co.uk/ 178 | http://www.hennesseyingalls.com/ 179 | http://www.yellowmoon.com/ 180 | http://www.rock-art.com/ 181 | http://www.mckendry.net/ 182 | http://bafta.org/ 183 | http://www.latimes.com/entertainment/envelope/ 184 | http://www.kennedy-center.org/pages/SpecialEvents/honors 185 | http://www.kennedy-center.org/pages/SpecialEvents/honors 186 | http://www.latimes.com/entertainment/envelope/ 187 | http://bafta.org/ 188 | http://www.emerson.edu/comedy/ 189 | http://www.arcmusic.org/ 190 | http://www.indiana.edu/~aaamc/ 191 | http://www.aaa.si.edu/ 192 | http://www.cartoons.ac.uk/ 193 | http://www.lib.uchicago.edu/e/su/cja/ 194 | http://www.chitralekha.org/ 195 | http://www.shubertarchive.org/ 196 | http://wcftr.commarts.wisc.edu/ 197 | http://www.cartoons.ac.uk/ 198 | http://wcftr.commarts.wisc.edu/ 199 | http://www.aaa.si.edu/ 200 | http://www.emerson.edu/comedy/ 201 | http://www.lib.uchicago.edu/e/su/cja/ 202 | http://www.arcmusic.org/ 203 | http://www.chitralekha.org/ 204 | http://www.indiana.edu/~aaamc/ 205 | http://www.shubertarchive.org/ 206 | http://umaine.edu/wcenter/resources/ 207 | http://www.sfwa.org/beware/ 208 | http://writingcorner.com/ 209 | http://www.writing-world.com/ 210 | http://writingcorner.com/ 211 | http://umaine.edu/wcenter/resources/ 212 | http://www.sfwa.org/beware/ 213 | http://www.writing-world.com/ 214 | http://theartinquirer.blogspot.com/ 215 | http://towerofbabel.com/ 216 | http://beautifuldecay.com/ 217 | http://brandonbuckner.blogspot.com/ 218 | http://cfye.com/ 219 | http://blog.amystewart.com/ 220 | http://greg.org/ 221 | http://www.headbutler.com/ 222 | http://ialwayswantedtobeatenenbaum.blogspot.com/ 223 | http://suzannekaufman.blogspot.com/ 224 | http://www.lot49.com/ 225 | http://www.openculture.com/ 226 | http://www.pallalink.net/ 227 | http://sharocksthe.blogspot.com/ 228 | http://www.storyguru.co.uk/ 229 | http://www.trebuchet-magazine.com/ 230 | http://www.kevinsmokler.com/ 231 | http://blog.amystewart.com/ 232 | http://cfye.com/ 233 | http://greg.org/ 234 | http://www.storyguru.co.uk/ 235 | http://www.trebuchet-magazine.com/ 236 | http://sharocksthe.blogspot.com/ 237 | http://www.openculture.com/ 238 | http://beautifuldecay.com/ 239 | http://towerofbabel.com/ 240 | http://theartinquirer.blogspot.com/ 241 | http://www.lot49.com/ 242 | http://ialwayswantedtobeatenenbaum.blogspot.com/ 243 | http://brandonbuckner.blogspot.com/ 244 | http://www.headbutler.com/ 245 | http://suzannekaufman.blogspot.com/ 246 | http://www.kevinsmokler.com/ 247 | http://www.pallalink.net/ 248 | http://www.mcny.org/collections/theater-collection 249 | http://www.mcny.org/collections/theater-collection 250 | http://www.opendemocracy.net/ecology-foodwithoutfrontiers/debate.jsp 251 | http://www.gastronomica.org/ 252 | http://www.pvspade.com/Sartre/cookbook.html 253 | http://ebooks.adelaide.edu.au/b/brillat/savarin/b85p/ 254 | http://www.slowfood.com/ 255 | http://ebooks.adelaide.edu.au/b/brillat/savarin/b85p/ 256 | http://www.opendemocracy.net/ecology-foodwithoutfrontiers/debate.jsp 257 | http://www.pvspade.com/Sartre/cookbook.html 258 | http://www.slowfood.com/ 259 | http://www.gastronomica.org/ 260 | http://www.npr.org/rss/rss.php?id=1047 261 | http://www.npr.org/rss/rss.php?id=1047 262 | http://www.loc.gov/rr/perform/ 263 | http://www.loc.gov/rr/perform/ 264 | http://www.arkitip.com/ 265 | http://www.artdaily.com/ 266 | http://www.artmonthly.org.au/ 267 | http://www.artezine.com/ 268 | http://www.artpix.org/ 269 | http://www.arttalk.com/ 270 | http://www.flashartonline.com/ 271 | http://glasstire.com/ 272 | http://www.jca-online.com/ 273 | http://www.newyorkartworld.com/mag/artmag.html 274 | http://www.plug-in.ch/rss.asp 275 | http://www.preview-art.com/ 276 | http://www.stunned.org/ 277 | http://www.jca-online.com/ 278 | http://glasstire.com/ 279 | http://www.preview-art.com/ 280 | http://www.artdaily.com/ 281 | http://www.stunned.org/ 282 | http://www.artmonthly.org.au/ 283 | http://www.artezine.com/ 284 | http://www.arkitip.com/ 285 | http://www.arttalk.com/ 286 | http://www.flashartonline.com/ 287 | http://www.plug-in.ch/rss.asp 288 | http://www.newyorkartworld.com/mag/artmag.html 289 | http://www.artpix.org/ 290 | http://hawaiiflow.com/SpeleoArt/ 291 | http://hawaiiflow.com/SpeleoArt/ 292 | http://www.3daynovel.com/ 293 | http://www.apexawards.com/ 294 | http://www.awpwriter.org/ 295 | http://www.bulwer-lytton.com/ 296 | http://www.communicatorawards.com/ 297 | http://www.fishpublishing.com/writing-contests/ 298 | http://www.indiebookawards.com/ 299 | https://www2.mmu.ac.uk/writingcompetition/ 300 | http://www.milkweed.org/ 301 | http://www.brashcyber.com/mona.htm 302 | http://www.newguardreview.com/tng-contests/ 303 | http://www.parkpublications.co.uk/ 304 | http://www.pomegranatewords.com/ 305 | http://www.pronghornpress.org/ 306 | http://salamandermag.org/contests/ 307 | http://www.sfwp.com/ 308 | http://www.shelfmediagroup.com/pages/competition.html 309 | http://www.tennesseewilliams.net/ 310 | http://library.stanford.edu/projects/william-saroyan-international-prize-writing 311 | http://www.awomanswrite.com/ 312 | http://www.wordsmitten.com/fiction.html 313 | http://www.writersdigest.com/competitions 314 | http://www.wyrmsgauntlet.com/ 315 | https://www2.mmu.ac.uk/writingcompetition/ 316 | http://www.newguardreview.com/tng-contests/ 317 | http://library.stanford.edu/projects/william-saroyan-international-prize-writing 318 | http://www.shelfmediagroup.com/pages/competition.html 319 | http://www.indiebookawards.com/ 320 | http://salamandermag.org/contests/ 321 | http://www.tennesseewilliams.net/ 322 | http://www.wyrmsgauntlet.com/ 323 | http://www.fishpublishing.com/writing-contests/ 324 | http://www.writersdigest.com/competitions 325 | http://www.communicatorawards.com/ 326 | http://www.3daynovel.com/ 327 | http://www.brashcyber.com/mona.htm 328 | http://www.apexawards.com/ 329 | http://www.wordsmitten.com/fiction.html 330 | http://www.pronghornpress.org/ 331 | http://www.awpwriter.org/ 332 | http://www.sfwp.com/ 333 | http://www.pomegranatewords.com/ 334 | http://www.bulwer-lytton.com/ 335 | http://www.awomanswrite.com/ 336 | http://www.parkpublications.co.uk/ 337 | http://www.milkweed.org/ 338 | http://www.artrenewal.org/ 339 | http://www.colormatters.com/ 340 | http://www.worqx.com/color/ 341 | http://www.ccpvideos.com/ 342 | http://ebtx.com/artfrm.htm 343 | http://www.leaningpost.com/ 344 | http://www.rehsgalleries.com/newsletterarchives.htm 345 | http://www.ccpvideos.com/ 346 | http://www.rehsgalleries.com/newsletterarchives.htm 347 | http://ebtx.com/artfrm.htm 348 | http://www.leaningpost.com/ 349 | http://www.worqx.com/color/ 350 | http://www.colormatters.com/ 351 | http://www.artrenewal.org/ 352 | http://www.famouscreativewomen.com/ 353 | http://www.femalepersuasion.net/ 354 | http://www.womenarts.org/ 355 | http://www.studioxx.org/ 356 | http://www.womenfolk.com/ 357 | http://www.femalepersuasion.net/ 358 | http://www.womenfolk.com/ 359 | http://www.womenarts.org/ 360 | http://www.studioxx.org/ 361 | http://www.famouscreativewomen.com/ 362 | http://www.theartlist.com/ 363 | http://www.callsforart.com/ 364 | http://www.daveyawards.com/ 365 | http://www.daveyawards.com/ 366 | http://www.theartlist.com/ 367 | http://www.callsforart.com/ 368 | http://www.author-network.com/ 369 | http://www.burryman.com/ 370 | http://www.everywritersresource.com/ 371 | http://everyonewhosanyone.com/ 372 | http://greatwritersites.blogspot.com/ 373 | http://literaryterms.net/ 374 | http://www.newpages.com/ 375 | http://www.poewar.com/ 376 | http://andromeda.rutgers.edu/~jlynch/Writing/links.html 377 | http://www.espressographics.com/text/writers.html 378 | http://www.writers-free-reference.com/ 379 | http://writersresourcedirectory.com/ 380 | http://www.thewriterslounge.webs.com/ 381 | http://www.writers-free-reference.com/ 382 | http://greatwritersites.blogspot.com/ 383 | http://writersresourcedirectory.com/ 384 | http://literaryterms.net/ 385 | http://everyonewhosanyone.com/ 386 | http://www.everywritersresource.com/ 387 | http://www.burryman.com/ 388 | http://www.poewar.com/ 389 | http://www.thewriterslounge.webs.com/ 390 | http://www.newpages.com/ 391 | http://www.author-network.com/ 392 | http://www.espressographics.com/text/writers.html 393 | http://andromeda.rutgers.edu/~jlynch/Writing/links.html 394 | http://www.absolutewrite.com/forums/index.php 395 | http://accentuatewriters.com/ 396 | http://groups.yahoo.com/group/born2write/ 397 | http://www.writingforums.org/ 398 | http://forums.bellaonline.com/ubbthreads.php/forums/363/1/Fiction_Writing 399 | http://mywriterscircle.com/ 400 | https://www.reddit.com/r/StorylWritersGroup/ 401 | http://groups.yahoo.com/group/ticket2write/ 402 | http://www.wordtrip.com/ 403 | http://www.writersbeat.com/ 404 | http://www.writersdigest.com/forum/ 405 | http://www.writeupcafe.com/ 406 | http://groups.yahoo.com/group/writingandpublishing/ 407 | http://www.writingforums.com/ 408 | https://www.reddit.com/r/StorylWritersGroup/ 409 | http://mywriterscircle.com/ 410 | http://www.writersdigest.com/forum/ 411 | http://accentuatewriters.com/ 412 | http://www.writeupcafe.com/ 413 | http://www.absolutewrite.com/forums/index.php 414 | http://forums.bellaonline.com/ubbthreads.php/forums/363/1/Fiction_Writing 415 | http://groups.yahoo.com/group/writingandpublishing/ 416 | http://www.writingforums.org/ 417 | http://groups.yahoo.com/group/born2write/ 418 | http://www.wordtrip.com/ 419 | http://groups.yahoo.com/group/ticket2write/ 420 | http://www.writersbeat.com/ 421 | http://www.writingforums.com/ 422 | http://www.sagawards.org/ 423 | http://www.seeing-stars.com/Awards/SAGawards.shtml 424 | http://www.sagawards.org/ 425 | http://www.seeing-stars.com/Awards/SAGawards.shtml 426 | http://www.arslibri.com/ 427 | http://www.jmcohenrarebooks.com/ 428 | http://www.colophon.com/ 429 | http://www.hanshan.com/ 430 | http://www.brunias.com/ 431 | http://www.mullenbooks.com/ 432 | http://www.foxrarebooks.com/ 433 | http://www.violinsandbows.com/ 434 | http://www.simongoughbooks.com/Default.htm 435 | http://www.sullivangoss.com/ 436 | http://www.colophon.com/ 437 | http://www.mullenbooks.com/ 438 | http://www.violinsandbows.com/ 439 | http://www.brunias.com/ 440 | http://www.hanshan.com/ 441 | http://www.simongoughbooks.com/Default.htm 442 | http://www.foxrarebooks.com/ 443 | http://www.sullivangoss.com/ 444 | http://www.arslibri.com/ 445 | http://www.jmcohenrarebooks.com/ 446 | http://www.afi.com/about/library.aspx 447 | http://www.anthologyfilmarchives.org/ 448 | http://www.animationarchive.org/ 449 | http://www.amianet.org/ 450 | http://www.bampfa.berkeley.edu/ 451 | http://www.contemporaryfootage.com/ 452 | http://douglasfairbanks.wordpress.com/ 453 | http://web.library.yale.edu/testimonies 454 | https://library.harvard.edu/film/index.html 455 | http://www.moviemice.com/ 456 | http://www.movinghistory.ac.uk/ 457 | http://www.nfsa.gov.au/ 458 | http://www.loc.gov/film/ 459 | http://www.filmarchive.org.nz/ 460 | http://www.digitalfilmarchive.net/ 461 | http://www.open-video.org/ 462 | http://www.prelinger.com/ 463 | http://www.hrc.utexas.edu/collections/film/ 464 | http://www.brighton.ac.uk/screenarchive/ 465 | http://www.cinema.ucla.edu/ 466 | https://library.harvard.edu/film/index.html 467 | http://www.moviemice.com/ 468 | http://web.library.yale.edu/testimonies 469 | http://www.nfsa.gov.au/ 470 | http://douglasfairbanks.wordpress.com/ 471 | http://www.prelinger.com/ 472 | http://www.contemporaryfootage.com/ 473 | http://www.loc.gov/film/ 474 | http://www.animationarchive.org/ 475 | http://www.movinghistory.ac.uk/ 476 | http://www.brighton.ac.uk/screenarchive/ 477 | http://www.cinema.ucla.edu/ 478 | http://www.bampfa.berkeley.edu/ 479 | http://www.open-video.org/ 480 | http://www.filmarchive.org.nz/ 481 | http://www.digitalfilmarchive.net/ 482 | http://www.anthologyfilmarchives.org/ 483 | http://www.hrc.utexas.edu/collections/film/ 484 | http://www.afi.com/about/library.aspx 485 | http://www.amianet.org/ 486 | http://thehamletweblog.blogspot.com/ 487 | http://shakespearemag.blogspot.com/ 488 | http://thehamletweblog.blogspot.com/ 489 | http://shakespearemag.blogspot.com/ 490 | http://alcoholicoutsiderartist.blogspot.com/ 491 | http://anaba.blogspot.com/ 492 | http://arrestedmotion.com/ 493 | http://artblogbybob.blogspot.com/ 494 | http://artpaintingartist.org/ 495 | http://www.artgenossen.org/ 496 | http://baihuili.blogspot.com/ 497 | http://katedsmith.blogspot.com/ 498 | http://blogschmlog.blogspot.com/ 499 | http://www.chamuconegro.com/ 500 | http://www.creepmachine.com/ 501 | http://www.artsjournal.com/culturegrrl/ 502 | http://dailyartfixx.com/ 503 | http://eatsleepdraw.com/ 504 | http://www.edwardwinkleman.com/ 505 | http://ellenmcdermott.blogspot.com/ 506 | http://www.elvisrichardson.com/ 507 | http://escapeintolife.com/ 508 | http://flobberlob.blogspot.com/ 509 | http://fwaaldijk.blogspot.com/ 510 | https://hiking.org/ 511 | http://www.thejealouscurator.com/blog/ 512 | http://liamcrockard.tumblr.com/ 513 | http://marcbelldept.blogspot.com/ 514 | http://www.spacegirl.com/ 515 | http://squigglepage.blogspot.com/ 516 | http://artofthemystic.blogspot.com/ 517 | http://new-art.blogspot.com/ 518 | http://nickgazin.tumblr.com/ 519 | http://papercraftparadise.blogspot.com/ 520 | http://paperkraft.blogspot.com/ 521 | http://peryer.blogspot.com/ 522 | http://polvora-spigot.blogspot.com/ 523 | http://renaissanceandart.blogspot.com/ 524 | http://ryantravischristian.blogspot.com/ 525 | http://www.sharesomecandy.com/ 526 | http://sophiaallison.blogspot.com/ 527 | http://suckerpunchdaily.com/ 528 | http://tempesthole.com/blog/ 529 | http://theartblog.org/ 530 | http://weburbanist.com/ 531 | http://williampowhida.blogspot.com/ 532 | http://www.irishart.com/blog/blogindex.html 533 | http://artofthemystic.blogspot.com/ 534 | http://nickgazin.tumblr.com/ 535 | http://www.thejealouscurator.com/blog/ 536 | http://ellenmcdermott.blogspot.com/ 537 | http://baihuili.blogspot.com/ 538 | http://www.edwardwinkleman.com/ 539 | https://hiking.org/ 540 | http://artpaintingartist.org/ 541 | http://www.spacegirl.com/ 542 | http://www.sharesomecandy.com/ 543 | http://theartblog.org/ 544 | http://www.creepmachine.com/ 545 | http://eatsleepdraw.com/ 546 | http://escapeintolife.com/ 547 | http://www.artsjournal.com/culturegrrl/ 548 | http://arrestedmotion.com/ 549 | http://dailyartfixx.com/ 550 | http://liamcrockard.tumblr.com/ 551 | http://www.chamuconegro.com/ 552 | http://marcbelldept.blogspot.com/ 553 | http://williampowhida.blogspot.com/ 554 | http://www.elvisrichardson.com/ 555 | http://ryantravischristian.blogspot.com/ 556 | http://sophiaallison.blogspot.com/ 557 | http://fwaaldijk.blogspot.com/ 558 | http://anaba.blogspot.com/ 559 | http://new-art.blogspot.com/ 560 | http://www.artgenossen.org/ 561 | http://www.irishart.com/blog/blogindex.html 562 | http://papercraftparadise.blogspot.com/ 563 | http://tempesthole.com/blog/ 564 | http://flobberlob.blogspot.com/ 565 | http://peryer.blogspot.com/ 566 | http://polvora-spigot.blogspot.com/ 567 | http://squigglepage.blogspot.com/ 568 | http://weburbanist.com/ 569 | http://alcoholicoutsiderartist.blogspot.com/ 570 | http://katedsmith.blogspot.com/ 571 | http://paperkraft.blogspot.com/ 572 | http://artblogbybob.blogspot.com/ 573 | http://suckerpunchdaily.com/ 574 | http://renaissanceandart.blogspot.com/ 575 | http://blogschmlog.blogspot.com/ 576 | http://www.webring.org/hub?ring=finewildlifewild 577 | http://www.webring.org/hub?ring=puzzmann 578 | http://www.webring.org/hub?ring=artonthewebmanag 579 | http://www.webring.org/hub?ring=visualnet 580 | http://www.webring.org/hub?ring=artistsofthewebn 581 | http://www.webring.org/hub?ring=fineyoung 582 | http://www.webring.org/hub?ring=artring 583 | http://www.webring.org/hub?ring=artring 584 | http://www.webring.org/hub?ring=artonthewebmanag 585 | http://www.webring.org/hub?ring=puzzmann 586 | http://www.webring.org/hub?ring=visualnet 587 | http://www.webring.org/hub?ring=fineyoung 588 | http://www.webring.org/hub?ring=finewildlifewild 589 | http://www.webring.org/hub?ring=artistsofthewebn 590 | http://www.49th.net/ 591 | http://aardvarkwriting.com/ 592 | http://www.authoronestop.com/ 593 | http://authorassist.com/ 594 | http://www.azconsulting.us/ 595 | http://www.beaconlit.com/ 596 | http://www.belindabuckley.com/ 597 | http://www.cascobayliteraryservices.com/ 598 | http://www.copyclinic.com/ 599 | http://diannej.com/ 600 | http://www.ualberta.ca/~macraig 601 | http://www.escribo.com.au/ 602 | http://www.tchastings.com/ 603 | http://www.ideascapeinc.com/ 604 | http://inathememoircoach.com/ 605 | http://www.janerobertson.com/ 606 | http://gaushaus.tripod.com/ 607 | http://managedediting.com/ 608 | http://www.wrightscontent.com/ 609 | http://www.masterplayworks.com/ 610 | http://www.rebeccamcbride.net/ 611 | http://www.marketing-freelancer.com/ 612 | http://www.monkeytext.com/ 613 | http://www.persuasivepen.com/ 614 | http://www.clearest.co.uk/ 615 | http://www.rocketwords.com/ 616 | http://www.editingandwriting.com/ 617 | http://webspinstudios.com/ 618 | http://www.starrynyte.com/ 619 | http://www.togsolutions.com/ 620 | http://www.writewaydesigns.com/ 621 | http://www.writerfind.com/ 622 | http://www.writerspost.com/ 623 | http://www.writestyle.com/ 624 | http://diannej.com/ 625 | http://www.escribo.com.au/ 626 | http://managedediting.com/ 627 | http://www.writerfind.com/ 628 | http://www.authoronestop.com/ 629 | http://authorassist.com/ 630 | http://www.beaconlit.com/ 631 | http://www.marketing-freelancer.com/ 632 | http://inathememoircoach.com/ 633 | http://www.wrightscontent.com/ 634 | http://www.togsolutions.com/ 635 | http://www.rebeccamcbride.net/ 636 | http://www.belindabuckley.com/ 637 | http://www.cascobayliteraryservices.com/ 638 | http://www.ideascapeinc.com/ 639 | http://www.writerspost.com/ 640 | http://www.copyclinic.com/ 641 | http://www.masterplayworks.com/ 642 | http://www.azconsulting.us/ 643 | http://www.writestyle.com/ 644 | http://webspinstudios.com/ 645 | http://www.tchastings.com/ 646 | http://aardvarkwriting.com/ 647 | http://www.persuasivepen.com/ 648 | http://gaushaus.tripod.com/ 649 | http://www.clearest.co.uk/ 650 | http://www.editingandwriting.com/ 651 | http://www.monkeytext.com/ 652 | http://www.janerobertson.com/ 653 | http://www.49th.net/ 654 | http://www.writewaydesigns.com/ 655 | http://www.rocketwords.com/ 656 | http://www.ualberta.ca/~macraig 657 | http://www.starrynyte.com/ 658 | http://www.art-e-zine.co.uk/ 659 | http://groups.yahoo.com/group/TheGleanerZine/ 660 | http://www.art-e-zine.co.uk/ 661 | http://groups.yahoo.com/group/TheGleanerZine/ 662 | http://www.amscan.org/translation.html 663 | http://poetrysociety.org.uk/event/popescu/ 664 | https://frenchamerican.org/translationprize 665 | https://bcla.org/prizes-and-competitions/john-dryden-translation-competition/ 666 | http://ec.europa.eu/translatores/index_en.htm 667 | http://www.societyofauthors.org/Prizes/Translation-Prizes 668 | http://www.susansontag.com/ 669 | http://www.stephen-spender.org/SSMTrust/times_ss_prize/ssmt_evTransPrize.htm 670 | http://www.societyofauthors.org/Prizes/Translation-Prizes 671 | https://frenchamerican.org/translationprize 672 | https://bcla.org/prizes-and-competitions/john-dryden-translation-competition/ 673 | http://poetrysociety.org.uk/event/popescu/ 674 | http://www.amscan.org/translation.html 675 | http://ec.europa.eu/translatores/index_en.htm 676 | http://www.stephen-spender.org/SSMTrust/times_ss_prize/ssmt_evTransPrize.htm 677 | http://www.susansontag.com/ 678 | http://www.accessart.org.uk/ 679 | http://www.artinstructionblog.com/ 680 | http://www.artyfactory.com/ 681 | http://www.beginningartist.com/ 682 | http://www.bobross.com/Articles.asp?ID=305 683 | http://www.creativespotlite.com/ 684 | http://www.from-sketch-to-oil-painting.com/ 685 | http://www.funartlessons.com/ 686 | http://www.how-to-draw-and-paint.com/ 687 | http://www.magenta-sky.com/ 688 | http://www.onlinesculptureclass.com/ 689 | http://www.studioarts.net/ 690 | http://thevirtualinstructor.com/ 691 | http://psych.hanover.edu/Krantz/art/ 692 | http://www.creativewayart.com/ 693 | http://www.onlinesculptureclass.com/ 694 | http://www.beginningartist.com/ 695 | http://thevirtualinstructor.com/ 696 | http://www.artinstructionblog.com/ 697 | http://www.bobross.com/Articles.asp?ID=305 698 | http://www.creativewayart.com/ 699 | http://www.from-sketch-to-oil-painting.com/ 700 | http://www.funartlessons.com/ 701 | http://www.creativespotlite.com/ 702 | http://www.how-to-draw-and-paint.com/ 703 | http://www.magenta-sky.com/ 704 | http://www.accessart.org.uk/ 705 | http://www.studioarts.net/ 706 | http://www.artyfactory.com/ 707 | http://psych.hanover.edu/Krantz/art/ 708 | http://www.guerrillagirlsontour.com/ 709 | http://www.themagdalenaproject.org/ 710 | http://spec.lib.miamioh.edu/home/nawpa/ 711 | http://spec.lib.miamioh.edu/home/nawpa/ 712 | http://www.guerrillagirlsontour.com/ 713 | http://www.themagdalenaproject.org/ 714 | http://www.amazonradio.com/ 715 | http://music.minnesota.publicradio.org/features/0102_instrumentalwomen/index.shtml 716 | http://www.satellitesisters.com/ 717 | http://www.satellitesisters.com/ 718 | http://music.minnesota.publicradio.org/features/0102_instrumentalwomen/index.shtml 719 | http://www.amazonradio.com/ 720 | http://www.adelegriffin.com/ 721 | http://amy-alexander.com/ 722 | https://aabrahams.wordpress.com/ 723 | http://www.adrianpiper.com/ 724 | http://www.veryscarycarnival.20m.com/ 725 | http://www.arthistoryarchive.com/arthistory/feminist/20thcentury_feministartists.html 726 | http://www.artcyclopedia.com/artists/women.html 727 | http://barbaranessim.com/ 728 | http://www.archive-it.org/collections/2973 729 | http://www.cynthiabrodyart.com/ 730 | http://elupton.com/ 731 | http://feministartproject.rutgers.edu/ 732 | http://www.feministstudies.org/ArtGallery.html 733 | http://figurationfeminine.blogspot.com.br/ 734 | http://www.guerrillagirls.com/ 735 | http://www.imogencunningham.com/ 736 | http://www.lucidplanet.com/IWA/ 737 | http://www.judychicago.com/ 738 | http://lallaessaydi.com/ 739 | http://www.louisenevelsonfoundation.org/ 740 | http://www.mai-hudson.org/ 741 | http://www.acha-kutscher.com/ 742 | http://www.nmwa.org/ 743 | http://www.patrafeathers.com/ 744 | http://www.reactfeminism.org/prog_overview.php 745 | http://sallymann.com/ 746 | http://www.sandyskoglund.com/ 747 | http://www.society-women-artists.org.uk/ 748 | http://www.astudiooftheirown.org/ 749 | http://sugswritersblog.blogspot.co.uk/ 750 | http://varoregistry.org/ 751 | http://blogs.indiewire.com/womenandhollywood/ 752 | http://womenartrevolution.com/ 753 | http://www.collectionscanada.gc.ca/women/030001-1150-e.html 754 | https://www.cla.purdue.edu/waaw/ 755 | http://womenoutwest.blogspot.com.br/ 756 | https://www.facebook.com/Womens-Art-Resource-Centre-412289932186210/ 757 | http://womensmuseumca.org/ 758 | http://www.wsworkshop.org/ 759 | http://www.acha-kutscher.com/ 760 | http://www.astudiooftheirown.org/ 761 | http://www.archive-it.org/collections/2973 762 | http://www.adelegriffin.com/ 763 | http://www.adrianpiper.com/ 764 | http://elupton.com/ 765 | http://www.patrafeathers.com/ 766 | http://www.judychicago.com/ 767 | http://www.artcyclopedia.com/artists/women.html 768 | https://www.facebook.com/Womens-Art-Resource-Centre-412289932186210/ 769 | http://barbaranessim.com/ 770 | http://womenartrevolution.com/ 771 | http://womensmuseumca.org/ 772 | http://www.imogencunningham.com/ 773 | http://varoregistry.org/ 774 | https://www.cla.purdue.edu/waaw/ 775 | http://lallaessaydi.com/ 776 | https://aabrahams.wordpress.com/ 777 | http://feministartproject.rutgers.edu/ 778 | http://www.cynthiabrodyart.com/ 779 | http://www.arthistoryarchive.com/arthistory/feminist/20thcentury_feministartists.html 780 | http://www.nmwa.org/ 781 | http://www.mai-hudson.org/ 782 | http://sallymann.com/ 783 | http://amy-alexander.com/ 784 | http://blogs.indiewire.com/womenandhollywood/ 785 | http://www.louisenevelsonfoundation.org/ 786 | http://www.sandyskoglund.com/ 787 | http://www.society-women-artists.org.uk/ 788 | http://womenoutwest.blogspot.com.br/ 789 | http://sugswritersblog.blogspot.co.uk/ 790 | http://figurationfeminine.blogspot.com.br/ 791 | http://www.guerrillagirls.com/ 792 | http://www.feministstudies.org/ArtGallery.html 793 | http://www.reactfeminism.org/prog_overview.php 794 | http://www.collectionscanada.gc.ca/women/030001-1150-e.html 795 | http://www.veryscarycarnival.20m.com/ 796 | http://www.lucidplanet.com/IWA/ 797 | http://www.wsworkshop.org/ 798 | http://www.people.virginia.edu/~pm9k/libsci/womFilm.html 799 | http://womeninfilm.org/ 800 | http://www.wmm.com/ 801 | http://womeninfilm.org/ 802 | http://www.people.virginia.edu/~pm9k/libsci/womFilm.html 803 | http://www.wmm.com/ 804 | https://iwwg.wildapricot.org/ 805 | http://digital.nypl.org/schomburg/writers_aa19/ 806 | http://digital.library.upenn.edu/women/ 807 | http://www.womenplaywrights.org/ 808 | http://www.jaws.org/ 809 | http://www.marywshelley.com/ 810 | http://www.nlapw.org/ 811 | http://webapp1.dlib.indiana.edu/vwwp/ 812 | http://www.angelfire.com/journal2/wickedalicemag/ 813 | http://research.umbc.edu/~korenman/wmst/links_arts.html#lit 814 | http://www.wwp.brown.edu/ 815 | https://iwwg.wildapricot.org/ 816 | http://webapp1.dlib.indiana.edu/vwwp/ 817 | http://www.marywshelley.com/ 818 | http://www.womenplaywrights.org/ 819 | http://digital.nypl.org/schomburg/writers_aa19/ 820 | http://research.umbc.edu/~korenman/wmst/links_arts.html#lit 821 | http://www.nlapw.org/ 822 | http://www.jaws.org/ 823 | http://digital.library.upenn.edu/women/ 824 | http://www.angelfire.com/journal2/wickedalicemag/ 825 | http://www.wwp.brown.edu/ 826 | http://www.soltys.ca/techcomm.html 827 | http://www.soltys.ca/techcomm.html 828 | http://www.angrylittlegirls.com/ 829 | http://www.damedarcy.com/ 830 | http://womenincomics.wikia.com/wiki/Friends_of_Lulu 831 | http://www.grrl.com/grrlcomix.html 832 | http://jillthompson.tumblr.com/ 833 | http://mollykiely.com/ 834 | http://en.wikipedia.org/wiki/Women_in_comics 835 | http://lambiek.net/magazines/wimmenscomix.htm 836 | http://mollykiely.com/ 837 | http://jillthompson.tumblr.com/ 838 | http://womenincomics.wikia.com/wiki/Friends_of_Lulu 839 | http://www.angrylittlegirls.com/ 840 | http://en.wikipedia.org/wiki/Women_in_comics 841 | http://www.grrl.com/grrlcomix.html 842 | http://lambiek.net/magazines/wimmenscomix.htm 843 | http://www.damedarcy.com/ 844 | http://www.bawifm.org/ 845 | http://www.hawaiiwomeninfilmmaking.org/ 846 | http://www.kcwift.com/ 847 | http://mnwift.org/ 848 | http://wiftnashville.org/ 849 | http://www.nmwif.com/ 850 | http://www.pswift.org/ 851 | https://www.facebook.com/ReelWomen/ 852 | http://www.wiftlouisiana.org/ 853 | http://www.wiftnz.org.nz/ 854 | http://womeninfilm.org/ 855 | http://www.wiftnsw.org.au/ 856 | http://www.wifv.org/ 857 | http://www.womeninfilmfl.org/ 858 | http://www.wifdallas.org/ 859 | http://womeninfilmseattle.org/ 860 | http://www.wifmpit.org/ 861 | http://www.wifta.org/ 862 | http://www.nywift.org/ 863 | https://www.wift.com/ 864 | http://www.wifta.ca/ 865 | http://www.wift-houston.org/ 866 | https://www.facebook.com/wiftin/ 867 | http://www.wift.se/in-english/ 868 | https://www.facebook.com/Women-in-Film-and-Television-South-Africa-WIFTSA-141217032567634/ 869 | http://www.wftv.org.uk/ 870 | http://www.womeninfilm.ca/ 871 | http://wiftvic.com.au/ 872 | http://www.wifchicago.net/ 873 | http://wifpdx.org/ 874 | http://www.womeninfilmvideo.org/ 875 | http://www.wifmpit.org/ 876 | http://mnwift.org/ 877 | https://www.facebook.com/wiftin/ 878 | http://www.womeninfilm.ca/ 879 | http://www.nmwif.com/ 880 | http://www.wftv.org.uk/ 881 | https://www.facebook.com/Women-in-Film-and-Television-South-Africa-WIFTSA-141217032567634/ 882 | https://www.facebook.com/ReelWomen/ 883 | http://www.kcwift.com/ 884 | http://wiftvic.com.au/ 885 | http://wifpdx.org/ 886 | http://www.wiftlouisiana.org/ 887 | http://www.wiftnsw.org.au/ 888 | http://www.wifta.ca/ 889 | http://www.wift.se/in-english/ 890 | http://www.wiftnz.org.nz/ 891 | http://www.bawifm.org/ 892 | http://www.hawaiiwomeninfilmmaking.org/ 893 | http://wiftnashville.org/ 894 | http://www.pswift.org/ 895 | http://www.wifv.org/ 896 | http://www.wifchicago.net/ 897 | http://womeninfilm.org/ 898 | http://womeninfilmseattle.org/ 899 | http://www.wift-houston.org/ 900 | http://www.wifta.org/ 901 | http://www.wifdallas.org/ 902 | http://www.womeninfilmvideo.org/ 903 | http://www.nywift.org/ 904 | http://www.womeninfilmfl.org/ 905 | https://www.wift.com/ 906 | http://www.anti-robot.org/ 907 | http://www.acmailart.blogspot.com/ 908 | http://fripsmailart.blogspot.com/ 909 | http://wimplet-heldinnen.blogspot.com/ 910 | http://digitalmailart.blogspot.com/ 911 | https://minkranch.wordpress.com/ 912 | http://sideshowpost.blogspot.com/ 913 | http://spiralmail.blogspot.com/ 914 | https://minkranch.wordpress.com/ 915 | http://sideshowpost.blogspot.com/ 916 | http://wimplet-heldinnen.blogspot.com/ 917 | http://www.anti-robot.org/ 918 | http://spiralmail.blogspot.com/ 919 | http://www.acmailart.blogspot.com/ 920 | http://digitalmailart.blogspot.com/ 921 | http://fripsmailart.blogspot.com/ 922 | http://www.1000words-a-day.com/ 923 | http://the1940mysterywriter.weebly.com/ 924 | http://www.julieduffy.com/ 925 | http://www.absolutewrite.com/ 926 | http://www.aintiawriter.blogspot.com/ 927 | http://www.alanbaxteronline.com/ 928 | http://allkindsofwriting.blogspot.com/ 929 | http://copywriter.typepad.com/copywriter 930 | https://booksandsinging.wordpress.com/ 931 | http://annotationnation.wordpress.com/ 932 | http://hbaum.blogspot.com/ 933 | http://allisonwinnscotch.blogspot.com/ 934 | http://blogsbite.blogspot.com/ 935 | http://bobsanchez1.blogspot.com/ 936 | http://bookpublishingnews.com/ 937 | http://booksist.net/ 938 | http://www.booksquare.com/ 939 | http://blogs.chron.com/bookwoman/ 940 | http://www.overthehillchick.blogspot.com/ 941 | http://bridgetwhelan.com/ 942 | http://pjparrish.blogspot.com/ 943 | http://www.carrieabutler.com/ 944 | http://scottwbaker.net/ 945 | http://www.cherylreif.com/ 946 | http://www.christopherfielden.com/ 947 | https://cleasaal.wordpress.com/ 948 | http://aconservatoryofone.blogspot.com/ 949 | http://www.thecreativepenn.com/ 950 | http://cynthialeecartierblogs.com/ 951 | http://danerickson.net/ 952 | http://www.davidcrystal.com/ 953 | http://www.davideide.com/ 954 | http://www.smithwriter.com/ 955 | http://www.aneclecticmind.com/ 956 | http://fictioneditorsopinions.com/ 957 | http://editorialanonymous.blogspot.com/ 958 | http://elizabethmeltonparsons.wordpress.com/ 959 | http://www.evileditor.blogspot.com/ 960 | http://fairyhedgehog.blogspot.com/ 961 | http://www.fearofwriting.com/blog 962 | http://www.writinglit-newgeneration.blogspot.com/ 963 | http://kendralynn.blogspot.com/ 964 | http://www.fundsforwriters.com/ 965 | http://francesdinkelspiel.blogspot.com/ 966 | http://www.gloriaoliver.com/blog/ 967 | http://www.glynnjames.co.uk/ 968 | http://thehappybooker.blogs.com/ 969 | http://hortorian.com/ 970 | http://ideaboutique.blogspot.com/ 971 | http://ilonasworld.blogspot.com/atom.xml 972 | http://inkinmycoffee.blogspot.com/ 973 | http://inkthinkerblog.com/ 974 | http://inkygirl.com/ 975 | http://www.irregardlessmagazine.com/ 976 | http://ithadbetterbegood.blogspot.com/ 977 | http://jalanerwine.blogspot.com/ 978 | http://goinswriter.com/blog/ 979 | http://jerz.setonhill.edu/ 980 | http://www.kenwriting.com/ 981 | http://lighthouse-writing-tips.blogspot.com/ 982 | http://pages.intnet.mu/linx/index.html 983 | http://lisaburks.typepad.com/ 984 | http://lbc.typepad.com/blog/ 985 | http://www.markdcooper.com/ 986 | http://marthaoconnor.blogspot.com/ 987 | http://maudnewton.com/blog/ 988 | http://michel-cruz.rimontgo.com/ 989 | http://www.michellerichmond.com/sanserif/ 990 | http://misssnarksfirstvictim.blogspot.com/ 991 | http://misssnark.blogspot.com/ 992 | http://www.mkanderson.com/ 993 | http://nancybartholomew.blogspot.com/ 994 | http://museinks.blogspot.com/ 995 | https://katrich.wordpress.com/ 996 | http://writetotravel.blogspot.com/ 997 | http://blog.nathanbransford.com/ 998 | http://www.nationalpasquinade.com/ 999 | http://newcenturynotebook.blogspot.com/ 1000 | http://jakonrath.blogspot.com/ 1001 | http://brendacoulter.blogspot.com/ 1002 | http://www.theoldhag.com/ 1003 | http://www.oursimplejoys.com/ 1004 | http://pbackwriter.blogspot.com/ 1005 | http://paranormalityuniverse.blogspot.com/ 1006 | http://www.patrickjames.ie/blog 1007 | https://penandthepad.com/ 1008 | https://penlighten.com/ 1009 | http://picturebookillustrators.typepad.com/ 1010 | http://publishinginsider.net/ 1011 | http://queenswriteaboutwriting.blogspot.com/ 1012 | http://queryletters.blogspot.com/ 1013 | http://www.rebeccarosenblum.com/ 1014 | http://www.regularspelling.com/ 1015 | http://ritahubbard.com/ 1016 | http://theroadlesswritten.com/ 1017 | http://robertfay.com/ 1018 | http://www.sarahsalway.co.uk/ 1019 | http://www.paullima.com/blog/ 1020 | https://www.thesneakykittycritic.com/ 1021 | http://stephenlloydwebber.com/ 1022 | http://www.sutanumitra.com/ 1023 | http://theswivet.blogspot.com/ 1024 | http://teenybooks.blogspot.com/ 1025 | http://salutor.blogs.com/tenebris/ 1026 | https://thinkinglazy.wordpress.com/ 1027 | http://translationpost.com/ 1028 | http://www.unpublishedguy.com/ 1029 | http://thewildwriters.com/ 1030 | http://www.kerismith.com/blog 1031 | http://wordgrrls.com/ 1032 | http://wordmothers.com/ 1033 | http://wordsdontcomeeasily.blogspot.com/ 1034 | http://lianespicer.blogspot.com/ 1035 | http://theworldofmyimagination.blogspot.com/ 1036 | http://www.writefarmlive.com/ 1037 | http://writefem.blogspot.com/ 1038 | http://thewritepractice.com/ 1039 | http://writesuccess.com/ 1040 | http://www.writethismoment.com/ 1041 | https://accrispin.blogspot.com/ 1042 | http://thewritermama.wordpress.com/ 1043 | http://www.writerunboxed.com/ 1044 | http://www.writerscookbook.com/ 1045 | http://leegoldberg.typepad.com/ 1046 | http://www.thewriterslife.blogspot.com/ 1047 | http://writerstricksofthetrade.blogspot.com/ 1048 | http://www.writersandeditors.com/ 1049 | http://writersnoonereads.tumblr.com/ 1050 | http://writerschecklist.blogspot.com/ 1051 | http://writinginwonderland.blogspot.com/ 1052 | http://terrywhalin.blogspot.com/ 1053 | http://nienkehinton.blogspot.com/ 1054 | http://writingnag.blogspot.com/ 1055 | http://blog.writingspirit.com/ 1056 | http://www.writingthoughts.com/ 1057 | http://ylogs.com/ 1058 | http://www.michellemiles.net/blog/ 1059 | https://booksandsinging.wordpress.com/ 1060 | https://penlighten.com/ 1061 | https://www.thesneakykittycritic.com/ 1062 | http://wordmothers.com/ 1063 | https://penandthepad.com/ 1064 | https://accrispin.blogspot.com/ 1065 | http://writersnoonereads.tumblr.com/ 1066 | http://www.davideide.com/ 1067 | http://www.writefarmlive.com/ 1068 | http://www.nationalpasquinade.com/ 1069 | http://www.irregardlessmagazine.com/ 1070 | http://thewildwriters.com/ 1071 | http://www.glynnjames.co.uk/ 1072 | http://www.davidcrystal.com/ 1073 | http://www.sutanumitra.com/ 1074 | http://fairyhedgehog.blogspot.com/ 1075 | http://stephenlloydwebber.com/ 1076 | http://publishinginsider.net/ 1077 | http://the1940mysterywriter.weebly.com/ 1078 | http://www.unpublishedguy.com/ 1079 | http://robertfay.com/ 1080 | http://www.markdcooper.com/ 1081 | http://cynthialeecartierblogs.com/ 1082 | http://www.carrieabutler.com/ 1083 | http://elizabethmeltonparsons.wordpress.com/ 1084 | http://ritahubbard.com/ 1085 | http://fictioneditorsopinions.com/ 1086 | http://www.alanbaxteronline.com/ 1087 | http://www.mkanderson.com/ 1088 | http://scottwbaker.net/ 1089 | http://translationpost.com/ 1090 | http://www.gloriaoliver.com/blog/ 1091 | http://www.rebeccarosenblum.com/ 1092 | http://www.aneclecticmind.com/ 1093 | http://lianespicer.blogspot.com/ 1094 | http://bobsanchez1.blogspot.com/ 1095 | http://blog.writingspirit.com/ 1096 | http://www.sarahsalway.co.uk/ 1097 | http://www.regularspelling.com/ 1098 | http://www.patrickjames.ie/blog 1099 | http://inkygirl.com/ 1100 | http://www.writerscookbook.com/ 1101 | https://katrich.wordpress.com/ 1102 | http://bookpublishingnews.com/ 1103 | http://writesuccess.com/ 1104 | http://www.cherylreif.com/ 1105 | http://www.oursimplejoys.com/ 1106 | http://thewritepractice.com/ 1107 | http://writinginwonderland.blogspot.com/ 1108 | http://www.christopherfielden.com/ 1109 | http://www.thecreativepenn.com/ 1110 | http://terrywhalin.blogspot.com/ 1111 | http://www.fundsforwriters.com/ 1112 | http://theroadlesswritten.com/ 1113 | http://jerz.setonhill.edu/ 1114 | http://www.writethismoment.com/ 1115 | https://cleasaal.wordpress.com/ 1116 | http://booksist.net/ 1117 | http://www.fearofwriting.com/blog 1118 | http://michel-cruz.rimontgo.com/ 1119 | http://www.writersandeditors.com/ 1120 | https://thinkinglazy.wordpress.com/ 1121 | http://writerstricksofthetrade.blogspot.com/ 1122 | http://hortorian.com/ 1123 | http://danerickson.net/ 1124 | http://theworldofmyimagination.blogspot.com/ 1125 | http://www.aintiawriter.blogspot.com/ 1126 | http://writerschecklist.blogspot.com/ 1127 | http://annotationnation.wordpress.com/ 1128 | http://www.kenwriting.com/ 1129 | http://goinswriter.com/blog/ 1130 | http://writingnag.blogspot.com/ 1131 | http://www.1000words-a-day.com/ 1132 | http://wordgrrls.com/ 1133 | http://www.smithwriter.com/ 1134 | http://www.absolutewrite.com/ 1135 | http://inkthinkerblog.com/ 1136 | http://blog.nathanbransford.com/ 1137 | http://bridgetwhelan.com/ 1138 | http://ithadbetterbegood.blogspot.com/ 1139 | http://lisaburks.typepad.com/ 1140 | http://ilonasworld.blogspot.com/atom.xml 1141 | http://misssnarksfirstvictim.blogspot.com/ 1142 | http://editorialanonymous.blogspot.com/ 1143 | http://theswivet.blogspot.com/ 1144 | http://pages.intnet.mu/linx/index.html 1145 | http://lighthouse-writing-tips.blogspot.com/ 1146 | http://www.paullima.com/blog/ 1147 | http://blogs.chron.com/bookwoman/ 1148 | http://allkindsofwriting.blogspot.com/ 1149 | http://museinks.blogspot.com/ 1150 | http://ideaboutique.blogspot.com/ 1151 | http://teenybooks.blogspot.com/ 1152 | http://www.writinglit-newgeneration.blogspot.com/ 1153 | http://www.writerunboxed.com/ 1154 | http://copywriter.typepad.com/copywriter 1155 | http://allisonwinnscotch.blogspot.com/ 1156 | http://writetotravel.blogspot.com/ 1157 | http://www.writingthoughts.com/ 1158 | http://thewritermama.wordpress.com/ 1159 | http://salutor.blogs.com/tenebris/ 1160 | http://newcenturynotebook.blogspot.com/ 1161 | http://nancybartholomew.blogspot.com/ 1162 | http://ylogs.com/ 1163 | http://www.thewriterslife.blogspot.com/ 1164 | http://maudnewton.com/blog/ 1165 | http://picturebookillustrators.typepad.com/ 1166 | http://www.evileditor.blogspot.com/ 1167 | http://jalanerwine.blogspot.com/ 1168 | http://wordsdontcomeeasily.blogspot.com/ 1169 | http://www.overthehillchick.blogspot.com/ 1170 | http://www.theoldhag.com/ 1171 | http://aconservatoryofone.blogspot.com/ 1172 | http://thehappybooker.blogs.com/ 1173 | http://francesdinkelspiel.blogspot.com/ 1174 | http://www.booksquare.com/ 1175 | http://nienkehinton.blogspot.com/ 1176 | http://leegoldberg.typepad.com/ 1177 | http://pjparrish.blogspot.com/ 1178 | http://blogsbite.blogspot.com/ 1179 | http://www.julieduffy.com/ 1180 | http://paranormalityuniverse.blogspot.com/ 1181 | http://www.michellemiles.net/blog/ 1182 | http://inkinmycoffee.blogspot.com/ 1183 | http://queryletters.blogspot.com/ 1184 | http://pbackwriter.blogspot.com/ 1185 | http://jakonrath.blogspot.com/ 1186 | http://lbc.typepad.com/blog/ 1187 | http://queenswriteaboutwriting.blogspot.com/ 1188 | http://misssnark.blogspot.com/ 1189 | http://brendacoulter.blogspot.com/ 1190 | http://hbaum.blogspot.com/ 1191 | http://kendralynn.blogspot.com/ 1192 | http://marthaoconnor.blogspot.com/ 1193 | http://writefem.blogspot.com/ 1194 | http://www.kerismith.com/blog 1195 | http://www.michellerichmond.com/sanserif/ 1196 | http://www.allartsupplies.com/ 1197 | http://www.in2art.com/ 1198 | http://aswexpress.com/ 1199 | http://www.artxpress.com/ 1200 | http://www.artacademy.com/ 1201 | http://www.artsupplies.co.nz/ 1202 | http://www.artsuppliesonline.com/ 1203 | http://www.artsupply.com/ 1204 | http://www.dickblick.com/ 1205 | http://www.dixieart.com/ 1206 | http://www.oilpaintingtechniques.com/ 1207 | http://www.artdiscount.co.uk/ 1208 | http://www.hellermans.com/ 1209 | http://www.herwecks.com/ 1210 | http://www.islandblue.com/ 1211 | http://www.jaysartshop.com/ 1212 | http://www.jerrysartarama.com/ 1213 | http://www.londonart-shop.co.uk/ 1214 | http://www.meininger.com/ 1215 | http://www.misterart.com/ 1216 | http://www.deserres.ca/ 1217 | http://www.rexart.com/ 1218 | http://www.scaramouch.co.uk/ 1219 | http://www.simplypainting.com/ 1220 | http://www.teachingart.co.uk/ 1221 | http://www.ternartsupplies.com/ 1222 | http://www.ukframingsupplies.net/ 1223 | http://www.utrechtart.com/ 1224 | http://www.artstuff.net/ 1225 | http://www.ukframingsupplies.net/ 1226 | http://www.ternartsupplies.com/ 1227 | http://www.deserres.ca/ 1228 | http://www.teachingart.co.uk/ 1229 | http://www.jaysartshop.com/ 1230 | http://www.utrechtart.com/ 1231 | http://www.londonart-shop.co.uk/ 1232 | http://www.artsuppliesonline.com/ 1233 | http://www.artsupplies.co.nz/ 1234 | http://www.meininger.com/ 1235 | http://www.artxpress.com/ 1236 | http://www.hellermans.com/ 1237 | http://www.jerrysartarama.com/ 1238 | http://www.artsupply.com/ 1239 | http://www.simplypainting.com/ 1240 | http://www.in2art.com/ 1241 | http://www.artdiscount.co.uk/ 1242 | http://www.rexart.com/ 1243 | http://www.dickblick.com/ 1244 | http://www.misterart.com/ 1245 | http://www.scaramouch.co.uk/ 1246 | http://www.allartsupplies.com/ 1247 | http://www.islandblue.com/ 1248 | http://www.artstuff.net/ 1249 | http://www.herwecks.com/ 1250 | http://aswexpress.com/ 1251 | http://www.artacademy.com/ 1252 | http://www.oilpaintingtechniques.com/ 1253 | http://www.dixieart.com/ 1254 | http://www.learn2paintsigns.com/ 1255 | http://www.noteaccess.com/MATERIALS/Painting.htm 1256 | http://www.noteaccess.com/MATERIALS/Painting.htm 1257 | http://www.learn2paintsigns.com/ 1258 | http://www.mixedgreens.com/ 1259 | http://www.mixedgreens.com/ 1260 | http://www.guerrillagirls.com/ 1261 | http://www.guerrillagirls.com/ 1262 | http://www.artgallery.nsw.gov.au/ 1263 | http://www.artinstituteshop.org/ 1264 | http://www.britishmuseumshoponline.org/ 1265 | http://www.brooklynmuseum.org/ 1266 | http://shop.architecture.org/ 1267 | http://shopchicagohistory.com/ 1268 | http://www.cincinnatiartmuseum.org/ 1269 | http://www.clevelandart.org/ 1270 | https://www.cmog.org/shop 1271 | http://www.cortezculturalcenter.org/ 1272 | https://shop.dma.org/ 1273 | http://www.farnsworthmuseum.org/ 1274 | http://store.fieldmuseum.org/ 1275 | http://shop.fredericremington.org/ 1276 | http://www.guggenheimstore.org/ 1277 | http://www.heard.org/ 1278 | http://thehermitage.com/shop/ 1279 | http://shop.getty.edu/ 1280 | https://www.janeausten.co.uk/shop/ 1281 | http://www.janm.org/ 1282 | http://www.thejewishmuseum.org/ 1283 | https://shop.khm.at/ 1284 | http://www.boutiquesdemusees.fr/en/ 1285 | https://library-of-congress-shop.myshopify.com/ 1286 | http://www.mmoca.org/shop-mmoca/museum-store 1287 | http://www.massmoca.org/ 1288 | http://store.metmuseum.org/ 1289 | http://store.mam.org/16/milwaukee-art-museum-store.htm 1290 | http://www.mocastore.org/ 1291 | http://shop.themodern.org/ 1292 | https://store.moma.org/ 1293 | http://www.montclairartmuseum.org/ 1294 | http://www.monticello.org/ 1295 | http://www.themorgan.org/ 1296 | http://musee.com/ 1297 | http://www.mcachicagostore.org/ 1298 | http://www.shopmuseum.org/ 1299 | https://www.shopbethelwoods.com/ 1300 | https://secure2.convio.net/nbm/site/Ecommerce?store_id=1161 1301 | http://shop.nga.gov/ 1302 | http://shop.nmwa.org/ 1303 | http://www.noguchi.org/ 1304 | http://www.norton.org/ 1305 | https://store.philamuseum.org/ 1306 | https://www.reaganfoundation.org/store 1307 | http://www.royalacademy.org.uk/?lid=254&shoppage=brws&cat=7 1308 | http://shop.thedali.org/ 1309 | http://museumstore.sfmoma.org/ 1310 | http://www.sciencemuseumshop.co.uk/ 1311 | https://americanart.si.edu/books 1312 | http://www.smithsonianstore.com/ 1313 | https://www.hermitageshop.org/ 1314 | http://www.stormking.org/ 1315 | http://swedishamericanmuseum.org/2.0/shop/ 1316 | http://www.vangoghmuseumshop.com/ 1317 | https://shop.vam.ac.uk/ 1318 | https://shop.walkerart.org/ 1319 | http://www.warholstore.com/ 1320 | http://www.whitney.org/ 1321 | https://library-of-congress-shop.myshopify.com/ 1322 | https://shop.dma.org/ 1323 | https://store.moma.org/ 1324 | https://americanart.si.edu/books 1325 | https://www.reaganfoundation.org/store 1326 | https://store.philamuseum.org/ 1327 | https://www.cmog.org/shop 1328 | http://swedishamericanmuseum.org/2.0/shop/ 1329 | https://www.janeausten.co.uk/shop/ 1330 | https://www.shopbethelwoods.com/ 1331 | http://store.mam.org/16/milwaukee-art-museum-store.htm 1332 | https://shop.vam.ac.uk/ 1333 | http://www.farnsworthmuseum.org/ 1334 | http://www.montclairartmuseum.org/ 1335 | http://www.shopmuseum.org/ 1336 | http://shop.fredericremington.org/ 1337 | https://www.hermitageshop.org/ 1338 | http://shop.architecture.org/ 1339 | http://thehermitage.com/shop/ 1340 | http://www.vangoghmuseumshop.com/ 1341 | http://museumstore.sfmoma.org/ 1342 | http://shop.nmwa.org/ 1343 | http://www.mocastore.org/ 1344 | http://shop.themodern.org/ 1345 | http://shop.thedali.org/ 1346 | https://shop.khm.at/ 1347 | http://www.mmoca.org/shop-mmoca/museum-store 1348 | http://store.fieldmuseum.org/ 1349 | http://shopchicagohistory.com/ 1350 | http://www.boutiquesdemusees.fr/en/ 1351 | http://shop.getty.edu/ 1352 | http://www.sciencemuseumshop.co.uk/ 1353 | https://secure2.convio.net/nbm/site/Ecommerce?store_id=1161 1354 | http://www.britishmuseumshoponline.org/ 1355 | http://store.metmuseum.org/ 1356 | http://shop.nga.gov/ 1357 | http://www.guggenheimstore.org/ 1358 | http://www.clevelandart.org/ 1359 | http://www.massmoca.org/ 1360 | http://www.themorgan.org/ 1361 | http://www.heard.org/ 1362 | http://www.artgallery.nsw.gov.au/ 1363 | http://www.whitney.org/ 1364 | http://www.brooklynmuseum.org/ 1365 | http://www.cortezculturalcenter.org/ 1366 | http://www.norton.org/ 1367 | http://www.thejewishmuseum.org/ 1368 | http://www.stormking.org/ 1369 | http://www.monticello.org/ 1370 | https://shop.walkerart.org/ 1371 | http://www.janm.org/ 1372 | http://www.noguchi.org/ 1373 | http://www.smithsonianstore.com/ 1374 | http://www.mcachicagostore.org/ 1375 | http://musee.com/ 1376 | http://www.artinstituteshop.org/ 1377 | http://www.cincinnatiartmuseum.org/ 1378 | http://www.warholstore.com/ 1379 | http://www.royalacademy.org.uk/?lid=254&shoppage=brws&cat=7 1380 | http://muse.jhu.edu/journal/35 1381 | http://www.upress.umn.edu/Books/C/carson_multiple.html 1382 | http://muse.jhu.edu/journal/35 1383 | http://www.upress.umn.edu/Books/C/carson_multiple.html 1384 | http://www.aawm-ngo.com/ 1385 | https://www.facebook.com/GRIMEGender-Research-in-Music-Education-302125169848573/ 1386 | http://www.iawm.org/ 1387 | http://www.ladyslipper.org/ 1388 | http://www.newyorkwomencomposers.org/ 1389 | http://www.soundsandfuries.com/ 1390 | http://tunesbaby.com/ 1391 | http://www.womeninjazz.org/ 1392 | http://www.womeninmusic.org/ 1393 | http://www.womeninmusic.org.uk/ 1394 | https://www.facebook.com/GRIMEGender-Research-in-Music-Education-302125169848573/ 1395 | http://www.newyorkwomencomposers.org/ 1396 | http://www.womeninjazz.org/ 1397 | http://tunesbaby.com/ 1398 | http://www.iawm.org/ 1399 | http://www.ladyslipper.org/ 1400 | http://www.womeninmusic.org.uk/ 1401 | http://www.womeninmusic.org/ 1402 | http://www.aawm-ngo.com/ 1403 | http://www.soundsandfuries.com/ 1404 | http://luciesbookreview.blogspot.com/ 1405 | http://presidentofherbookclub.blogspot.com/ 1406 | http://presidentofherbookclub.blogspot.com/ 1407 | http://luciesbookreview.blogspot.com/ 1408 | http://www.harley.com/womens-quotes/index.html 1409 | http://www.harley.com/womens-quotes/index.html 1410 | http://jessicaabel.com/ 1411 | http://www.thestranger.com/seattle/under-the-volcano/Content?oid=12956 1412 | http://magazine.uchicago.edu/0004/features/ 1413 | http://jessicaabel.com/ 1414 | http://www.thestranger.com/seattle/under-the-volcano/Content?oid=12956 1415 | http://magazine.uchicago.edu/0004/features/ 1416 | http://www.ireneogarden.com/ 1417 | http://www.peggyorenstein.com/ 1418 | http://www.suburbandiva.com/ 1419 | http://www.ireneogarden.com/ 1420 | http://www.suburbandiva.com/ 1421 | http://www.peggyorenstein.com/ 1422 | -------------------------------------------------------------------------------- /page_identify/data_Processer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:data_Processer 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: 对数据进行处理 17 | # creatData:2019/5/9 18 | import copy 19 | import re 20 | import string 21 | 22 | import tensorflow as tf 23 | import numpy as np 24 | import pandas as pd 25 | 26 | 27 | def load_csv_file(input_file,cols=7): 28 | ''' 29 | 加载特征 30 | :param input_file: csv文件 默认第一行为文件名 要去除 31 | cols 行数 32 | :return: 33 | ''' 34 | 35 | #加载文件 36 | csv_data=pd.read_csv(input_file,usecols=[i for i in range(1,cols)]) 37 | 38 | #转换数据 39 | data=np.float32(csv_data) 40 | 41 | #返回数据 42 | return data 43 | 44 | def load_positive_negtive_data_files(positive_file,negative_file): 45 | ''' 46 | 加载正向数据和负向数据 并添加标签混合 47 | :param positive_file: 正向数据 48 | :param negative_file: 负向数据 49 | :return: 50 | ''' 51 | 52 | positive_examples=load_csv_file(positive_file) 53 | negative_examples=load_csv_file(negative_file) 54 | 55 | #生成标签 56 | positive_labels=[[0,1] for _ in positive_examples] 57 | negative_labels=[[1,0] for _ in negative_examples] 58 | 59 | #分别混合数据与标签 60 | x=np.concatenate([positive_examples,negative_examples],0) 61 | y=np.concatenate([positive_labels,negative_labels],0) 62 | 63 | return (x,y) 64 | 65 | def batch_iter(data, batch_size, num_epochs, shuffle=True): 66 | ''' 67 | 产生数据batch的迭代 68 | ''' 69 | data = np.array(data) 70 | data_size = len(data) 71 | num_batches_per_epoch = int((data_size - 1) / batch_size) + 1 72 | for epoch in range(num_epochs): 73 | if shuffle: 74 | # Shuffle the data at each epoch 75 | shuffle_indices = np.random.permutation(np.arange(data_size)) 76 | shuffled_data = data[shuffle_indices] 77 | else: 78 | shuffled_data = data 79 | for batch_num in range(num_batches_per_epoch): 80 | start_idx = batch_num * batch_size 81 | end_idx = min((batch_num + 1) * batch_size, data_size) 82 | yield shuffled_data[start_idx : end_idx] 83 | 84 | def clean_URL(url_str): 85 | ''' 86 | 清理URL 87 | :param url_str: 处理之前的URL 88 | :return: 处理之后的URL 89 | ''' 90 | #先将字节数组转换为字符 91 | url_str=str(url_str) 92 | 93 | #去掉常见的字符 94 | url_str=re.sub(r"http://","",url_str) 95 | url_str=re.sub(r"https://","",url_str) 96 | url_str=re.sub(r"www\.","",url_str) 97 | url_str=re.sub(r"/$"," ",url_str) 98 | 99 | return url_str 100 | 101 | def load_positive_negative_url_files(positive_url_file,negative_url_file): 102 | ''' 103 | 加载恶意的还有正常的URL并进行清理 混合处理 104 | :param positive_url_file: 105 | :param negative_url_file: 106 | :return: 107 | ''' 108 | 109 | #从CSV文件中读取数据 110 | positive_url_data = pd.read_csv(positive_url_file,usecols=[0]).values 111 | negative_url_data = pd.read_csv(negative_url_file,usecols=[0]).values 112 | 113 | # print(positive_url_data) 114 | # print(negative_url_data) 115 | 116 | #对数据进行简单的清洗 117 | positive_clean_url_data=[clean_URL(url_data[0]) for url_data in positive_url_data] 118 | negative_clean_url_data=[clean_URL(url_data[0]) for url_data in negative_url_data] 119 | 120 | 121 | # print(positive_clean_url_data) 122 | # print(negative_clean_url_data) 123 | 124 | # 将数据进行结合 125 | x_text = positive_clean_url_data + negative_clean_url_data 126 | 127 | # 产生标签 128 | positive_labels = [[0, 1] for _ in positive_clean_url_data] 129 | negative_labels = [[1, 0] for _ in negative_clean_url_data] 130 | y = np.concatenate([positive_labels, negative_labels], 0) 131 | 132 | return [x_text,y] 133 | 134 | 135 | def padding_url(urls,padding_token='0', padding_url_length = None): 136 | ''' 137 | 对URL集合进行对齐处理 138 | :param url:要处理的url集合 139 | :return: 140 | ''' 141 | 142 | #最大长度 143 | max_url_length=padding_url_length if padding_url_length else max([len(url) for url in urls]) 144 | 145 | print("\n********padding url {} *******\n".format(max_url_length)) 146 | 147 | #对齐处理 148 | for i in range(len(urls)): 149 | url=urls[i] 150 | 151 | if len(url)>max_url_length: 152 | urls[i]=url[:max_url_length]#截断 153 | else: 154 | urls[i]=url+padding_token*(max_url_length-len(url))#填充 155 | 156 | return (urls,max_url_length) 157 | 158 | 159 | 160 | def map_code_char(urls,max_sequece_length): 161 | ''' 162 | 对URL字符进行字符级别的映射编码处理 163 | :param urls: url集合 164 | :param max_sequece_length: 最大的字符集合 165 | :return: 每个URL为一个一维向量 166 | ''' 167 | # 字符对应表 168 | characters = string.printable # 返回字符串,所有可打印的 ASCII 字符 169 | token_index = dict(zip(range(1, len(characters) + 1), characters)) # 给字符添加索引 170 | num_characters = max(token_index.keys()) 171 | 172 | print("\n-------------------编码对应表--------------------\n") 173 | print(token_index) 174 | 175 | # 二维矩阵 176 | embed_matrix = np.zeros((len(urls), max_sequece_length)) 177 | 178 | # 对每个url进行处理 179 | for url in urls: 180 | for i, sample in enumerate(url): 181 | for j, character in enumerate(sample): 182 | index = token_index.get(character) 183 | embed_matrix[i, j] = index 184 | return embed_matrix 185 | 186 | def one_hot_char(urls,max_sequence_length): 187 | ''' 188 | 对URL进行字符级别的one-hot处理 189 | 190 | :param urls: url集合 191 | :param embedding_size: 后面嵌入层大小 192 | :return: 每一个URL输出为一个二维矩阵 193 | ''' 194 | 195 | #字符对应表 196 | characters=string.printable#返回字符串,所有可打印的 ASCII 字符 197 | token_index=dict(zip(range(1,len(characters)+1),characters))#给字符添加索引 198 | num_characters=max(token_index.keys()) 199 | 200 | print("\n-------------------编码对应表--------------------\n") 201 | print(token_index) 202 | 203 | embed_matrix = np.zeros((len(urls), max_sequence_length, num_characters)) 204 | 205 | #对每个url进行处理 206 | for url in urls: 207 | for i,sample in enumerate(url): 208 | for j,character in enumerate(sample): 209 | index=token_index.get(character) 210 | embed_matrix[i,j,index]=1. 211 | return (embed_matrix,num_characters) 212 | 213 | def clean_split_url_w2vec(url_str): 214 | # 先将字节数组转换为字符 215 | url_str = str(url_str) 216 | 217 | # 去掉常见的字符 218 | url_str = re.sub(r"http://", "", url_str) 219 | url_str = re.sub(r"https://", "", url_str) 220 | url_str = re.sub(r".html$", "", url_str) 221 | url_str = re.sub(r".htm$", "", url_str) 222 | 223 | #分隔字符 224 | url_str=re.split(r"[/=-?.&]",url_str) 225 | 226 | return url_str 227 | 228 | 229 | 230 | def load_positive_negative_url_files_w2vec(positive_url_file,negative_url_file): 231 | ''' 232 | 加载恶意的还有正常的URL word2vec方案 (对单词进行处理) 233 | :param positive_url_file: 234 | :param negative_url_file: 235 | :return: 236 | ''' 237 | 238 | #从CSV文件中读取数据 239 | positive_url_data = pd.read_csv(positive_url_file,usecols=[0]).values 240 | negative_url_data = pd.read_csv(negative_url_file,usecols=[0]).values 241 | 242 | print(positive_url_data) 243 | print(negative_url_data) 244 | 245 | #对数据进行简单的清洗 246 | positive_clean_split_url_data=[clean_split_url_w2vec(url_data[0]) for url_data in positive_url_data] 247 | negative_clean_split_url_data=[clean_split_url_w2vec(url_data[0]) for url_data in negative_url_data] 248 | 249 | 250 | print(positive_clean_split_url_data) 251 | print(negative_clean_split_url_data) 252 | 253 | # 将数据进行结合 254 | x_text = positive_clean_split_url_data + negative_clean_split_url_data 255 | 256 | # 产生标签 257 | positive_labels = [[0, 1] for _ in positive_clean_split_url_data] 258 | negative_labels = [[1, 0] for _ in negative_clean_split_url_data] 259 | y = np.concatenate([positive_labels, negative_labels], 0) 260 | 261 | return [x_text,y] 262 | 263 | def padding_url_w2vec(urls,padding_token="", padding_url_length = None): 264 | ''' 265 | 对URL集合进行对齐处理 266 | :param url:要处理的url集合 267 | :return: 268 | ''' 269 | 270 | #最大长度 271 | max_url_length=padding_url_length if padding_url_length else max([len(url) for url in urls]) 272 | 273 | print("\n********padding url {} *******\n".format(max_url_length)) 274 | 275 | #对齐处理 276 | padding_urls=[] 277 | for url in urls: 278 | if len(url)>max_url_length: 279 | url=url[:max_url_length] 280 | padding_urls.append(copy.deepcopy(url)) 281 | else: 282 | url.extend([padding_token for _ in range(max_url_length-len(url))]) 283 | padding_urls.append(url) 284 | 285 | 286 | return (padding_urls,max_url_length) 287 | 288 | 289 | if __name__ == '__main__': 290 | 291 | # print(load_csv_file("./data/NegativeFile6.csv")) 292 | # print(load_csv_file("./data/PositiveFile6.csv")) 293 | #print(load_positive_negative_url_files_w2vec("./data/PositiveFile6.csv","./data/NegativeFile6.csv")) 294 | 295 | #load_url_csv_file("./data/normalURL/Arts.csv") 296 | load_positive_negative_url_files_w2vec("./data/positive_urls.csv","./data/negative_urls.csv") 297 | print("he") -------------------------------------------------------------------------------- /page_identify/simpleNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:simpleNN 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: 简单的神经网络 17 | # creatData:2019/5/9 18 | 19 | import tensorflow as tf 20 | 21 | class simpleNN(): 22 | ''' 23 | 简单的神经网络 24 | 25 | 输入层 26 | 隐藏层 (1层) 27 | 输出层 28 | ''' 29 | 30 | # 辅助函数 获取变量 使用 get_Variable来获取 31 | def get_weight_variable(self,shape, regularizer=None): 32 | # 获取一个变量 名字为weights 形状由参数指定 33 | weights = tf.get_variable( 34 | "weight", shape=shape, initializer=tf.truncated_normal_initializer(stddev=0.1)) 35 | 36 | # 当给出正则化损失函数时候 37 | # 将当前变量的正则化损失加入到losses集合 38 | 39 | if regularizer != None: 40 | tf.add_to_collection('losses', regularizer(weights)) 41 | 42 | return weights 43 | 44 | #构造函数 45 | def __init__(self,input_node,embedding_node,output_node,l2_reg_lambda): 46 | 47 | # 输入数据与验证的placeholder 48 | self.input_x = tf.placeholder(tf.float32, [None,input_node], name="x-input") 49 | self.input_y = tf.placeholder(tf.float32, [None,output_node], name="y-input") 50 | 51 | # 正则化函数 52 | regularizer = tf.contrib.layers.l2_regularizer(l2_reg_lambda) 53 | 54 | # 输入层到隐藏层 input->embed 55 | with tf.variable_scope('layer1'): 56 | weights = self.get_weight_variable([input_node,embedding_node], regularizer) # 权重 57 | biases = tf.get_variable("biases", [embedding_node], initializer=tf.constant_initializer(0.0)) # 偏置项 58 | 59 | # 向前传播 60 | self.layer1 = tf.nn.relu(tf.matmul(self.input_x, weights) + biases) 61 | 62 | # 隐藏层到输出层 embed->output 63 | with tf.variable_scope('layer2'): 64 | weights = self.get_weight_variable([embedding_node, output_node], regularizer) 65 | biases = tf.get_variable("biases", [output_node], initializer=tf.constant_initializer(0.0)) 66 | self.y = tf.matmul(self.layer1, weights) + biases 67 | self.predictions = tf.argmax(self.y, 1, name="predictions") 68 | 69 | # 损失函数 使用交叉熵 70 | with tf.name_scope("loss"): 71 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.y, labels=tf.arg_max(self.input_y, 1)) 72 | cross_entropy_mean = tf.reduce_mean(cross_entropy) 73 | 74 | # 总损失函数 交叉熵+L2正则化损失函数 75 | self.loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) 76 | 77 | #正确率 78 | with tf.name_scope("accuracy"): 79 | correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) 80 | self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") -------------------------------------------------------------------------------- /page_identify/train_LSTMCNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:train_LSTMCNN 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: 对LSTM_CNN模型进行训练 17 | # creatData:2019/5/14 18 | 19 | import os 20 | import datetime 21 | import time 22 | import tensorflow as tf 23 | import numpy as np 24 | 25 | from page_identify.LSTM_CNN import LSTM_CNN 26 | from page_identify.CNN_LSTM import CNN_LSTM 27 | from page_identify.TextCNN import TextCNN 28 | from page_identify import data_Processer 29 | 30 | # 参数设置 31 | #================================================================ 32 | 33 | tf.flags.DEFINE_string("positive_url_file","./data/positive_urls.csv","正常URL数据集") 34 | tf.flags.DEFINE_string("negative_url_file","./data/negative_urls.csv","恶意URL数据集") 35 | 36 | #模型超参数 37 | tf.flags.DEFINE_integer("embedding_size",100,"隐藏层的维度") 38 | tf.flags.DEFINE_integer("max_seq_length",100,"输入序列的最大长度") 39 | tf.flags.DEFINE_string("filter_sizes","3,4,5","卷积核(滤波器)的尺寸") 40 | tf.flags.DEFINE_integer("num_filters",32,"卷积核的数目") 41 | tf.flags.DEFINE_float("dropout_keep_prob",0.5,"DropOut层选择概率") 42 | tf.flags.DEFINE_float("l2_reg_lambda",0.0,"l2正则化比例") 43 | #tf.flags.DEFINE_boolean("use_glove",True,"是否使用GloVe模型") 44 | 45 | #训练参数 46 | tf.flags.DEFINE_integer("batch_size",500,"batch 大小") 47 | tf.flags.DEFINE_integer("num_steps", 200, "训练的次数") 48 | tf.flags.DEFINE_integer("evaluate_every", 100, "评价的间隔步数") 49 | tf.flags.DEFINE_integer("checkpoint_every", 100, "保存模型的间隔步数") 50 | tf.flags.DEFINE_integer("num_checkpoints", 5, "保存的checkpoints数") 51 | tf.flags.DEFINE_float("validation_percentage", 0.2, "验证数据集比例") 52 | 53 | # session配置的一些参数 54 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "允许tf自动分配设备") 55 | tf.flags.DEFINE_boolean("log_device_placement", False, "日志记录") 56 | 57 | # 解析参数 58 | #================================================================= 59 | # 解析参数 60 | FLAGS = tf.flags.FLAGS 61 | FLAGS.flag_values_dict() 62 | print("\n*SETED FLAGS AS FOLLOW*\nFLAG_NAME\tFLAG_VALUE\n") 63 | for attr, value in sorted(FLAGS.__flags.items()): 64 | print("{}\t{}".format(attr.upper(), value)) 65 | print("==========================================================================") 66 | 67 | # 输出数据和模型的目录 68 | # ======================================================= 69 | timestamp = str(int(time.time())) 70 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "output/LSTM_CNN/runs",timestamp)) 71 | print("\nWriting to {}\n".format(out_dir)) 72 | if not os.path.exists(out_dir): 73 | os.makedirs(out_dir) 74 | 75 | 76 | # 加载数据 77 | # ======================================================= 78 | 79 | print("\nLoading data...") 80 | x_text, y = data_Processer.load_positive_negative_url_files(FLAGS.positive_url_file,FLAGS.negative_url_file) 81 | print(x_text) 82 | print("total:",len(x_text)) 83 | print("\nloaded!") 84 | 85 | # 数据规范化编码 86 | #========================================================= 87 | 88 | # 对齐 89 | print("\n Loading embedding Layer tensor(padding)....") 90 | x_padding, max_x_length = data_Processer.padding_url(x_text,padding_url_length=FLAGS.max_seq_length) 91 | 92 | # 映射编码 每个字符对应一个数字 93 | print("padding done!") 94 | # x= data_Processer.map_code_char(x_padding,max_x_length) 95 | x,vocab_size=data_Processer.one_hot_char(x_padding,max_x_length) 96 | print("x.shape = {}".format(x.shape)) 97 | print("y.shape = {}".format(y.shape)) 98 | 99 | # 数据处理,最终得到训练数据集 100 | #===================================================== 101 | 102 | # 随机打乱数据 103 | np.random.seed(10) 104 | shuffle_indices = np.random.permutation(np.arange(len(y))) 105 | x_shuffled = x[shuffle_indices] 106 | y_shuffled = y[shuffle_indices] 107 | 108 | # 分隔验证和训练数据集 109 | # TODO: This is very crude, should use cross-validation 110 | dev_sample_index = -1 * int(FLAGS.validation_percentage * float(len(y))) 111 | x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] 112 | y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] 113 | print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) 114 | 115 | # 训练 116 | # ======================================================= 117 | with tf.Graph().as_default(): 118 | #配置会话 119 | session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) 120 | sess = tf.Session(config=session_conf) 121 | 122 | with sess.as_default(): 123 | lstm_cnn = TextCNN(x_train.shape[1], #输入序列长度 124 | y_train.shape[1], #分类数目 125 | FLAGS.embedding_size,#隐藏层大小 126 | list(map(int, FLAGS.filter_sizes.split(","))),#卷积核尺寸 127 | FLAGS.num_filters,#卷积核数据 128 | FLAGS.l2_reg_lambda)#l2正则化参数 129 | # 定义训练过程 130 | global_step = tf.Variable(0, name="global_step", trainable=False) # 训练次数 131 | optimizer= tf.train.AdamOptimizer(1e-3) # 优化算法 132 | grads_and_vars = optimizer.compute_gradients(lstm_cnn.loss) # 计算相关的梯度 133 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 运用梯度(gradients) 134 | 135 | # 追踪梯度值和稀疏值 136 | grad_summaries = [] 137 | for g, v in grads_and_vars: 138 | if g is not None: 139 | grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) 140 | sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) 141 | grad_summaries.append(grad_hist_summary) 142 | grad_summaries.append(sparsity_summary) 143 | grad_summaries_merged = tf.summary.merge(grad_summaries) 144 | 145 | # 输出的路径 146 | print("Writing to {}\n".format(out_dir)) 147 | 148 | # 正确率与损失率 149 | loss_summary = tf.summary.scalar("loss", lstm_cnn.loss) 150 | acc_summary = tf.summary.scalar("accuracy", lstm_cnn.accuracy) 151 | 152 | # 训练总结 153 | train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) 154 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 155 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 156 | 157 | # 验证总结 158 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 159 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 160 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 161 | 162 | # 存储检查点 163 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 164 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 165 | if not os.path.exists(checkpoint_dir): 166 | os.makedirs(checkpoint_dir) 167 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) 168 | 169 | # 初始化所有变量 170 | sess.run(tf.global_variables_initializer()) 171 | 172 | 173 | # 训练的一个步骤 174 | def train_step(x_batch, y_batch): 175 | """ 176 | A single training step 177 | """ 178 | feed_dict = { 179 | lstm_cnn.input_x: x_batch, 180 | lstm_cnn.input_y: y_batch, 181 | lstm_cnn.dropout_keep_prob: FLAGS.dropout_keep_prob 182 | } 183 | _, step, summaries, loss, accuracy = sess.run( 184 | [train_op, global_step, train_summary_op, lstm_cnn.loss, lstm_cnn.accuracy], 185 | feed_dict) 186 | time_str = datetime.datetime.now().isoformat() 187 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 188 | train_summary_writer.add_summary(summaries, step) 189 | 190 | 191 | # 验证的一个步骤 192 | def dev_step(x_batch, y_batch, writer=None): 193 | """ 194 | Evaluates model on a dev set 195 | """ 196 | feed_dict = { 197 | lstm_cnn.input_x: x_batch, 198 | lstm_cnn.input_y: y_batch, 199 | lstm_cnn.dropout_keep_prob: 1.0 200 | } 201 | step, summaries, loss, accuracy = sess.run( 202 | [global_step, dev_summary_op, lstm_cnn.loss, lstm_cnn.accuracy], 203 | feed_dict) 204 | time_str = datetime.datetime.now().isoformat() 205 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 206 | if writer: 207 | writer.add_summary(summaries, step) 208 | 209 | 210 | # 产生batch 211 | batches = data_Processer.batch_iter( 212 | list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_steps) 213 | 214 | # 循环训练 215 | for batch in batches: 216 | x_batch, y_batch = zip(*batch) 217 | train_step(x_batch, y_batch) 218 | current_step = tf.train.global_step(sess, global_step) 219 | if current_step % FLAGS.evaluate_every == 0: 220 | print("\nEvaluation:") 221 | dev_step(x_dev, y_dev, writer=dev_summary_writer) 222 | print("") 223 | if current_step % FLAGS.checkpoint_every == 0: 224 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 225 | print("Saved model checkpoint to {}\n".format(path)) -------------------------------------------------------------------------------- /page_identify/train_simpleNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:train_simpleNN 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: 对simpleNN模型进行训练 17 | # creatData:2019/5/9 18 | 19 | import os 20 | import datetime 21 | import time 22 | import tensorflow as tf 23 | import numpy as np 24 | 25 | from page_identify import simpleNN 26 | from page_identify import data_Processer 27 | 28 | 29 | # 参数设置 30 | #============================================================ 31 | 32 | # 输入数据的一些参数 33 | tf.flags.DEFINE_float("validation_percentage",0.1,"所有的训练数据用来验证的比例") 34 | tf.flags.DEFINE_string("positive_data_file", "./data/PositiveFile6.csv", "正样本数据") 35 | tf.flags.DEFINE_string("negative_data_file", "./data/NegativeFile6.csv", "负样本数据") 36 | 37 | #模型中的参数 38 | tf.flags.DEFINE_integer("input_node", 6, "输入的维度") #8个特征 39 | tf.flags.DEFINE_integer("embedding_node", 200, "隐藏层的维度") 40 | tf.flags.DEFINE_integer("output_node", 2, "输出层的维度") #2分类 41 | tf.flags.DEFINE_float("l2_reg_lambda", 0.0001, "L2 正则化比例") 42 | 43 | # 训练的一些参数 44 | tf.flags.DEFINE_float("learning_rate_base", 0.2, "基础学习率") 45 | tf.flags.DEFINE_float("learning_rate_decay", 0.999, "学习率衰减率") 46 | tf.flags.DEFINE_float("moving_average_decay", 0.99, "滑动平均衰减率") 47 | tf.flags.DEFINE_integer("batch_size", 500, "Batch大小") 48 | tf.flags.DEFINE_integer("num_examples", 4800, "输入数据的数目") 49 | tf.flags.DEFINE_integer("num_steps", 200, "训练的次数") 50 | tf.flags.DEFINE_integer("evaluate_every", 100, "评价的间隔步数") 51 | tf.flags.DEFINE_integer("checkpoint_every", 100, "保存模型的间隔步数") 52 | tf.flags.DEFINE_integer("num_checkpoints", 5, "保存的checkpoints数") 53 | 54 | # session配置的一些参数 55 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "允许tf自动分配设备") 56 | tf.flags.DEFINE_boolean("log_device_placement", False, "日志记录") 57 | 58 | # 解析参数 59 | FLAGS = tf.flags.FLAGS 60 | FLAGS.flag_values_dict() 61 | print("\n*SETED FLAGS AS FOLLOW*\nFLAG_NAME\tFLAG_VALUE\n===========================================================") 62 | for attr, value in sorted(FLAGS.__flags.items()): 63 | print("{}\t{}".format(attr.upper(), value)) 64 | print("==========================================================================") 65 | 66 | 67 | 68 | # 设置输出的目录 69 | #===================================================== 70 | timestamp = str(int(time.time())) 71 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "output",timestamp)) 72 | print("\n\nWriting to {}\n".format(out_dir)) 73 | if not os.path.exists(out_dir): 74 | os.makedirs(out_dir) 75 | 76 | 77 | # 加载数据 78 | #===================================================== 79 | print("\nLoading data...") 80 | x, y_ = data_Processer.load_positive_negtive_data_files(FLAGS.positive_data_file,FLAGS.negative_data_file) 81 | print(x,y_) 82 | print("\nloaded!") 83 | 84 | # 随机打乱数据 85 | #====================================================== 86 | np.random.seed(10) 87 | shuffle_indices = np.random.permutation(np.arange(len(y_))) 88 | x_shuffled = x[shuffle_indices] 89 | y_shuffled = y_[shuffle_indices] 90 | 91 | # 分隔验证和训练数据集 92 | # TODO: This is very crude, should use cross-validation 93 | dev_sample_index = -1 * int(FLAGS.validation_percentage * float(len(y_))) 94 | x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] 95 | y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] 96 | print("\nTrain/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) 97 | 98 | 99 | 100 | # 开始训练 101 | #========================================================= 102 | with tf.Graph().as_default(): 103 | 104 | #参数配置 105 | session_conf = tf.ConfigProto( 106 | allow_soft_placement = FLAGS.allow_soft_placement,#如果指定的设备不存在 是否允许tf自动分配 107 | log_device_placement = FLAGS.log_device_placement) #是否打印设备分配日志 108 | 109 | #创建会话 110 | sess = tf.Session(config = session_conf) 111 | with sess.as_default(): 112 | 113 | #建立神经网络 114 | simple_nn=simpleNN.simpleNN(FLAGS.input_node,FLAGS.embedding_node,FLAGS.output_node,FLAGS.l2_reg_lambda) 115 | 116 | #定义训练过程 117 | global_step = tf.Variable(0, name="global_step", trainable=False)#训练次数 118 | # 滑动平均 119 | variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step) # 初始滑动平均类 120 | variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 所有变量使用滑动平均类 121 | 122 | # 指数衰减的学习率 123 | learning_rate = tf.train.exponential_decay(FLAGS.learning_rate_base,global_step,FLAGS.num_examples /FLAGS.batch_size,FLAGS.learning_rate_decay) 124 | # 使用优化算法来优化损失函数 125 | train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(simple_nn.loss, global_step=global_step) 126 | 127 | #使用滑动平均来应用于训练步骤 128 | with tf.control_dependencies([train_step, variable_averages_op]): 129 | train_op = tf.no_op(name='train') 130 | 131 | #正确率与损失率 132 | loss_summary = tf.summary.scalar("loss", simple_nn.loss) 133 | acc_summary = tf.summary.scalar("accuracy", simple_nn.accuracy) 134 | 135 | #训练总结 136 | train_summary_op = tf.summary.merge([loss_summary, acc_summary]) 137 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 138 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 139 | 140 | #验证总结 141 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 142 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 143 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 144 | 145 | #存储检查点 146 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 147 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 148 | if not os.path.exists(checkpoint_dir): 149 | os.makedirs(checkpoint_dir) 150 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) 151 | 152 | #初始化所有变量 153 | sess.run(tf.global_variables_initializer()) 154 | 155 | #训练的一个步骤 156 | def train_step(x_batch, y_batch): 157 | feed_dict = { 158 | simple_nn.input_x: x_batch, 159 | simple_nn.input_y: y_batch, 160 | } 161 | _, step, summaries, loss, accuracy = sess.run( 162 | [train_op, global_step, train_summary_op, simple_nn.loss, simple_nn.accuracy], 163 | feed_dict) 164 | time_str = datetime.datetime.now().isoformat() 165 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 166 | train_summary_writer.add_summary(summaries, step) 167 | 168 | #验证的一个步骤 169 | def dev_step(x_batch, y_batch, writer=None): 170 | """ 171 | Evaluates model on a dev set 172 | """ 173 | feed_dict = { 174 | simple_nn.input_x: x_batch, 175 | simple_nn.input_y: y_batch, 176 | } 177 | step, summaries, loss, accuracy = sess.run( 178 | [global_step, dev_summary_op, simple_nn.loss, simple_nn.accuracy], 179 | feed_dict) 180 | time_str = datetime.datetime.now().isoformat() 181 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 182 | if writer: 183 | writer.add_summary(summaries, step) 184 | 185 | #产生batch 186 | batches = data_Processer.batch_iter( 187 | list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_steps) 188 | 189 | #循环训练 190 | for batch in batches: 191 | x_batch, y_batch = zip(*batch) 192 | train_step(x_batch, y_batch) 193 | current_step = tf.train.global_step(sess, global_step) 194 | if current_step % FLAGS.evaluate_every == 0: 195 | print("\nEvaluation:") 196 | dev_step(x_dev, y_dev, writer=dev_summary_writer) 197 | print("") 198 | if current_step % FLAGS.checkpoint_every == 0: 199 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 200 | print("Saved model checkpoint to {}\n".format(path)) 201 | -------------------------------------------------------------------------------- /page_identify/train_textCNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:train_LSTMCNN 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: 对TextCNN模型进行训练 直接对URL进行训练 17 | # creatData:2019/5/14 18 | 19 | import os 20 | import datetime 21 | import time 22 | import tensorflow as tf 23 | import numpy as np 24 | 25 | from page_identify.TextCNN import TextCNN 26 | from page_identify import data_Processer 27 | 28 | # 参数设置 29 | #================================================================ 30 | 31 | tf.flags.DEFINE_string("positive_url_file","./data/positive_urls.csv","正常URL数据集") 32 | tf.flags.DEFINE_string("negative_url_file","./data/negative_urls.csv","恶意URL数据集") 33 | 34 | #模型超参数 35 | tf.flags.DEFINE_integer("embedding_size",100,"隐藏层的维度") 36 | tf.flags.DEFINE_integer("max_seq_length",100,"输入序列的最大长度") 37 | tf.flags.DEFINE_string("filter_sizes","3,4,5","卷积核(滤波器)的尺寸") 38 | tf.flags.DEFINE_integer("num_filters",32,"卷积核的数目") 39 | tf.flags.DEFINE_float("dropout_keep_prob",0.5,"DropOut层选择概率") 40 | tf.flags.DEFINE_float("l2_reg_lambda",0.0,"l2正则化比例") 41 | #tf.flags.DEFINE_boolean("use_glove",True,"是否使用GloVe模型") 42 | 43 | #训练参数 44 | tf.flags.DEFINE_integer("batch_size",500,"batch 大小") 45 | tf.flags.DEFINE_integer("num_steps", 200, "训练的次数") 46 | tf.flags.DEFINE_integer("evaluate_every", 100, "评价的间隔步数") 47 | tf.flags.DEFINE_integer("checkpoint_every", 100, "保存模型的间隔步数") 48 | tf.flags.DEFINE_integer("num_checkpoints", 5, "保存的checkpoints数") 49 | tf.flags.DEFINE_float("validation_percentage", 0.2, "验证数据集比例") 50 | 51 | # session配置的一些参数 52 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "允许tf自动分配设备") 53 | tf.flags.DEFINE_boolean("log_device_placement", False, "日志记录") 54 | 55 | # 解析参数 56 | #================================================================= 57 | # 解析参数 58 | FLAGS = tf.flags.FLAGS 59 | FLAGS.flag_values_dict() 60 | print("\n*SETED FLAGS AS FOLLOW*\nFLAG_NAME\tFLAG_VALUE\n") 61 | for attr, value in sorted(FLAGS.__flags.items()): 62 | print("{}\t{}".format(attr.upper(), value)) 63 | print("==========================================================================") 64 | 65 | # 输出数据和模型的目录 66 | # ======================================================= 67 | timestamp = str(int(time.time())) 68 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "output/textCNN/runs",timestamp)) 69 | print("\nWriting to {}\n".format(out_dir)) 70 | if not os.path.exists(out_dir): 71 | os.makedirs(out_dir) 72 | 73 | 74 | # 加载数据 75 | # ======================================================= 76 | 77 | print("\nLoading data...") 78 | x_text, y = data_Processer.load_positive_negative_url_files(FLAGS.positive_url_file,FLAGS.negative_url_file) 79 | print(x_text) 80 | print("total:",len(x_text)) 81 | print("\nloaded!") 82 | 83 | # 数据规范化编码 84 | #========================================================= 85 | 86 | # 对齐 87 | print("\n Loading embedding Layer tensor(padding)....") 88 | x_padding, max_x_length = data_Processer.padding_url(x_text,padding_url_length=FLAGS.max_seq_length) 89 | 90 | # 映射编码 每个字符对应一个数字 91 | print("padding done!") 92 | # x= data_Processer.map_code_char(x_padding,max_x_length) 93 | x,vocab_size=data_Processer.one_hot_char(x_padding,max_x_length) 94 | print("x.shape = {}".format(x.shape)) 95 | print("y.shape = {}".format(y.shape)) 96 | 97 | # 数据处理,最终得到训练数据集 98 | #===================================================== 99 | 100 | # 随机打乱数据 101 | np.random.seed(10) 102 | shuffle_indices = np.random.permutation(np.arange(len(y))) 103 | x_shuffled = x[shuffle_indices] 104 | y_shuffled = y[shuffle_indices] 105 | 106 | # 分隔验证和训练数据集 107 | # TODO: This is very crude, should use cross-validation 108 | dev_sample_index = -1 * int(FLAGS.validation_percentage * float(len(y))) 109 | x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] 110 | y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] 111 | print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) 112 | 113 | # 训练 114 | # ======================================================= 115 | with tf.Graph().as_default(): 116 | #配置会话 117 | session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) 118 | sess = tf.Session(config=session_conf) 119 | 120 | with sess.as_default(): 121 | text_cnn = TextCNN(x_train.shape[1], #输入序列长度 122 | y_train.shape[1], #分类数目 123 | FLAGS.embedding_size,#隐藏层大小 124 | list(map(int, FLAGS.filter_sizes.split(","))),#卷积核尺寸 125 | FLAGS.num_filters,#卷积核数据 126 | FLAGS.l2_reg_lambda)#l2正则化参数 127 | # 定义训练过程 128 | global_step = tf.Variable(0, name="global_step", trainable=False) # 训练次数 129 | optimizer= tf.train.AdamOptimizer(1e-3) # 优化算法 130 | grads_and_vars = optimizer.compute_gradients(text_cnn.loss) # 计算相关的梯度 131 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 运用梯度(gradients) 132 | 133 | # 追踪梯度值和稀疏值 134 | grad_summaries = [] 135 | for g, v in grads_and_vars: 136 | if g is not None: 137 | grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) 138 | sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) 139 | grad_summaries.append(grad_hist_summary) 140 | grad_summaries.append(sparsity_summary) 141 | grad_summaries_merged = tf.summary.merge(grad_summaries) 142 | 143 | # 输出的路径 144 | print("Writing to {}\n".format(out_dir)) 145 | 146 | # 正确率与损失率 147 | loss_summary = tf.summary.scalar("loss", text_cnn.loss) 148 | acc_summary = tf.summary.scalar("accuracy", text_cnn.accuracy) 149 | 150 | # 训练总结 151 | train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) 152 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 153 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 154 | 155 | # 验证总结 156 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 157 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 158 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 159 | 160 | # 存储检查点 161 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 162 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 163 | if not os.path.exists(checkpoint_dir): 164 | os.makedirs(checkpoint_dir) 165 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) 166 | 167 | # 初始化所有变量 168 | sess.run(tf.global_variables_initializer()) 169 | 170 | 171 | # 训练的一个步骤 172 | def train_step(x_batch, y_batch): 173 | """ 174 | A single training step 175 | """ 176 | feed_dict = { 177 | text_cnn.input_x: x_batch, 178 | text_cnn.input_y: y_batch, 179 | text_cnn.dropout_keep_prob: FLAGS.dropout_keep_prob 180 | } 181 | _, step, summaries, loss, accuracy = sess.run( 182 | [train_op, global_step, train_summary_op, text_cnn.loss, text_cnn.accuracy], 183 | feed_dict) 184 | time_str = datetime.datetime.now().isoformat() 185 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 186 | train_summary_writer.add_summary(summaries, step) 187 | 188 | 189 | # 验证的一个步骤 190 | def dev_step(x_batch, y_batch, writer=None): 191 | """ 192 | Evaluates model on a dev set 193 | """ 194 | feed_dict = { 195 | text_cnn.input_x: x_batch, 196 | text_cnn.input_y: y_batch, 197 | text_cnn.dropout_keep_prob: 1.0 198 | } 199 | step, summaries, loss, accuracy = sess.run( 200 | [global_step, dev_summary_op, text_cnn.loss, text_cnn.accuracy], 201 | feed_dict) 202 | time_str = datetime.datetime.now().isoformat() 203 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 204 | if writer: 205 | writer.add_summary(summaries, step) 206 | 207 | 208 | # 产生batch 209 | batches = data_Processer.batch_iter( 210 | list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_steps) 211 | 212 | # 循环训练 213 | for batch in batches: 214 | x_batch, y_batch = zip(*batch) 215 | train_step(x_batch, y_batch) 216 | current_step = tf.train.global_step(sess, global_step) 217 | if current_step % FLAGS.evaluate_every == 0: 218 | print("\nEvaluation:") 219 | dev_step(x_dev, y_dev, writer=dev_summary_writer) 220 | print("") 221 | if current_step % FLAGS.checkpoint_every == 0: 222 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 223 | print("Saved model checkpoint to {}\n".format(path)) -------------------------------------------------------------------------------- /page_identify/train_textCNN_w2vec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:train_textCNN_w2vec 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: {文件作用描述} 17 | # creatData:2019/5/16 18 | 19 | 20 | import os 21 | import datetime 22 | import time 23 | import tensorflow as tf 24 | import numpy as np 25 | 26 | from page_identify.TextCNN import TextCNN 27 | from page_identify import data_Processer, word2vec_tool 28 | 29 | # 参数设置 30 | #================================================================ 31 | 32 | tf.flags.DEFINE_string("positive_url_file","./data/positive_urls.csv","正常URL数据集") 33 | tf.flags.DEFINE_string("negative_url_file","./data/negative_urls.csv","恶意URL数据集") 34 | 35 | #模型超参数 36 | tf.flags.DEFINE_integer("embedding_size",10,"隐藏层的维度") 37 | tf.flags.DEFINE_integer("max_seq_length",10,"输入序列的最大长度") 38 | tf.flags.DEFINE_string("filter_sizes","3,4,5","卷积核(滤波器)的尺寸") 39 | tf.flags.DEFINE_integer("num_filters",32,"卷积核的数目") 40 | tf.flags.DEFINE_float("dropout_keep_prob",0.5,"DropOut层选择概率") 41 | tf.flags.DEFINE_float("l2_reg_lambda",0.0,"l2正则化比例") 42 | #tf.flags.DEFINE_boolean("use_glove",True,"是否使用GloVe模型") 43 | 44 | #训练参数 45 | tf.flags.DEFINE_integer("batch_size",500,"batch 大小") 46 | tf.flags.DEFINE_integer("num_steps", 200, "训练的次数") 47 | tf.flags.DEFINE_integer("evaluate_every", 100, "评价的间隔步数") 48 | tf.flags.DEFINE_integer("checkpoint_every", 100, "保存模型的间隔步数") 49 | tf.flags.DEFINE_integer("num_checkpoints", 5, "保存的checkpoints数") 50 | tf.flags.DEFINE_float("validation_percentage", 0.2, "验证数据集比例") 51 | 52 | # session配置的一些参数 53 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "允许tf自动分配设备") 54 | tf.flags.DEFINE_boolean("log_device_placement", False, "日志记录") 55 | 56 | # 解析参数 57 | #================================================================= 58 | # 解析参数 59 | FLAGS = tf.flags.FLAGS 60 | FLAGS.flag_values_dict() 61 | print("\n*SETED FLAGS AS FOLLOW*\nFLAG_NAME\tFLAG_VALUE\n") 62 | for attr, value in sorted(FLAGS.__flags.items()): 63 | print("{}\t{}".format(attr.upper(), value)) 64 | print("==========================================================================") 65 | 66 | # 输出数据和模型的目录 67 | # ======================================================= 68 | timestamp = str(int(time.time())) 69 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "output/textCNN/runs",timestamp)) 70 | wVec_dir = os.path.abspath(os.path.join(os.path.curdir,"output/wordVec")) 71 | print("\nWriting to {}\n".format(out_dir)) 72 | if not os.path.exists(out_dir): 73 | os.makedirs(out_dir) 74 | if not os.path.exists(wVec_dir): 75 | os.makedirs(wVec_dir) 76 | 77 | # 加载数据 78 | # ======================================================= 79 | 80 | print("\nLoading data...") 81 | x_text, y = data_Processer.load_positive_negative_url_files_w2vec(FLAGS.positive_url_file,FLAGS.negative_url_file) 82 | print(x_text) 83 | print("total:",len(x_text)) 84 | print("\nloaded!") 85 | 86 | # 数据规范化编码 87 | #========================================================= 88 | 89 | # 对齐 90 | print("\n Loading embedding Layer tensor(padding)....") 91 | x_padding, max_x_length = data_Processer.padding_url_w2vec(x_text,padding_url_length=FLAGS.max_seq_length) 92 | 93 | # 映射编码 每个字符对应一个数字 94 | print("padding done!") 95 | 96 | 97 | print(x_padding[0]) 98 | x = np.array(word2vec_tool.embedding_sentences(x_padding, embedding_size = FLAGS.embedding_size, file_to_load = os.path.join(wVec_dir, 'trained_word2vec.model'))) 99 | 100 | #print(x) 101 | print("x.shape = {}".format(x.shape)) 102 | print("y.shape = {}".format(y.shape)) 103 | 104 | 105 | # 数据处理,最终得到训练数据集 106 | #===================================================== 107 | 108 | # 随机打乱数据 109 | np.random.seed(10) 110 | shuffle_indices = np.random.permutation(np.arange(len(y))) 111 | x_shuffled = x[shuffle_indices] 112 | y_shuffled = y[shuffle_indices] 113 | 114 | # 分隔验证和训练数据集 115 | # TODO: This is very crude, should use cross-validation 116 | dev_sample_index = -1 * int(FLAGS.validation_percentage * float(len(y))) 117 | x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] 118 | y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] 119 | print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) 120 | 121 | # 训练 122 | # ======================================================= 123 | with tf.Graph().as_default(): 124 | #配置会话 125 | session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) 126 | sess = tf.Session(config=session_conf) 127 | 128 | with sess.as_default(): 129 | text_cnn = TextCNN(x_train.shape[1], #输入序列长度 130 | y_train.shape[1], #分类数目 131 | FLAGS.embedding_size,#隐藏层大小 132 | list(map(int, FLAGS.filter_sizes.split(","))),#卷积核尺寸 133 | FLAGS.num_filters,#卷积核数据 134 | FLAGS.l2_reg_lambda)#l2正则化参数 135 | # 定义训练过程 136 | global_step = tf.Variable(0, name="global_step", trainable=False) # 训练次数 137 | optimizer= tf.train.AdamOptimizer(1e-3) # 优化算法 138 | grads_and_vars = optimizer.compute_gradients(text_cnn.loss) # 计算相关的梯度 139 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 运用梯度(gradients) 140 | 141 | # 追踪梯度值和稀疏值 142 | grad_summaries = [] 143 | for g, v in grads_and_vars: 144 | if g is not None: 145 | grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) 146 | sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) 147 | grad_summaries.append(grad_hist_summary) 148 | grad_summaries.append(sparsity_summary) 149 | grad_summaries_merged = tf.summary.merge(grad_summaries) 150 | 151 | # 输出的路径 152 | print("Writing to {}\n".format(out_dir)) 153 | 154 | # 正确率与损失率 155 | loss_summary = tf.summary.scalar("loss", text_cnn.loss) 156 | acc_summary = tf.summary.scalar("accuracy", text_cnn.accuracy) 157 | 158 | # 训练总结 159 | train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) 160 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 161 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 162 | 163 | # 验证总结 164 | dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) 165 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev") 166 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) 167 | 168 | # 存储检查点 169 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 170 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 171 | if not os.path.exists(checkpoint_dir): 172 | os.makedirs(checkpoint_dir) 173 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) 174 | 175 | # 初始化所有变量 176 | sess.run(tf.global_variables_initializer()) 177 | 178 | 179 | # 训练的一个步骤 180 | def train_step(x_batch, y_batch): 181 | """ 182 | A single training step 183 | """ 184 | feed_dict = { 185 | text_cnn.input_x: x_batch, 186 | text_cnn.input_y: y_batch, 187 | text_cnn.dropout_keep_prob: FLAGS.dropout_keep_prob 188 | } 189 | _, step, summaries, loss, accuracy = sess.run( 190 | [train_op, global_step, train_summary_op, text_cnn.loss, text_cnn.accuracy], 191 | feed_dict) 192 | time_str = datetime.datetime.now().isoformat() 193 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 194 | train_summary_writer.add_summary(summaries, step) 195 | 196 | 197 | # 验证的一个步骤 198 | def dev_step(x_batch, y_batch, writer=None): 199 | """ 200 | Evaluates model on a dev set 201 | """ 202 | feed_dict = { 203 | text_cnn.input_x: x_batch, 204 | text_cnn.input_y: y_batch, 205 | text_cnn.dropout_keep_prob: 1.0 206 | } 207 | step, summaries, loss, accuracy = sess.run( 208 | [global_step, dev_summary_op, text_cnn.loss, text_cnn.accuracy], 209 | feed_dict) 210 | time_str = datetime.datetime.now().isoformat() 211 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 212 | if writer: 213 | writer.add_summary(summaries, step) 214 | 215 | 216 | # 产生batch 217 | batches = data_Processer.batch_iter( 218 | list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_steps) 219 | 220 | # 循环训练 221 | for batch in batches: 222 | x_batch, y_batch = zip(*batch) 223 | train_step(x_batch, y_batch) 224 | current_step = tf.train.global_step(sess, global_step) 225 | if current_step % FLAGS.evaluate_every == 0: 226 | print("\nEvaluation:") 227 | dev_step(x_dev, y_dev, writer=dev_summary_writer) 228 | print("") 229 | if current_step % FLAGS.checkpoint_every == 0: 230 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 231 | print("Saved model checkpoint to {}\n".format(path)) -------------------------------------------------------------------------------- /page_identify/word2vec_tool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | # ____ ____ ______________ 4 | # | | | | | | 5 | # | | | | |_____ _____| 6 | # | |__| | | | 7 | # | __ | | | 8 | # | | | | | | 9 | # | | | | | | 10 | # |____| |____| |____| 11 | # 12 | # fileName:word2vec_tool 13 | # project: Fish_learning 14 | # author: theo_hui 15 | # e-mail:Theo_hui@163.com 16 | # purpose: {文件作用描述} 17 | # creatData:2019/5/16 18 | import multiprocessing 19 | import numpy as np 20 | 21 | from gensim.models import Word2Vec 22 | 23 | 24 | def embedding_sentences(sentences, embedding_size=128, window=5, min_count=5, file_to_load=None, file_to_save=None): 25 | if file_to_load is not None: 26 | w2vModel = Word2Vec.load(file_to_load) 27 | else: 28 | print("w2vec") 29 | w2vModel = Word2Vec(sentences, size=embedding_size, window=window, min_count=min_count, 30 | workers=multiprocessing.cpu_count()) 31 | 32 | 33 | if file_to_save is not None: 34 | w2vModel.save(file_to_save) 35 | all_vectors = [] 36 | embeddingDim = w2vModel.vector_size 37 | embeddingUnknown = [0 for i in range(embeddingDim)] 38 | 39 | for sentence in sentences: 40 | print("processing "+str(sentence)) 41 | 42 | this_vector = [] 43 | for word in sentence: 44 | if word in w2vModel.wv.vocab: 45 | this_vector.append(w2vModel[word]) 46 | else: 47 | this_vector.append(embeddingUnknown) 48 | 49 | if np.array(this_vector).shape != (10,10): 50 | print(np.array(this_vector).shape) 51 | all_vectors.append(this_vector) 52 | 53 | return all_vectors --------------------------------------------------------------------------------