]*class\s*=\s*["']?(?:posted|plugin-\w+)['"]?[^>]*>/ 110 | ) 111 | list.forEach(function(block) { 112 | if (!block) { 113 | return 114 | } 115 | block = block.trim() 116 | if (has_only_tags(block)) { 117 | return 118 | } 119 | if (body.length > 0) { 120 | continuous /= self.continuous_factor 121 | } 122 | 123 | // リンク除外&リンクリスト判定 124 | var notlinked = eliminate_link(block) 125 | if (notlinked.length < self.min_length) { 126 | return 127 | } 128 | 129 | // スコア算出 130 | var c = 131 | (notlinked.length + 132 | str_scan(notlinked, self.punctuations).length * 133 | self.punctuation_weight) * 134 | factor 135 | factor *= self.decay_factor 136 | var not_body_rate = 137 | str_scan(block, self.waste_expressions).length + 138 | str_scan(block, /amazon[a-z0-9\.\/\-\?&]+-22/i).length / 2.0 139 | if (not_body_rate > 0) { 140 | c *= Math.pow(0.72, not_body_rate) 141 | } 142 | var c1 = c * continuous 143 | 144 | if (self.debug) { 145 | console.log(c, '*', continuous, '=', c1, notlinked.length) 146 | } 147 | 148 | // ブロック抽出&スコア加算 149 | if (c1 > self.threshold) { 150 | body += block.trim() + '\n' 151 | score += c1 152 | continuous = self.continuous_factor 153 | } else if (c > self.threshold) { 154 | // continuous block end 155 | bodylist.push([body, score]) 156 | body = block.trim() + '\n' 157 | score = c 158 | continuous = self.continuous_factor 159 | } 160 | }) 161 | bodylist.push([body, score]) 162 | body = bodylist.reduce( 163 | function(a, b) { 164 | if (a[1] >= b[1]) { 165 | return a 166 | } else { 167 | return b 168 | } 169 | }, 170 | ['', 0] 171 | ) 172 | self.mainText = strip_tags(body[0], self.dom_separator) 173 | return self.mainText 174 | }) 175 | return promise 176 | } 177 | 178 | BodyExtractor.prototype.__defineGetter__('title', function() { 179 | var m = this.html.match(/