├── .gitmodules ├── node.js ├── lib ├── twitter-text-node.js └── twitter-text.js ├── LICENSE ├── Rakefile ├── README.textile └── test ├── test.html.erb └── test.html /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "test/twitter-text-conformance"] 2 | path = test/twitter-text-conformance 3 | url = http://github.com/mzsanford/twitter-text-conformance.git 4 | -------------------------------------------------------------------------------- /node.js: -------------------------------------------------------------------------------- 1 | // just to test out the twitter module 2 | sys = require('sys'); 3 | twitter = require('./lib/twitter-text-node'); 4 | 5 | sys.puts(twitter.auto_link("@hello #there http://google.com")); -------------------------------------------------------------------------------- /lib/twitter-text-node.js: -------------------------------------------------------------------------------- 1 | var Script = process.binding('evals').Script; 2 | read = require('fs').readFileSync 3 | sys = require('sys') 4 | 5 | Script.runInThisContext(read('lib/twitter-text.js')); 6 | 7 | // exported functions 8 | [ 9 | 'auto_link_urls_custom', 10 | 'auto_link', 11 | 'auto_link_usernames_or_lists', 12 | 'auto_link_hashtags', 13 | 'extract_mentioned_screen_names', 14 | 'extract_reply_screen_name', 15 | 'extract_urls', 16 | 'extract_hashtags' 17 | ].forEach(function(method){ 18 | exports[method] = TwitterText[method] 19 | }) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2010 Twitter, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | use this file except in compliance with the License. You may obtain a copy of 5 | the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | License for the specific language governing permissions and limitations under 13 | the License. 14 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'rake' 3 | require 'yaml' 4 | require 'erb' 5 | require 'active_support' 6 | 7 | desc "Generate test.html" 8 | task :generate_test do 9 | template = File.read('test/test.html.erb') 10 | autolink = File.read('test/twitter-text-conformance/autolink.yml'); 11 | extract = File.read('test/twitter-text-conformance/extract.yml'); 12 | hit_highlighting = File.read('test/twitter-text-conformance/hit_highlighting.yml') 13 | 14 | class TestTemplate 15 | 16 | def initialize(autolink_json, extract_json, hit_highlighting_json) 17 | @autolink_json = autolink_json 18 | @extract_json = extract_json 19 | @hit_highlighting_json = hit_highlighting_json 20 | end 21 | 22 | def get_binding 23 | binding 24 | end 25 | end 26 | 27 | template = ERB.new(template) 28 | t = TestTemplate.new(YAML.load(autolink).to_json, YAML.load(extract).to_json, YAML.load(hit_highlighting).to_json) 29 | html = template.result(t.get_binding) 30 | File.open('test/test.html', 'w+') do |file| 31 | file.print(html) 32 | end 33 | end -------------------------------------------------------------------------------- /README.textile: -------------------------------------------------------------------------------- 1 | h1. Twitter Text in javascript 2 | 3 | An autolinking and extracting library for use in the DOM. 4 | 5 | More info on twitter-text "here":http://engineering.twitter.com/2010/02/introducing-open-source-twitter-text.html 6 | 7 | Based of the "twitter-text-rb library":http://github.com/mzsanford/twitter-text-rb 8 | 9 | Main goal is to achieve 100 percent conformance. More about conformance "here":http://github.com/mzsanford/twitter-text-conformance 10 | 11 | To run the conformance tests just open test/test.html in your browser. 12 | 13 | h2. How To: 14 | 15 | * Download the project from "github":http://github.com/rubymaverick/twitter-text-js. 16 | * Include the lib/twitter-text.js file in your html. 17 | 18 | h2. Testing: 19 | 20 | * First, generate the test.html file by running @rake generate_tests@ in the project root. This uses the test.html.erb file to generate tests, converting the twitter-text-conformance yaml files to json and writing tests against those. Then just open up test.html in your browser to run the tests. If you change the test.html.erb file than you have to run @rake generate_tests@ again. 21 | 22 | h2. Examples: 23 | 24 | @TwitterText.auto_link("This is a #tweet with a @username");@ 25 | @// This is a #tweet with a username@ 26 | 27 | h2. TODO: 28 | 29 | * Figure out the last couple of url regexp bugs 30 | * Convert to Node.js? 31 | -------------------------------------------------------------------------------- /test/test.html.erb: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | Test twitter-text-js 8 | 9 | 10 | 11 | 12 | 13 | 79 | 80 | 81 | 82 |

QUnit example

83 |

84 |

85 |
    86 | 87 | 88 | -------------------------------------------------------------------------------- /lib/twitter-text.js: -------------------------------------------------------------------------------- 1 | var TwitterText = (function () { 2 | var my = {}; 3 | var regexes = {}; 4 | var punct = "!'#%&'()*+,\\-./:;<=>?@\\[/\\]^_{|}~"; 5 | regexes['spaces'] = new RegExp("\\s"); 6 | regexes['at_signs'] = new RegExp("[@@]"); 7 | regexes['extract_mentions'] = new RegExp("(^|[^a-zA-Z0-9_])" + regexes['at_signs'].source + "([a-zA-Z0-9_]{1,20})(?=(.|$))", "g"); 8 | regexes['extract_reply'] = new RegExp("^(?:" + regexes['spaces'].source + ")*" + regexes['at_signs'].source + "([a-zA-Z0-9_]{1,20})", "g"); 9 | regexes['list_name'] = /^[a-zA-Z\x80-\xff].{0,79}$/; 10 | var latin_accents = "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ\\303\\277"; 11 | var hashtag_charactars = "[a-z0-9_"+latin_accents+"]"; 12 | regexes['auto_link_hashtags'] = new RegExp("(^|[^0-9A-Z&\\/]+)(#|#)([0-9A-Z_]*[A-Z_]+" + hashtag_charactars + "*)", "ig"); 13 | regexes['auto_link_usernames_or_lists'] = /([^a-zA-Z0-9_]|^)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9\x80-\xff\-]{0,79})?/g; 14 | regexes['auto_link_emoticon'] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/g; 15 | 16 | regexes['valid_preceding_chars'] = new RegExp("(?:[^\\/\"':!=]|^|\\:)", "ig"); 17 | regexes['valid_domain'] = new RegExp("(?:[^" + punct + "\\s][.-](?=[^" + punct + "\\s])|[^" + punct + "\\s]){1,}\\.[a-z]{2,}(?::[0-9]+)?", "ig"); 18 | regexes['valid_url_path_chars'] = new RegExp("(?:[.,]?[a-z0-9!*'();:=+$/%#\\[\\]\\-_,~@])", "ig"); 19 | regexes['valid_url_path_ending_chars'] = new RegExp("[a-z0-9)=#/]", "ig"); 20 | regexes['valid_url_query_chars'] = new RegExp("[a-z0-9!*'();:&=+$/%#\\[\\]\\-_.,~]", "ig"); 21 | regexes['valid_url_query_ending_chars'] = new RegExp("[a-z0-9_&=#]", "ig"); 22 | 23 | var valid_url = "( \ 24 | (" + regexes['valid_preceding_chars'].source + ") \ 25 | ( \ 26 | (https?:\\/\\/|www\\.) \ 27 | (" + regexes['valid_domain'].source + ") \ 28 | (/" + regexes['valid_url_path_chars'].source + "*" + regexes['valid_url_path_ending_chars'].source + "?)? \ 29 | (\\?" + regexes['valid_url_query_chars'].source + "*" + regexes['valid_url_query_ending_chars'].source + ")? \ 30 | ) \ 31 | )"; 32 | 33 | regexes['valid_url'] = new RegExp(valid_url.replace(/[\s]/g,''), "gi"); 34 | 35 | var www_regex = /www\./i; 36 | 37 | // Default CSS class for auto-linked URLs 38 | var default_url_class = "tweet-url"; 39 | // # Default CSS class for auto-linked lists (along with the url class) 40 | var default_list_class = "list-slug"; 41 | // # Default CSS class for auto-linked usernames (along with the url class) 42 | var default_username_class = "username"; 43 | // # Default CSS class for auto-linked hashtags (along with the url class) 44 | var default_hashtag_class = "hashtag"; 45 | // # HTML attribute for robot nofollow behavior (default) 46 | var html_attr_no_follow = " rel=\"nofollow\""; 47 | 48 | // turns an "options" object into an html attributes string 49 | var tag_options = function(opt){ 50 | var result = ""; 51 | for (attr in opt){ 52 | if(opt[attr]){ 53 | result += " " + attr + "=\"" + opt[attr] +"\""; 54 | } 55 | } 56 | return result; 57 | } 58 | 59 | // creates and returns a copy of the opt object 60 | var copy_options = function(opt){ 61 | var options = {}; 62 | for (key in opt){ 63 | options[key] = opt[key]; 64 | } 65 | 66 | return options; 67 | } 68 | 69 | 70 | // Add tags around the usernames, lists, hashtags and URLs in the provided text. The 71 | // tags can be controlled with the following entries in the options 72 | // hash: 73 | // 74 | // :url_class:: class to add to all tags 75 | // :list_class:: class to add to list tags 76 | // :username_class:: class to add to username tags 77 | // :hashtag_class:: class to add to hashtag tags 78 | // :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. 79 | // :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. 80 | // :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this. 81 | // :suppress_lists:: disable auto-linking to lists 82 | // :suppress_no_follow:: Do not add rel="nofollow" to auto-linked items 83 | my.auto_link = function(text, options){ 84 | return this.auto_link_usernames_or_lists(this.auto_link_urls_custom(this.auto_link_hashtags(text, options), options), options); 85 | } 86 | 87 | // Add tags around the usernames and lists in the provided text. The 88 | // tags can be controlled with the following entries in the options 89 | // hash: 90 | // 91 | // :url_class:: class to add to all tags 92 | // :list_class:: class to add to list tags 93 | // :username_class:: class to add to username tags 94 | // :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. 95 | // :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. 96 | // :suppress_lists:: disable auto-linking to lists 97 | // :suppress_no_follow:: Do not add rel="nofollow" to auto-linked items 98 | my.auto_link_usernames_or_lists = function(text, opt, func){ 99 | options = copy_options(opt); 100 | options['url_class'] = options['url_class'] || default_url_class; 101 | options['list_class'] = options['list_class'] || default_list_class; 102 | options['username_class'] = options['username_class'] || default_username_class; 103 | options['username_url_base'] = options['username_url_base'] || "http://twitter.com/"; 104 | options['list_url_base'] = options['list_url_base'] || "http://twitter.com/"; 105 | 106 | var extra_html = ""; 107 | 108 | if(!options['suppress_no_follow']){ extra_html = html_attr_no_follow; } 109 | 110 | var r = text.replace(regexes['auto_link_usernames_or_lists'], function(str, p1, p2, p3, p4, offset, s){ 111 | 112 | if(p4 && !options['suppress_lists']){ 113 | // # the link is a list 114 | var t = p3 + p4; 115 | var list = t; 116 | 117 | if(func){ t = func(list); } 118 | 119 | return p1 + p2 + "" + t + ""; 120 | }else { 121 | // # this is a screen name 122 | var t = p3; 123 | 124 | if(func){ t = func(text); } 125 | return p1 + p2 + "" + t + ""; 126 | } 127 | 128 | }); 129 | return r; 130 | } 131 | 132 | my.auto_link_urls_custom = function(text, opt){ 133 | var options = copy_options(opt); 134 | 135 | if(!options['suppress_no_follow']){ 136 | options['rel'] = "nofollow"; 137 | }else{ 138 | options['suppress_no_follow'] = null; 139 | } 140 | 141 | var r = text.replace(regexes['valid_url'], function(str, all, before, url, protocol, p4, offset, s){ 142 | var html_attrs = tag_options(options); 143 | var full_url; 144 | 145 | if(protocol.match(www_regex)){ 146 | full_url = "http://" + url 147 | }else{ 148 | full_url = url; 149 | } 150 | 151 | return before + "" + url + ""; 152 | }); 153 | 154 | return r; 155 | } 156 | 157 | my.auto_link_hashtags = function(text, opt, func){ 158 | var options = copy_options(opt); 159 | 160 | options['url_class'] = options['url_class'] || default_url_class; 161 | options['hashtag_class'] = options['hashtag_class'] || default_hashtag_class; 162 | options['hashtag_url_base'] = options['hashtag_url_base'] || "http://twitter.com/search?q=%23"; 163 | 164 | var extra_html = ""; 165 | if(!options['suppress_no_follow']){ 166 | extra_html = html_attr_no_follow; 167 | } 168 | 169 | 170 | var r = text.replace(regexes['auto_link_hashtags'], function(str, before, hash, text, offset, s){ 171 | if(func){ text = func(text); } 172 | return before + "" + hash + text +""; 173 | }); 174 | 175 | return r; 176 | } 177 | 178 | // Extracts a list of all usernames mentioned in the Tweet text. If the 179 | // text is nil or contains no username mentions an empty array 180 | // will be returned. 181 | // 182 | // If a block is given then it will be called for each username. 183 | my.extract_mentioned_screen_names = function(text, func){ 184 | if(!text){ return []; } 185 | 186 | var possible_screen_names = []; 187 | 188 | text.replace(regexes['extract_mentions'], function(str, before, sn, after, offset){ 189 | if(!after.match(regexes['at_signs'])){ 190 | possible_screen_names.push(sn) 191 | } 192 | }); 193 | 194 | if(func){ 195 | for (var i=0; i < possible_screen_names.length; i++) { 196 | func(possible_screen_names[i]) 197 | }; 198 | } 199 | 200 | return possible_screen_names; 201 | } 202 | 203 | // Extracts the username username replied to in the Tweet text. If the 204 | // text is nil or is not a reply nil will be returned. 205 | // 206 | // If a block is given then it will be called with the username replied to (if any) 207 | my.extract_reply_screen_name = function(text, func){ 208 | if(!text) { return null }; 209 | var screen_name = null; 210 | text.replace(regexes['extract_reply'], function(str, username, offset){ 211 | screen_name = username; 212 | if(func) { func(screen_name); } 213 | }); 214 | 215 | return screen_name; 216 | } 217 | // 218 | // # Extracts a list of all URLs included in the Tweet text. If the 219 | // # text is nil or contains no URLs an empty array 220 | // # will be returned. 221 | // # 222 | // # If a block is given then it will be called for each URL. 223 | my.extract_urls = function(text, func){ 224 | if(!text){ return []; } 225 | var urls = [] 226 | 227 | text.replace(regexes['valid_url'], function(str, all, before, url, protocol, p4, offset, s){ 228 | var full_url; 229 | 230 | if(protocol.match(www_regex)){ 231 | full_url = "http://" + url 232 | }else{ 233 | full_url = url; 234 | } 235 | urls.push(full_url) 236 | }); 237 | 238 | if(func) { 239 | for (var i=0; i < urls.length; i++) { 240 | func(urls[i]); 241 | }; 242 | } 243 | 244 | return urls; 245 | } 246 | // 247 | // # Extracts a list of all hashtags included in the Tweet text. If the 248 | // # text is nil or contains no hashtags an empty array 249 | // # will be returned. The array returned will not include the leading # 250 | // # character. 251 | // # 252 | // # If a block is given then it will be called for each hashtag. 253 | my.extract_hashtags = function(text, func){ 254 | if(!text){ return []; } 255 | var tags = [] 256 | 257 | text.replace(regexes['auto_link_hashtags'], function(str, before, hash, hash_text){ 258 | tags.push(hash_text) 259 | }) 260 | 261 | if(func) { 262 | for (var i=0; i < tags.length; i++) { 263 | func(tags[i]); 264 | }; 265 | } 266 | 267 | return tags; 268 | } 269 | 270 | my.printUnicodeSpaces = function () { 271 | console.log(this.unicode_spaces); 272 | }; 273 | 274 | my.regexes = regexes; 275 | 276 | 277 | var default_highlight_tag = "em"; 278 | 279 | // Add tags around the hits provided in the text. The 280 | // hits should be an array of (start, end) index pairs, relative to the original 281 | // text, before auto-linking (but the text may already be auto-linked if desired) 282 | // 283 | // The tags can be overridden using the :tag option. For example: 284 | // 285 | // >>> hit_highlight("test hit here", [[5, 8]], {tag: 'strong'}); 286 | // "test hit here" 287 | my.hit_highlight = function (text, hits, options) { 288 | hits = hits || []; 289 | options = options || {}; 290 | 291 | if (hits.length === 0) { 292 | return text; 293 | } 294 | 295 | var tag_name = options.tag || default_highlight_tag, 296 | tags = ["<" + tag_name + ">", ""], 297 | first_splits = text.split("<"), second_splits, 298 | chunks = [], 299 | split, 300 | i, 301 | j; 302 | 303 | for (i = 0; i < first_splits.length; i += 1) { 304 | split = first_splits[i]; 305 | if (!split) { 306 | chunks.push(""); 307 | } else { 308 | second_splits = split.split(">"); 309 | for (j = 0; j < second_splits.length; j += 1) { 310 | chunks.push(second_splits[j]); 311 | } 312 | } 313 | } 314 | 315 | var result = "", 316 | chunk_index = 0, 317 | chunk = chunks[0], 318 | prev_chunks_len = 0, 319 | chunk_cursor = 0, 320 | start_in_chunk = false, 321 | chunk_chars = chunk, 322 | flat_hits = [], 323 | index, 324 | hit, 325 | tag, 326 | placed, 327 | hit_spot; 328 | 329 | for (i = 0; i < hits.length; i += 1) { 330 | for (j = 0; j < hits[i].length; j += 1) { 331 | flat_hits.push(hits[i][j]); 332 | } 333 | } 334 | 335 | for (index = 0; index < flat_hits.length; index += 1) { 336 | hit = flat_hits[index]; 337 | tag = tags[index % 2]; 338 | placed = false; 339 | 340 | while (chunk != null && hit >= prev_chunks_len + chunk.length) { 341 | result += chunk_chars.slice(chunk_cursor); 342 | if (start_in_chunk && hit === prev_chunks_len + chunk_chars.length) { 343 | result += tag; 344 | placed = true; 345 | } 346 | 347 | if (chunks[chunk_index + 1]) { 348 | result += "<" + chunks[chunk_index + 1] + ">"; 349 | } 350 | 351 | prev_chunks_len += chunk_chars.length; 352 | chunk_cursor = 0; 353 | chunk_index += 2; 354 | chunk = chunks[chunk_index]; 355 | chunk_chars = chunk; 356 | start_in_chunk = false; 357 | } 358 | 359 | if (!placed && chunk != null) { 360 | hit_spot = hit - prev_chunks_len; 361 | result += chunk_chars.slice(chunk_cursor, hit_spot) + tag; 362 | chunk_cursor = hit_spot; 363 | if (index % 2 === 0) { 364 | start_in_chunk = true; 365 | } 366 | } 367 | } 368 | 369 | if (chunk != null) { 370 | if (chunk_cursor < chunk_chars.length) { 371 | result += chunk_chars.slice(chunk_cursor); 372 | } 373 | for (index = chunk_index + 1; index < chunks.length; index += 1) { 374 | result += (index % 2 === 0 ? chunks[index] : "<" + chunks[index] + ">"); 375 | } 376 | } 377 | 378 | return result; 379 | }; 380 | 381 | return my; 382 | }()); 383 | -------------------------------------------------------------------------------- /test/test.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | Test twitter-text-js 8 | 9 | 10 | 11 | 12 | 13 | 79 | 80 | 81 | 82 |

    QUnit example

    83 |

    84 |

    85 |
      86 | 87 | 88 | --------------------------------------------------------------------------------