├── .gitmodules ├── LICENSE ├── README ├── README.markdown ├── TODO ├── lib └── Twitter │ ├── Autolink.php │ ├── Extractor.php │ ├── HitHighlighter.php │ └── Regex.php ├── phpunit.xml └── tests ├── Twitter ├── AutolinkTest.php ├── ExtractorTest.php └── HitHighlighterTest.php ├── bootstrap.php ├── example.php └── runtests.php /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/data/twitter-text-conformance"] 2 | path = tests/data/twitter-text-conformance 3 | url = git://github.com/mzsanford/twitter-text-conformance.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2010 Mike Cochrane 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | use this file except in compliance with the License. You may obtain a copy of 5 | the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | License for the specific language governing permissions and limitations under 13 | the License. 14 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | README.markdown -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # Twitter Text (PHP Edition) # 2 | 3 | A library of PHP classes that provide auto-linking and extraction of usernames, 4 | lists, hashtags and URLs from tweets. Originally created from twitter-text-rb 5 | and twitter-text-java projects by Matt Sanford and ported to PHP by Mike 6 | Cochrane, this library has been improved and made more complete by Nick Pope. 7 | 8 | ## Features ## 9 | 10 | ### Autolink ## 11 | 12 | - Add links to all matching Twitter usernames (no account verification). 13 | - Add links to all user lists (of the form @username/list-name). 14 | - Add links to all valid hashtags. 15 | - Add links to all URLs. 16 | - Support for international character sets. 17 | 18 | ### Extractor ### 19 | 20 | - Extract mentioned Twitter usernames (from anywhere in the tweet). 21 | - Extract replied to Twitter usernames (from start of the tweet). 22 | - Extract all user lists (of the form @username/list-name). 23 | - Extract all valid hashtags. 24 | - Extract all URLs. 25 | - Support for international character sets. 26 | 27 | ### Hit Highlighter ### 28 | 29 | - Highlight text specifed by a range by surrounding with a tag. 30 | - Support for highlighting when tweet has already been autolinked. 31 | - Support for international character sets. 32 | 33 | ## Examples ## 34 | 35 | For examples, please see `tests/example.php` which you can view in a browser or 36 | run from the command line. 37 | 38 | ## Conformance ## 39 | 40 | You'll need the test data which is in YAML format from the following 41 | repository: 42 | 43 | http://github.com/mzsanford/twitter-text-conformance 44 | 45 | It has already been added as a git submodule so you should just need to run: 46 | 47 | git submodule init 48 | git submodule update 49 | 50 | As PHP has no native support for YAML you'll need to checkout spyc from svn 51 | into `tests/spyc`: 52 | 53 | svn checkout http://spyc.googlecode.com/svn/trunk/ tests/spyc 54 | 55 | There are a couple of options for testing conformance: 56 | 57 | 1. Run `phpunit` in from the root folder of the project. 58 | 2. Run `tests/runtests.php` from the command line. 59 | 3. Make `tests/runtests.php` accessible on a web server and view it in your 60 | browser. 61 | 62 | ## Thanks & Contributions ## 63 | 64 | The bulk of this library is from the heroic efforts of: 65 | 66 | - Matt Sanford (https://github.com/mzsanford): For the orignal Ruby and Java implementions. 67 | - Mike Cochrane (https://github.com/mikenz): For the initial PHP code. 68 | - Nick Pope (https://github.com/ngnpope): For the bulk of the maintenance work to date. 69 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - Generate PHP Documentation 2 | - Add a phing build.xml file for generating documentation and running tests. 3 | - Tidy up regular expressions: 4 | - Modularise repeated sections. 5 | - Remove unnecessary capturing. 6 | - Tidy up tests/runtests.php to refactor repeated sections. 7 | - Check unicode/mbstring handling... 8 | -------------------------------------------------------------------------------- /lib/Twitter/Autolink.php: -------------------------------------------------------------------------------- 1 | 4 | * @author Nick Pope 5 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 6 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 7 | * @package Twitter 8 | */ 9 | 10 | require_once 'Regex.php'; 11 | 12 | /** 13 | * Twitter Autolink Class 14 | * 15 | * Parses tweets and generates HTML anchor tags around URLs, usernames, 16 | * username/list pairs and hashtags. 17 | * 18 | * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this 19 | * is based on code by {@link http://github.com/mzsanford Matt Sanford} and 20 | * heavily modified by {@link http://github.com/ngnpope Nick Pope}. 21 | * 22 | * @author Mike Cochrane 23 | * @author Nick Pope 24 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 25 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 26 | * @package Twitter 27 | */ 28 | class Twitter_Autolink extends Twitter_Regex { 29 | 30 | /** 31 | * CSS class for auto-linked URLs. 32 | * 33 | * @var string 34 | */ 35 | protected $class_url = 'url'; 36 | 37 | /** 38 | * CSS class for auto-linked username URLs. 39 | * 40 | * @var string 41 | */ 42 | protected $class_user = 'username'; 43 | 44 | /** 45 | * CSS class for auto-linked list URLs. 46 | * 47 | * @var string 48 | */ 49 | protected $class_list = 'list'; 50 | 51 | /** 52 | * CSS class for auto-linked hashtag URLs. 53 | * 54 | * @var string 55 | */ 56 | protected $class_hash = 'hashtag'; 57 | 58 | /** 59 | * URL base for username links (the username without the @ will be appended). 60 | * 61 | * @var string 62 | */ 63 | protected $url_base_user = 'http://twitter.com/'; 64 | 65 | /** 66 | * URL base for list links (the username/list without the @ will be appended). 67 | * 68 | * @var string 69 | */ 70 | protected $url_base_list = 'http://twitter.com/'; 71 | 72 | /** 73 | * URL base for hashtag links (the hashtag without the # will be appended). 74 | * 75 | * @var string 76 | */ 77 | protected $url_base_hash = 'http://twitter.com/search?q=%23'; 78 | 79 | /** 80 | * Whether to include the value 'nofollow' in the 'rel' attribute. 81 | * 82 | * @var bool 83 | */ 84 | protected $nofollow = true; 85 | 86 | /** 87 | * Whether to include the value 'external' in the 'rel' attribute. 88 | * 89 | * Often this is used to be matched on in JavaScript for dynamically adding 90 | * the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has 91 | * been undeprecated and thus the 'target' attribute can be used. If this is 92 | * set to false then the 'target' attribute will be output. 93 | * 94 | * @var bool 95 | */ 96 | protected $external = true; 97 | 98 | /** 99 | * The scope to open the link in. 100 | * 101 | * Support for the 'target' attribute was deprecated in HTML 4.01 but has 102 | * since been reinstated in HTML 5. To output the 'target' attribute you 103 | * must disable the adding of the string 'external' to the 'rel' attribute. 104 | * 105 | * @var string 106 | */ 107 | protected $target = '_blank'; 108 | 109 | /** 110 | * Provides fluent method chaining. 111 | * 112 | * @param string $tweet The tweet to be converted. 113 | * @param bool $full_encode Whether to encode all special characters. 114 | * 115 | * @see __construct() 116 | * 117 | * @return Twitter_Autolink 118 | */ 119 | public static function create($tweet, $full_encode = false) { 120 | return new self($tweet, $full_encode); 121 | } 122 | 123 | /** 124 | * Reads in a tweet to be parsed and converted to contain links. 125 | * 126 | * As the intent is to produce links and output the modified tweet to the 127 | * user, we take this opportunity to ensure that we escape user input. 128 | * 129 | * @see htmlspecialchars() 130 | * 131 | * @param string $tweet The tweet to be converted. 132 | * @param bool $escape Whether to escape the tweet (default: true). 133 | * @param bool $full_encode Whether to encode all special characters. 134 | */ 135 | public function __construct($tweet, $escape = true, $full_encode = false) { 136 | if ($escape) { 137 | if ($full_encode) { 138 | parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false)); 139 | } else { 140 | parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false)); 141 | } 142 | } else { 143 | parent::__construct($tweet); 144 | } 145 | } 146 | 147 | /** 148 | * CSS class for auto-linked URLs. 149 | * 150 | * @return string CSS class for URL links. 151 | */ 152 | public function getURLClass() { 153 | return $this->class_url; 154 | } 155 | 156 | /** 157 | * CSS class for auto-linked URLs. 158 | * 159 | * @param string $v CSS class for URL links. 160 | * 161 | * @return Twitter_Autolink Fluid method chaining. 162 | */ 163 | public function setURLClass($v) { 164 | $this->class_url = trim($v); 165 | return $this; 166 | } 167 | 168 | /** 169 | * CSS class for auto-linked username URLs. 170 | * 171 | * @return string CSS class for username links. 172 | */ 173 | public function getUsernameClass() { 174 | return $this->class_user; 175 | } 176 | 177 | /** 178 | * CSS class for auto-linked username URLs. 179 | * 180 | * @param string $v CSS class for username links. 181 | * 182 | * @return Twitter_Autolink Fluid method chaining. 183 | */ 184 | public function setUsernameClass($v) { 185 | $this->class_user = trim($v); 186 | return $this; 187 | } 188 | 189 | /** 190 | * CSS class for auto-linked username/list URLs. 191 | * 192 | * @return string CSS class for username/list links. 193 | */ 194 | public function getListClass() { 195 | return $this->class_list; 196 | } 197 | 198 | /** 199 | * CSS class for auto-linked username/list URLs. 200 | * 201 | * @param string $v CSS class for username/list links. 202 | * 203 | * @return Twitter_Autolink Fluid method chaining. 204 | */ 205 | public function setListClass($v) { 206 | $this->class_list = trim($v); 207 | return $this; 208 | } 209 | 210 | /** 211 | * CSS class for auto-linked hashtag URLs. 212 | * 213 | * @return string CSS class for hashtag links. 214 | */ 215 | public function getHashtagClass() { 216 | return $this->class_hash; 217 | } 218 | 219 | /** 220 | * CSS class for auto-linked hashtag URLs. 221 | * 222 | * @param string $v CSS class for hashtag links. 223 | * 224 | * @return Twitter_Autolink Fluid method chaining. 225 | */ 226 | public function setHashtagClass($v) { 227 | $this->class_hash = trim($v); 228 | return $this; 229 | } 230 | 231 | /** 232 | * Whether to include the value 'nofollow' in the 'rel' attribute. 233 | * 234 | * @return bool Whether to add 'nofollow' to the 'rel' attribute. 235 | */ 236 | public function getNoFollow() { 237 | return $this->nofollow; 238 | } 239 | 240 | /** 241 | * Whether to include the value 'nofollow' in the 'rel' attribute. 242 | * 243 | * @param bool $v The value to add to the 'target' attribute. 244 | * 245 | * @return Twitter_Autolink Fluid method chaining. 246 | */ 247 | public function setNoFollow($v) { 248 | $this->nofollow = $v; 249 | return $this; 250 | } 251 | 252 | /** 253 | * Whether to include the value 'external' in the 'rel' attribute. 254 | * 255 | * Often this is used to be matched on in JavaScript for dynamically adding 256 | * the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has 257 | * been undeprecated and thus the 'target' attribute can be used. If this is 258 | * set to false then the 'target' attribute will be output. 259 | * 260 | * @return bool Whether to add 'external' to the 'rel' attribute. 261 | */ 262 | public function getExternal() { 263 | return $this->external; 264 | } 265 | 266 | /** 267 | * Whether to include the value 'external' in the 'rel' attribute. 268 | * 269 | * Often this is used to be matched on in JavaScript for dynamically adding 270 | * the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has 271 | * been undeprecated and thus the 'target' attribute can be used. If this is 272 | * set to false then the 'target' attribute will be output. 273 | * 274 | * @param bool $v The value to add to the 'target' attribute. 275 | * 276 | * @return Twitter_Autolink Fluid method chaining. 277 | */ 278 | public function setExternal($v) { 279 | $this->external = $v; 280 | return $this; 281 | } 282 | 283 | /** 284 | * The scope to open the link in. 285 | * 286 | * Support for the 'target' attribute was deprecated in HTML 4.01 but has 287 | * since been reinstated in HTML 5. To output the 'target' attribute you 288 | * must disable the adding of the string 'external' to the 'rel' attribute. 289 | * 290 | * @return string The value to add to the 'target' attribute. 291 | */ 292 | public function getTarget() { 293 | return $this->target; 294 | } 295 | 296 | /** 297 | * The scope to open the link in. 298 | * 299 | * Support for the 'target' attribute was deprecated in HTML 4.01 but has 300 | * since been reinstated in HTML 5. To output the 'target' attribute you 301 | * must disable the adding of the string 'external' to the 'rel' attribute. 302 | * 303 | * @param string $v The value to add to the 'target' attribute. 304 | * 305 | * @return Twitter_Autolink Fluid method chaining. 306 | */ 307 | public function setTarget($v) { 308 | $this->target = trim($v); 309 | return $this; 310 | } 311 | 312 | /** 313 | * Adds links to all elements in the tweet. 314 | * 315 | * @return string The modified tweet. 316 | */ 317 | public function addLinks() { 318 | $original = $this->tweet; 319 | $this->tweet = $this->addLinksToURLs(); 320 | $this->tweet = $this->addLinksToHashtags(); 321 | $this->tweet = $this->addLinksToUsernamesAndLists(); 322 | $modified = $this->tweet; 323 | $this->tweet = $original; 324 | return $modified; 325 | } 326 | 327 | /** 328 | * Adds links to hashtag elements in the tweet. 329 | * 330 | * @return string The modified tweet. 331 | */ 332 | public function addLinksToHashtags() { 333 | return preg_replace_callback( 334 | self::REGEX_HASHTAG, 335 | array($this, '_addLinksToHashtags'), 336 | $this->tweet); 337 | } 338 | 339 | /** 340 | * Adds links to URL elements in the tweet. 341 | * 342 | * @return string The modified tweet. 343 | */ 344 | public function addLinksToURLs() { 345 | return preg_replace_callback( 346 | self::$REGEX_VALID_URL, 347 | array($this, '_addLinksToURLs'), 348 | $this->tweet); 349 | } 350 | 351 | /** 352 | * Adds links to username/list elements in the tweet. 353 | * 354 | * @return string The modified tweet. 355 | */ 356 | public function addLinksToUsernamesAndLists() { 357 | return preg_replace_callback( 358 | self::REGEX_USERNAME_LIST, 359 | array($this, '_addLinksToUsernamesAndLists'), 360 | $this->tweet); 361 | } 362 | 363 | /** 364 | * Wraps a tweet element in an HTML anchor tag using the provided URL. 365 | * 366 | * This is a helper function to perform the generation of the link. 367 | * 368 | * @param string $url The URL to use as the href. 369 | * @param string $class The CSS class(es) to apply (space separated). 370 | * @param string $element The tweet element to wrap. 371 | * 372 | * @return string The tweet element with a link applied. 373 | */ 374 | protected function wrap($url, $class, $element) { 375 | $link = 'external) $rel[] = 'external'; 380 | if ($this->nofollow) $rel[] = 'nofollow'; 381 | if (!empty($rel)) $link .= ' rel="'.implode(' ', $rel).'"'; 382 | if ($this->target) $link .= ' target="'.$this->target.'"'; 383 | $link .= '>'.$element.''; 384 | return $link; 385 | } 386 | 387 | /** 388 | * Callback used by the method that adds links to hashtags. 389 | * 390 | * @see addLinksToHashtags() 391 | * 392 | * @param array $matches The regular expression matches. 393 | * 394 | * @return string The link-wrapped hashtag. 395 | */ 396 | protected function _addLinksToHashtags($matches) { 397 | $replacement = $matches[1]; 398 | $element = $matches[2] . $matches[3]; 399 | $url = $this->url_base_hash . $matches[3]; 400 | $replacement .= $this->wrap($url, $this->class_hash, $element); 401 | return $replacement; 402 | } 403 | 404 | /** 405 | * Callback used by the method that adds links to URLs. 406 | * 407 | * @see addLinksToURLs() 408 | * 409 | * @param array $matches The regular expression matches. 410 | * 411 | * @return string The link-wrapped URL. 412 | */ 413 | protected function _addLinksToURLs($matches) { 414 | list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, ''); 415 | $url = htmlspecialchars($url, ENT_QUOTES, 'UTF-8', false); 416 | if (!$protocol) return $all; 417 | return $before . $this->wrap($url, $this->class_url, $url); 418 | } 419 | 420 | /** 421 | * Callback used by the method that adds links to username/list pairs. 422 | * 423 | * @see addLinksToUsernamesAndLists() 424 | * 425 | * @param array $matches The regular expression matches. 426 | * 427 | * @return string The link-wrapped username/list pair. 428 | */ 429 | protected function _addLinksToUsernamesAndLists($matches) { 430 | list($all, $before, $at, $username, $slash_listname, $after) = array_pad($matches, 6, ''); 431 | # If $after is not empty, there is an invalid character. 432 | if (!empty($after)) return $all; 433 | if (!empty($slash_listname)) { 434 | # Replace the list and username 435 | $element = $username . substr($slash_listname, 0, 26); 436 | $class = $this->class_list; 437 | $url = $this->url_base_list . $element; 438 | $postfix = substr($slash_listname, 26); 439 | } else { 440 | # Replace the username 441 | $element = $username; 442 | $class = $this->class_user; 443 | $url = $this->url_base_user . $element; 444 | $postfix = ''; 445 | } 446 | return $before . $at . $this->wrap($url, $class, $element) . $postfix . $after; 447 | } 448 | 449 | } 450 | -------------------------------------------------------------------------------- /lib/Twitter/Extractor.php: -------------------------------------------------------------------------------- 1 | 4 | * @author Nick Pope 5 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 6 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 7 | * @package Twitter 8 | */ 9 | 10 | require_once 'Regex.php'; 11 | 12 | /** 13 | * Twitter Extractor Class 14 | * 15 | * Parses tweets and extracts URLs, usernames, username/list pairs and 16 | * hashtags. 17 | * 18 | * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this 19 | * is based on code by {@link http://github.com/mzsanford Matt Sanford} and 20 | * heavily modified by {@link http://github.com/ngnpope Nick Pope}. 21 | * 22 | * @author Mike Cochrane 23 | * @author Nick Pope 24 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 25 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 26 | * @package Twitter 27 | */ 28 | class Twitter_Extractor extends Twitter_Regex { 29 | 30 | /** 31 | * Provides fluent method chaining. 32 | * 33 | * @param string $tweet The tweet to be converted. 34 | * 35 | * @see __construct() 36 | * 37 | * @return Twitter_Extractor 38 | */ 39 | public static function create($tweet) { 40 | return new self($tweet); 41 | } 42 | 43 | /** 44 | * Reads in a tweet to be parsed and extracts elements from it. 45 | * 46 | * Extracts various parts of a tweet including URLs, usernames, hashtags... 47 | * 48 | * @param string $tweet The tweet to extract. 49 | */ 50 | public function __construct($tweet) { 51 | parent::__construct($tweet); 52 | } 53 | 54 | /** 55 | * Extracts all parts of a tweet and returns an associative array containing 56 | * the extracted elements. 57 | * 58 | * @return array The elements in the tweet. 59 | */ 60 | public function extract() { 61 | return array( 62 | 'hashtags' => $this->extractHashtags(), 63 | 'urls' => $this->extractURLs(), 64 | 'mentions' => $this->extractMentionedUsernames(), 65 | 'replyto' => $this->extractRepliedUsernames(), 66 | 'hashtags_with_indices' => $this->extractHashtagsWithIndices(), 67 | 'urls_with_indices' => $this->extractURLsWithIndices(), 68 | 'mentions_with_indices' => $this->extractMentionedUsernamesWithIndices(), 69 | ); 70 | } 71 | 72 | /** 73 | * Extracts all the hashtags from the tweet. 74 | * 75 | * @return array The hashtag elements in the tweet. 76 | */ 77 | public function extractHashtags() { 78 | preg_match_all(self::REGEX_HASHTAG, $this->tweet, $matches); 79 | return $matches[3]; 80 | } 81 | 82 | /** 83 | * Extracts all the URLs from the tweet. 84 | * 85 | * @return array The URL elements in the tweet. 86 | */ 87 | public function extractURLs() { 88 | preg_match_all(self::$REGEX_VALID_URL, $this->tweet, $matches); 89 | list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, ''); 90 | return $url; 91 | } 92 | 93 | /** 94 | * Extract all the usernames from the tweet. 95 | * 96 | * A mention is an occurrence of a username anywhere in a tweet. 97 | * 98 | * @return array The usernames elements in the tweet. 99 | */ 100 | public function extractMentionedUsernames() { 101 | preg_match_all(self::REGEX_USERNAME_MENTION, $this->tweet, $matches); 102 | list($all, $before, $username, $after) = array_pad($matches, 4, ''); 103 | $usernames = array(); 104 | for ($i = 0; $i < count($username); $i ++) { 105 | # If $after is not empty, there is an invalid character. 106 | if (!empty($after[$i])) continue; 107 | array_push($usernames, $username[$i]); 108 | } 109 | return $usernames; 110 | } 111 | 112 | /** 113 | * Extract all the usernames replied to from the tweet. 114 | * 115 | * A reply is an occurrence of a username at the beginning of a tweet. 116 | * 117 | * @return array The usernames replied to in a tweet. 118 | */ 119 | public function extractRepliedUsernames() { 120 | preg_match(self::$REGEX_REPLY_USERNAME, $this->tweet, $matches); 121 | return isset($matches[2]) ? $matches[2] : ''; 122 | } 123 | 124 | /** 125 | * Extracts all the hashtags and the indices they occur at from the tweet. 126 | * 127 | * @return array The hashtag elements in the tweet. 128 | */ 129 | public function extractHashtagsWithIndices() { 130 | preg_match_all(self::REGEX_HASHTAG, $this->tweet, $matches, PREG_OFFSET_CAPTURE); 131 | $m = &$matches[3]; 132 | for ($i = 0; $i < count($m); $i++) { 133 | $m[$i] = array_combine(array('hashtag', 'indices'), $m[$i]); 134 | # XXX: Fix for PREG_OFFSET_CAPTURE returning byte offsets... 135 | $start = mb_strlen(substr($this->tweet, 0, $matches[1][$i][1])); 136 | $start += mb_strlen($matches[1][$i][0]); 137 | $length = mb_strlen($m[$i]['hashtag']); 138 | $m[$i]['indices'] = array($start, $start + $length + 1); 139 | } 140 | return $m; 141 | } 142 | 143 | /** 144 | * Extracts all the URLs and the indices they occur at from the tweet. 145 | * 146 | * @return array The URLs elements in the tweet. 147 | */ 148 | public function extractURLsWithIndices() { 149 | preg_match_all(self::$REGEX_VALID_URL, $this->tweet, $matches, PREG_OFFSET_CAPTURE); 150 | $m = &$matches[2]; 151 | for ($i = 0; $i < count($m); $i++) { 152 | $m[$i] = array_combine(array('url', 'indices'), $m[$i]); 153 | # XXX: Fix for PREG_OFFSET_CAPTURE returning byte offsets... 154 | $start = mb_strlen(substr($this->tweet, 0, $matches[1][$i][1])); 155 | $start += mb_strlen($matches[1][$i][0]); 156 | $length = mb_strlen($m[$i]['url']); 157 | $m[$i]['indices'] = array($start, $start + $length); 158 | } 159 | return $m; 160 | } 161 | 162 | /** 163 | * Extracts all the usernames and the indices they occur at from the tweet. 164 | * 165 | * @return array The username elements in the tweet. 166 | */ 167 | public function extractMentionedUsernamesWithIndices() { 168 | preg_match_all(self::REGEX_USERNAME_MENTION, $this->tweet, $matches, PREG_OFFSET_CAPTURE); 169 | $m = &$matches[2]; 170 | for ($i = 0; $i < count($m); $i++) { 171 | $m[$i] = array_combine(array('screen_name', 'indices'), $m[$i]); 172 | # XXX: Fix for PREG_OFFSET_CAPTURE returning byte offsets... 173 | $start = mb_strlen(substr($this->tweet, 0, $matches[1][$i][1])); 174 | $start += mb_strlen($matches[1][$i][0]); 175 | $length = mb_strlen($m[$i]['screen_name']); 176 | $m[$i]['indices'] = array($start, $start + $length + 1); 177 | } 178 | return $m; 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /lib/Twitter/HitHighlighter.php: -------------------------------------------------------------------------------- 1 | 4 | * @copyright Copyright © 2010, Nick Pope 5 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 6 | * @package Twitter 7 | */ 8 | 9 | require_once 'Regex.php'; 10 | 11 | /** 12 | * Twitter HitHighlighter Class 13 | * 14 | * Performs "hit highlighting" on tweets that have been auto-linked already. 15 | * Useful with the results returned from the search API. 16 | * 17 | * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this 18 | * is based on code by {@link http://github.com/mzsanford Matt Sanford} and 19 | * heavily modified by {@link http://github.com/ngnpope Nick Pope}. 20 | * 21 | * @author Nick Pope 22 | * @copyright Copyright © 2010, Nick Pope 23 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 24 | * @package Twitter 25 | */ 26 | class Twitter_HitHighlighter extends Twitter_Regex { 27 | 28 | /** 29 | * The tag to surround hits with. 30 | * 31 | * @var string 32 | */ 33 | protected $tag = 'em'; 34 | 35 | /** 36 | * Provides fluent method chaining. 37 | * 38 | * @param string $tweet The tweet to be hit highlighted. 39 | * @param bool $full_encode Whether to encode all special characters. 40 | * 41 | * @see __construct() 42 | * 43 | * @return Twitter_HitHighlighter 44 | */ 45 | public static function create($tweet, $full_encode = false) { 46 | return new self($tweet, $full_encode); 47 | } 48 | 49 | /** 50 | * Reads in a tweet to be parsed and hit highlighted. 51 | * 52 | * We take this opportunity to ensure that we escape user input. 53 | * 54 | * @see htmlspecialchars() 55 | * 56 | * @param string $tweet The tweet to be hit highlighted. 57 | * @param bool $escape Whether to escape the tweet (default: true). 58 | * @param bool $full_encode Whether to encode all special characters. 59 | */ 60 | public function __construct($tweet, $escape = true, $full_encode = false) { 61 | if ($escape) { 62 | if ($full_encode) { 63 | parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false)); 64 | } else { 65 | parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false)); 66 | } 67 | } else { 68 | parent::__construct($tweet); 69 | } 70 | } 71 | 72 | /** 73 | * Set the highlighting tag to surround hits with. The default tag is 'em'. 74 | * 75 | * @return string The tag name. 76 | */ 77 | public function getTag() { 78 | return $this->tag; 79 | } 80 | 81 | /** 82 | * Set the highlighting tag to surround hits with. The default tag is 'em'. 83 | * 84 | * @param string $v The tag name. 85 | * 86 | * @return Twitter_HitHighlighter Fluid method chaining. 87 | */ 88 | public function setTag($v) { 89 | $this->tag = $v; 90 | return $this; 91 | } 92 | 93 | /** 94 | * Hit highlights the tweet. 95 | * 96 | * @param array $hits An array containing the start and end index pairs 97 | * for the highlighting. 98 | * 99 | * @return string The hit highlighted tweet. 100 | */ 101 | public function addHitHighlighting(array $hits) { 102 | if (empty($hits)) return $this->tweet; 103 | $tweet = ''; 104 | $tags = array('<'.$this->tag.'>', 'tag.'>'); 105 | # Check whether we can simply replace or whether we need to chunk... 106 | if (strpos($this->tweet, '<') === false) { 107 | $ti = 0; // tag increment (for added tags) 108 | $tweet = $this->tweet; 109 | foreach ($hits as $hit) { 110 | $tweet = self::mb_substr_replace($tweet, $tags[0], $hit[0] + $ti, 0); 111 | $ti += mb_strlen($tags[0]); 112 | $tweet = self::mb_substr_replace($tweet, $tags[1], $hit[1] + $ti, 0); 113 | $ti += mb_strlen($tags[1]); 114 | } 115 | } else { 116 | $chunks = preg_split('/[<>]/iu', $this->tweet); 117 | $chunk = $chunks[0]; 118 | $chunk_index = 0; 119 | $chunk_cursor = 0; 120 | $offset = 0; 121 | $start_in_chunk = false; 122 | # Flatten the multidimensional hits array: 123 | $hits_flat = array(); 124 | foreach ($hits as $hit) $hits_flat = array_merge($hits_flat, $hit); 125 | # Loop over the hit indices: 126 | for ($index = 0; $index < count($hits_flat); $index++) { 127 | $hit = $hits_flat[$index]; 128 | $tag = $tags[$index % 2]; 129 | $placed = false; 130 | while ($chunk !== null && $hit >= ($i = $offset + mb_strlen($chunk))) { 131 | $tweet .= mb_substr($chunk, $chunk_cursor); 132 | if ($start_in_chunk && $hit === $i) { 133 | $tweet .= $tag; 134 | $placed = true; 135 | } 136 | if (isset($chunks[$chunk_index+1])) $tweet .= '<' . $chunks[$chunk_index+1] . '>'; 137 | $offset += mb_strlen($chunk); 138 | $chunk_cursor = 0; 139 | $chunk_index += 2; 140 | $chunk = (isset($chunks[$chunk_index]) ? $chunks[$chunk_index] : null); 141 | $start_in_chunk = false; 142 | } 143 | if (!$placed && $chunk !== null) { 144 | $hit_spot = $hit - $offset; 145 | $tweet .= mb_substr($chunk, $chunk_cursor, $hit_spot - $chunk_cursor) . $tag; 146 | $chunk_cursor = $hit_spot; 147 | $start_in_chunk = ($index % 2 === 0); 148 | $placed = true; 149 | } 150 | # Ultimate fallback - hits that run off the end get a closing tag: 151 | if (!$placed) $tweet .= $tag; 152 | } 153 | if ($chunk !== null) { 154 | if ($chunk_cursor < mb_strlen($chunk)) { 155 | $tweet .= mb_substr($chunk, $chunk_cursor); 156 | } 157 | for ($index = $chunk_index + 1; $index < count($chunks); $index++) { 158 | $tweet .= ($index % 2 === 0 ? $chunks[$index] : '<' . $chunks[$index] . '>'); 159 | } 160 | } 161 | } 162 | return $tweet; 163 | } 164 | 165 | /** 166 | * A multibyte-aware substring replacement function. 167 | * 168 | * @param string $string The string to modify. 169 | * @param string $replacement The replacement string. 170 | * @param int $start The start of the replacement. 171 | * @param int $length The number of characters to replace. 172 | * @param string $encoding The encoding of the string. 173 | * 174 | * @return string The modified string. 175 | * 176 | * @see http://www.php.net/manual/en/function.substr-replace.php#90146 177 | */ 178 | protected static function mb_substr_replace($string, $replacement, $start, $length = null, $encoding = null) { 179 | if (extension_loaded('mbstring') === true) { 180 | $string_length = (is_null($encoding) === true) ? mb_strlen($string) : mb_strlen($string, $encoding); 181 | if ($start < 0) { 182 | $start = max(0, $string_length + $start); 183 | } else if ($start > $string_length) { 184 | $start = $string_length; 185 | } 186 | if ($length < 0) { 187 | $length = max(0, $string_length - $start + $length); 188 | } else if ((is_null($length) === true) || ($length > $string_length)) { 189 | $length = $string_length; 190 | } 191 | if (($start + $length) > $string_length) { 192 | $length = $string_length - $start; 193 | } 194 | if (is_null($encoding) === true) { 195 | return mb_substr($string, 0, $start) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length); 196 | } 197 | return mb_substr($string, 0, $start, $encoding) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length, $encoding); 198 | } 199 | return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length); 200 | } 201 | 202 | } 203 | -------------------------------------------------------------------------------- /lib/Twitter/Regex.php: -------------------------------------------------------------------------------- 1 | 4 | * @author Nick Pope 5 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 6 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 7 | * @package Twitter 8 | */ 9 | 10 | /** 11 | * Twitter Regex Abstract Class 12 | * 13 | * Used by subclasses that need to parse tweets. 14 | * 15 | * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this 16 | * is based on code by {@link http://github.com/mzsanford Matt Sanford} and 17 | * heavily modified by {@link http://github.com/ngnpope Nick Pope}. 18 | * 19 | * @author Mike Cochrane 20 | * @author Nick Pope 21 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 22 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 23 | * @package Twitter 24 | */ 25 | abstract class Twitter_Regex { 26 | 27 | /** 28 | * Expression to at sign characters 29 | * 30 | * @var string 31 | */ 32 | const REGEX_AT_SIGNS = '[@@]'; 33 | 34 | /** 35 | * Expression to match characters that may come before a URL. 36 | * 37 | * @var string 38 | */ 39 | const REGEX_URL_CHARS_BEFORE = '(?:[^-\\/"\':!=a-z0-9_@@]|^|\\:)'; 40 | 41 | /** 42 | * Expression to match the domain portion of a URL. 43 | * 44 | * @var string 45 | */ 46 | const REGEX_URL_DOMAIN = '(?:[^\\p{P}\\p{Lo}\\s][\\.-](?=[^\\p{P}\\p{Lo}\\s])|[^\\p{P}\\p{Lo}\\s])+\\.[a-z]{2,}(?::[0-9]+)?'; 47 | 48 | /** 49 | * Expression to match characters that may come in the URL path. 50 | * 51 | * @var string 52 | */ 53 | const REGEX_URL_CHARS_PATH = '(?:(?:\\([a-z0-9!\\*\';:=\\+\\$\\/%#\\[\\]\\-_,~]+\\))|@[a-z0-9!\\*\';:=\\+\\$\\/%#\\[\\]\\-_,~]+\\/|[\\.\\,]?(?:[a-z0-9!\\*\';:=\\+\\$\\/%#\\[\\]\\-_~]|,(?!\s)))'; 54 | 55 | /** 56 | * Expression to match characters that may come at the end of the URL path. 57 | * 58 | * @var string 59 | */ 60 | const REGEX_URL_CHARS_PATH_END = '[a-z0-9=#\\/]'; 61 | 62 | /** 63 | * Expression to match characters that may come in the URL query string. 64 | * 65 | * @var string 66 | */ 67 | const REGEX_URL_CHARS_QUERY = '[a-z0-9!\\*\'\\(\\);:&=\\+\\$\\/%#\\[\\]\\-_\\.,~]'; 68 | 69 | /** 70 | * Expression to match characters that may come at the end of the URL query 71 | * string. 72 | * 73 | * @var string 74 | */ 75 | const REGEX_URL_CHARS_QUERY_END = '[a-z0-9_&=#\\/]'; 76 | 77 | /** 78 | * Expression to match a username followed by a list. 79 | * 80 | * @var string 81 | */ 82 | const REGEX_USERNAME_LIST = '/([^a-z0-9_\/]|^|RT:?)([@@]+)([a-z0-9_]{1,20})(\/[a-z][-_a-z0-9\x80-\xFF]{0,24})?([@@\xC0-\xD6\xD8-\xF6\xF8-\xFF]?)/iu'; 83 | 84 | /** 85 | * Expression to match a username mentioned anywhere in a tweet. 86 | * 87 | * @var string 88 | */ 89 | const REGEX_USERNAME_MENTION = '/(^|[^a-z0-9_])[@@]([a-z0-9_]{1,20})([@@\xC0-\xD6\xD8-\xF6\xF8-\xFF]?)/iu'; 90 | 91 | /** 92 | * Expression to match a hashtag. 93 | * 94 | * @var string 95 | */ 96 | const REGEX_HASHTAG = '/(^|[^0-9A-Z&\/\?]+)([##]+)([0-9A-Z_]*[A-Z_]+[a-z0-9_üÀ-ÖØ-öø-ÿ]*)/iu'; 97 | 98 | /** 99 | * Expression to match whitespace. 100 | * 101 | * Single byte whitespace characters 102 | * 0x0009-0x000D White_Space # Cc # .. 103 | * 0x0020 White_Space # Zs # SPACE 104 | * 0x0085 White_Space # Cc # 105 | * 0x00A0 White_Space # Zs # NO-BREAK SPACE 106 | * Multi byte whitespace characters 107 | * 0x1680 White_Space # Zs # OGHAM SPACE MARK 108 | * 0x180E White_Space # Zs # MONGOLIAN VOWEL SEPARATOR 109 | * 0x2000-0x200A White_Space # Zs # EN QUAD..HAIR SPACE 110 | * 0x2028 White_Space # Zl # LINE SEPARATOR 111 | * 0x2029 White_Space # Zp # PARAGRAPH SEPARATOR 112 | * 0x202F White_Space # Zs # NARROW NO-BREAK SPACE 113 | * 0x205F White_Space # Zs # MEDIUM MATHEMATICAL SPACE 114 | * 0x3000 White_Space # Zs # IDEOGRAPHIC SPACE 115 | * 116 | * @var string 117 | */ 118 | const REGEX_WHITESPACE = '[\x09-\x0D\x20\x85\xA0]|\xe1\x9a\x80|\xe1\xa0\x8e|\xe2\x80[\x80-\x8a,\xa8,\xa9,\xaf\xdf]|\xe3\x80\x80'; 119 | 120 | /** 121 | * Contains the complete valid URL pattern string. 122 | * 123 | * This should be generated the first time the constructor is called. 124 | * 125 | * @var string The regex pattern for a valid URL. 126 | */ 127 | protected static $REGEX_VALID_URL = null; 128 | 129 | /** 130 | * Contains the reply username pattern string. 131 | * 132 | * This should be generated the first time the constructor is called. 133 | * 134 | * @var string The regex pattern for a reply username. 135 | */ 136 | protected static $REGEX_REPLY_USERNAME = null; 137 | 138 | /** 139 | * The tweet to be used in parsing. This should be populated by the 140 | * constructor of all subclasses. 141 | * 142 | * @var string 143 | */ 144 | protected $tweet = ''; 145 | 146 | /** 147 | * This constructor is used to populate some variables. 148 | * 149 | * @param string $tweet The tweet to parse. 150 | */ 151 | protected function __construct($tweet) { 152 | if (is_null(self::$REGEX_VALID_URL)) { 153 | self::$REGEX_VALID_URL = '/(?:' # $1 Complete match (preg_match already matches everything.) 154 | . '('.self::REGEX_URL_CHARS_BEFORE.')' # $2 Preceding character 155 | . '(' # $3 Complete URL 156 | . '(https?:\\/\\/)' # $4 Protocol (or www) 157 | . '('.self::REGEX_URL_DOMAIN.')' # $5 Domain(s) (and port) 158 | . '(\\/'.self::REGEX_URL_CHARS_PATH.'*' # $6 URL Path 159 | . self::REGEX_URL_CHARS_PATH_END.'?)?' 160 | . '(\\?'.self::REGEX_URL_CHARS_QUERY.'*' # $7 Query String 161 | . self::REGEX_URL_CHARS_QUERY_END.')?' 162 | . ')' 163 | . ')/iux'; 164 | } 165 | if (is_null(self::$REGEX_REPLY_USERNAME)) { 166 | self::$REGEX_REPLY_USERNAME = '/^('.self::REGEX_WHITESPACE.')*[@@]([a-zA-Z0-9_]{1,20})/'; 167 | } 168 | $this->tweet = $tweet; 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | 10 | 11 | tests/Twitter 12 | 13 | 14 | 15 | 16 | 17 | lib/Twitter 18 | 19 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/Twitter/AutolinkTest.php: -------------------------------------------------------------------------------- 1 | 4 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 5 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 6 | * @package Twitter 7 | */ 8 | 9 | /** 10 | * Twitter Autolink Class Unit Tests 11 | * 12 | * @author Nick Pope 13 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 14 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 15 | * @package Twitter 16 | */ 17 | class Twitter_AutolinkTest extends PHPUnit_Framework_TestCase { 18 | 19 | /** 20 | * A helper function for providers. 21 | * 22 | * @param string $test The test to fetch data for. 23 | * 24 | * @return array The test data to provide. 25 | */ 26 | protected function providerHelper($test) { 27 | $data = Spyc::YAMLLoad(DATA.'/autolink.yml'); 28 | return isset($data['tests'][$test]) ? $data['tests'][$test] : array(); 29 | } 30 | 31 | /** 32 | * @dataProvider addLinksToUsernamesProvider 33 | */ 34 | public function testAddLinksToUsernames($description, $text, $expected) { 35 | $linked = Twitter_Autolink::create($text, false) 36 | ->setNoFollow(false)->setExternal(false)->setTarget('') 37 | ->setUsernameClass('tweet-url username') 38 | ->setListClass('tweet-url list-slug') 39 | ->setHashtagClass('tweet-url hashtag') 40 | ->setURLClass('') 41 | ->addLinksToUsernamesAndLists(); 42 | $this->assertSame($expected, $linked, $description); 43 | } 44 | 45 | /** 46 | * 47 | */ 48 | public function addLinksToUsernamesProvider() { 49 | return $this->providerHelper('usernames'); 50 | } 51 | 52 | /** 53 | * @dataProvider addLinksToListsProvider 54 | */ 55 | public function testAddLinksToLists($description, $text, $expected) { 56 | $linked = Twitter_Autolink::create($text, false) 57 | ->setNoFollow(false)->setExternal(false)->setTarget('') 58 | ->setUsernameClass('tweet-url username') 59 | ->setListClass('tweet-url list-slug') 60 | ->setHashtagClass('tweet-url hashtag') 61 | ->setURLClass('') 62 | ->addLinksToUsernamesAndLists(); 63 | $this->assertSame($expected, $linked, $description); 64 | } 65 | 66 | /** 67 | * 68 | */ 69 | public function addLinksToListsProvider() { 70 | return $this->providerHelper('lists'); 71 | } 72 | 73 | /** 74 | * @dataProvider addLinksToHashtagsProvider 75 | */ 76 | public function testAddLinksToHashtags($description, $text, $expected) { 77 | $linked = Twitter_Autolink::create($text, false) 78 | ->setNoFollow(false)->setExternal(false)->setTarget('') 79 | ->setUsernameClass('tweet-url username') 80 | ->setListClass('tweet-url list-slug') 81 | ->setHashtagClass('tweet-url hashtag') 82 | ->setURLClass('') 83 | ->addLinksToHashtags(); 84 | # XXX: Need to re-order for hashtag as it is written out differently... 85 | # We use the same wrapping function for adding links for all methods. 86 | $linked = preg_replace(array( 87 | '!([^<]*)!', 88 | '!title="#([^"]+)"!' 89 | ), array( 90 | '$3', 91 | 'title="#$1"' 92 | ), $linked); 93 | $this->assertSame($expected, $linked, $description); 94 | } 95 | 96 | /** 97 | * 98 | */ 99 | public function addLinksToHashtagsProvider() { 100 | return $this->providerHelper('hashtags'); 101 | } 102 | 103 | /** 104 | * @dataProvider addLinksToURLsProvider 105 | */ 106 | public function testAddLinksToURLs($description, $text, $expected) { 107 | $linked = Twitter_Autolink::create($text, false) 108 | ->setNoFollow(false)->setExternal(false)->setTarget('') 109 | ->setUsernameClass('tweet-url username') 110 | ->setListClass('tweet-url list-slug') 111 | ->setHashtagClass('tweet-url hashtag') 112 | ->setURLClass('') 113 | ->addLinksToURLs(); 114 | $this->assertSame($expected, $linked, $description); 115 | } 116 | 117 | /** 118 | * 119 | */ 120 | public function addLinksToURLsProvider() { 121 | return $this->providerHelper('urls'); 122 | } 123 | 124 | /** 125 | * @dataProvider addLinksProvider 126 | */ 127 | public function testAddLinks($description, $text, $expected) { 128 | $linked = Twitter_Autolink::create($text, false) 129 | ->setNoFollow(false)->setExternal(false)->setTarget('') 130 | ->setUsernameClass('tweet-url username') 131 | ->setListClass('tweet-url list-slug') 132 | ->setHashtagClass('tweet-url hashtag') 133 | ->setURLClass('') 134 | ->addLinks(); 135 | $this->assertSame($expected, $linked, $description); 136 | } 137 | 138 | /** 139 | * 140 | */ 141 | public function addLinksProvider() { 142 | return $this->providerHelper('all'); 143 | } 144 | 145 | } 146 | -------------------------------------------------------------------------------- /tests/Twitter/ExtractorTest.php: -------------------------------------------------------------------------------- 1 | 4 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 5 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 6 | * @package Twitter 7 | */ 8 | 9 | /** 10 | * Twitter Extractor Class Unit Tests 11 | * 12 | * @author Nick Pope 13 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 14 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 15 | * @package Twitter 16 | */ 17 | class Twitter_ExtractorTest extends PHPUnit_Framework_TestCase { 18 | 19 | /** 20 | * A helper function for providers. 21 | * 22 | * @param string $test The test to fetch data for. 23 | * 24 | * @return array The test data to provide. 25 | */ 26 | protected function providerHelper($test) { 27 | $data = Spyc::YAMLLoad(DATA.'/extract.yml'); 28 | return isset($data['tests'][$test]) ? $data['tests'][$test] : array(); 29 | } 30 | 31 | /** 32 | * @dataProvider extractMentionedUsernamesProvider 33 | */ 34 | public function testExtractMentionedUsernames($description, $text, $expected) { 35 | $extracted = Twitter_Extractor::create($text)->extractMentionedUsernames(); 36 | $this->assertSame($expected, $extracted, $description); 37 | } 38 | 39 | /** 40 | * 41 | */ 42 | public function extractMentionedUsernamesProvider() { 43 | return $this->providerHelper('mentions'); 44 | } 45 | 46 | /** 47 | * @dataProvider extractRepliedUsernamesProvider 48 | */ 49 | public function testExtractRepliedUsernames($description, $text, $expected) { 50 | $extracted = Twitter_Extractor::create($text)->extractRepliedUsernames(); 51 | $this->assertSame($expected, $extracted, $description); 52 | } 53 | 54 | /** 55 | * 56 | */ 57 | public function extractRepliedUsernamesProvider() { 58 | return $this->providerHelper('replies'); 59 | } 60 | 61 | /** 62 | * @dataProvider extractURLsProvider 63 | */ 64 | public function testExtractURLs($description, $text, $expected) { 65 | $extracted = Twitter_Extractor::create($text)->extractURLs(); 66 | $this->assertSame($expected, $extracted, $description); 67 | } 68 | 69 | /** 70 | * 71 | */ 72 | public function extractURLsProvider() { 73 | return $this->providerHelper('urls'); 74 | } 75 | 76 | /** 77 | * @dataProvider extractHashtagsProvider 78 | */ 79 | public function testExtractHashtags($description, $text, $expected) { 80 | $extracted = Twitter_Extractor::create($text)->extractHashtags(); 81 | $this->assertSame($expected, $extracted, $description); 82 | } 83 | 84 | /** 85 | * 86 | */ 87 | public function extractHashtagsProvider() { 88 | return $this->providerHelper('hashtags'); 89 | } 90 | 91 | /** 92 | * @dataProvider extractHashtagsWithIndicesProvider 93 | */ 94 | public function testExtractHashtagsWithIndices($description, $text, $expected) { 95 | $extracted = Twitter_Extractor::create($text)->extractHashtagsWithIndices(); 96 | $this->assertSame($expected, $extracted, $description); 97 | } 98 | 99 | /** 100 | * 101 | */ 102 | public function extractHashtagsWithIndicesProvider() { 103 | return $this->providerHelper('hashtags_with_indices'); 104 | } 105 | 106 | /** 107 | * @dataProvider extractURLsWithIndicesProvider 108 | */ 109 | public function testExtractURLsWithIndices($description, $text, $expected) { 110 | $extracted = Twitter_Extractor::create($text)->extractURLsWithIndices(); 111 | $this->assertSame($expected, $extracted, $description); 112 | } 113 | 114 | /** 115 | * 116 | */ 117 | public function extractURLsWithIndicesProvider() { 118 | return $this->providerHelper('urls_with_indices'); 119 | } 120 | 121 | /** 122 | * @dataProvider extractMentionedUsernamesWithIndicesProvider 123 | */ 124 | public function testExtractMentionedUsernamesWithIndices($description, $text, $expected) { 125 | $extracted = Twitter_Extractor::create($text)->extractMentionedUsernamesWithIndices(); 126 | $this->assertSame($expected, $extracted, $description); 127 | } 128 | 129 | /** 130 | * 131 | */ 132 | public function extractMentionedUsernamesWithIndicesProvider() { 133 | return $this->providerHelper('mentions_with_indices'); 134 | } 135 | 136 | } 137 | -------------------------------------------------------------------------------- /tests/Twitter/HitHighlighterTest.php: -------------------------------------------------------------------------------- 1 | 4 | * @copyright Copyright © 2010, Nick Pope 5 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 6 | * @package Twitter 7 | */ 8 | 9 | /** 10 | * Twitter HitHighlighter Class Unit Tests 11 | * 12 | * @author Nick Pope 13 | * @copyright Copyright © 2010, Nick Pope 14 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 15 | * @package Twitter 16 | */ 17 | class Twitter_HitHighlighterTest extends PHPUnit_Framework_TestCase { 18 | 19 | /** 20 | * A helper function for providers. 21 | * 22 | * @param string $test The test to fetch data for. 23 | * 24 | * @return array The test data to provide. 25 | */ 26 | protected function providerHelper($test) { 27 | $data = Spyc::YAMLLoad(DATA.'/hit_highlighting.yml'); 28 | return isset($data['tests'][$test]) ? $data['tests'][$test] : array(); 29 | } 30 | 31 | /** 32 | * @dataProvider addHitHighlightingProvider 33 | */ 34 | public function testAddHitHighlighting($description, $text, $hits, $expected) { 35 | $extracted = Twitter_HitHighlighter::create($text)->addHitHighlighting($hits); 36 | $this->assertSame($expected, $extracted, $description); 37 | } 38 | 39 | /** 40 | * 41 | */ 42 | public function addHitHighlightingProvider() { 43 | return array_merge($this->providerHelper('plain_text'), $this->providerHelper('with_links')); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | 8 | * @author Nick Pope 9 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 10 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 11 | */ 12 | 13 | if (!defined('E_DEPRECATED')) define('E_DEPRECATED', 8192); 14 | error_reporting(E_ALL | E_STRICT | E_DEPRECATED); 15 | 16 | $ROOT = dirname(dirname(__FILE__)); 17 | 18 | require_once $ROOT.'/lib/Twitter/Autolink.php'; 19 | require_once $ROOT.'/lib/Twitter/Extractor.php'; 20 | require_once $ROOT.'/lib/Twitter/HitHighlighter.php'; 21 | 22 | $browser = (PHP_SAPI != 'cli'); 23 | 24 | function print_array(array $a) { 25 | $p = print_r($a, true); 26 | $p = str_replace(' ', ' ', $p); 27 | echo preg_replace(array( 28 | '!^Array\s+\(\s+!', 29 | '!=> Array\s+\(!', 30 | '! (\[\d|\))!', 31 | '!\s+\)\s*$!', 32 | ), array( 33 | ' ', '=> (', '\1', '', 34 | ), $p); 35 | } 36 | 37 | $tweet = 'Tweet mentioning @mikenz and referring to his list @mikeNZ/sports and website http://mikenz.geek.nz #awesome'; 38 | 39 | if ($browser) echo << 41 | 42 | 43 | 44 | Twitter Text (PHP Edition) Library » Examples 45 | 77 | 78 | 79 | EOHTML; 80 | 81 | if ($browser) echo '

'; 82 | echo 'Twitter Text (PHP Edition) Library » Examples'; 83 | if ($browser) echo '

'; 84 | else echo PHP_EOL, '============================================', PHP_EOL; 85 | echo PHP_EOL; 86 | 87 | if ($browser) echo '

'; 88 | echo 'Extraction Examples'; 89 | if ($browser) echo '

'; 90 | else echo PHP_EOL, '-------------------', PHP_EOL; 91 | echo PHP_EOL; 92 | 93 | $code = <<extract(); 98 | print_r(\$data); 99 | EOPHP; 100 | if ($browser) { 101 | echo '

Source

', PHP_EOL; 102 | echo '
';
103 |   highlight_string($code);
104 |   echo '
', PHP_EOL; 105 | } else { 106 | echo 'Source:', PHP_EOL, PHP_EOL; 107 | echo $code; 108 | echo PHP_EOL, PHP_EOL; 109 | } 110 | 111 | $data = Twitter_Extractor::create($tweet) 112 | ->extract(); 113 | 114 | if ($browser) { 115 | echo '

Output

', PHP_EOL; 116 | echo '
';
117 |   print_array($data);
118 |   echo '
', PHP_EOL; 119 | } else { 120 | echo 'Output:', PHP_EOL, PHP_EOL; 121 | print_array($data); 122 | echo PHP_EOL, PHP_EOL; 123 | } 124 | 125 | if ($browser) echo '

'; 126 | echo 'Autolink Examples'; 127 | if ($browser) echo '

'; 128 | else echo PHP_EOL, '-----------------', PHP_EOL; 129 | echo PHP_EOL; 130 | 131 | $code = <<setNoFollow(false) 136 | ->addLinks(); 137 | echo \$html; 138 | EOPHP; 139 | if ($browser) { 140 | echo '

Source

', PHP_EOL; 141 | echo '
';
142 |   highlight_string($code);
143 |   echo '
', PHP_EOL; 144 | } else { 145 | echo 'Source:', PHP_EOL, PHP_EOL; 146 | echo $code; 147 | echo PHP_EOL, PHP_EOL; 148 | } 149 | 150 | $html = Twitter_Autolink::create($tweet) 151 | ->setNoFollow(false) 152 | ->addLinks(); 153 | 154 | if ($browser) { 155 | echo '

Markup

', PHP_EOL; 156 | echo '
';
157 |   echo htmlspecialchars($html, ENT_QUOTES, 'UTF-8', false);
158 |   echo '
', PHP_EOL; 159 | } else { 160 | echo 'Markup:', PHP_EOL, PHP_EOL; 161 | echo wordwrap(htmlspecialchars($html, ENT_QUOTES, 'UTF-8', false)); 162 | echo PHP_EOL, PHP_EOL; 163 | } 164 | 165 | if ($browser) { 166 | echo '

Output

', PHP_EOL; 167 | echo '
'; 168 | echo $html; 169 | echo '
', PHP_EOL; 170 | } else { 171 | echo 'Output:', PHP_EOL, PHP_EOL; 172 | echo wordwrap($html); 173 | echo PHP_EOL, PHP_EOL; 174 | } 175 | 176 | if ($browser) echo '

'; 177 | echo 'Hit Highlighter Examples'; 178 | if ($browser) echo '

'; 179 | else echo PHP_EOL, '------------------------', PHP_EOL; 180 | echo PHP_EOL; 181 | 182 | $code = <<addHitHighlighting(\$hits); 188 | echo \$html; 189 | EOPHP; 190 | if ($browser) { 191 | echo '

Source

', PHP_EOL; 192 | echo '
';
193 |   highlight_string($code);
194 |   echo '
', PHP_EOL; 195 | } else { 196 | echo 'Source:', PHP_EOL, PHP_EOL; 197 | echo $code; 198 | echo PHP_EOL, PHP_EOL; 199 | } 200 | 201 | $html = Twitter_HitHighlighter::create($tweet) 202 | ->addHitHighlighting(array(array(70, 77), array(101, 108))); 203 | 204 | if ($browser) { 205 | echo '

Markup

', PHP_EOL; 206 | echo '
';
207 |   echo htmlspecialchars($html, ENT_QUOTES, 'UTF-8', false);
208 |   echo '
', PHP_EOL; 209 | } else { 210 | echo 'Markup:', PHP_EOL, PHP_EOL; 211 | echo wordwrap(htmlspecialchars($html, ENT_QUOTES, 'UTF-8', false)); 212 | echo PHP_EOL, PHP_EOL; 213 | } 214 | 215 | if ($browser) { 216 | echo '

Output

', PHP_EOL; 217 | echo '
'; 218 | echo $html; 219 | echo '
', PHP_EOL; 220 | } else { 221 | echo 'Output:', PHP_EOL, PHP_EOL; 222 | echo wordwrap($html); 223 | echo PHP_EOL, PHP_EOL; 224 | } 225 | 226 | if ($browser) echo << 228 | 229 | EOHTML; 230 | -------------------------------------------------------------------------------- /tests/runtests.php: -------------------------------------------------------------------------------- 1 | 8 | * @author Nick Pope 9 | * @copyright Copyright © 2010, Mike Cochrane, Nick Pope 10 | * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0 11 | */ 12 | 13 | require_once dirname(__FILE__).'/bootstrap.php'; 14 | 15 | $browser = (PHP_SAPI != 'cli'); 16 | 17 | function pretty_format($a) { 18 | return preg_replace(array( 19 | "/\n/", '/ +\[/', '/ +\)/', '/Array +\(/', '/(? 33 | 34 | 35 | 36 | Twitter Text (PHP Edition) Library » Conformance 37 | 47 | 48 | 49 | EOHTML; 50 | 51 | echo ($browser ? '

' : "\033[1m"); 52 | echo 'Twitter Text (PHP Edition) Library » Conformance'; 53 | echo ($browser ? '

' : "\033[0m".PHP_EOL.'==============================================='.PHP_EOL); 54 | echo PHP_EOL; 55 | 56 | echo ($browser ? '

' : "\033[1m"); 57 | echo 'Extraction Conformance'; 58 | echo ($browser ? '

' : "\033[0m".PHP_EOL.'----------------------'.PHP_EOL); 59 | echo PHP_EOL; 60 | 61 | # Load the test data. 62 | $data = Spyc::YAMLLoad($DATA.'/extract.yml'); 63 | 64 | # Define the functions to be tested. 65 | $functions = array( 66 | 'hashtags' => 'extractHashtags', 67 | 'urls' => 'extractURLs', 68 | 'mentions' => 'extractMentionedUsernames', 69 | 'replies' => 'extractRepliedUsernames', 70 | 'hashtags_with_indices' => 'extractHashtagsWithIndices', 71 | 'urls_with_indices' => 'extractURLsWithIndices', 72 | 'mentions_with_indices' => 'extractMentionedUsernamesWithIndices', 73 | ); 74 | 75 | # Perform testing. 76 | foreach ($data['tests'] as $group => $tests) { 77 | 78 | echo ($browser ? '

' : "\033[1m"); 79 | echo 'Test Group - '.ucfirst(str_replace('_', ' ', $group)); 80 | echo ($browser ? '

' : ":\033[0m".PHP_EOL); 81 | echo PHP_EOL; 82 | 83 | if (!array_key_exists($group, $functions)) { 84 | echo ($browser ? '

' : " \033[1;35m"); 85 | echo 'Skipping Test...'; 86 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 87 | echo PHP_EOL; 88 | continue; 89 | } 90 | $function = $functions[$group]; 91 | $pass_group = 0; 92 | $fail_group = 0; 93 | if ($browser) echo '
    ', PHP_EOL; 94 | foreach ($tests as $test) { 95 | echo ($browser ? '
  • ' : ' - '); 96 | echo $test['description'], ' ... '; 97 | $extracted = Twitter_Extractor::create($test['text'])->$function(); 98 | if ($test['expected'] == $extracted) { 99 | $pass_group++; 100 | echo ($browser ? 'PASS' : "\033[1;32mPASS\033[0m"); 101 | } else { 102 | $fail_group++; 103 | echo ($browser ? 'FAIL' : "\033[1;31mFAIL\033[0m"); 104 | if ($browser) { 105 | echo '
    ';
    106 |         echo 'Original: '.htmlspecialchars($test['text'], ENT_QUOTES, 'UTF-8', false), PHP_EOL;
    107 |         echo 'Expected: '.pretty_format($test['expected']), PHP_EOL;
    108 |         echo 'Actual:   '.pretty_format($extracted);
    109 |         echo '
    '; 110 | } else { 111 | echo PHP_EOL, PHP_EOL; 112 | echo ' Original: '.$test['text'], PHP_EOL; 113 | echo ' Expected: '.pretty_format($test['expected']), PHP_EOL; 114 | echo ' Actual: '.pretty_format($extracted), PHP_EOL; 115 | } 116 | } 117 | if ($browser) echo '
  • '; 118 | echo PHP_EOL; 119 | } 120 | if ($browser) echo '
'; 121 | echo PHP_EOL; 122 | $pass_total += $pass_group; 123 | $fail_total += $fail_group; 124 | echo ($browser ? '

' : " \033[1;33m"); 125 | printf('Group Results: %d passes, %d failures', $pass_group, $fail_group); 126 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 127 | echo PHP_EOL; 128 | } 129 | 130 | echo ($browser ? '

' : "\033[1m"); 131 | echo 'Autolink Conformance'; 132 | echo ($browser ? '

' : "\033[0m".PHP_EOL.'--------------------'.PHP_EOL); 133 | echo PHP_EOL; 134 | 135 | # Load the test data. 136 | $data = Spyc::YAMLLoad($DATA.'/autolink.yml'); 137 | 138 | # Define the functions to be tested. 139 | $functions = array( 140 | 'usernames' => 'addLinksToUsernamesAndLists', 141 | 'lists' => 'addLinksToUsernamesAndLists', 142 | 'hashtags' => 'addLinksToHashtags', 143 | 'urls' => 'addLinksToURLs', 144 | 'all' => 'addLinks', 145 | ); 146 | 147 | # Perform testing. 148 | foreach ($data['tests'] as $group => $tests) { 149 | 150 | echo ($browser ? '

' : "\033[1m"); 151 | echo 'Test Group - '.ucfirst(str_replace('_', ' ', $group)); 152 | echo ($browser ? '

' : ":\033[0m".PHP_EOL); 153 | echo PHP_EOL; 154 | 155 | if (!array_key_exists($group, $functions)) { 156 | echo ($browser ? '

' : " \033[1;35m"); 157 | echo 'Skipping Test...'; 158 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 159 | echo PHP_EOL; 160 | continue; 161 | } 162 | $function = $functions[$group]; 163 | $pass_group = 0; 164 | $fail_group = 0; 165 | if ($browser) echo '
    ', PHP_EOL; 166 | foreach ($tests as $test) { 167 | echo ($browser ? '
  • ' : ' - '); 168 | echo $test['description'], ' ... '; 169 | $linked = Twitter_Autolink::create($test['text'], false) 170 | ->setNoFollow(false)->setExternal(false)->setTarget('') 171 | ->setUsernameClass('tweet-url username') 172 | ->setListClass('tweet-url list-slug') 173 | ->setHashtagClass('tweet-url hashtag') 174 | ->setURLClass('') 175 | ->$function(); 176 | # XXX: Need to re-order for hashtag as it is written out differently... 177 | # We use the same wrapping function for adding links for all methods. 178 | if ($group == 'hashtags') { 179 | $linked = preg_replace(array( 180 | '!([^<]*)!', 181 | '!title="#([^"]+)"!' 182 | ), array( 183 | '$3', 184 | 'title="#$1"' 185 | ), $linked); 186 | } 187 | if ($test['expected'] == $linked) { 188 | $pass_group++; 189 | echo ($browser ? 'PASS' : "\033[1;32mPASS\033[0m"); 190 | } else { 191 | $fail_group++; 192 | echo ($browser ? 'FAIL' : "\033[1;31mFAIL\033[0m"); 193 | if ($browser) { 194 | echo '
    ';
    195 |         echo 'Original: '.htmlspecialchars($test['text'], ENT_QUOTES, 'UTF-8', false), PHP_EOL;
    196 |         echo 'Expected: '.pretty_format($test['expected']), PHP_EOL;
    197 |         echo 'Actual:   '.pretty_format($linked);
    198 |         echo '
    '; 199 | } else { 200 | echo PHP_EOL, PHP_EOL; 201 | echo ' Original: '.$test['text'], PHP_EOL; 202 | echo ' Expected: '.pretty_format($test['expected']), PHP_EOL; 203 | echo ' Actual: '.pretty_format($linked), PHP_EOL; 204 | } 205 | } 206 | if ($browser) echo '
  • '; 207 | echo PHP_EOL; 208 | } 209 | if ($browser) echo '
'; 210 | echo PHP_EOL; 211 | $pass_total += $pass_group; 212 | $fail_total += $fail_group; 213 | echo ($browser ? '

' : " \033[1;33m"); 214 | printf('Group Results: %d passes, %d failures', $pass_group, $fail_group); 215 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 216 | echo PHP_EOL; 217 | } 218 | 219 | echo ($browser ? '

' : "\033[1m"); 220 | echo 'Hit Highlighter Conformance'; 221 | echo ($browser ? '

' : "\033[0m".PHP_EOL.'---------------------------'.PHP_EOL); 222 | echo PHP_EOL; 223 | 224 | # Load the test data. 225 | $data = Spyc::YAMLLoad($DATA.'/hit_highlighting.yml'); 226 | 227 | # Define the functions to be tested. 228 | $functions = array( 229 | 'plain_text' => 'addHitHighlighting', 230 | 'with_links' => 'addHitHighlighting', 231 | ); 232 | 233 | # Perform testing. 234 | foreach ($data['tests'] as $group => $tests) { 235 | 236 | echo ($browser ? '

' : "\033[1m"); 237 | echo 'Test Group - '.ucfirst(str_replace('_', ' ', $group)); 238 | echo ($browser ? '

' : ":\033[0m".PHP_EOL); 239 | echo PHP_EOL; 240 | 241 | if (!array_key_exists($group, $functions)) { 242 | echo ($browser ? '

' : " \033[1;35m"); 243 | echo 'Skipping Test...'; 244 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 245 | echo PHP_EOL; 246 | continue; 247 | } 248 | $function = $functions[$group]; 249 | $pass_group = 0; 250 | $fail_group = 0; 251 | if ($browser) echo '
    ', PHP_EOL; 252 | foreach ($tests as $test) { 253 | echo ($browser ? '
  • ' : ' - '); 254 | echo $test['description'], ' ... '; 255 | $highlighted = Twitter_HitHighlighter::create($test['text'])->$function($test['hits']); 256 | if ($test['expected'] == $highlighted) { 257 | $pass_group++; 258 | echo ($browser ? 'PASS' : "\033[1;32mPASS\033[0m"); 259 | } else { 260 | $fail_group++; 261 | echo ($browser ? 'FAIL' : "\033[1;31mFAIL\033[0m"); 262 | if ($browser) { 263 | echo '
    ';
    264 |         echo 'Original: '.htmlspecialchars($test['text'], ENT_QUOTES, 'UTF-8', false), PHP_EOL;
    265 |         echo 'Expected: '.pretty_format($test['expected']), PHP_EOL;
    266 |         echo 'Actual:   '.pretty_format($highlighted);
    267 |         echo '
    '; 268 | } else { 269 | echo PHP_EOL, PHP_EOL; 270 | echo ' Original: '.$test['text'], PHP_EOL; 271 | echo ' Expected: '.pretty_format($test['expected']), PHP_EOL; 272 | echo ' Actual: '.pretty_format($highlighted), PHP_EOL; 273 | } 274 | } 275 | if ($browser) echo '
  • '; 276 | echo PHP_EOL; 277 | } 278 | if ($browser) echo '
'; 279 | echo PHP_EOL; 280 | $pass_total += $pass_group; 281 | $fail_total += $fail_group; 282 | echo ($browser ? '

' : " \033[1;33m"); 283 | printf('Group Results: %d passes, %d failures', $pass_group, $fail_group); 284 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 285 | echo PHP_EOL; 286 | } 287 | 288 | echo ($browser ? '

' : " \033[1;36m"); 289 | printf('Total Results: %d passes, %d failures', $pass_total, $fail_total); 290 | echo ($browser ? '

' : "\033[0m".PHP_EOL); 291 | echo PHP_EOL; 292 | 293 | if ($browser) echo << 295 | 296 | EOHTML; 297 | --------------------------------------------------------------------------------