├── README.md ├── lib ├── Scripto.php └── Scripto │ ├── Adapter │ ├── Example.php │ ├── Exception.php │ └── Interface.php │ ├── Document.php │ ├── Exception.php │ └── Service │ ├── Exception.php │ └── MediaWiki.php └── tests ├── README.markdown ├── adapter_test.php ├── all_tests.php ├── config.php.changeme ├── document_test.php ├── mediawiki_test.php └── scripto_test.php /README.md: -------------------------------------------------------------------------------- 1 | Scripto 2 | ============= 3 | 4 | © 2010-2012, Center for History and New Media 5 | License: [GNU GPL v3](http://www.gnu.org/licenses/gpl-3.0.txt) 6 | 7 | Scripto is an open source documentary transcription tool written in PHP. It 8 | features a lightweight library that interfaces MediaWiki and potentially any 9 | content management system that serves transcribable resources, including text, 10 | still image, moving image, and audio files. 11 | 12 | Scripto is not a content management system. Scripto is not a graphical user 13 | interface. Scripto is a software library powered by wiki technology that 14 | developers can use to integrate a custom transcription GUI into an existing CMS. 15 | You provide the CMS and GUI; Scripto provides the engine for crowdsourcing the 16 | transcription of your content. 17 | 18 | Why MediaWiki? 19 | ------------- 20 | 21 | MediaWiki is a good choice for the transcription database for several reasons: 22 | 23 | * It is the most popular wiki application and has a sizable and active developer community; 24 | * It offers helpful features, such as talk pages, version history, and user administration; 25 | * [Wiki markup](http://en.wikipedia.org/wiki/Help:Wiki_markup) is easy to learn; 26 | * It comes with a powerful, fully-featured [API](http://www.mediawiki.org/wiki/API). 27 | 28 | Requirements 29 | ------------- 30 | 31 | * PHP 5.2.4+ 32 | * Zend Framework 1.10+ 33 | * MediaWiki 1.15.4+ 34 | * Custom adapter interface to (and possibly an API for) the external CMS 35 | 36 | Installation 37 | ------------- 38 | 39 | * Download and install [MediaWiki](http://www.mediawiki.org/wiki/MediaWiki); 40 | * Download the [Zend Framework](http://framework.zend.com/) library; 41 | * Download the [Scripto](https://github.com/chnm/Scripto) library, set the 42 | configuration, and use the API to build your documentary transcription 43 | application. 44 | 45 | Suggested Configuration and Setup 46 | ------------- 47 | 48 | Here's a basic configuration: 49 | 50 | ```php 51 | MEDIAWIKI_API_URL)); 72 | 73 | // Set the current document object. 74 | $doc = $scripto->getDocument($_REQUEST['documentId']); 75 | 76 | // Set the current document page. 77 | $doc->setPage($_REQUEST['pageId']); 78 | 79 | // Render the transcription or talk page using the $scripto and $doc APIs. 80 | ``` 81 | 82 | See the various implementations of Scripto for more suggestions on configuration, 83 | setup, layout, and styles. 84 | 85 | * [Omeka plugin](https://github.com/omeka/plugin-Scripto) 86 | * [WordPress plugin](https://github.com/chnm/scripto-wordpress-plugin) 87 | * [Drupal module](https://github.com/chnm/scripto-drupal-module) 88 | 89 | Advanced Usage 90 | ------------- 91 | 92 | ### Record Client IP Address 93 | 94 | Scripto does not record a client's IP address by default. All modifications to 95 | pages will be set to the IP address of the server running Scripto. To record a 96 | client's IP address, you'll need to add the following code to MediaWiki's 97 | LocalSettings.php: 98 | 99 | ``` 100 | $wgSquidServersNoPurge = array('127.0.0.1'); 101 | ``` 102 | 103 | Where '127.0.0.1' is the IP address of the server running Scripto. 104 | 105 | ### Base64 Decoding 106 | 107 | Scripto Base64 encodes document and page numbers to prevent incompatible 108 | MediaWiki title characters. Because of this, corresponding page titles in 109 | MediaWiki will be unusually named. You may place the following code in 110 | MediaWiki's LocalSettings.php to make page titles human readable: 111 | 112 | ``` 113 | // Decode the MediaWiki title from Base64. 114 | // http://www.mediawiki.org/wiki/Manual:Hooks/BeforePageDisplay 115 | $wgHooks['BeforePageDisplay'][] = 'fnScriptoDecodePageTitle'; 116 | function fnScriptoDecodePageTitle(&$out, &$sk, $prefix = '.', $delimiter = '.') 117 | { 118 | $title = strtr($out->getPageTitle(), '-_', '+/'); 119 | if ($prefix != $title[0]) { 120 | return false; 121 | } 122 | $title = array_map('base64_decode', explode($delimiter, ltrim($title, $prefix))); 123 | $title = 'Document ' . $title[0] . '; Page ' . $title[1]; 124 | $out->setPageTitle($title); 125 | return false; 126 | } 127 | ``` 128 | 129 | Changelog 130 | ------------- 131 | 132 | * 1.1 133 | * Add option to retain specified HTML attributes. 134 | * 1.1.1 135 | * Fix watch and unwatch pages. 136 | * 1.1.2 137 | * The /e modifier is deprecated in PHP 5.5.0 and removed in 7.0.0. Use 138 | preg_replace_callback() instead. 139 | -------------------------------------------------------------------------------- /lib/Scripto.php: -------------------------------------------------------------------------------- 1 | 58 | *
  • $mediawiki['api_url']: required; the MediaWiki API URL
  • 59 | *
  • $mediawiki['pass_cookies']: optional pass cookies to the web 60 | *
  • $mediawiki['cookie_prefix']: optional; set the cookie prefix 61 | * browser via API client
  • 62 | * 63 | */ 64 | public function __construct(Scripto_Adapter_Interface $adapter, $mediawiki) 65 | { 66 | // Set the adapter. 67 | $this->_adapter = $adapter; 68 | 69 | // Set the MediaWiki service. 70 | if ($mediawiki instanceof Scripto_Service_MediaWiki) { 71 | $this->_mediawiki = $mediawiki; 72 | } else if (is_array($mediawiki) && array_key_exists('api_url', $mediawiki)) { 73 | if (!isset($mediawiki['pass_cookies'])) { 74 | $mediawiki['pass_cookies'] = true; 75 | } 76 | if (!isset($mediawiki['cookie_prefix'])) { 77 | $mediawiki['cookie_prefix'] = null; 78 | } 79 | 80 | $this->_mediawiki = new Scripto_Service_MediaWiki($mediawiki['api_url'], 81 | (bool) $mediawiki['pass_cookies'], 82 | $mediawiki['cookie_prefix']); 83 | } else { 84 | throw new Scripto_Exception('The provided mediawiki parameter is invalid.'); 85 | } 86 | 87 | // Set the user information. 88 | $this->setUserInfo(); 89 | } 90 | 91 | /** 92 | * Provide a transparent interface for calling custom adapter methods. 93 | * 94 | * This makes it possible to call custom adapter methods (those not required 95 | * by Scripto_Adapter_Interface) directly from the Scripto object. 96 | * 97 | * @see Scripto_Adapter_Interface 98 | * @param string $name 99 | * @param array $args 100 | * @return mixed 101 | */ 102 | public function __call($name, $args) 103 | { 104 | if (!method_exists($this->_adapter, $name)) { 105 | require_once 'Scripto/Adapter/Exception.php'; 106 | throw new Scripto_Adapter_Exception('The provided adapter method "' . $name . '" does not exist.'); 107 | } 108 | return call_user_func_array(array($this->_adapter, $name), $args); 109 | } 110 | 111 | /** 112 | * Check whether the specified document exists in the external system. 113 | * 114 | * @uses Scripto_Adapter_Interface::documentExists() 115 | * @param string|int $id The unique document identifier. 116 | * @return bool 117 | */ 118 | public function documentExists($id) 119 | { 120 | // Query the adapter whether the document exists. 121 | if ($this->_adapter->documentExists($id)) { 122 | return true; 123 | } 124 | return false; 125 | } 126 | 127 | /** 128 | * Get a Scripto_Document object. 129 | * 130 | * @see Scripto_Document 131 | * @param string|int $id The unique document identifier. 132 | * @return Scripto_Document 133 | */ 134 | public function getDocument($id) 135 | { 136 | return new Scripto_Document($id, $this->_adapter, $this->_mediawiki); 137 | } 138 | 139 | /** 140 | * Login via the MediaWiki service. 141 | * 142 | * It is possible to restrict account creation in MediaWiki. 143 | * @link http://www.mediawiki.org/wiki/Manual:Preventing_access#Restrict_account_creation 144 | * 145 | * @uses Scripto_Service_MediaWiki::login() 146 | * @param string $username The MediaWiki user's username. 147 | * @param string $password The MediaWiki user's password. 148 | */ 149 | public function login($username, $password) 150 | { 151 | $this->_mediawiki->login($username, $password); 152 | $this->setUserInfo(); 153 | } 154 | 155 | /** 156 | * Logout via the MediaWiki service. 157 | * 158 | * @uses Scripto_Service_MediaWiki::logout() 159 | */ 160 | public function logout() 161 | { 162 | $this->_mediawiki->logout(); 163 | $this->setUserInfo(); 164 | } 165 | 166 | /** 167 | * Determine if the current user is logged in. 168 | * 169 | * @return bool 170 | */ 171 | public function isLoggedIn() 172 | { 173 | // Check against the user ID. An anonymous user has an ID of 0. 174 | return (bool) $this->_userInfo['query']['userinfo']['id']; 175 | } 176 | 177 | /** 178 | * Determine if the current user can export transcriptions to the external 179 | * system. 180 | * 181 | * @param array $groups The MediaWiki groups allowed to export. 182 | * @return bool 183 | */ 184 | public function canExport(array $groups = array('sysop', 'bureaucrat')) 185 | { 186 | foreach ($groups as $group) { 187 | if (in_array($group, $this->_userInfo['query']['userinfo']['groups'])) { 188 | return true; 189 | } 190 | } 191 | return false; 192 | } 193 | 194 | /** 195 | * Determine if the current user can protect MediaWiki pages. 196 | * 197 | * @return bool 198 | */ 199 | public function canProtect() 200 | { 201 | // Users with protect rights can protect pages. 202 | if (in_array('protect', $this->_userInfo['query']['userinfo']['rights'])) { 203 | return true; 204 | } 205 | return false; 206 | } 207 | 208 | /** 209 | * Set the current user's information. 210 | * 211 | * Under normal circumstances calling this method directly is unnecessary, 212 | * but is helpful when authenticating after construction and when a login is 213 | * not called, like when hijacking cookies for command line authentication. 214 | * 215 | * @uses Scripto_Service_MediaWiki::getUserInfo() 216 | */ 217 | public function setUserInfo() 218 | { 219 | $this->_userInfo = $this->_mediawiki->getUserInfo('groups|rights'); 220 | } 221 | 222 | /** 223 | * Return the name of the current user. 224 | * 225 | * @return string 226 | */ 227 | public function getUserName() 228 | { 229 | return $this->_userInfo['query']['userinfo']['name']; 230 | } 231 | 232 | /** 233 | * Get the current user's most recently contributed document pages. 234 | * 235 | * @uses Scripto_Service_MediaWiki::getUserContributions() 236 | * @param int $limit The number of document pages to return. 237 | * @return array 238 | */ 239 | public function getUserDocumentPages($limit = 10) 240 | { 241 | $limit = (int) $limit; 242 | $userDocumentPages = array(); 243 | $documentTitles = array(); 244 | $start = null; 245 | 246 | // Namespaces to get: ns_index => ns_name 247 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces 248 | $namespaces = array('0' => 'Main', '1' => 'Talk'); 249 | 250 | do { 251 | $response = $this->_mediawiki->getUserContributions( 252 | $this->_userInfo['query']['userinfo']['name'], 253 | array('ucstart' => $start, 254 | 'ucnamespace' => implode('|', array_keys($namespaces)), 255 | 'uclimit' => 100) 256 | ); 257 | foreach ($response['query']['usercontribs'] as $value) { 258 | 259 | // Filter out duplicate pages. 260 | if (array_key_exists($value['pageid'], $userDocumentPages)) { 261 | continue; 262 | } 263 | 264 | // Extract the title, removing the namespace if any. 265 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']); 266 | 267 | // Preempt further processing on contributions with an invalid 268 | // prefix. 269 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) { 270 | continue; 271 | } 272 | 273 | // Set the document ID and page ID. 274 | $documentIds = Scripto_Document::decodeBaseTitle($title); 275 | 276 | // Filter out contributions that are not valid document pages. 277 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) { 278 | continue; 279 | } 280 | 281 | // Set the document title and document page name. Reduce calls 282 | // to the adapter by caching each document title, and checking 283 | // if they exist. 284 | if (array_key_exists($documentIds[0], $documentTitles)) { 285 | $documentTitle = $documentTitles[$documentIds[0]]; 286 | } else { 287 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]); 288 | $documentTitles[$documentIds[0]] = $documentTitle; 289 | } 290 | 291 | // Duplicate pages have already been filtered out, so there is 292 | // no need to cache document page names. 293 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]); 294 | 295 | // Build the user document pages, newest properties first. 296 | $userDocumentPages[$value['pageid']] = array( 297 | 'revision_id' => $value['revid'], 298 | 'namespace_index' => $value['ns'], 299 | 'namespace_name' => $namespaces[$value['ns']], 300 | 'mediawiki_title' => $value['title'], 301 | 'timestamp' => $value['timestamp'], 302 | 'comment' => $value['comment'], 303 | 'size' => $value['size'], 304 | 'document_id' => $documentIds[0], 305 | 'document_page_id' => $documentIds[1], 306 | 'document_title' => $documentTitle, 307 | 'document_page_name' => $documentPageName, 308 | ); 309 | 310 | // Break out of the loops if limit has been reached. 311 | if ($limit == count($userDocumentPages)) { 312 | break 2; 313 | } 314 | } 315 | 316 | // Set the query continue, if any. 317 | if (isset($response['query-continue'])) { 318 | $start = $response['query-continue']['usercontribs']['ucstart']; 319 | } else { 320 | $start = null; 321 | } 322 | 323 | } while ($start); 324 | 325 | return $userDocumentPages; 326 | } 327 | 328 | /** 329 | * Get the recent changes. 330 | * 331 | * @link http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces 332 | * @uses Scripto_Service_MediaWiki::getRecentChanges() 333 | * @param int $limit The number of recent changes to return. 334 | * @return array 335 | */ 336 | public function getRecentChanges($limit = 10) 337 | { 338 | $start = null; 339 | $recentChanges = array(); 340 | $documentTitles = array(); 341 | $documentPageNames = array(); 342 | 343 | // Namespaces to get: ns_index => ns_name 344 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces 345 | $namespaces = array('0' => 'Main', '1' => 'Talk'); 346 | 347 | do { 348 | $response = $this->_mediawiki->getRecentChanges( 349 | array('rcprop' => 'user|comment|timestamp|title|ids|sizes|loginfo|flags', 350 | 'rclimit' => '100', 351 | 'rcnamespace' => implode('|', array_keys($namespaces)), 352 | 'rcstart' => $start) 353 | ); 354 | 355 | foreach ($response['query']['recentchanges'] as $value) { 356 | 357 | // Extract the title, removing the namespace if any. 358 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']); 359 | 360 | // Preempt further processing on contributions with an invalid 361 | // prefix. 362 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) { 363 | continue; 364 | } 365 | 366 | // Set the document ID and page ID. 367 | $documentIds = Scripto_Document::decodeBaseTitle($title); 368 | 369 | // Set the document title and document page name. Reduce calls 370 | // to the adapter by caching each document title and page name, 371 | // and checking if they exist. 372 | $cachedDocument = array_key_exists($documentIds[0], $documentTitles); 373 | $cachedDocumentPage = array_key_exists($documentIds[1], $documentPageNames); 374 | 375 | // The document title and page name have been cached. 376 | if ($cachedDocument && $cachedDocumentPage) { 377 | $documentTitle = $documentTitles[$documentIds[0]]; 378 | $documentPageName = $documentPageNames[$documentIds[1]]; 379 | 380 | // The document title has been cached, but not the page name. 381 | } else if ($cachedDocument && !$cachedDocumentPage) { 382 | // Filter out invalid document pages. 383 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) { 384 | continue; 385 | } 386 | $documentTitle = $documentTitles[$documentIds[0]]; 387 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]); 388 | $documentPageNames[$documentIds[1]] = $documentPageName; 389 | 390 | // The document title and page name have not been cached. 391 | } else { 392 | // Filter out invalid document pages. 393 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) { 394 | continue; 395 | } 396 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]); 397 | $documentTitles[$documentIds[0]] = $documentTitle; 398 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]); 399 | $documentPageNames[$documentIds[1]] = $documentPageName; 400 | } 401 | 402 | $logAction = isset($value['logaction']) ? $value['logaction']: null; 403 | $action = self::getChangeAction(array('comment' => $value['comment'], 404 | 'log_action' => $logAction)); 405 | 406 | $recentChanges[] = array( 407 | 'type' => $value['type'], 408 | 'namespace_index' => $value['ns'], 409 | 'namespace_name' => $namespaces[$value['ns']], 410 | 'mediawiki_title' => $value['title'], 411 | 'rcid' => $value['rcid'], 412 | 'page_id' => $value['pageid'], 413 | 'revision_id' => $value['revid'], 414 | 'old_revision_id' => $value['old_revid'], 415 | 'user' => $value['user'], 416 | 'old_length' => $value['oldlen'], 417 | 'new_length' => $value['newlen'], 418 | 'timestamp' => $value['timestamp'], 419 | 'comment' => $value['comment'], 420 | 'action' => $action, 421 | 'log_id' => isset($value['logid']) ? $value['logid']: null, 422 | 'log_type' => isset($value['logtype']) ? $value['logtype']: null, 423 | 'log_action' => $logAction, 424 | 'new' => isset($value['new']) ? true: false, 425 | 'minor' => isset($value['minor']) ? true: false, 426 | 'document_id' => $documentIds[0], 427 | 'document_page_id' => $documentIds[1], 428 | 'document_title' => $documentTitle, 429 | 'document_page_name' => $documentPageName, 430 | ); 431 | 432 | // Break out of the loops if limit has been reached. 433 | if ($limit == count($recentChanges)) { 434 | break 2; 435 | } 436 | } 437 | 438 | // Set the query continue, if any. 439 | if (isset($response['query-continue'])) { 440 | $start = $response['query-continue']['recentchanges']['rcstart']; 441 | } else { 442 | $start = null; 443 | } 444 | 445 | } while ($start); 446 | 447 | return $recentChanges; 448 | } 449 | 450 | /** 451 | * Get the current user's watchlist. 452 | * 453 | * @link http://www.mediawiki.org/wiki/API:Watchlist 454 | * @uses Scripto_Service_MediaWiki::getWatchlist() 455 | * @param int $limit The number of recent changes to return. 456 | * @return array 457 | */ 458 | public function getWatchlist($limit = 10) 459 | { 460 | $start = null; 461 | $watchlist = array(); 462 | $documentTitles = array(); 463 | $documentPageNames = array(); 464 | 465 | // Namespaces to get: ns_index => ns_name 466 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces 467 | $namespaces = array('0' => 'Main', '1' => 'Talk'); 468 | 469 | do { 470 | $response = $this->_mediawiki->getWatchlist( 471 | array('wlprop' => 'user|comment|timestamp|title|ids|sizes|flags', 472 | 'wllimit' => '100', 473 | 'wlallrev' => true, 474 | 'wlnamespace' => implode('|', array_keys($namespaces)), 475 | 'wlstart' => $start) 476 | ); 477 | 478 | foreach ($response['query']['watchlist'] as $value) { 479 | 480 | // Extract the title, removing the namespace if any. 481 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']); 482 | 483 | // Preempt further processing on contributions with an invalid 484 | // prefix. 485 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) { 486 | continue; 487 | } 488 | 489 | // Set the document ID and page ID. 490 | $documentIds = Scripto_Document::decodeBaseTitle($title); 491 | 492 | // Set the document title and document page name. Reduce calls 493 | // to the adapter by caching each document title and page name, 494 | // and checking if they exist. 495 | $cachedDocument = array_key_exists($documentIds[0], $documentTitles); 496 | $cachedDocumentPage = array_key_exists($documentIds[1], $documentPageNames); 497 | 498 | // The document title and page name have been cached. 499 | if ($cachedDocument && $cachedDocumentPage) { 500 | $documentTitle = $documentTitles[$documentIds[0]]; 501 | $documentPageName = $documentPageNames[$documentIds[1]]; 502 | 503 | // The document title has been cached, but not the page name. 504 | } else if ($cachedDocument && !$cachedDocumentPage) { 505 | // Filter out invalid document pages. 506 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) { 507 | continue; 508 | } 509 | $documentTitle = $documentTitles[$documentIds[0]]; 510 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]); 511 | $documentPageNames[$documentIds[1]] = $documentPageName; 512 | 513 | // The document title and page name have not been cached. 514 | } else { 515 | // Filter out invalid document pages. 516 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) { 517 | continue; 518 | } 519 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]); 520 | $documentTitles[$documentIds[0]] = $documentTitle; 521 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]); 522 | $documentPageNames[$documentIds[1]] = $documentPageName; 523 | } 524 | 525 | $action = self::getChangeAction(array('comment' => $value['comment'], 526 | 'revision_id' => $value['revid'])); 527 | 528 | $watchlist[] = array( 529 | 'namespace_index' => $value['ns'], 530 | 'namespace_name' => $namespaces[$value['ns']], 531 | 'mediawiki_title' => $value['title'], 532 | 'page_id' => $value['pageid'], 533 | 'revision_id' => $value['revid'], 534 | 'user' => $value['user'], 535 | 'old_length' => $value['oldlen'], 536 | 'new_length' => $value['newlen'], 537 | 'timestamp' => $value['timestamp'], 538 | 'comment' => $value['comment'], 539 | 'action' => $action, 540 | 'new' => isset($value['new']) ? true: false, 541 | 'minor' => isset($value['minor']) ? true: false, 542 | 'anonymous' => isset($value['anon']) ? true: false, 543 | 'document_id' => $documentIds[0], 544 | 'document_page_id' => $documentIds[1], 545 | 'document_title' => $documentTitle, 546 | 'document_page_name' => $documentPageName, 547 | ); 548 | 549 | // Break out of the loops if limit has been reached. 550 | if ($limit == count($watchlist)) { 551 | break 2; 552 | } 553 | } 554 | 555 | // Set the query continue, if any. 556 | if (isset($response['query-continue'])) { 557 | $start = $response['query-continue']['watchlist']['wlstart']; 558 | } else { 559 | $start = null; 560 | } 561 | 562 | } while ($start); 563 | 564 | return $watchlist; 565 | } 566 | 567 | /** 568 | * Get all documents from MediaWiki that have at least one page with text. 569 | * 570 | * @uses Scripto_Service_MediaWiki::getAllPages() 571 | * @return array An array following this format: 572 | * 573 | * array( 574 | * {document ID} => array( 575 | * ['mediawiki_titles'] => array( 576 | * {page ID} => {mediawiki title}, 577 | * {...} 578 | * ), 579 | * ['document_title'] => {document title} 580 | * ), 581 | * {...} 582 | * ) 583 | * 584 | */ 585 | public function getAllDocuments() 586 | { 587 | $from = null; 588 | $documentTitles = array(); 589 | $allDocuments = array(); 590 | do { 591 | $response = $this->_mediawiki->getAllPages( 592 | array('aplimit' => 500, 593 | 'apminsize' => 1, 594 | 'apprefix' => Scripto_Document::BASE_TITLE_PREFIX, 595 | 'apfrom' => $from) 596 | ); 597 | 598 | foreach ($response['query']['allpages'] as $value) { 599 | 600 | // Set the document ID and page ID. 601 | $documentIds = Scripto_Document::decodeBaseTitle($value['title']); 602 | 603 | // Set the page and continue if the document was already set. 604 | if (array_key_exists($documentIds[0], $documentTitles)) { 605 | $allDocuments[$documentIds[0]]['mediawiki_titles'][$documentIds[1]] = $value['title']; 606 | continue; 607 | 608 | // Set the document. Before getting the title, filter out pages 609 | // that are not valid documents. 610 | } else { 611 | if (!$this->_adapter->documentExists($documentIds[0])) { 612 | continue; 613 | } 614 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]); 615 | $documentTitles[$documentIds[0]] = $documentTitle; 616 | } 617 | 618 | $allDocuments[$documentIds[0]] = array( 619 | 'mediawiki_titles' => array($documentIds[1] => $value['title']), 620 | 'document_title' => $documentTitle, 621 | ); 622 | } 623 | 624 | // Set the query continue, if any. 625 | if (isset($response['query-continue'])) { 626 | $from = $response['query-continue']['allpages']['apfrom']; 627 | } else { 628 | $from = null; 629 | } 630 | 631 | } while ($from); 632 | 633 | return $allDocuments; 634 | } 635 | 636 | /** 637 | * Get the difference between two page revisions. 638 | * 639 | * @uses Scripto_Service_MediaWiki::getRevisionDiff() 640 | * @param int $fromRevisionId The revision ID from which to diff. 641 | * @param int|string $toRevisionId The revision to which to diff. Use the 642 | * revision ID, "prev", "next", or "cur". 643 | * @return string An HTML table without the wrapping tag containing 644 | * difference markup, pre-formatted by MediaWiki. It is the responsibility 645 | * of implementers to wrap the result with table tags. 646 | */ 647 | public function getRevisionDiff($fromRevisionId, $toRevisionId = 'prev') 648 | { 649 | return $this->_mediawiki->getRevisionDiff($fromRevisionId, $toRevisionId); 650 | } 651 | 652 | /** 653 | * Get properties of the specified page revision. 654 | * 655 | * @uses Scripto_Service_MediaWiki::getRevisions() 656 | * @param int $revisionId The ID of the rpage evision. 657 | * @return array 658 | */ 659 | public function getRevision($revisionId) 660 | { 661 | // Get the revision properties. 662 | $response = $this->_mediawiki->getRevisions( 663 | null, 664 | array('revids' => $revisionId, 665 | 'rvprop' => 'ids|flags|timestamp|user|comment|size|content') 666 | ); 667 | $page = current($response['query']['pages']); 668 | 669 | // Parse the wikitext into HTML. 670 | $response = $this->_mediawiki->parse( 671 | array('text' => '__NOEDITSECTION__' . $page['revisions'][0]['*']) 672 | ); 673 | 674 | $action = self::getChangeAction(array('comment' => $page['revisions'][0]['comment'])); 675 | 676 | $revision = array('revision_id' => $page['revisions'][0]['revid'], 677 | 'parent_id' => $page['revisions'][0]['parentid'], 678 | 'user' => $page['revisions'][0]['user'], 679 | 'timestamp' => $page['revisions'][0]['timestamp'], 680 | 'comment' => $page['revisions'][0]['comment'], 681 | 'size' => $page['revisions'][0]['size'], 682 | 'action' => $action, 683 | 'wikitext' => $page['revisions'][0]['*'], 684 | 'html' => $response['parse']['text']['*']); 685 | return $revision; 686 | } 687 | 688 | /** 689 | * Infer a change action verb from hints containted in various responses. 690 | * 691 | * @param array $hints Keyed hints from which to infer an change action: 692 | * 697 | * @return string 698 | */ 699 | static public function getChangeAction(array $hints = array()) 700 | { 701 | $action = ''; 702 | 703 | // Recent changes returns log_action=protect|unprotect with no comment. 704 | if (array_key_exists('log_action', $hints)) { 705 | $logActions = array('protect' => 'protected', 'unprotect' => 'unprotected'); 706 | if (array_key_exists($hints['log_action'], $logActions)) { 707 | return $logActions[$hints['log_action']]; 708 | } 709 | } 710 | 711 | // Infer from comment and revision_id. 712 | if (array_key_exists('comment', $hints)) { 713 | $commentActions = array('Replaced', 'Unprotected', 'Protected', 'Created'); 714 | $actionPattern = '/^(' . implode('|', $commentActions) . ').+$/s'; 715 | if (preg_match($actionPattern, $hints['comment'])) { 716 | $action = preg_replace_callback($actionPattern, function ($matches) { 717 | return strtolower($matches[1]); 718 | }, $hints['comment']); 719 | } else { 720 | // Watchlist returns revision_id=0 when the action is protect 721 | // or unprotect. 722 | if (array_key_exists('revision_id', $hints) && 0 == $hints['revision_id']) { 723 | $action = 'un/protected'; 724 | } else { 725 | $action = 'edited'; 726 | } 727 | } 728 | } 729 | 730 | return $action; 731 | } 732 | 733 | /** 734 | * Determine whether the provided MediaWiki API URL is valid. 735 | * 736 | * @uses Scripto_Service_MediaWiki::isValidApiUrl() 737 | * @param string $apiUrl The MediaWiki API URL to validate. 738 | * @return bool 739 | */ 740 | static public function isValidApiUrl($apiUrl) 741 | { 742 | return Scripto_Service_MediaWiki::isValidApiUrl($apiUrl); 743 | } 744 | 745 | /** 746 | * Remove all HTML attributes from the provided markup. 747 | * 748 | * This filter is useful after getting HTML from the MediaWiki API, which 749 | * often contains MediaWiki-specific attributes that may conflict with local 750 | * settings. 751 | * 752 | * @see http://www.php.net/manual/en/domdocument.loadhtml.php#95251 753 | * @param string $html 754 | * @param array $exceptions Do not remove these attributes. 755 | * @return string 756 | */ 757 | static public function removeHtmlAttributes($html, array $exceptions = array('href')) 758 | { 759 | // Check for an empty string. 760 | $html = trim($html); 761 | if (empty($html)) { 762 | return $html; 763 | } 764 | 765 | // Load the HTML into DOM. Must inject an XML declaration with encoding 766 | // set to UTF-8 to prevent DOMDocument from munging Unicode characters. 767 | $doc = new DOMDocument(); 768 | $doc->loadHTML('' . $html); 769 | $xpath = new DOMXPath($doc); 770 | 771 | // Iterate over and remove attributes. 772 | foreach ($xpath->evaluate('//@*') as $attribute) { 773 | // Do not remove specified attributes. 774 | if (in_array($attribute->name, $exceptions)) { 775 | continue; 776 | } 777 | $attribute->ownerElement->removeAttributeNode($attribute); 778 | } 779 | 780 | return $doc->saveHTML(); 781 | } 782 | 783 | /** 784 | * Remove all preprocessor limit reports from the provided markup. 785 | * 786 | * This filter is useful after getting HTML from the MediaWiki API, which 787 | * always contains a preprocessor limit report within hidden tags. 788 | * 789 | * @see http://en.wikipedia.org/wiki/Wikipedia:Template_limits#How_can_you_find_out.3F 790 | * @param string $text 791 | * @return string 792 | */ 793 | static public function removeNewPPLimitReports($html) 794 | { 795 | // The "s" modifier means the "." meta-character will include newlines. 796 | // The "?" means the "+" quantifier is not greedy, thus will not remove 797 | // text between pages when importing document transcriptions. 798 | $html = preg_replace("//s", '', $html); 799 | return $html; 800 | } 801 | } 802 | -------------------------------------------------------------------------------- /lib/Scripto/Adapter/Example.php: -------------------------------------------------------------------------------- 1 | array( 31 | * 'document_title' => {documentTitle}, 32 | * 'document_pages' => array( 33 | * {pageId} => array( 34 | * 'page_name' => {pageName}, 35 | * 'page_file_url' => {pageFileUrl} 36 | * ) 37 | * ) 38 | * ) 39 | * 40 | * Other adapters will likely get relevant data using the CMS API, and not 41 | * hardcode them like this example. Be sure to URL encode the document and 42 | * page IDs when transporting over HTTP. For example: 43 | * 44 | * documentId: Request for Purchase of Liver Oil & Drum Heads 45 | * pageId: xbe/XBE02001.jpg 46 | * ?documentId=Request+for+Purchase+of+Liver+Oil+%26+Drum+Heads&pageId=xbe%2FXBE02001.jpg 47 | * 48 | * These example documents are from Center for History and New Media Papers 49 | * of the War Department and Library of Congress American Memory. 50 | * 51 | * @var array 52 | */ 53 | private $_documents = array( 54 | // Example of the preferred way to set the document and page IDs using 55 | // unique keys. See: http://wardepartmentpapers.org/document.php?id=16344 56 | 16344 => array( 57 | 'document_title' => 'Return of articles received and expended; work done at Springfield Massachusetts armory', 58 | 'document_pages' => array( 59 | 67799 => array( 60 | 'page_name' => 'Letter Outside', 61 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07001.jpg' 62 | ), 63 | 67800 => array( 64 | 'page_name' => 'Letter Body', 65 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07002.jpg' 66 | ), 67 | 67801 => array( 68 | 'page_name' => 'Worksheet 1, Outside', 69 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07003.jpg' 70 | ), 71 | 67802 => array( 72 | 'page_name' => 'Worksheet 1, Page 1', 73 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07004.jpg' 74 | ), 75 | 67803 => array( 76 | 'page_name' => 'Worksheet 1, Page 2', 77 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07005.jpg' 78 | ), 79 | 67804 => array( 80 | 'page_name' => 'Worksheet 2, Outside', 81 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07006.jpg' 82 | ), 83 | 67805 => array( 84 | 'page_name' => 'Worksheet 2, Page 1', 85 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07007.jpg' 86 | ) 87 | ) 88 | ), 89 | // An alternate way to set the document using a document title as the 90 | // document ID and the file path as the page ID. See: http://books.google.com/books?id=eAuOQMmGEYIC&lpg=PA515&ots=PtWRBKDZbf&pg=PA515 91 | // %5BFacsimile%20of%5D%20letter%20to%20Messrs.%20O.%20P.%20Hall%20et%20al%20from%20Lincoln. 92 | '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.' => array( 93 | 'document_title' => '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.', 94 | 'document_pages' => array( 95 | // rbc%2Flprbscsm%2Fscsm0455%2F001r.jpg 96 | 'rbc/lprbscsm/scsm0455/001r.jpg' => array( 97 | 'page_name' => '001r', 98 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/001r.jpg' 99 | ), 100 | 'rbc/lprbscsm/scsm0455/002r.jpg' => array( 101 | 'page_name' => '002r', 102 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/002r.jpg' 103 | ), 104 | 'rbc/lprbscsm/scsm0455/003r.jpg' => array( 105 | 'page_name' => '003r', 106 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/003r.jpg' 107 | ), 108 | 'rbc/lprbscsm/scsm0455/004r.jpg' => array( 109 | 'page_name' => '004r', 110 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/004r.jpg' 111 | ) 112 | ) 113 | ) 114 | ); 115 | 116 | public function documentExists($documentId) 117 | { 118 | return array_key_exists($documentId, $this->_documents); 119 | } 120 | 121 | public function documentPageExists($documentId, $pageId) 122 | { 123 | if (!array_key_exists($documentId, $this->_documents)) { 124 | return false; 125 | } 126 | return array_key_exists($pageId, $this->_documents[$documentId]['document_pages']); 127 | } 128 | 129 | public function getDocumentPages($documentId) 130 | { 131 | if (!array_key_exists($documentId, $this->_documents)) { 132 | throw new Scripto_Adapter_Exception('Document does not exist.'); 133 | } 134 | $pages = array(); 135 | foreach ($this->_documents[$documentId]['document_pages'] as $pageId => $page) { 136 | $pages[$pageId] = $page['page_name']; 137 | } 138 | return $pages; 139 | } 140 | 141 | public function getDocumentPageFileUrl($documentId, $pageId) 142 | { 143 | if (!array_key_exists($documentId, $this->_documents)) { 144 | throw new Scripto_Adapter_Exception('Document does not exist.'); 145 | } 146 | if (!array_key_exists($pageId, $this->_documents[$documentId]['document_pages'])) { 147 | throw new Scripto_Adapter_Exception('Document page does not exist.'); 148 | } 149 | return $this->_documents[$documentId]['document_pages'][$pageId]['page_file_url']; 150 | } 151 | 152 | public function getDocumentFirstPageId($documentId) 153 | { 154 | if (!array_key_exists($documentId, $this->_documents)) { 155 | throw new Scripto_Adapter_Exception('Document does not exist.'); 156 | } 157 | reset($this->_documents[$documentId]['document_pages']); 158 | return key($this->_documents[$documentId]['document_pages']); 159 | } 160 | 161 | public function getDocumentTitle($documentId) 162 | { 163 | if (!array_key_exists($documentId, $this->_documents)) { 164 | throw new Scripto_Adapter_Exception('Document does not exist.'); 165 | } 166 | return $this->_documents[$documentId]['document_title']; 167 | } 168 | 169 | public function getDocumentPageName($documentId, $pageId) 170 | { 171 | if (!array_key_exists($documentId, $this->_documents)) { 172 | throw new Scripto_Adapter_Exception('Document does not exist.'); 173 | } 174 | if (!array_key_exists($pageId, $this->_documents[$documentId]['document_pages'])) { 175 | throw new Scripto_Adapter_Exception('Document page does not exist.'); 176 | } 177 | return $this->_documents[$documentId]['document_pages'][$pageId]['page_name']; 178 | } 179 | 180 | public function documentTranscriptionIsImported($documentId) 181 | { 182 | return false; 183 | } 184 | 185 | public function documentPageTranscriptionIsImported($documentId, $pageId) 186 | { 187 | return false; 188 | } 189 | 190 | public function importDocumentPageTranscription($documentId, $pageId, $text) 191 | { 192 | return false; 193 | } 194 | 195 | public function importDocumentTranscription($documentId, $text) 196 | { 197 | return false; 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /lib/Scripto/Adapter/Exception.php: -------------------------------------------------------------------------------- 1 | [pageName], [...]) 54 | * 55 | * Example return values: 56 | * array(2011 => 'Title Page', 57 | * 1999 => 'Page 1', 58 | * 4345 => 'Page 2') 59 | * 60 | * array('page_1' => 1, 61 | * 'page_2' => 2, 62 | * 'page_3' => 3) 63 | * 64 | * @param int|string $documentId The unique document ID 65 | * @return array An array containing page identifiers as keys and page names 66 | * as values, in sequential page order. 67 | */ 68 | public function getDocumentPages($documentId); 69 | 70 | /** 71 | * Get the URL of the specified document page file. 72 | * 73 | * @param int|string $documentId The unique document ID 74 | * @param int|string $pageId The unique page ID 75 | * @return string The page file URL 76 | */ 77 | public function getDocumentPageFileUrl($documentId, $pageId); 78 | 79 | /** 80 | * Get the first page of the document. 81 | * 82 | * @param int|string $documentId The document ID 83 | * @return int|string 84 | */ 85 | public function getDocumentFirstPageId($documentId); 86 | 87 | /** 88 | * Get the title of the document. 89 | * 90 | * @param int|string $documentId The document ID 91 | * @return string 92 | */ 93 | public function getDocumentTitle($documentId); 94 | 95 | /** 96 | * Get the name of the document page. 97 | * 98 | * @param int|string $documentId The document ID 99 | * @param int|string $pageId The unique page ID 100 | * @return string 101 | */ 102 | public function getDocumentPageName($documentId, $pageId); 103 | 104 | /** 105 | * Indicate whether the document transcription has been imported. 106 | * 107 | * @param int|string $documentId The document ID 108 | * @return bool True: has been imported; false: has not been imported 109 | */ 110 | public function documentTranscriptionIsImported($documentId); 111 | 112 | /** 113 | * Indicate whether the document page transcription has been imported. 114 | * 115 | * @param int|string $documentId The document ID 116 | * @param int|string $pageId The page ID 117 | */ 118 | public function documentPageTranscriptionIsImported($documentId, $pageId); 119 | 120 | /** 121 | * Import a document page's transcription into the external system. 122 | * 123 | * @param int|string $documentId The document ID 124 | * @param int|string $pageId The page ID 125 | * @param string $text The text to import 126 | * @return bool True: success; false: fail 127 | */ 128 | public function importDocumentPageTranscription($documentId, $pageId, $text); 129 | 130 | /** 131 | * Import an entire document's transcription into the external system. 132 | * 133 | * @param int|string The document ID 134 | * @param string The text to import 135 | * @return bool True: success; false: fail 136 | */ 137 | public function importDocumentTranscription($documentId, $text); 138 | } 139 | -------------------------------------------------------------------------------- /lib/Scripto/Document.php: -------------------------------------------------------------------------------- 1 | documentExists($id)) { 108 | throw new Scripto_Exception("The specified document does not exist: {$this->_id}"); 109 | } 110 | 111 | $this->_id = $id; 112 | $this->_adapter = $adapter; 113 | $this->_mediawiki = $mediawiki; 114 | $this->_title = $this->_adapter->getDocumentTitle($id); 115 | } 116 | 117 | /** 118 | * Set the current document page. 119 | * 120 | * Sets the current page ID, the base title used by MediaWiki, and 121 | * information about the MediaWiki transcription and talk pages. 122 | * 123 | * @param string|null $pageId The unique page identifier. 124 | */ 125 | public function setPage($pageId) 126 | { 127 | // Set to the first page if the provided page is NULL or FALSE. 128 | if (null === $pageId || false === $pageId) { 129 | $pageId = $this->getFirstPageId(); 130 | } 131 | 132 | // Check if the page exists. 133 | if (!$this->_adapter->documentPageExists($this->_id, $pageId)) { 134 | throw new Scripto_Exception("The specified page does not exist: $pageId"); 135 | } 136 | 137 | // Mint the page title used by MediaWiki. 138 | $baseTitle = self::encodeBaseTitle($this->_id, $pageId); 139 | 140 | // Check if the base title is under the maximum character length. 141 | if (self::TITLE_BYTE_LIMIT < strlen($this->_baseTitle)) { 142 | throw new Scripto_Exception('The document ID and/or page ID are too long to set the provided page.'); 143 | } 144 | 145 | // Set information about the transcription and talk pages. 146 | $this->_transcriptionPageInfo = $this->_getPageInfo($baseTitle); 147 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $baseTitle); 148 | 149 | $this->_pageId = $pageId; 150 | $this->_pageName = $this->_adapter->getDocumentPageName($this->_id, $pageId); 151 | $this->_baseTitle = $baseTitle; 152 | } 153 | 154 | /** 155 | * Get this document's ID. 156 | * 157 | * @return string|int 158 | */ 159 | public function getId() 160 | { 161 | return $this->_id; 162 | } 163 | 164 | /** 165 | * Get this document's title. 166 | */ 167 | public function getTitle() 168 | { 169 | return $this->_title; 170 | } 171 | 172 | /** 173 | * Get this document page's name. 174 | */ 175 | public function getPageName() 176 | { 177 | return $this->_pageName; 178 | } 179 | 180 | /** 181 | * Get this document's current page ID. 182 | * 183 | * @return string|int 184 | */ 185 | public function getPageId() 186 | { 187 | return $this->_pageId; 188 | } 189 | 190 | /** 191 | * Get this document's current base title. 192 | * 193 | * @return string 194 | */ 195 | public function getBaseTitle() 196 | { 197 | if (is_null($this->_pageId)) { 198 | throw new Scripto_Exception('The document page must be set before getting the base title.'); 199 | } 200 | return $this->_baseTitle; 201 | } 202 | 203 | /** 204 | * Get information about the current MediaWiki transcription page. 205 | * 206 | * @return array 207 | */ 208 | public function getTranscriptionPageInfo() 209 | { 210 | if (is_null($this->_pageId)) { 211 | throw new Scripto_Exception('The document page must be set before getting information about the transcription page.'); 212 | } 213 | return $this->_transcriptionPageInfo; 214 | } 215 | 216 | /** 217 | * Get information about the current MediaWiki talk page. 218 | * 219 | * @return array 220 | */ 221 | public function getTalkPageInfo() 222 | { 223 | if (is_null($this->_pageId)) { 224 | throw new Scripto_Exception('The document page must be set before getting information about the talk page.'); 225 | } 226 | return $this->_talkPageInfo; 227 | } 228 | 229 | /** 230 | * Get all of this document's pages from the adapter. 231 | * 232 | * @uses Scripto_Adapter_Interface::getDocumentPages() 233 | * @return array 234 | */ 235 | public function getPages() 236 | { 237 | return (array) $this->_adapter->getDocumentPages($this->_id); 238 | } 239 | 240 | /** 241 | * Get this document's first page ID from the adapter. 242 | * 243 | * @uses Scripto_Adapter_Interface::getDocumentFirstPageId() 244 | * @return array 245 | */ 246 | public function getFirstPageId() 247 | { 248 | return $this->_adapter->getDocumentFirstPageId($this->_id); 249 | } 250 | 251 | /** 252 | * Get this document's current page file URL from the adapter. 253 | * 254 | * @uses Scripto_Adapter_Interface::getDocumentPageFileUrl() 255 | * @return string 256 | */ 257 | public function getPageFileUrl() 258 | { 259 | if (is_null($this->_pageId)) { 260 | throw new Scripto_Exception('The document page must be set before getting the page file URL.'); 261 | } 262 | return $this->_adapter->getDocumentPageFileUrl($this->_id, $this->_pageId); 263 | } 264 | 265 | /** 266 | * Get the MediaWiki URL for the current transcription page. 267 | * 268 | * @return string 269 | */ 270 | public function getTranscriptionPageMediawikiUrl() 271 | { 272 | if (is_null($this->_pageId)) { 273 | throw new Scripto_Exception('The document page must be set before getting the transcription page MediaWiki URL.'); 274 | } 275 | return $this->_getPageMediawikiUrl($this->_baseTitle); 276 | } 277 | 278 | /** 279 | * Get the MediaWiki URL for the current talk page. 280 | * 281 | * @return string 282 | */ 283 | public function getTalkPageMediawikiUrl() 284 | { 285 | if (is_null($this->_pageId)) { 286 | throw new Scripto_Exception('The document page must be set before getting the talk page MediaWiki URL.'); 287 | } 288 | return $this->_getPageMediawikiUrl('Talk:' . $this->_baseTitle); 289 | } 290 | 291 | /** 292 | * Get the MediaWiki transcription page wikitext for the current page. 293 | * 294 | * @uses Scripto_Service_MediaWiki::getLatestRevisionWikitext() 295 | * @return string The transcription wikitext. 296 | */ 297 | public function getTranscriptionPageWikitext() 298 | { 299 | if (is_null($this->_pageId)) { 300 | throw new Scripto_Exception('The document page must be set before getting the transcription page wikitext.'); 301 | } 302 | return $this->_mediawiki->getLatestRevisionWikitext($this->_baseTitle); 303 | } 304 | 305 | /** 306 | * Get the MediaWiki talk page wikitext for the current page. 307 | * 308 | * @uses Scripto_Service_MediaWiki::getLatestRevisionWikitext() 309 | * @return string The talk wikitext. 310 | */ 311 | public function getTalkPageWikitext() 312 | { 313 | if (is_null($this->_pageId)) { 314 | throw new Scripto_Exception('The document page must be set before getting the talk page wikitext.'); 315 | } 316 | return $this->_mediawiki->getLatestRevisionWikitext('Talk:' . $this->_baseTitle); 317 | } 318 | 319 | /** 320 | * Get the MediaWiki transcription page HTML for the current page. 321 | * 322 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml() 323 | * @return string The transcription HTML. 324 | */ 325 | public function getTranscriptionPageHtml() 326 | { 327 | if (is_null($this->_pageId)) { 328 | throw new Scripto_Exception('The document page must be set before getting the transcription page HTML.'); 329 | } 330 | return $this->_mediawiki->getLatestRevisionHtml($this->_baseTitle); 331 | } 332 | 333 | /** 334 | * Get the MediaWiki talk page HTML for the current page. 335 | * 336 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml() 337 | * @return string The talk HTML. 338 | */ 339 | public function getTalkPageHtml() 340 | { 341 | if (is_null($this->_pageId)) { 342 | throw new Scripto_Exception('The document page must be set before getting the talk page HTML.'); 343 | } 344 | return $this->_mediawiki->getLatestRevisionHtml('Talk:' . $this->_baseTitle); 345 | } 346 | 347 | /** 348 | * Get the MediaWiki transcription page plain text for the current page. 349 | * 350 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml() 351 | * @return string The transcription page plain text. 352 | */ 353 | public function getTranscriptionPagePlainText() 354 | { 355 | if (is_null($this->_pageId)) { 356 | throw new Scripto_Exception('The document page must be set before getting the transcription page plain text.'); 357 | } 358 | return html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml($this->_baseTitle))); 359 | } 360 | 361 | /** 362 | * Get the MediaWiki talk plain text for the current page. 363 | * 364 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml() 365 | * @return string The talk plain text. 366 | */ 367 | public function getTalkPagePlainText() 368 | { 369 | if (is_null($this->_pageId)) { 370 | throw new Scripto_Exception('The document page must be set before getting the talk page plain text.'); 371 | } 372 | return html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml('Talk:' . $this->_baseTitle))); 373 | } 374 | 375 | /** 376 | * Get the MediaWiki transcription page revision history for the current page. 377 | * 378 | * @param int $limit The number of revisions to return. 379 | * @param int $startRevisionId The revision ID from which to start. 380 | * @return array 381 | */ 382 | public function getTranscriptionPageHistory($limit = 10, $startRevisionId = null) 383 | { 384 | if (is_null($this->_pageId)) { 385 | throw new Scripto_Exception('The document page must be set before getting the transcription page history.'); 386 | } 387 | return $this->_getPageHistory($this->_baseTitle, $limit, $startRevisionId); 388 | } 389 | 390 | /** 391 | * Get the MediaWiki talk page revision history for the current page. 392 | * 393 | * @param int $limit The number of revisions to return. 394 | * @param int $startRevisionId The revision ID from which to start. 395 | * @return array 396 | */ 397 | public function getTalkPageHistory($limit = 10, $startRevisionId = null) 398 | { 399 | if (is_null($this->_pageId)) { 400 | throw new Scripto_Exception('The document page must be set before getting the talk page history.'); 401 | } 402 | return $this->_getPageHistory('Talk:' . $this->_baseTitle, $limit, $startRevisionId); 403 | } 404 | 405 | /** 406 | * Determine if the current user can edit the MediaWiki transcription page. 407 | * 408 | * @return bool 409 | */ 410 | public function canEditTranscriptionPage() 411 | { 412 | if (is_null($this->_pageId)) { 413 | throw new Scripto_Exception('The document page must be set before determining whether the user can edit the transcription page.'); 414 | } 415 | return $this->_canEdit($this->_transcriptionPageInfo['protections']); 416 | } 417 | 418 | /** 419 | * Determine if the current user can edit the MediaWiki talk page. 420 | * 421 | * @return bool 422 | */ 423 | public function canEditTalkPage() 424 | { 425 | if (is_null($this->_pageId)) { 426 | throw new Scripto_Exception('The document page must be set before determining whether the user can edit the talk page.'); 427 | } 428 | return $this->_canEdit($this->_talkPageInfo['protections']); 429 | } 430 | 431 | /** 432 | * Edit the MediaWiki transcription page for the current document. 433 | * 434 | * @uses Scripto_Service_MediaWiki::edit() 435 | * @param string $text The wikitext of the transcription. 436 | */ 437 | public function editTranscriptionPage($text) 438 | { 439 | if (is_null($this->_pageId)) { 440 | throw new Scripto_Exception('The document page must be set before editing the transcription page.'); 441 | } 442 | $this->_mediawiki->edit($this->_baseTitle, 443 | $text, 444 | $this->_transcriptionPageInfo['edit_token']); 445 | } 446 | 447 | /** 448 | * Edit the MediaWiki talk page for the current document. 449 | * 450 | * @uses Scripto_Service_MediaWiki::edit() 451 | * @param string $text The wikitext of the transcription. 452 | */ 453 | public function editTalkPage($text) 454 | { 455 | if (is_null($this->_pageId)) { 456 | throw new Scripto_Exception('The document page must be set before editing the talk page.'); 457 | } 458 | $this->_mediawiki->edit('Talk:' . $this->_baseTitle, 459 | $text, 460 | $this->_talkPageInfo['edit_token']); 461 | } 462 | 463 | /** 464 | * Protect the current transcription page. 465 | */ 466 | public function protectTranscriptionPage() 467 | { 468 | if (is_null($this->_pageId)) { 469 | throw new Scripto_Exception('The document page must be set before protecting the transcription page.'); 470 | } 471 | $this->_protectPage($this->_baseTitle, $this->_transcriptionPageInfo['protect_token']); 472 | 473 | // Update information about this page. 474 | $this->_transcriptionPageInfo = $this->_getPageInfo($this->_baseTitle); 475 | } 476 | 477 | /** 478 | * Protect the current talk page. 479 | */ 480 | public function protectTalkPage() 481 | { 482 | if (is_null($this->_pageId)) { 483 | throw new Scripto_Exception('The document page must be set before protecting the talk page.'); 484 | } 485 | $this->_protectPage('Talk:' . $this->_baseTitle, $this->_talkPageInfo['protect_token']); 486 | 487 | // Update information about this page. 488 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $this->_baseTitle); 489 | } 490 | 491 | /** 492 | * Unprotect the current transcription page. 493 | */ 494 | public function unprotectTranscriptionPage() 495 | { 496 | if (is_null($this->_pageId)) { 497 | throw new Scripto_Exception('The document page must be set before unprotecting the transcription page.'); 498 | } 499 | $this->_unprotectPage($this->_baseTitle, $this->_transcriptionPageInfo['protect_token']); 500 | 501 | // Update information about this page. 502 | $this->_transcriptionPageInfo = $this->_getPageInfo($this->_baseTitle); 503 | } 504 | 505 | /** 506 | * Unprotect the current talk page. 507 | */ 508 | public function unprotectTalkPage() 509 | { 510 | if (is_null($this->_pageId)) { 511 | throw new Scripto_Exception('The document page must be set before unprotecting the talk page.'); 512 | } 513 | $this->_unprotectPage('Talk:' . $this->_baseTitle, $this->_talkPageInfo['protect_token']); 514 | 515 | // Update information about this page. 516 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $this->_baseTitle); 517 | } 518 | 519 | /** 520 | * Watch the current page. 521 | * 522 | * Watching a transcription page implies watching its talk page. 523 | * 524 | * @uses Scripto_Service_MediaWiki::watch() 525 | */ 526 | public function watchPage() 527 | { 528 | if (is_null($this->_pageId)) { 529 | throw new Scripto_Exception('The document page must be set before watching the page.'); 530 | } 531 | $this->_mediawiki->watch($this->_baseTitle); 532 | } 533 | 534 | /** 535 | * Unwatch the current page. 536 | * 537 | * Unwatching a transcription page implies unwatching its talk page. 538 | * 539 | * @uses Scripto_Service_MediaWiki::watch() 540 | */ 541 | public function unwatchPage() 542 | { 543 | if (is_null($this->_pageId)) { 544 | throw new Scripto_Exception('The document page must be set before unwatching the page.'); 545 | } 546 | $this->_mediawiki->watch($this->_baseTitle, null, array('unwatch' => true)); 547 | } 548 | 549 | /** 550 | * Determine whether the current transcription page is edit protected. 551 | * 552 | * @return bool 553 | */ 554 | public function isProtectedTranscriptionPage() 555 | { 556 | if (is_null($this->_pageId)) { 557 | throw new Scripto_Exception('The document page must be set before determining whether the transcription page is protected.'); 558 | } 559 | return $this->_isProtectedPage($this->_transcriptionPageInfo['protections']); 560 | } 561 | 562 | /** 563 | * Determine whether the current talk page is edit protected. 564 | * 565 | * @return bool 566 | */ 567 | public function isProtectedTalkPage() 568 | { 569 | if (is_null($this->_pageId)) { 570 | throw new Scripto_Exception('The document page must be set before determining whether the talk page is protected.'); 571 | } 572 | return $this->_isProtectedPage($this->_talkPageInfo['protections']); 573 | } 574 | 575 | /** 576 | * Determine whether the current user is watching the current page. 577 | * 578 | * @return bool 579 | */ 580 | public function isWatchedPage() 581 | { 582 | if (is_null($this->_pageId)) { 583 | throw new Scripto_Exception('The document page must be set before determining whether the current user is watching the page.'); 584 | } 585 | return $this->_transcriptionPageInfo['watched']; 586 | } 587 | 588 | /** 589 | * Determine whether all of this document's transcription pages were already 590 | * exported to the external system. 591 | * 592 | * @uses Scripto_Adapter_Interface::documentTranscriptionIsImported() 593 | * @return bool 594 | */ 595 | public function isExported() 596 | { 597 | return $this->_adapter->documentTranscriptionIsImported($this->_id); 598 | } 599 | 600 | /** 601 | * Determine whether the current transcription page was already exported to 602 | * the external system. 603 | * 604 | * @uses Scripto_Adapter_Interface::documentPageTranscriptionIsImported() 605 | * @return bool 606 | */ 607 | public function isExportedPage() 608 | { 609 | if (is_null($this->_pageId)) { 610 | throw new Scripto_Exception('The document page must be set before determining whether it is imported.'); 611 | } 612 | return $this->_adapter->documentPageTranscriptionIsImported($this->_id, $this->_pageId); 613 | } 614 | 615 | /** 616 | * Export the document page transcription to the external system by calling 617 | * the adapter. 618 | * 619 | * @uses Scripto_Adapter_Interface::importDocumentPageTranscription() 620 | * @param string $type The type of text to set, valid options are 621 | * plain_text, html, and wikitext. 622 | */ 623 | public function exportPage($type = 'plain_text') 624 | { 625 | switch ($type) { 626 | case 'plain_text': 627 | $text = $this->getTranscriptionPagePlainText(); 628 | break; 629 | case 'html': 630 | $text = $this->getTranscriptionPageHtml(); 631 | break; 632 | case 'wikitext': 633 | $text = $this->getTranscriptionPageWikitext(); 634 | break; 635 | default: 636 | throw new Scripto_Exception('The provided import type is invalid.'); 637 | } 638 | $this->_adapter->importDocumentPageTranscription($this->_id, 639 | $this->_pageId, 640 | trim($text)); 641 | } 642 | 643 | /** 644 | * Export the entire document transcription to the external system by 645 | * calling the adapter. 646 | * 647 | * @uses Scripto_Adapter_Interface::importDocumentTranscription() 648 | * @param string $type The type of text to set, valid options are 649 | * plain_text, html, and wikitext. 650 | * @param string $pageDelimiter The delimiter used to stitch pages together. 651 | */ 652 | public function export($type = 'plain_text', $pageDelimiter = "\n") 653 | { 654 | $text = array(); 655 | foreach ($this->getPages() as $pageId => $pageName) { 656 | $baseTitle = self::encodeBaseTitle($this->_id, $pageId); 657 | switch ($type) { 658 | case 'plain_text': 659 | $text[] = html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml($baseTitle))); 660 | break; 661 | case 'html': 662 | $text[] = $this->_mediawiki->getLatestRevisionHtml($baseTitle); 663 | break; 664 | case 'wikitext': 665 | $text[] = $this->_mediawiki->getLatestRevisionWikitext($baseTitle); 666 | break; 667 | default: 668 | throw new Scripto_Exception('The provided import type is invalid.'); 669 | } 670 | } 671 | $text = implode($pageDelimiter, array_map('trim', $text)); 672 | $this->_adapter->importDocumentTranscription($this->_id, trim($text)); 673 | } 674 | 675 | /** 676 | * Determine if the current user can edit the specified MediaWiki page. 677 | * 678 | * @uses Scripto_Service_MediaWiki::getUserInfo() 679 | * @param array $pageProtections 680 | * @return bool 681 | */ 682 | protected function _canEdit(array $pageProtections) 683 | { 684 | $userInfo = $this->_mediawiki->getUserInfo('rights'); 685 | 686 | // Users without edit rights cannot edit pages. 687 | if (!in_array('edit', $userInfo['query']['userinfo']['rights'])) { 688 | return false; 689 | } 690 | 691 | // Users with edit rights can edit unprotected pages. 692 | if (empty($pageProtections)) { 693 | return true; 694 | } 695 | 696 | // Iterate the page protections. 697 | foreach ($pageProtections as $pageProtection) { 698 | 699 | // The page is edit-protected. 700 | if ('edit' == $pageProtection['type']) { 701 | 702 | // Users with edit and protect rights can edit protected pages. 703 | if (in_array('protect', $userInfo['query']['userinfo']['rights'])) { 704 | return true; 705 | 706 | // Users with edit but without protect rights cannot edit 707 | // protected pages. 708 | } else { 709 | return false; 710 | } 711 | } 712 | } 713 | 714 | // Users with edit rights can edit pages that are not edit-protected. 715 | return true; 716 | } 717 | 718 | /** 719 | * Determine whether the provided protections contain an edit protection. 720 | * 721 | * @param array $pageProtections The page protections from the page info: 722 | * {@link Scripto_Document::$_transcriptionPageInfo} or 723 | * {@link Scripto_Document::$_talkPageInfo}. 724 | * @return bool 725 | */ 726 | protected function _isProtectedPage(array $pageProtections) 727 | { 728 | // There are no protections. 729 | if (empty($pageProtections)) { 730 | return false; 731 | } 732 | 733 | // Iterate the page protections. 734 | foreach ($pageProtections as $pageProtection) { 735 | // The page is edit protected. 736 | if ('edit' == $pageProtection['type'] || 'create' == $pageProtection['type']) { 737 | return true; 738 | } 739 | } 740 | 741 | // There are no edit protections. 742 | return false; 743 | } 744 | 745 | /** 746 | * Protect the specified page. 747 | * 748 | * @uses Scripto_Service_MediaWiki::protect() 749 | * @param string $title 750 | * @param string $protectToken 751 | */ 752 | protected function _protectPage($title, $protectToken) 753 | { 754 | if ($this->_mediawiki->pageCreated($title)) { 755 | $protections = 'edit=sysop'; 756 | } else { 757 | $protections = 'create=sysop'; 758 | } 759 | $this->_mediawiki->protect($title, $protections, $protectToken); 760 | } 761 | 762 | /** 763 | * Unprotect the specified page. 764 | * 765 | * @uses Scripto_Service_MediaWiki::protect() 766 | * @param string $title 767 | * @param string $protectToken 768 | */ 769 | protected function _unprotectPage($title, $protectToken) 770 | { 771 | if ($this->_mediawiki->pageCreated($title)) { 772 | $protections = 'edit=all'; 773 | } else { 774 | $protections = 'create=all'; 775 | } 776 | $this->_mediawiki->protect($title, $protections, $protectToken); 777 | } 778 | 779 | /** 780 | * Get the MediaWiki URL for the specified page. 781 | * 782 | * @uses Scripto_Service_MediaWiki::getSiteInfo() 783 | * @param string $title 784 | * @return string 785 | */ 786 | protected function _getPageMediawikiUrl($title) 787 | { 788 | $siteInfo = $this->_mediawiki->getSiteInfo(); 789 | return $siteInfo['query']['general']['server'] 790 | . str_replace('$1', $title, $siteInfo['query']['general']['articlepath']); 791 | } 792 | 793 | /** 794 | * Get information for the specified page. 795 | * 796 | * @uses Scripto_Service_MediaWiki::getInfo() 797 | * @param string $title 798 | * @return array 799 | */ 800 | protected function _getPageInfo($title) 801 | { 802 | $params = array('inprop' => 'protection|talkid|subjectid|url|watched', 803 | 'intoken' => 'edit|move|delete|protect'); 804 | $response = $this->_mediawiki->getInfo($title, $params); 805 | $page = current($response['query']['pages']); 806 | $pageInfo = array('page_id' => isset($page['pageid']) ? $page['pageid'] : null, 807 | 'namespace_index' => isset($page['ns']) ? $page['ns'] : null, 808 | 'mediawiki_title' => isset($page['title']) ? $page['title'] : null, 809 | 'last_revision_id' => isset($page['lastrevid']) ? $page['lastrevid'] : null, 810 | 'counter' => isset($page['counter']) ? $page['counter'] : null, 811 | 'length' => isset($page['length']) ? $page['length'] : null, 812 | 'start_timestamp' => isset($page['starttimestamp']) ? $page['starttimestamp'] : null, 813 | 'edit_token' => isset($page['edittoken']) ? $page['edittoken'] : null, 814 | 'move_token' => isset($page['movetoken']) ? $page['movetoken'] : null, 815 | 'delete_token' => isset($page['deletetoken']) ? $page['deletetoken'] : null, 816 | 'protect_token' => isset($page['protecttoken']) ? $page['protecttoken'] : null, 817 | 'protections' => isset($page['protection']) ? $page['protection'] : null, 818 | 'talk_id' => isset($page['talkid']) ? $page['talkid'] : null, 819 | 'mediawiki_full_url' => isset($page['fullurl']) ? $page['fullurl'] : null, 820 | 'mediawiki_edit_url' => isset($page['editurl']) ? $page['editurl'] : null, 821 | 'watched' => isset($page['watched']) ? true: false, 822 | 'redirect' => isset($page['redirect']) ? true: false, 823 | 'new' => isset($page['new']) ? true: false); 824 | return $pageInfo; 825 | } 826 | 827 | /** 828 | * Get the revisions for the specified page. 829 | * 830 | * @uses Scripto_Service_MediaWiki::getRevisions() 831 | * @param string $title 832 | * @param int $limit 833 | * @param int $startRevisionId 834 | * @return array 835 | */ 836 | protected function _getPageHistory($title, $limit = 10, $startRevisionId = null) 837 | { 838 | $revisions = array(); 839 | do { 840 | $response = $this->_mediawiki->getRevisions( 841 | $title, 842 | array('rvstartid' => $startRevisionId, 843 | 'rvlimit' => 100, 844 | 'rvprop' => 'ids|flags|timestamp|user|comment|size') 845 | ); 846 | $page = current($response['query']['pages']); 847 | 848 | // Return if the page has not been created. 849 | if (array_key_exists('missing', $page)) { 850 | return $revisions; 851 | } 852 | 853 | foreach ($page['revisions'] as $revision) { 854 | 855 | $action = Scripto::getChangeAction(array('comment' => $revision['comment'])); 856 | 857 | // Build the revisions. 858 | $revisions[] = array( 859 | 'revision_id' => $revision['revid'], 860 | 'parent_id' => $revision['parentid'], 861 | 'user' => $revision['user'], 862 | 'timestamp' => $revision['timestamp'], 863 | 'comment' => $revision['comment'], 864 | 'size' => $revision['size'], 865 | 'action' => $action, 866 | ); 867 | 868 | // Break out of the loops if limit has been reached. 869 | if ($limit == count($revisions)) { 870 | break 2; 871 | } 872 | } 873 | 874 | // Set the query continue, if any. 875 | if (isset($response['query-continue'])) { 876 | $startRevisionId = $response['query-continue']['revisions']['rvstartid']; 877 | } else { 878 | $startRevisionId = null; 879 | } 880 | 881 | } while ($startRevisionId); 882 | 883 | return $revisions; 884 | } 885 | 886 | /** 887 | * Encode a base title that enables fail-safe document page transport 888 | * between the external system, Scripto, and MediaWiki. 889 | * 890 | * The base title is the base MediaWiki page title that corresponds to the 891 | * document page. Encoding is necessary to allow all Unicode characters in 892 | * document and page IDs, even those not allowed in URL syntax and MediaWiki 893 | * naming conventions. Encoding in Base64 allows the title to be decoded. 894 | * 895 | * The base title has four parts: 896 | *
      897 | *
    1. A title prefix to keep MediaWiki from capitalizing the first 898 | * character
    2. 899 | *
    3. A URL-safe Base64 encoded document ID
    4. 900 | *
    5. A delimiter between the encoded document ID and page ID
    6. 901 | *
    7. A URL-safe Base64 encoded page ID
    8. 902 | *
    903 | * 904 | * @link http://en.wikipedia.org/wiki/Base64#URL_applications 905 | * @link http://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_%28technical_restrictions%29 906 | * @param string|int $documentId The document ID 907 | * @param string|int $pageId The page ID 908 | * @return string The encoded base title 909 | */ 910 | static public function encodeBaseTitle($documentId, $pageId) 911 | { 912 | return self::BASE_TITLE_PREFIX 913 | . Scripto_Document::base64UrlEncode($documentId) 914 | . self::BASE_TITLE_DELIMITER 915 | . Scripto_Document::base64UrlEncode($pageId); 916 | } 917 | 918 | /** 919 | * Decode the base title. 920 | * 921 | * @param string|int $baseTitle 922 | * @return array An array containing the document ID and page ID 923 | */ 924 | static public function decodeBaseTitle($baseTitle) 925 | { 926 | // First remove the title prefix. 927 | $baseTitle = ltrim($baseTitle, self::BASE_TITLE_PREFIX); 928 | // Create an array containing the document ID and page ID. 929 | $baseTitle = explode(self::BASE_TITLE_DELIMITER, $baseTitle); 930 | // URL-safe Base64 decode the array and return it. 931 | return array_map('Scripto_Document::base64UrlDecode', $baseTitle); 932 | } 933 | 934 | /** 935 | * Encode a string to URL-safe Base64. 936 | * 937 | * @link http://en.wikipedia.org/wiki/Base64#URL_applications 938 | * @param string $str 939 | * @return string 940 | */ 941 | static public function base64UrlEncode($str) 942 | { 943 | return strtr(rtrim(base64_encode($str), '='), '+/', '-_'); 944 | } 945 | 946 | /** 947 | * Decode a string from a URL-safe Base64. 948 | * 949 | * @param string $str 950 | * @return string 951 | */ 952 | static public function base64UrlDecode($str) 953 | { 954 | return base64_decode(strtr($str, '-_', '+/')); 955 | } 956 | } 957 | -------------------------------------------------------------------------------- /lib/Scripto/Exception.php: -------------------------------------------------------------------------------- 1 | array( 55 | 'text', 'title', 'page', 'prop', 'pst', 'uselang' 56 | ), 57 | 'edit' => array( 58 | 'title', 'section', 'text', 'token', 'summary', 'minor', 'notminor', 59 | 'bot', 'basetimestamp', 'starttimestamp', 'recreate', 'createonly', 60 | 'nocreate', 'watchlist', 'md5', 'captchaid', 'captchaword', 'undo', 61 | 'undoafter' 62 | ), 63 | 'protect' => array( 64 | 'title', 'token', 'protections', 'expiry', 'reason', 'cascade' 65 | ), 66 | 'watch' => array( 67 | 'title', 'unwatch', 'token' 68 | ), 69 | 'query' => array( 70 | // title specifications 71 | 'titles', 'revids', 'pageids', 72 | // submodules 73 | 'meta', 'prop', 'list', 74 | // meta submodule 75 | 'siprop', 'sifilteriw', 'sishowalldb', 'sinumberingroup', 76 | 'uiprop', 77 | // prop submodule 78 | 'inprop', 'intoken', 'indexpageids', 'incontinue', 79 | 'rvprop', 'rvcontinue', 'rvlimit', 'rvstartid', 'rvendid', 80 | 'rvstart', 'rvend', 'rvdir', 'rvuser', 'rvexcludeuser', 81 | 'rvexpandtemplates', 'rvgeneratexml', 'rvsection', 'rvtoken', 82 | 'rvdiffto', 'rvdifftotext', 83 | // list submodule 84 | 'ucprop', 'ucuser', 'ucuserprefix', 'ucstart', 'ucend', 85 | 'uccontinue', 'ucdir', 'uclimit', 'ucnamespace', 'ucshow', 86 | 'rcprop', 'rcstart', 'rcend', 'rcdir', 'rclimit', 'rcnamespace', 87 | 'rcuser', 'rcexcludeuser', 'rctype', 'rcshow', 88 | 'wlprop', 'wlstart', 'wlend', 'wldir', 'wllimit', 'wlnamespace', 89 | 'wluser', 'wlexcludeuser', 'wlowner', 'wltoken', 'wlallrev', 90 | 'wlshow', 91 | 'aplimit', 'apminsize', 'apmaxsize', 'apprefix', 'apfrom', 92 | 'apnamespace', 'apfilterredir', 'apfilterlanglinks', 'apprtype', 93 | 'apprlevel', 'apdir', 94 | ), 95 | 'login' => array( 96 | 'lgname', 'lgpassword', 'lgtoken' 97 | ), 98 | 'logout' => array() 99 | ); 100 | 101 | /** 102 | * Constructs the MediaWiki API client. 103 | * 104 | * @link http://www.mediawiki.org/wiki/API:Main_page 105 | * @param string $apiUrl The URL to the MediaWiki API. 106 | * @param bool $passCookies Pass cookies to the web browser. 107 | * @param string $cookiePrefix 108 | */ 109 | public function __construct($apiUrl, $passCookies = true, $cookiePrefix = null) 110 | { 111 | $this->_passCookies = (bool) $passCookies; 112 | 113 | if (null !== $cookiePrefix) { 114 | $this->_cookiePrefix = $cookiePrefix; 115 | } elseif (isset($_COOKIE[self::COOKIE_NS . 'cookieprefix'])) { 116 | // Set the cookie prefix that was set by MediaWiki during login. 117 | $this->_cookiePrefix = $_COOKIE[self::COOKIE_NS . 'cookieprefix']; 118 | } 119 | 120 | // Set the HTTP client for the MediaWiki API . 121 | self::getHttpClient()->setUri($apiUrl) 122 | ->setConfig(array('keepalive' => true)) 123 | ->setCookieJar(); 124 | 125 | // Add X-Forwarded-For header if applicable. 126 | if (isset($_SERVER['REMOTE_ADDR']) && isset($_SERVER['SERVER_ADDR'])) { 127 | self::getHttpClient()->setHeaders('X-Forwarded-For', 128 | $_SERVER['REMOTE_ADDR'] . ', ' . $_SERVER['SERVER_ADDR']); 129 | } 130 | 131 | // If MediaWiki API authentication cookies are being passed and the 132 | // MediaWiki cookieprefix is set, get the cookies from the browser and 133 | // add them to the HTTP client cookie jar. Doing so maintains state 134 | // between browser requests. 135 | if ($this->_passCookies && $this->_cookiePrefix) { 136 | require_once 'Zend/Http/Cookie.php'; 137 | foreach ($this->_cookieSuffixes as $cookieSuffix) { 138 | $cookieName = self::COOKIE_NS . $this->_cookiePrefix . $cookieSuffix; 139 | if (array_key_exists($cookieName, $_COOKIE)) { 140 | $cookie = new Zend_Http_Cookie($this->_cookiePrefix . $cookieSuffix, 141 | $_COOKIE[$cookieName], 142 | self::getHttpClient()->getUri()->getHost()); 143 | self::getHttpClient()->getCookieJar()->addCookie($cookie); 144 | } 145 | } 146 | } 147 | } 148 | 149 | /** 150 | * Gets information about the current user. 151 | * 152 | * @link http://www.mediawiki.org/wiki/API:Meta#userinfo_.2F_ui 153 | * @param string $uiprop 154 | * @return array 155 | */ 156 | public function getUserInfo($uiprop = '') 157 | { 158 | $params = array('meta' => 'userinfo', 159 | 'uiprop' => $uiprop); 160 | return $this->query($params); 161 | } 162 | 163 | /** 164 | * Gets overall site information. 165 | * 166 | * @link http://www.mediawiki.org/wiki/API:Meta#siteinfo_.2F_si 167 | * @param string $siprop 168 | * @return array 169 | */ 170 | public function getSiteInfo($siprop = 'general') 171 | { 172 | $params = array('meta' => 'siteinfo', 173 | 'siprop' => $siprop); 174 | return $this->query($params); 175 | } 176 | 177 | /** 178 | * Gets a list of contributions made by a given user. 179 | * 180 | * @link http://www.mediawiki.org/wiki/API:Usercontribs 181 | * @param string $ucuser 182 | * @param array $params 183 | * @return array 184 | */ 185 | public function getUserContributions($ucuser, array $params = array()) 186 | { 187 | $params['ucuser'] = $ucuser; 188 | $params['list'] = 'usercontribs'; 189 | return $this->query($params); 190 | } 191 | 192 | /** 193 | * Gets all recent changes to the wiki. 194 | * 195 | * @link http://www.mediawiki.org/wiki/API:Recentchanges 196 | * @param array $params 197 | * @return array 198 | */ 199 | public function getRecentChanges(array $params = array()) 200 | { 201 | $params['list'] = 'recentchanges'; 202 | return $this->query($params); 203 | } 204 | 205 | /** 206 | * Gets a list of pages on the current user's watchlist. 207 | * 208 | * @link http://www.mediawiki.org/wiki/API:Watchlist 209 | * @param array $params 210 | * @return array 211 | */ 212 | public function getWatchlist(array $params = array()) 213 | { 214 | $params['list'] = 'watchlist'; 215 | return $this->query($params); 216 | } 217 | 218 | /** 219 | * Gets a list of pages. 220 | * 221 | * @link http://www.mediawiki.org/wiki/API:Allpages 222 | * @param array $params 223 | * @return array 224 | */ 225 | public function getAllPages(array $params = array()) 226 | { 227 | $params['list'] = 'allpages'; 228 | return $this->query($params); 229 | } 230 | 231 | /** 232 | * Gets basic page information. 233 | * 234 | * @link http://www.mediawiki.org/wiki/API:Properties#info_.2F_in 235 | * @param string $titles 236 | * @param array $params 237 | * @return array 238 | */ 239 | public function getInfo($titles, array $params = array()) 240 | { 241 | $params['titles'] = $titles; 242 | $params['prop'] = 'info'; 243 | return $this->query($params); 244 | } 245 | 246 | /** 247 | * Gets revisions for a given page. 248 | * 249 | * @link http://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv 250 | * @param string $titles 251 | * @param array $params 252 | * @return array 253 | */ 254 | public function getRevisions($titles, array $params = array()) 255 | { 256 | $params['titles'] = $titles; 257 | $params['prop'] = 'revisions'; 258 | return $this->query($params); 259 | } 260 | 261 | /** 262 | * Gets the HTML of a specified revision of a given page. 263 | * 264 | * @param int $revisionId 265 | * @return string 266 | */ 267 | public function getRevisionHtml($revisionId) 268 | { 269 | // Get the revision wikitext. 270 | $response = $this->getRevisions(null, array('revids' => $revisionId, 271 | 'rvprop' => 'content')); 272 | $page = current($response['query']['pages']); 273 | 274 | // Parse the wikitext into HTML. 275 | $response = $this->parse( 276 | array('text' => '__NOEDITSECTION__' . $page['revisions'][0]['*']) 277 | ); 278 | return $response['parse']['text']['*']; 279 | } 280 | 281 | /** 282 | * Gets the difference between two revisions. 283 | * 284 | * @param int $from The revision ID to diff. 285 | * @param int|string $to The revision to diff to: use the revision ID, 286 | * prev, next, or cur. 287 | * @return string The API returns preformatted table rows without a wrapping 288 | *
    . Presumably this is so implementers can wrap a custom
    . 289 | */ 290 | public function getRevisionDiff($fromRevisionId, $toRevisionId = 'prev') 291 | { 292 | $response = $this->getRevisions(null, array('revids' => $fromRevisionId, 293 | 'rvdiffto' => $toRevisionId)); 294 | $page = current($response['query']['pages']); 295 | return $page['revisions'][0]['diff']['*']; 296 | } 297 | 298 | /** 299 | * Gets the edit token for a given page. 300 | * 301 | * @link http://www.mediawiki.org/wiki/API:Edit#Token 302 | * @param string $title 303 | * @return string 304 | */ 305 | public function getEditToken($title) 306 | { 307 | $response = $this->getInfo($title, array('intoken' => 'edit')); 308 | $page = current($response['query']['pages']); 309 | 310 | $edittoken = null; 311 | if (isset($page['edittoken'])) { 312 | $edittoken = $page['edittoken']; 313 | } 314 | return $edittoken; 315 | } 316 | 317 | /** 318 | * Gets the protect token for a given page. 319 | * 320 | * @link http://www.mediawiki.org/wiki/API:Protect#Token 321 | * @param string $title 322 | * @return string 323 | */ 324 | public function getProtectToken($title) 325 | { 326 | $response = $this->getInfo($title, array('intoken' => 'protect')); 327 | $page = current($response['query']['pages']); 328 | 329 | $protecttoken = null; 330 | if (isset($page['protecttoken'])) { 331 | $protecttoken = $page['protecttoken']; 332 | } 333 | return $protecttoken; 334 | } 335 | 336 | /** 337 | * Gets the watch token for a given page. 338 | * 339 | * @link http://www.mediawiki.org/wiki/API:Watch#Token 340 | * @param string $title 341 | * @return string 342 | */ 343 | public function getWatchToken($title) 344 | { 345 | $response = $this->getInfo($title, array('intoken' => 'watch')); 346 | $page = current($response['query']['pages']); 347 | 348 | $watchtoken = null; 349 | if (isset($page['watchtoken'])) { 350 | $watchtoken = $page['watchtoken']; 351 | } 352 | return $watchtoken; 353 | } 354 | 355 | /** 356 | * Gets the protections for a given page. 357 | * 358 | * @link http://www.mediawiki.org/wiki/API:Properties#info_.2F_in 359 | * @param string $title 360 | * @return array 361 | */ 362 | public function getPageProtections($title) 363 | { 364 | $response = $this->getInfo($title, array('inprop' => 'protection')); 365 | $page = current($response['query']['pages']); 366 | return $page['protection']; 367 | } 368 | 369 | /** 370 | * Gets the wikitext of the latest revision of a given page. 371 | * 372 | * @link http://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv 373 | * @param string $title 374 | * @return string|null 375 | */ 376 | public function getLatestRevisionWikitext($title) 377 | { 378 | $response = $this->getRevisions($title, array('rvprop' => 'content', 379 | 'rvlimit' => '1')); 380 | $page = current($response['query']['pages']); 381 | 382 | // Return the wikitext only if the page already exists. 383 | $wikitext = null; 384 | if (isset($page['revisions'][0]['*'])) { 385 | $wikitext = $page['revisions'][0]['*']; 386 | } 387 | return $wikitext; 388 | } 389 | 390 | /** 391 | * Gets the HTML of the latest revision of a given page. 392 | * 393 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse 394 | * @param string $title 395 | * @return string|null 396 | */ 397 | public function getLatestRevisionHtml($title) 398 | { 399 | // To exclude [edit] links in the parsed wikitext, we must use the 400 | // following hack. 401 | $response = $this->parse(array('text' => '__NOEDITSECTION__{{:' . $title . '}}')); 402 | 403 | // Return the text only if the page already exists. Otherwise, the 404 | // returned HTML is a link to the document's MediaWiki edit page. The 405 | // only indicator I found in the response XML is the "exists" attribute 406 | // in the templates node; but this may not be adequate. 407 | $html = null; 408 | if (isset($response['parse']['templates'][0]['exists'])) { 409 | $html = $response['parse']['text']['*']; 410 | } 411 | return $html; 412 | } 413 | 414 | /** 415 | * Get the HTML preview of the given text. 416 | * 417 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse 418 | * @param string $text 419 | * @return string 420 | */ 421 | public function getPreview($text) 422 | { 423 | $response = $this->parse(array('text' => '__NOEDITSECTION__' . $text)); 424 | return $response['parse']['text']['*']; 425 | } 426 | 427 | /** 428 | * Returns whether a given page is created. 429 | * 430 | * @link http://www.mediawiki.org/wiki/API:Query#Missing_and_invalid_titles 431 | * @param string $title 432 | * @return bool 433 | */ 434 | public function pageCreated($title) 435 | { 436 | $response = $this->query(array('titles' => $title)); 437 | $page = current($response['query']['pages']); 438 | if (isset($page['missing']) || isset($page['invalid'])) { 439 | return false; 440 | } 441 | return true; 442 | } 443 | 444 | /** 445 | * Returns parsed wikitext. 446 | * 447 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse 448 | * @param array $params 449 | * @return array 450 | */ 451 | public function parse(array $params = array()) 452 | { 453 | return $this->_request('parse', $params); 454 | } 455 | 456 | /** 457 | * Returns data. 458 | * 459 | * @link http://www.mediawiki.org/wiki/API:Query 460 | * @param array $params 461 | * @return array 462 | */ 463 | public function query(array $params = array()) 464 | { 465 | return $this->_request('query', $params); 466 | } 467 | 468 | /** 469 | * Watch or unwatch pages. 470 | * 471 | * @link http://www.mediawiki.org/wiki/API:Watch 472 | * @param string $title 473 | * @param array $params 474 | * @return array 475 | */ 476 | public function watch($title, $watchtoken = null, array $params = array()) 477 | { 478 | // Get the watch token if not passed. 479 | if (is_null($watchtoken)) { 480 | $watchtoken = $this->getWatchToken($title); 481 | } 482 | $params['title'] = $title; 483 | $params['token'] = $watchtoken; 484 | return $this->_request('watch', $params); 485 | } 486 | 487 | /** 488 | * Applies protections to a given page. 489 | * 490 | * @link http://www.mediawiki.org/wiki/API:Protect 491 | * @param string $title 492 | * @param string $protections 493 | * @param string|null $protecttokens 494 | * @param array $params 495 | * @return array 496 | */ 497 | public function protect($title, 498 | $protections, 499 | $protecttoken = null, 500 | array $params = array()) 501 | { 502 | // Get the protect token if not passed. 503 | if (is_null($protecttoken)) { 504 | $protecttoken = $this->getProtectToken($title); 505 | } 506 | 507 | // Apply protections. 508 | $params['title'] = $title; 509 | $params['protections'] = $protections; 510 | $params['token'] = $protecttoken; 511 | 512 | return $this->_request('protect', $params); 513 | } 514 | 515 | /** 516 | * Create or edit a given page. 517 | * 518 | * @link http://www.mediawiki.org/wiki/API:Edit 519 | * @link http://www.mediawiki.org/wiki/Manual:Preventing_access#Restrict_editing_of_all_pages 520 | * @param string $title 521 | * @param string $text 522 | * @param string|null $edittoken 523 | * @param array $params 524 | * @return array 525 | */ 526 | public function edit($title, 527 | $text, 528 | $edittoken = null, 529 | array $params = array()) 530 | { 531 | // Get the edit token if not passed. 532 | if (is_null($edittoken)) { 533 | $edittoken = $this->getEditToken($title); 534 | } 535 | 536 | // Protect against edit conflicts by getting the timestamp of the last 537 | // revision. 538 | $response = $this->getRevisions($title); 539 | $page = current($response['query']['pages']); 540 | 541 | $basetimestamp = null; 542 | if (isset($page['revisions'])) { 543 | $basetimestamp = $page['revisions'][0]['timestamp']; 544 | } 545 | 546 | // Edit the page. 547 | $params['title'] = $title; 548 | $params['text'] = $text; 549 | $params['token'] = $edittoken; 550 | $params['basetimestamp'] = $basetimestamp; 551 | 552 | return $this->_request('edit', $params); 553 | } 554 | 555 | /** 556 | * Login to MediaWiki. 557 | * 558 | * @link http://www.mediawiki.org/wiki/API:Login 559 | * @param string $lgname 560 | * @param string $lgpassword 561 | */ 562 | public function login($lgname, $lgpassword) 563 | { 564 | // Log in or get the login token. 565 | $params = array('lgname' => $lgname, 'lgpassword' => $lgpassword); 566 | $response = $this->_request('login', $params); 567 | 568 | // Confirm the login token. 569 | if ('NeedToken' == $response['login']['result']) { 570 | $params['lgtoken'] = $response['login']['token']; 571 | $response = $this->_request('login', $params); 572 | } 573 | 574 | // Process a successful login. 575 | if ('Success' == $response['login']['result']) { 576 | if ($this->_passCookies) { 577 | $cookiePrefix = isset($response['login']['cookieprefix']) 578 | ? $response['login']['cookieprefix'] 579 | : $this->_cookiePrefix; 580 | // Persist the MediaWiki cookie prefix in the browser. Set to 581 | // expire in 30 days, the same as MediaWiki cookies. 582 | setcookie(self::COOKIE_NS . 'cookieprefix', 583 | $cookiePrefix, 584 | time() + 60 * 60 * 24 * 30, 585 | '/'); 586 | 587 | // Persist MediaWiki authentication cookies in the browser. 588 | foreach (self::getHttpClient()->getCookieJar()->getAllCookies() as $cookie) { 589 | setcookie(self::COOKIE_NS . $this->cookiePrefix . $cookie->getName(), 590 | $cookie->getValue(), 591 | $cookie->getExpiryTime(), 592 | '/'); 593 | } 594 | } 595 | return; 596 | } 597 | 598 | // Process an unsuccessful login. 599 | $errors = array('NoName' => 'Username is empty.', 600 | 'Illegal' => 'Username is illegal.', 601 | 'NotExists' => 'Username is not found.', 602 | 'EmptyPass' => 'Password is empty.', 603 | 'WrongPass' => 'Password is incorrect.', 604 | 'WrongPluginPass' => 'Password is incorrect (via plugin)', 605 | 'CreateBlocked' => 'IP address is blocked for account creation.', 606 | 'Throttled' => 'Login attempt limit surpassed.', 607 | 'Blocked' => 'User is blocked.'); 608 | $error = $response['login']['result']; 609 | if (array_key_exists($error, $errors)) { 610 | throw new Scripto_Service_Exception($errors[$error]); 611 | } 612 | throw new Scripto_Service_Exception('Unknown login error: ' . $response['login']['result']); 613 | } 614 | 615 | /** 616 | * Logout of MediaWiki. 617 | * 618 | * @link http://www.mediawiki.org/wiki/API:Logout 619 | */ 620 | public function logout() 621 | { 622 | // Log out. 623 | $this->_request('logout'); 624 | 625 | // Reset the cookie jar. 626 | self::getHttpClient()->getCookieJar()->reset(); 627 | 628 | if ($this->_passCookies && $this->_cookiePrefix) { 629 | // Delete the MediaWiki authentication cookies from the browser. 630 | setcookie(self::COOKIE_NS . 'cookieprefix', false, 0, '/'); 631 | foreach ($this->_cookieSuffixes as $cookieSuffix) { 632 | $cookieName = self::COOKIE_NS . $this->_cookiePrefix . $cookieSuffix; 633 | if (array_key_exists($cookieName, $_COOKIE)) { 634 | setcookie($cookieName, false, 0, '/'); 635 | } 636 | } 637 | } 638 | } 639 | 640 | /** 641 | * Makes a MediaWiki API request and returns the response. 642 | * 643 | * @param string $action 644 | * @param array $params 645 | * @return array 646 | */ 647 | protected function _request($action, array $params = array()) 648 | { 649 | // Check if this action is a valid MediaWiki API action. 650 | if (!array_key_exists($action, $this->_actions)) { 651 | throw new Scripto_Service_Exception('Invalid MediaWiki API action.'); 652 | } 653 | 654 | // Set valid parameters for this action. 655 | foreach ($params as $paramName => $paramValue) { 656 | if (in_array($paramName, $this->_actions[$action])) { 657 | self::getHttpClient()->setParameterPost($paramName, $paramValue); 658 | } 659 | } 660 | 661 | // Set default parameters. 662 | self::getHttpClient()->setParameterPost('format', 'json') 663 | ->setParameterPost('action', $action); 664 | 665 | // Get the response body and reset the request. 666 | $body = self::getHttpClient()->request('POST')->getBody(); 667 | self::getHttpClient()->resetParameters(); 668 | 669 | // Parse the response body, throwing errors when encountered. 670 | $response = json_decode($body, true); 671 | if (isset($response['error'])) { 672 | throw new Scripto_Service_Exception($response['error']['info']); 673 | } 674 | return $response; 675 | } 676 | 677 | /** 678 | * Determine whether the provided MediaWiki API URL is valid. 679 | * 680 | * @param string $apiUrl 681 | * @return bool 682 | */ 683 | static public function isValidApiUrl($apiUrl) 684 | { 685 | // Check for valid API URL string. 686 | if (!Zend_Uri::check($apiUrl) || !preg_match('#/api\.php$#', $apiUrl)) { 687 | return false; 688 | } 689 | 690 | try { 691 | // Ping the API endpoint for a valid response. 692 | $body = self::getHttpClient()->setUri($apiUrl) 693 | ->setParameterPost('action', 'query') 694 | ->setParameterPost('meta', 'siteinfo') 695 | ->setParameterPost('format', 'json') 696 | ->request('POST')->getBody(); 697 | // Prevent "Unable to Connect" errors. 698 | } catch (Zend_Http_Client_Exception $e) { 699 | return false; 700 | } 701 | self::getHttpClient()->resetParameters(true); 702 | 703 | $response = json_decode($body, true); 704 | if (!is_array($response) || !isset($response['query']['general'])) { 705 | return false; 706 | } 707 | 708 | return true; 709 | } 710 | } 711 | -------------------------------------------------------------------------------- /tests/README.markdown: -------------------------------------------------------------------------------- 1 | Testing Scripto 2 | ============= 3 | 4 | Scripto uses the SimpleTest PHP testing framework. By running these tests, you 5 | can: 6 | 7 | * Test your external system's adapter for expected results; 8 | * Test the your MediaWiki instance via Scripto's MediaWiki API client; 9 | * Test the Scripto_Document base class. 10 | 11 | Installation 12 | ------------- 13 | 14 | * Download the [SimpleTest](http://www.simpletest.org/) framework; 15 | * Copy config.php.changeme to config.php: 16 | 17 | On the command line: 18 | 19 | $ cd /path/to/scripto/tests/ 20 | $ cp config.php.changeme config.php 21 | 22 | * Set the configuration in config.php: 23 | 24 | You can use the following document IDs to test Scripto's Example adapter: 25 | 26 | // Test document ID. 27 | define('TEST_DOCUMENT_ID', '16344'); 28 | 29 | or: 30 | 31 | // Test document ID. 32 | define('TEST_DOCUMENT_ID', '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.'); 33 | 34 | Running the Tests 35 | ------------- 36 | 37 | On the command line: 38 | 39 | $ cd /path/to/scripto/tests/ 40 | $ php all_tests.php 41 | 42 | In the browser: 43 | 44 | * Make sure the Scripto tests directory is available to your web server; 45 | * Go to http://your-domain/tests/all_tests.php 46 | -------------------------------------------------------------------------------- /tests/adapter_test.php: -------------------------------------------------------------------------------- 1 | _testAdapterFilename = TEST_ADAPTER_FILENAME; 28 | $this->_testAdapterClassName = TEST_ADAPTER_CLASS_NAME; 29 | $this->_testDocumentId = TEST_DOCUMENT_ID; 30 | } 31 | 32 | public function testAdapterIsValid() 33 | { 34 | // Assert adapter file exists. 35 | $this->assertTrue(file_exists($this->_testAdapterFilename), 'Example adapter file does not exist'); 36 | 37 | // Assert adapter file is instance of Scripto_Adapter_Interface. 38 | require_once $this->_testAdapterFilename; 39 | $adapter = new $this->_testAdapterClassName; 40 | $this->assertIsA($adapter, 'Scripto_Adapter_Interface', 'Example adapter is not an instance of Scripto_Adapter_Interface'); 41 | 42 | $this->_testAdapter = $adapter; 43 | } 44 | 45 | public function testDocumentIsValid() 46 | { 47 | // Assert document ID is valid and exists. 48 | $this->assertTrue((is_int($this->_testDocumentId) || is_string($this->_testDocumentId)), 'Document ID must be int or string (' . gettype($this->_testDocumentId) . ' given)'); 49 | $this->assertTrue($this->_testAdapter->documentExists($this->_testDocumentId), "Document ID \"{$this->_testDocumentId}\" does not exist"); 50 | 51 | // Assert document title exists. 52 | $documentTitle = $this->_testAdapter->getDocumentTitle($this->_testDocumentId); 53 | $this->assertIsA($documentTitle, 'string', 'Document title must be a string (' . gettype($documentTitle) . ' given)'); 54 | } 55 | 56 | public function testDocumentPagesAreValid() 57 | { 58 | // Assert valid document pages format. 59 | $documentPages = $this->_testAdapter->getDocumentPages($this->_testDocumentId); 60 | $this->assertIsA($documentPages, 'array', 'Document pages must be an array (' . gettype($documentPages) . ' given)'); 61 | $this->assertTrue(count($documentPages), 'Document pages must not be empty'); 62 | 63 | // Assert document first page is valid and exists. 64 | $documentFirstPageId = $this->_testAdapter->getDocumentFirstPageId($this->_testDocumentId); 65 | $this->assertTrue((is_int($documentFirstPageId) || is_string($documentFirstPageId)), 'Document first page ID must be int or string (' . gettype($documentFirstPageId) . ' given)'); 66 | $this->assertTrue(array_key_exists($documentFirstPageId, $documentPages), "Document first page ID \"$documentFirstPageId\" does not exist"); 67 | 68 | // Iterate all document pages. 69 | foreach ($documentPages as $pageId => $pageName) { 70 | 71 | // Assert document page exists. 72 | $documentPageExists = $this->_testAdapter->documentPageExists($this->_testDocumentId, $pageId); 73 | $this->assertIdentical($documentPageExists, true, "Document page ID \"$pageId\" does not exist"); 74 | 75 | // Assert document page name exists. 76 | $documentPageName = $this->_testAdapter->getDocumentPageName($this->_testDocumentId, $pageId); 77 | $this->assertIsA($documentPageName, 'string', "Document page name for page ID \"$pageId\" must be a string (" . gettype($documentPageName) . " given)"); 78 | 79 | // Assert document page URL is valid. There's no consistant, 80 | // reliable, and lightweight way to validate a URL, even with 81 | // regular expressions, so just check to see if it returns a string. 82 | $documentPageImageUrl = $this->_testAdapter->getDocumentPageFileUrl($this->_testDocumentId, $pageId); 83 | $this->assertIsA($documentPageImageUrl, 'string', "Document page image URL for page ID \"$pageId\" must be a string (" . gettype($documentPageImageUrl) . " given)"); 84 | } 85 | } 86 | 87 | public function testImportTranscriptions() 88 | { 89 | // Must install a parallel external system to test imports. This may be 90 | // too involved to be feasible for most people. 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /tests/all_tests.php: -------------------------------------------------------------------------------- 1 | TestSuite('All tests'); 10 | $path = dirname(__FILE__) . DIRECTORY_SEPARATOR; 11 | $this->addFile($path . 'adapter_test.php'); 12 | $this->addFile($path . 'mediawiki_test.php'); 13 | $this->addFile($path . 'scripto_test.php'); 14 | $this->addFile($path . 'document_test.php'); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /tests/config.php.changeme: -------------------------------------------------------------------------------- 1 | _testDocumentId = TEST_DOCUMENT_ID; 21 | 22 | require_once TEST_ADAPTER_FILENAME; 23 | require_once 'Scripto/Service/MediaWiki.php'; 24 | require_once 'Scripto/Document.php'; 25 | 26 | // Instantiate the Scripto_Document object and set it. 27 | $testAdapterClassName = TEST_ADAPTER_CLASS_NAME; 28 | $this->_testDocument = new Scripto_Document( 29 | $this->_testDocumentId, 30 | new $testAdapterClassName, 31 | new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false) 32 | ); 33 | } 34 | 35 | public function testGetId() 36 | { 37 | $this->assertEqual($this->_testDocumentId, $this->_testDocument->getId()); 38 | } 39 | 40 | public function testGetTitle() 41 | { 42 | $this->assertIsA($this->_testDocument->getTitle(), 'string'); 43 | } 44 | 45 | /** 46 | * Set the page for subsequent tests. 47 | */ 48 | public function testPageIsValid() 49 | { 50 | // Assert a page has not been set yet. 51 | $this->assertNull($this->_testDocument->getPageId(), 'The document page ID was prematurely set'); 52 | 53 | // Assert a page can be set (in this case, the first page). 54 | $this->_testDocument->setPage(null); 55 | $this->assertNotNull($this->_testDocument->getPageId(), 'The document page ID was not set'); 56 | 57 | // Assert the decoding the base title works. 58 | $baseTitle = Scripto_Document::encodeBaseTitle($this->_testDocument->getId(), $this->_testDocument->getPageId()); 59 | $decodedBaseTitle = Scripto_Document::decodeBaseTitle($baseTitle); 60 | 61 | $this->assertEqual($decodedBaseTitle[0], $this->_testDocumentId, 'Something went wrong during base title encoding/decoding. Document ID does not match'); 62 | $this->assertEqual($decodedBaseTitle[1], $this->_testDocument->getPageId(), 'Something went wrong during base title encoding/decoding. Page ID does not match'); 63 | } 64 | 65 | public function testGetPageName() 66 | { 67 | $this->assertIsA($this->_testDocument->getPageName(), 'string'); 68 | } 69 | 70 | public function testGetBaseTitle() 71 | { 72 | $this->assertIsA($this->_testDocument->getBaseTitle(), 'string'); 73 | } 74 | 75 | public function testGetPages() 76 | { 77 | $this->assertIsA($this->_testDocument->getPages(), 'array'); 78 | } 79 | 80 | public function testGetFirstPageId() 81 | { 82 | $firstPageId = $this->_testDocument->getFirstPageId(); 83 | $this->assertTrue((is_int($firstPageId) || is_string($firstPageId))); 84 | } 85 | 86 | public function testGetPageFileUrl() 87 | { 88 | $this->assertIsA($this->_testDocument->getPageFileUrl(), 'string'); 89 | } 90 | 91 | public function testGetTranscriptionPageMediawikiUrl() 92 | { 93 | $this->assertIsA($this->_testDocument->getTranscriptionPageMediawikiUrl(), 'string'); 94 | } 95 | 96 | public function testGetTalkPageMediawikiUrl() 97 | { 98 | $this->assertIsA($this->_testDocument->getTalkPageMediawikiUrl(), 'string'); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /tests/mediawiki_test.php: -------------------------------------------------------------------------------- 1 | Insert non-formatted text here 25 | ---- 26 | Strike-through text 27 |
    28 | Superscript text 29 | Subscript text 30 | Small Text 31 |
    32 | Block quote 33 |
    34 | {| class=\"wikitable\" 35 | |- 36 | ! header 1 37 | ! header 2 38 | ! header 3 39 | |- 40 | | row 1, cell 1 41 | | row 1, cell 2 42 | | row 1, cell 3 43 | |- 44 | | row 2, cell 1 45 | | row 2, cell 2 46 | | row 2, cell 3 47 | |}"; 48 | 49 | /** 50 | * When getting a preview and page HTML, MediaWiki returns an HTML comment 51 | * containing a dynamic "NewPP limit report." Here, this is removed prior to 52 | * asserting valid get responses. 53 | */ 54 | const TEST_EXPECTED_HTML = '

    Bold text 55 | Italic text 56 | link title 57 |

    58 |

    Headline text

    59 |

    Insert non-formatted text here 60 |

    61 |
    62 |

    Strike-through text 63 |
    64 | Superscript text 65 | Subscript text 66 | Small Text 67 |

    68 |
    69 | Block quote 70 |
    71 |
    72 | 73 | 74 | 76 | 78 | 80 | 81 | 83 | 85 | 87 | 88 | 90 | 92 |
    header 1 75 | header 2 77 | header 3 79 |
    row 1, cell 1 82 | row 1, cell 2 84 | row 1, cell 3 86 |
    row 2, cell 1 89 | row 2, cell 2 91 | row 2, cell 3 93 |
    94 | 95 | 96 | '; 97 | 98 | const TEST_EXPECTED_PREVIEW = ''; 99 | 100 | private $_testMediawiki; 101 | private $_testEditCredentials; 102 | 103 | /** 104 | * Use __construct() instead of setUp() because it's unnecessary to set up 105 | * the test case before every test method. 106 | */ 107 | public function __construct() 108 | { 109 | parent::__construct(); 110 | 111 | // Do not pass cookies to a browser when testing. 112 | require_once 'Scripto/Service/MediaWiki.php'; 113 | $this->_testMediawiki = new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false); 114 | } 115 | 116 | public function testCredentials() 117 | { 118 | // Test login and logout if username and password is provided. 119 | if (TEST_MEDIAWIKI_USERNAME && TEST_MEDIAWIKI_PASSWORD) { 120 | 121 | // Assert login works. Throws an error if login is unsuccessful. 122 | $this->_testMediawiki->login(TEST_MEDIAWIKI_USERNAME, TEST_MEDIAWIKI_PASSWORD); 123 | 124 | // Assert logout works. 125 | $this->_testMediawiki->logout(); 126 | $userInfo = $this->_testMediawiki->getUserInfo(); 127 | $this->assertTrue(isset($userInfo['query']['userinfo']['anon']), 'Logout unsuccessful'); 128 | } 129 | } 130 | 131 | public function testEditPage() 132 | { 133 | // Assert the test page's preview is valid. Remove dynamic HTML comments. 134 | $testPagePreview = $this->_testMediawiki->getPreview(self::TEST_WIKITEXT); 135 | $this->assertEqual(self::TEST_EXPECTED_HTML, $this->_removeHtmlComments($testPagePreview), 'The test page preview HTML is invalid'); 136 | 137 | // Clear the page before testing edit page. Resetting the database or 138 | // deleting the page is preferable, but resetting is too involved and 139 | // Scripto_Service_MediaWiki does not implement a delete page feature 140 | // because deleting requires special (sysops) permissions. 141 | $this->_testMediawiki->edit(self::TEST_TITLE, '.'); 142 | $text = $this->_testMediawiki->getLatestRevisionWikitext(self::TEST_TITLE); 143 | $this->assertEqual('.', $text, 'Clearing the test page did not work'); 144 | 145 | // Edit the page with test text. 146 | $this->_testMediawiki->edit(self::TEST_TITLE, self::TEST_WIKITEXT); 147 | 148 | // Assert the test page's Wikitext is valid. 149 | $textPageWikitext = $this->_testMediawiki->getLatestRevisionWikitext(self::TEST_TITLE); 150 | $this->assertEqual(self::TEST_WIKITEXT, $textPageWikitext, 'Editing the test page with test wikitext did not work '); 151 | 152 | // Assert the test page's HTML is valid. Remove dynamic HTML comments. 153 | $testPageHtml = $this->_testMediawiki->getLatestRevisionHtml(self::TEST_TITLE); 154 | $this->assertEqual(self::TEST_EXPECTED_HTML, $this->_removeHtmlComments($testPageHtml), 'The test page HTML is invalid'); 155 | 156 | } 157 | 158 | private function _removeHtmlComments($text) 159 | { 160 | // Must include "s" modifier so "." matches new lines. 161 | return preg_replace('//s', '', $text); 162 | } 163 | } -------------------------------------------------------------------------------- /tests/scripto_test.php: -------------------------------------------------------------------------------- 1 | _testMediawikiUsername = TEST_MEDIAWIKI_USERNAME; 23 | $this->_testMediawikiPassword = TEST_MEDIAWIKI_PASSWORD; 24 | $this->_testDocumentId = TEST_DOCUMENT_ID; 25 | 26 | require_once TEST_ADAPTER_FILENAME; 27 | require_once 'Scripto/Service/MediaWiki.php'; 28 | require_once 'Scripto.php'; 29 | 30 | // Instantiate the Scripto object and set it. 31 | $testAdapterClassName = TEST_ADAPTER_CLASS_NAME; 32 | $this->_testScripto = new Scripto( 33 | new $testAdapterClassName, 34 | new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false) 35 | ); 36 | } 37 | 38 | public function testDocumentExists() 39 | { 40 | $this->assertIsA($this->_testScripto->documentExists($this->_testDocumentId), 'bool'); 41 | } 42 | 43 | public function testGetDocument() 44 | { 45 | $this->assertIsA($this->_testScripto->getDocument($this->_testDocumentId), 'Scripto_Document'); 46 | } 47 | 48 | public function testLogin() 49 | { 50 | if ($this->_testMediawikiUsername && $this->_testMediawikiPassword) { 51 | $this->_testScripto->login($this->_testMediawikiUsername, $this->_testMediawikiPassword); 52 | $this->assertTrue($this->_testScripto->isLoggedIn()); 53 | } 54 | } 55 | 56 | public function testCanExport() 57 | { 58 | $this->assertIsA($this->_testScripto->canExport(), 'bool'); 59 | } 60 | 61 | public function testCanProtect() 62 | { 63 | $this->assertIsA($this->_testScripto->canProtect(), 'bool'); 64 | } 65 | 66 | public function testGetUserName() 67 | { 68 | $this->assertIsA($this->_testScripto->getUserName(), 'string'); 69 | } 70 | 71 | public function testGetUserDocumentPages() 72 | { 73 | $this->assertIsA($this->_testScripto->getUserDocumentPages(), 'array'); 74 | } 75 | 76 | public function testGetRecentChanges() 77 | { 78 | $this->assertIsA($this->_testScripto->getRecentChanges(), 'array'); 79 | } 80 | 81 | public function testGetWatchlist() 82 | { 83 | if ($this->_testScripto->isLoggedIn()) { 84 | $this->assertIsA($this->_testScripto->getWatchlist(), 'array'); 85 | } 86 | } 87 | 88 | public function testGetAllDocuments() 89 | { 90 | $this->assertIsA($this->_testScripto->getAllDocuments(), 'array'); 91 | } 92 | 93 | public function testLogout() 94 | { 95 | if ($this->_testScripto->isLoggedIn()) { 96 | $this->_testScripto->logout(); 97 | $this->assertFalse($this->_testScripto->isLoggedIn()); 98 | } 99 | } 100 | } 101 | --------------------------------------------------------------------------------