├── README.md
├── lib
├── Scripto.php
└── Scripto
│ ├── Adapter
│ ├── Example.php
│ ├── Exception.php
│ └── Interface.php
│ ├── Document.php
│ ├── Exception.php
│ └── Service
│ ├── Exception.php
│ └── MediaWiki.php
└── tests
├── README.markdown
├── adapter_test.php
├── all_tests.php
├── config.php.changeme
├── document_test.php
├── mediawiki_test.php
└── scripto_test.php
/README.md:
--------------------------------------------------------------------------------
1 | Scripto
2 | =============
3 |
4 | © 2010-2012, Center for History and New Media
5 | License: [GNU GPL v3](http://www.gnu.org/licenses/gpl-3.0.txt)
6 |
7 | Scripto is an open source documentary transcription tool written in PHP. It
8 | features a lightweight library that interfaces MediaWiki and potentially any
9 | content management system that serves transcribable resources, including text,
10 | still image, moving image, and audio files.
11 |
12 | Scripto is not a content management system. Scripto is not a graphical user
13 | interface. Scripto is a software library powered by wiki technology that
14 | developers can use to integrate a custom transcription GUI into an existing CMS.
15 | You provide the CMS and GUI; Scripto provides the engine for crowdsourcing the
16 | transcription of your content.
17 |
18 | Why MediaWiki?
19 | -------------
20 |
21 | MediaWiki is a good choice for the transcription database for several reasons:
22 |
23 | * It is the most popular wiki application and has a sizable and active developer community;
24 | * It offers helpful features, such as talk pages, version history, and user administration;
25 | * [Wiki markup](http://en.wikipedia.org/wiki/Help:Wiki_markup) is easy to learn;
26 | * It comes with a powerful, fully-featured [API](http://www.mediawiki.org/wiki/API).
27 |
28 | Requirements
29 | -------------
30 |
31 | * PHP 5.2.4+
32 | * Zend Framework 1.10+
33 | * MediaWiki 1.15.4+
34 | * Custom adapter interface to (and possibly an API for) the external CMS
35 |
36 | Installation
37 | -------------
38 |
39 | * Download and install [MediaWiki](http://www.mediawiki.org/wiki/MediaWiki);
40 | * Download the [Zend Framework](http://framework.zend.com/) library;
41 | * Download the [Scripto](https://github.com/chnm/Scripto) library, set the
42 | configuration, and use the API to build your documentary transcription
43 | application.
44 |
45 | Suggested Configuration and Setup
46 | -------------
47 |
48 | Here's a basic configuration:
49 |
50 | ```php
51 | MEDIAWIKI_API_URL));
72 |
73 | // Set the current document object.
74 | $doc = $scripto->getDocument($_REQUEST['documentId']);
75 |
76 | // Set the current document page.
77 | $doc->setPage($_REQUEST['pageId']);
78 |
79 | // Render the transcription or talk page using the $scripto and $doc APIs.
80 | ```
81 |
82 | See the various implementations of Scripto for more suggestions on configuration,
83 | setup, layout, and styles.
84 |
85 | * [Omeka plugin](https://github.com/omeka/plugin-Scripto)
86 | * [WordPress plugin](https://github.com/chnm/scripto-wordpress-plugin)
87 | * [Drupal module](https://github.com/chnm/scripto-drupal-module)
88 |
89 | Advanced Usage
90 | -------------
91 |
92 | ### Record Client IP Address
93 |
94 | Scripto does not record a client's IP address by default. All modifications to
95 | pages will be set to the IP address of the server running Scripto. To record a
96 | client's IP address, you'll need to add the following code to MediaWiki's
97 | LocalSettings.php:
98 |
99 | ```
100 | $wgSquidServersNoPurge = array('127.0.0.1');
101 | ```
102 |
103 | Where '127.0.0.1' is the IP address of the server running Scripto.
104 |
105 | ### Base64 Decoding
106 |
107 | Scripto Base64 encodes document and page numbers to prevent incompatible
108 | MediaWiki title characters. Because of this, corresponding page titles in
109 | MediaWiki will be unusually named. You may place the following code in
110 | MediaWiki's LocalSettings.php to make page titles human readable:
111 |
112 | ```
113 | // Decode the MediaWiki title from Base64.
114 | // http://www.mediawiki.org/wiki/Manual:Hooks/BeforePageDisplay
115 | $wgHooks['BeforePageDisplay'][] = 'fnScriptoDecodePageTitle';
116 | function fnScriptoDecodePageTitle(&$out, &$sk, $prefix = '.', $delimiter = '.')
117 | {
118 | $title = strtr($out->getPageTitle(), '-_', '+/');
119 | if ($prefix != $title[0]) {
120 | return false;
121 | }
122 | $title = array_map('base64_decode', explode($delimiter, ltrim($title, $prefix)));
123 | $title = 'Document ' . $title[0] . '; Page ' . $title[1];
124 | $out->setPageTitle($title);
125 | return false;
126 | }
127 | ```
128 |
129 | Changelog
130 | -------------
131 |
132 | * 1.1
133 | * Add option to retain specified HTML attributes.
134 | * 1.1.1
135 | * Fix watch and unwatch pages.
136 | * 1.1.2
137 | * The /e modifier is deprecated in PHP 5.5.0 and removed in 7.0.0. Use
138 | preg_replace_callback() instead.
139 |
--------------------------------------------------------------------------------
/lib/Scripto.php:
--------------------------------------------------------------------------------
1 |
58 | *
$mediawiki['api_url']: required; the MediaWiki API URL
59 | * $mediawiki['pass_cookies']: optional pass cookies to the web
60 | * $mediawiki['cookie_prefix']: optional; set the cookie prefix
61 | * browser via API client
62 | *
63 | */
64 | public function __construct(Scripto_Adapter_Interface $adapter, $mediawiki)
65 | {
66 | // Set the adapter.
67 | $this->_adapter = $adapter;
68 |
69 | // Set the MediaWiki service.
70 | if ($mediawiki instanceof Scripto_Service_MediaWiki) {
71 | $this->_mediawiki = $mediawiki;
72 | } else if (is_array($mediawiki) && array_key_exists('api_url', $mediawiki)) {
73 | if (!isset($mediawiki['pass_cookies'])) {
74 | $mediawiki['pass_cookies'] = true;
75 | }
76 | if (!isset($mediawiki['cookie_prefix'])) {
77 | $mediawiki['cookie_prefix'] = null;
78 | }
79 |
80 | $this->_mediawiki = new Scripto_Service_MediaWiki($mediawiki['api_url'],
81 | (bool) $mediawiki['pass_cookies'],
82 | $mediawiki['cookie_prefix']);
83 | } else {
84 | throw new Scripto_Exception('The provided mediawiki parameter is invalid.');
85 | }
86 |
87 | // Set the user information.
88 | $this->setUserInfo();
89 | }
90 |
91 | /**
92 | * Provide a transparent interface for calling custom adapter methods.
93 | *
94 | * This makes it possible to call custom adapter methods (those not required
95 | * by Scripto_Adapter_Interface) directly from the Scripto object.
96 | *
97 | * @see Scripto_Adapter_Interface
98 | * @param string $name
99 | * @param array $args
100 | * @return mixed
101 | */
102 | public function __call($name, $args)
103 | {
104 | if (!method_exists($this->_adapter, $name)) {
105 | require_once 'Scripto/Adapter/Exception.php';
106 | throw new Scripto_Adapter_Exception('The provided adapter method "' . $name . '" does not exist.');
107 | }
108 | return call_user_func_array(array($this->_adapter, $name), $args);
109 | }
110 |
111 | /**
112 | * Check whether the specified document exists in the external system.
113 | *
114 | * @uses Scripto_Adapter_Interface::documentExists()
115 | * @param string|int $id The unique document identifier.
116 | * @return bool
117 | */
118 | public function documentExists($id)
119 | {
120 | // Query the adapter whether the document exists.
121 | if ($this->_adapter->documentExists($id)) {
122 | return true;
123 | }
124 | return false;
125 | }
126 |
127 | /**
128 | * Get a Scripto_Document object.
129 | *
130 | * @see Scripto_Document
131 | * @param string|int $id The unique document identifier.
132 | * @return Scripto_Document
133 | */
134 | public function getDocument($id)
135 | {
136 | return new Scripto_Document($id, $this->_adapter, $this->_mediawiki);
137 | }
138 |
139 | /**
140 | * Login via the MediaWiki service.
141 | *
142 | * It is possible to restrict account creation in MediaWiki.
143 | * @link http://www.mediawiki.org/wiki/Manual:Preventing_access#Restrict_account_creation
144 | *
145 | * @uses Scripto_Service_MediaWiki::login()
146 | * @param string $username The MediaWiki user's username.
147 | * @param string $password The MediaWiki user's password.
148 | */
149 | public function login($username, $password)
150 | {
151 | $this->_mediawiki->login($username, $password);
152 | $this->setUserInfo();
153 | }
154 |
155 | /**
156 | * Logout via the MediaWiki service.
157 | *
158 | * @uses Scripto_Service_MediaWiki::logout()
159 | */
160 | public function logout()
161 | {
162 | $this->_mediawiki->logout();
163 | $this->setUserInfo();
164 | }
165 |
166 | /**
167 | * Determine if the current user is logged in.
168 | *
169 | * @return bool
170 | */
171 | public function isLoggedIn()
172 | {
173 | // Check against the user ID. An anonymous user has an ID of 0.
174 | return (bool) $this->_userInfo['query']['userinfo']['id'];
175 | }
176 |
177 | /**
178 | * Determine if the current user can export transcriptions to the external
179 | * system.
180 | *
181 | * @param array $groups The MediaWiki groups allowed to export.
182 | * @return bool
183 | */
184 | public function canExport(array $groups = array('sysop', 'bureaucrat'))
185 | {
186 | foreach ($groups as $group) {
187 | if (in_array($group, $this->_userInfo['query']['userinfo']['groups'])) {
188 | return true;
189 | }
190 | }
191 | return false;
192 | }
193 |
194 | /**
195 | * Determine if the current user can protect MediaWiki pages.
196 | *
197 | * @return bool
198 | */
199 | public function canProtect()
200 | {
201 | // Users with protect rights can protect pages.
202 | if (in_array('protect', $this->_userInfo['query']['userinfo']['rights'])) {
203 | return true;
204 | }
205 | return false;
206 | }
207 |
208 | /**
209 | * Set the current user's information.
210 | *
211 | * Under normal circumstances calling this method directly is unnecessary,
212 | * but is helpful when authenticating after construction and when a login is
213 | * not called, like when hijacking cookies for command line authentication.
214 | *
215 | * @uses Scripto_Service_MediaWiki::getUserInfo()
216 | */
217 | public function setUserInfo()
218 | {
219 | $this->_userInfo = $this->_mediawiki->getUserInfo('groups|rights');
220 | }
221 |
222 | /**
223 | * Return the name of the current user.
224 | *
225 | * @return string
226 | */
227 | public function getUserName()
228 | {
229 | return $this->_userInfo['query']['userinfo']['name'];
230 | }
231 |
232 | /**
233 | * Get the current user's most recently contributed document pages.
234 | *
235 | * @uses Scripto_Service_MediaWiki::getUserContributions()
236 | * @param int $limit The number of document pages to return.
237 | * @return array
238 | */
239 | public function getUserDocumentPages($limit = 10)
240 | {
241 | $limit = (int) $limit;
242 | $userDocumentPages = array();
243 | $documentTitles = array();
244 | $start = null;
245 |
246 | // Namespaces to get: ns_index => ns_name
247 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces
248 | $namespaces = array('0' => 'Main', '1' => 'Talk');
249 |
250 | do {
251 | $response = $this->_mediawiki->getUserContributions(
252 | $this->_userInfo['query']['userinfo']['name'],
253 | array('ucstart' => $start,
254 | 'ucnamespace' => implode('|', array_keys($namespaces)),
255 | 'uclimit' => 100)
256 | );
257 | foreach ($response['query']['usercontribs'] as $value) {
258 |
259 | // Filter out duplicate pages.
260 | if (array_key_exists($value['pageid'], $userDocumentPages)) {
261 | continue;
262 | }
263 |
264 | // Extract the title, removing the namespace if any.
265 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']);
266 |
267 | // Preempt further processing on contributions with an invalid
268 | // prefix.
269 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) {
270 | continue;
271 | }
272 |
273 | // Set the document ID and page ID.
274 | $documentIds = Scripto_Document::decodeBaseTitle($title);
275 |
276 | // Filter out contributions that are not valid document pages.
277 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) {
278 | continue;
279 | }
280 |
281 | // Set the document title and document page name. Reduce calls
282 | // to the adapter by caching each document title, and checking
283 | // if they exist.
284 | if (array_key_exists($documentIds[0], $documentTitles)) {
285 | $documentTitle = $documentTitles[$documentIds[0]];
286 | } else {
287 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
288 | $documentTitles[$documentIds[0]] = $documentTitle;
289 | }
290 |
291 | // Duplicate pages have already been filtered out, so there is
292 | // no need to cache document page names.
293 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]);
294 |
295 | // Build the user document pages, newest properties first.
296 | $userDocumentPages[$value['pageid']] = array(
297 | 'revision_id' => $value['revid'],
298 | 'namespace_index' => $value['ns'],
299 | 'namespace_name' => $namespaces[$value['ns']],
300 | 'mediawiki_title' => $value['title'],
301 | 'timestamp' => $value['timestamp'],
302 | 'comment' => $value['comment'],
303 | 'size' => $value['size'],
304 | 'document_id' => $documentIds[0],
305 | 'document_page_id' => $documentIds[1],
306 | 'document_title' => $documentTitle,
307 | 'document_page_name' => $documentPageName,
308 | );
309 |
310 | // Break out of the loops if limit has been reached.
311 | if ($limit == count($userDocumentPages)) {
312 | break 2;
313 | }
314 | }
315 |
316 | // Set the query continue, if any.
317 | if (isset($response['query-continue'])) {
318 | $start = $response['query-continue']['usercontribs']['ucstart'];
319 | } else {
320 | $start = null;
321 | }
322 |
323 | } while ($start);
324 |
325 | return $userDocumentPages;
326 | }
327 |
328 | /**
329 | * Get the recent changes.
330 | *
331 | * @link http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces
332 | * @uses Scripto_Service_MediaWiki::getRecentChanges()
333 | * @param int $limit The number of recent changes to return.
334 | * @return array
335 | */
336 | public function getRecentChanges($limit = 10)
337 | {
338 | $start = null;
339 | $recentChanges = array();
340 | $documentTitles = array();
341 | $documentPageNames = array();
342 |
343 | // Namespaces to get: ns_index => ns_name
344 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces
345 | $namespaces = array('0' => 'Main', '1' => 'Talk');
346 |
347 | do {
348 | $response = $this->_mediawiki->getRecentChanges(
349 | array('rcprop' => 'user|comment|timestamp|title|ids|sizes|loginfo|flags',
350 | 'rclimit' => '100',
351 | 'rcnamespace' => implode('|', array_keys($namespaces)),
352 | 'rcstart' => $start)
353 | );
354 |
355 | foreach ($response['query']['recentchanges'] as $value) {
356 |
357 | // Extract the title, removing the namespace if any.
358 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']);
359 |
360 | // Preempt further processing on contributions with an invalid
361 | // prefix.
362 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) {
363 | continue;
364 | }
365 |
366 | // Set the document ID and page ID.
367 | $documentIds = Scripto_Document::decodeBaseTitle($title);
368 |
369 | // Set the document title and document page name. Reduce calls
370 | // to the adapter by caching each document title and page name,
371 | // and checking if they exist.
372 | $cachedDocument = array_key_exists($documentIds[0], $documentTitles);
373 | $cachedDocumentPage = array_key_exists($documentIds[1], $documentPageNames);
374 |
375 | // The document title and page name have been cached.
376 | if ($cachedDocument && $cachedDocumentPage) {
377 | $documentTitle = $documentTitles[$documentIds[0]];
378 | $documentPageName = $documentPageNames[$documentIds[1]];
379 |
380 | // The document title has been cached, but not the page name.
381 | } else if ($cachedDocument && !$cachedDocumentPage) {
382 | // Filter out invalid document pages.
383 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) {
384 | continue;
385 | }
386 | $documentTitle = $documentTitles[$documentIds[0]];
387 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]);
388 | $documentPageNames[$documentIds[1]] = $documentPageName;
389 |
390 | // The document title and page name have not been cached.
391 | } else {
392 | // Filter out invalid document pages.
393 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) {
394 | continue;
395 | }
396 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
397 | $documentTitles[$documentIds[0]] = $documentTitle;
398 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]);
399 | $documentPageNames[$documentIds[1]] = $documentPageName;
400 | }
401 |
402 | $logAction = isset($value['logaction']) ? $value['logaction']: null;
403 | $action = self::getChangeAction(array('comment' => $value['comment'],
404 | 'log_action' => $logAction));
405 |
406 | $recentChanges[] = array(
407 | 'type' => $value['type'],
408 | 'namespace_index' => $value['ns'],
409 | 'namespace_name' => $namespaces[$value['ns']],
410 | 'mediawiki_title' => $value['title'],
411 | 'rcid' => $value['rcid'],
412 | 'page_id' => $value['pageid'],
413 | 'revision_id' => $value['revid'],
414 | 'old_revision_id' => $value['old_revid'],
415 | 'user' => $value['user'],
416 | 'old_length' => $value['oldlen'],
417 | 'new_length' => $value['newlen'],
418 | 'timestamp' => $value['timestamp'],
419 | 'comment' => $value['comment'],
420 | 'action' => $action,
421 | 'log_id' => isset($value['logid']) ? $value['logid']: null,
422 | 'log_type' => isset($value['logtype']) ? $value['logtype']: null,
423 | 'log_action' => $logAction,
424 | 'new' => isset($value['new']) ? true: false,
425 | 'minor' => isset($value['minor']) ? true: false,
426 | 'document_id' => $documentIds[0],
427 | 'document_page_id' => $documentIds[1],
428 | 'document_title' => $documentTitle,
429 | 'document_page_name' => $documentPageName,
430 | );
431 |
432 | // Break out of the loops if limit has been reached.
433 | if ($limit == count($recentChanges)) {
434 | break 2;
435 | }
436 | }
437 |
438 | // Set the query continue, if any.
439 | if (isset($response['query-continue'])) {
440 | $start = $response['query-continue']['recentchanges']['rcstart'];
441 | } else {
442 | $start = null;
443 | }
444 |
445 | } while ($start);
446 |
447 | return $recentChanges;
448 | }
449 |
450 | /**
451 | * Get the current user's watchlist.
452 | *
453 | * @link http://www.mediawiki.org/wiki/API:Watchlist
454 | * @uses Scripto_Service_MediaWiki::getWatchlist()
455 | * @param int $limit The number of recent changes to return.
456 | * @return array
457 | */
458 | public function getWatchlist($limit = 10)
459 | {
460 | $start = null;
461 | $watchlist = array();
462 | $documentTitles = array();
463 | $documentPageNames = array();
464 |
465 | // Namespaces to get: ns_index => ns_name
466 | // See http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces
467 | $namespaces = array('0' => 'Main', '1' => 'Talk');
468 |
469 | do {
470 | $response = $this->_mediawiki->getWatchlist(
471 | array('wlprop' => 'user|comment|timestamp|title|ids|sizes|flags',
472 | 'wllimit' => '100',
473 | 'wlallrev' => true,
474 | 'wlnamespace' => implode('|', array_keys($namespaces)),
475 | 'wlstart' => $start)
476 | );
477 |
478 | foreach ($response['query']['watchlist'] as $value) {
479 |
480 | // Extract the title, removing the namespace if any.
481 | $title = preg_replace('/^(.+:)?(.+)$/', '$2', $value['title']);
482 |
483 | // Preempt further processing on contributions with an invalid
484 | // prefix.
485 | if (Scripto_Document::BASE_TITLE_PREFIX != $title[0]) {
486 | continue;
487 | }
488 |
489 | // Set the document ID and page ID.
490 | $documentIds = Scripto_Document::decodeBaseTitle($title);
491 |
492 | // Set the document title and document page name. Reduce calls
493 | // to the adapter by caching each document title and page name,
494 | // and checking if they exist.
495 | $cachedDocument = array_key_exists($documentIds[0], $documentTitles);
496 | $cachedDocumentPage = array_key_exists($documentIds[1], $documentPageNames);
497 |
498 | // The document title and page name have been cached.
499 | if ($cachedDocument && $cachedDocumentPage) {
500 | $documentTitle = $documentTitles[$documentIds[0]];
501 | $documentPageName = $documentPageNames[$documentIds[1]];
502 |
503 | // The document title has been cached, but not the page name.
504 | } else if ($cachedDocument && !$cachedDocumentPage) {
505 | // Filter out invalid document pages.
506 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) {
507 | continue;
508 | }
509 | $documentTitle = $documentTitles[$documentIds[0]];
510 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]);
511 | $documentPageNames[$documentIds[1]] = $documentPageName;
512 |
513 | // The document title and page name have not been cached.
514 | } else {
515 | // Filter out invalid document pages.
516 | if (!$this->_adapter->documentPageExists($documentIds[0], $documentIds[1])) {
517 | continue;
518 | }
519 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
520 | $documentTitles[$documentIds[0]] = $documentTitle;
521 | $documentPageName = $this->_adapter->getDocumentPageName($documentIds[0], $documentIds[1]);
522 | $documentPageNames[$documentIds[1]] = $documentPageName;
523 | }
524 |
525 | $action = self::getChangeAction(array('comment' => $value['comment'],
526 | 'revision_id' => $value['revid']));
527 |
528 | $watchlist[] = array(
529 | 'namespace_index' => $value['ns'],
530 | 'namespace_name' => $namespaces[$value['ns']],
531 | 'mediawiki_title' => $value['title'],
532 | 'page_id' => $value['pageid'],
533 | 'revision_id' => $value['revid'],
534 | 'user' => $value['user'],
535 | 'old_length' => $value['oldlen'],
536 | 'new_length' => $value['newlen'],
537 | 'timestamp' => $value['timestamp'],
538 | 'comment' => $value['comment'],
539 | 'action' => $action,
540 | 'new' => isset($value['new']) ? true: false,
541 | 'minor' => isset($value['minor']) ? true: false,
542 | 'anonymous' => isset($value['anon']) ? true: false,
543 | 'document_id' => $documentIds[0],
544 | 'document_page_id' => $documentIds[1],
545 | 'document_title' => $documentTitle,
546 | 'document_page_name' => $documentPageName,
547 | );
548 |
549 | // Break out of the loops if limit has been reached.
550 | if ($limit == count($watchlist)) {
551 | break 2;
552 | }
553 | }
554 |
555 | // Set the query continue, if any.
556 | if (isset($response['query-continue'])) {
557 | $start = $response['query-continue']['watchlist']['wlstart'];
558 | } else {
559 | $start = null;
560 | }
561 |
562 | } while ($start);
563 |
564 | return $watchlist;
565 | }
566 |
567 | /**
568 | * Get all documents from MediaWiki that have at least one page with text.
569 | *
570 | * @uses Scripto_Service_MediaWiki::getAllPages()
571 | * @return array An array following this format:
572 | *
573 | * array(
574 | * {document ID} => array(
575 | * ['mediawiki_titles'] => array(
576 | * {page ID} => {mediawiki title},
577 | * {...}
578 | * ),
579 | * ['document_title'] => {document title}
580 | * ),
581 | * {...}
582 | * )
583 | *
584 | */
585 | public function getAllDocuments()
586 | {
587 | $from = null;
588 | $documentTitles = array();
589 | $allDocuments = array();
590 | do {
591 | $response = $this->_mediawiki->getAllPages(
592 | array('aplimit' => 500,
593 | 'apminsize' => 1,
594 | 'apprefix' => Scripto_Document::BASE_TITLE_PREFIX,
595 | 'apfrom' => $from)
596 | );
597 |
598 | foreach ($response['query']['allpages'] as $value) {
599 |
600 | // Set the document ID and page ID.
601 | $documentIds = Scripto_Document::decodeBaseTitle($value['title']);
602 |
603 | // Set the page and continue if the document was already set.
604 | if (array_key_exists($documentIds[0], $documentTitles)) {
605 | $allDocuments[$documentIds[0]]['mediawiki_titles'][$documentIds[1]] = $value['title'];
606 | continue;
607 |
608 | // Set the document. Before getting the title, filter out pages
609 | // that are not valid documents.
610 | } else {
611 | if (!$this->_adapter->documentExists($documentIds[0])) {
612 | continue;
613 | }
614 | $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
615 | $documentTitles[$documentIds[0]] = $documentTitle;
616 | }
617 |
618 | $allDocuments[$documentIds[0]] = array(
619 | 'mediawiki_titles' => array($documentIds[1] => $value['title']),
620 | 'document_title' => $documentTitle,
621 | );
622 | }
623 |
624 | // Set the query continue, if any.
625 | if (isset($response['query-continue'])) {
626 | $from = $response['query-continue']['allpages']['apfrom'];
627 | } else {
628 | $from = null;
629 | }
630 |
631 | } while ($from);
632 |
633 | return $allDocuments;
634 | }
635 |
636 | /**
637 | * Get the difference between two page revisions.
638 | *
639 | * @uses Scripto_Service_MediaWiki::getRevisionDiff()
640 | * @param int $fromRevisionId The revision ID from which to diff.
641 | * @param int|string $toRevisionId The revision to which to diff. Use the
642 | * revision ID, "prev", "next", or "cur".
643 | * @return string An HTML table without the wrapping tag containing
644 | * difference markup, pre-formatted by MediaWiki. It is the responsibility
645 | * of implementers to wrap the result with table tags.
646 | */
647 | public function getRevisionDiff($fromRevisionId, $toRevisionId = 'prev')
648 | {
649 | return $this->_mediawiki->getRevisionDiff($fromRevisionId, $toRevisionId);
650 | }
651 |
652 | /**
653 | * Get properties of the specified page revision.
654 | *
655 | * @uses Scripto_Service_MediaWiki::getRevisions()
656 | * @param int $revisionId The ID of the rpage evision.
657 | * @return array
658 | */
659 | public function getRevision($revisionId)
660 | {
661 | // Get the revision properties.
662 | $response = $this->_mediawiki->getRevisions(
663 | null,
664 | array('revids' => $revisionId,
665 | 'rvprop' => 'ids|flags|timestamp|user|comment|size|content')
666 | );
667 | $page = current($response['query']['pages']);
668 |
669 | // Parse the wikitext into HTML.
670 | $response = $this->_mediawiki->parse(
671 | array('text' => '__NOEDITSECTION__' . $page['revisions'][0]['*'])
672 | );
673 |
674 | $action = self::getChangeAction(array('comment' => $page['revisions'][0]['comment']));
675 |
676 | $revision = array('revision_id' => $page['revisions'][0]['revid'],
677 | 'parent_id' => $page['revisions'][0]['parentid'],
678 | 'user' => $page['revisions'][0]['user'],
679 | 'timestamp' => $page['revisions'][0]['timestamp'],
680 | 'comment' => $page['revisions'][0]['comment'],
681 | 'size' => $page['revisions'][0]['size'],
682 | 'action' => $action,
683 | 'wikitext' => $page['revisions'][0]['*'],
684 | 'html' => $response['parse']['text']['*']);
685 | return $revision;
686 | }
687 |
688 | /**
689 | * Infer a change action verb from hints containted in various responses.
690 | *
691 | * @param array $hints Keyed hints from which to infer an change action:
692 | *
693 | * - comment
694 | * - log_action
695 | * - revision_id
696 | *
697 | * @return string
698 | */
699 | static public function getChangeAction(array $hints = array())
700 | {
701 | $action = '';
702 |
703 | // Recent changes returns log_action=protect|unprotect with no comment.
704 | if (array_key_exists('log_action', $hints)) {
705 | $logActions = array('protect' => 'protected', 'unprotect' => 'unprotected');
706 | if (array_key_exists($hints['log_action'], $logActions)) {
707 | return $logActions[$hints['log_action']];
708 | }
709 | }
710 |
711 | // Infer from comment and revision_id.
712 | if (array_key_exists('comment', $hints)) {
713 | $commentActions = array('Replaced', 'Unprotected', 'Protected', 'Created');
714 | $actionPattern = '/^(' . implode('|', $commentActions) . ').+$/s';
715 | if (preg_match($actionPattern, $hints['comment'])) {
716 | $action = preg_replace_callback($actionPattern, function ($matches) {
717 | return strtolower($matches[1]);
718 | }, $hints['comment']);
719 | } else {
720 | // Watchlist returns revision_id=0 when the action is protect
721 | // or unprotect.
722 | if (array_key_exists('revision_id', $hints) && 0 == $hints['revision_id']) {
723 | $action = 'un/protected';
724 | } else {
725 | $action = 'edited';
726 | }
727 | }
728 | }
729 |
730 | return $action;
731 | }
732 |
733 | /**
734 | * Determine whether the provided MediaWiki API URL is valid.
735 | *
736 | * @uses Scripto_Service_MediaWiki::isValidApiUrl()
737 | * @param string $apiUrl The MediaWiki API URL to validate.
738 | * @return bool
739 | */
740 | static public function isValidApiUrl($apiUrl)
741 | {
742 | return Scripto_Service_MediaWiki::isValidApiUrl($apiUrl);
743 | }
744 |
745 | /**
746 | * Remove all HTML attributes from the provided markup.
747 | *
748 | * This filter is useful after getting HTML from the MediaWiki API, which
749 | * often contains MediaWiki-specific attributes that may conflict with local
750 | * settings.
751 | *
752 | * @see http://www.php.net/manual/en/domdocument.loadhtml.php#95251
753 | * @param string $html
754 | * @param array $exceptions Do not remove these attributes.
755 | * @return string
756 | */
757 | static public function removeHtmlAttributes($html, array $exceptions = array('href'))
758 | {
759 | // Check for an empty string.
760 | $html = trim($html);
761 | if (empty($html)) {
762 | return $html;
763 | }
764 |
765 | // Load the HTML into DOM. Must inject an XML declaration with encoding
766 | // set to UTF-8 to prevent DOMDocument from munging Unicode characters.
767 | $doc = new DOMDocument();
768 | $doc->loadHTML('' . $html);
769 | $xpath = new DOMXPath($doc);
770 |
771 | // Iterate over and remove attributes.
772 | foreach ($xpath->evaluate('//@*') as $attribute) {
773 | // Do not remove specified attributes.
774 | if (in_array($attribute->name, $exceptions)) {
775 | continue;
776 | }
777 | $attribute->ownerElement->removeAttributeNode($attribute);
778 | }
779 |
780 | return $doc->saveHTML();
781 | }
782 |
783 | /**
784 | * Remove all preprocessor limit reports from the provided markup.
785 | *
786 | * This filter is useful after getting HTML from the MediaWiki API, which
787 | * always contains a preprocessor limit report within hidden tags.
788 | *
789 | * @see http://en.wikipedia.org/wiki/Wikipedia:Template_limits#How_can_you_find_out.3F
790 | * @param string $text
791 | * @return string
792 | */
793 | static public function removeNewPPLimitReports($html)
794 | {
795 | // The "s" modifier means the "." meta-character will include newlines.
796 | // The "?" means the "+" quantifier is not greedy, thus will not remove
797 | // text between pages when importing document transcriptions.
798 | $html = preg_replace("//s", '', $html);
799 | return $html;
800 | }
801 | }
802 |
--------------------------------------------------------------------------------
/lib/Scripto/Adapter/Example.php:
--------------------------------------------------------------------------------
1 | array(
31 | * 'document_title' => {documentTitle},
32 | * 'document_pages' => array(
33 | * {pageId} => array(
34 | * 'page_name' => {pageName},
35 | * 'page_file_url' => {pageFileUrl}
36 | * )
37 | * )
38 | * )
39 | *
40 | * Other adapters will likely get relevant data using the CMS API, and not
41 | * hardcode them like this example. Be sure to URL encode the document and
42 | * page IDs when transporting over HTTP. For example:
43 | *
44 | * documentId: Request for Purchase of Liver Oil & Drum Heads
45 | * pageId: xbe/XBE02001.jpg
46 | * ?documentId=Request+for+Purchase+of+Liver+Oil+%26+Drum+Heads&pageId=xbe%2FXBE02001.jpg
47 | *
48 | * These example documents are from Center for History and New Media Papers
49 | * of the War Department and Library of Congress American Memory.
50 | *
51 | * @var array
52 | */
53 | private $_documents = array(
54 | // Example of the preferred way to set the document and page IDs using
55 | // unique keys. See: http://wardepartmentpapers.org/document.php?id=16344
56 | 16344 => array(
57 | 'document_title' => 'Return of articles received and expended; work done at Springfield Massachusetts armory',
58 | 'document_pages' => array(
59 | 67799 => array(
60 | 'page_name' => 'Letter Outside',
61 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07001.jpg'
62 | ),
63 | 67800 => array(
64 | 'page_name' => 'Letter Body',
65 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07002.jpg'
66 | ),
67 | 67801 => array(
68 | 'page_name' => 'Worksheet 1, Outside',
69 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07003.jpg'
70 | ),
71 | 67802 => array(
72 | 'page_name' => 'Worksheet 1, Page 1',
73 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07004.jpg'
74 | ),
75 | 67803 => array(
76 | 'page_name' => 'Worksheet 1, Page 2',
77 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07005.jpg'
78 | ),
79 | 67804 => array(
80 | 'page_name' => 'Worksheet 2, Outside',
81 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07006.jpg'
82 | ),
83 | 67805 => array(
84 | 'page_name' => 'Worksheet 2, Page 1',
85 | 'page_file_url' => 'http://wardepartmentpapers.org/images/medium/zto/ZTO07007.jpg'
86 | )
87 | )
88 | ),
89 | // An alternate way to set the document using a document title as the
90 | // document ID and the file path as the page ID. See: http://books.google.com/books?id=eAuOQMmGEYIC&lpg=PA515&ots=PtWRBKDZbf&pg=PA515
91 | // %5BFacsimile%20of%5D%20letter%20to%20Messrs.%20O.%20P.%20Hall%20et%20al%20from%20Lincoln.
92 | '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.' => array(
93 | 'document_title' => '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.',
94 | 'document_pages' => array(
95 | // rbc%2Flprbscsm%2Fscsm0455%2F001r.jpg
96 | 'rbc/lprbscsm/scsm0455/001r.jpg' => array(
97 | 'page_name' => '001r',
98 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/001r.jpg'
99 | ),
100 | 'rbc/lprbscsm/scsm0455/002r.jpg' => array(
101 | 'page_name' => '002r',
102 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/002r.jpg'
103 | ),
104 | 'rbc/lprbscsm/scsm0455/003r.jpg' => array(
105 | 'page_name' => '003r',
106 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/003r.jpg'
107 | ),
108 | 'rbc/lprbscsm/scsm0455/004r.jpg' => array(
109 | 'page_name' => '004r',
110 | 'page_file_url' => 'http://memory.loc.gov/service/rbc/lprbscsm/scsm0455/004r.jpg'
111 | )
112 | )
113 | )
114 | );
115 |
116 | public function documentExists($documentId)
117 | {
118 | return array_key_exists($documentId, $this->_documents);
119 | }
120 |
121 | public function documentPageExists($documentId, $pageId)
122 | {
123 | if (!array_key_exists($documentId, $this->_documents)) {
124 | return false;
125 | }
126 | return array_key_exists($pageId, $this->_documents[$documentId]['document_pages']);
127 | }
128 |
129 | public function getDocumentPages($documentId)
130 | {
131 | if (!array_key_exists($documentId, $this->_documents)) {
132 | throw new Scripto_Adapter_Exception('Document does not exist.');
133 | }
134 | $pages = array();
135 | foreach ($this->_documents[$documentId]['document_pages'] as $pageId => $page) {
136 | $pages[$pageId] = $page['page_name'];
137 | }
138 | return $pages;
139 | }
140 |
141 | public function getDocumentPageFileUrl($documentId, $pageId)
142 | {
143 | if (!array_key_exists($documentId, $this->_documents)) {
144 | throw new Scripto_Adapter_Exception('Document does not exist.');
145 | }
146 | if (!array_key_exists($pageId, $this->_documents[$documentId]['document_pages'])) {
147 | throw new Scripto_Adapter_Exception('Document page does not exist.');
148 | }
149 | return $this->_documents[$documentId]['document_pages'][$pageId]['page_file_url'];
150 | }
151 |
152 | public function getDocumentFirstPageId($documentId)
153 | {
154 | if (!array_key_exists($documentId, $this->_documents)) {
155 | throw new Scripto_Adapter_Exception('Document does not exist.');
156 | }
157 | reset($this->_documents[$documentId]['document_pages']);
158 | return key($this->_documents[$documentId]['document_pages']);
159 | }
160 |
161 | public function getDocumentTitle($documentId)
162 | {
163 | if (!array_key_exists($documentId, $this->_documents)) {
164 | throw new Scripto_Adapter_Exception('Document does not exist.');
165 | }
166 | return $this->_documents[$documentId]['document_title'];
167 | }
168 |
169 | public function getDocumentPageName($documentId, $pageId)
170 | {
171 | if (!array_key_exists($documentId, $this->_documents)) {
172 | throw new Scripto_Adapter_Exception('Document does not exist.');
173 | }
174 | if (!array_key_exists($pageId, $this->_documents[$documentId]['document_pages'])) {
175 | throw new Scripto_Adapter_Exception('Document page does not exist.');
176 | }
177 | return $this->_documents[$documentId]['document_pages'][$pageId]['page_name'];
178 | }
179 |
180 | public function documentTranscriptionIsImported($documentId)
181 | {
182 | return false;
183 | }
184 |
185 | public function documentPageTranscriptionIsImported($documentId, $pageId)
186 | {
187 | return false;
188 | }
189 |
190 | public function importDocumentPageTranscription($documentId, $pageId, $text)
191 | {
192 | return false;
193 | }
194 |
195 | public function importDocumentTranscription($documentId, $text)
196 | {
197 | return false;
198 | }
199 | }
200 |
--------------------------------------------------------------------------------
/lib/Scripto/Adapter/Exception.php:
--------------------------------------------------------------------------------
1 | [pageName], [...])
54 | *
55 | * Example return values:
56 | * array(2011 => 'Title Page',
57 | * 1999 => 'Page 1',
58 | * 4345 => 'Page 2')
59 | *
60 | * array('page_1' => 1,
61 | * 'page_2' => 2,
62 | * 'page_3' => 3)
63 | *
64 | * @param int|string $documentId The unique document ID
65 | * @return array An array containing page identifiers as keys and page names
66 | * as values, in sequential page order.
67 | */
68 | public function getDocumentPages($documentId);
69 |
70 | /**
71 | * Get the URL of the specified document page file.
72 | *
73 | * @param int|string $documentId The unique document ID
74 | * @param int|string $pageId The unique page ID
75 | * @return string The page file URL
76 | */
77 | public function getDocumentPageFileUrl($documentId, $pageId);
78 |
79 | /**
80 | * Get the first page of the document.
81 | *
82 | * @param int|string $documentId The document ID
83 | * @return int|string
84 | */
85 | public function getDocumentFirstPageId($documentId);
86 |
87 | /**
88 | * Get the title of the document.
89 | *
90 | * @param int|string $documentId The document ID
91 | * @return string
92 | */
93 | public function getDocumentTitle($documentId);
94 |
95 | /**
96 | * Get the name of the document page.
97 | *
98 | * @param int|string $documentId The document ID
99 | * @param int|string $pageId The unique page ID
100 | * @return string
101 | */
102 | public function getDocumentPageName($documentId, $pageId);
103 |
104 | /**
105 | * Indicate whether the document transcription has been imported.
106 | *
107 | * @param int|string $documentId The document ID
108 | * @return bool True: has been imported; false: has not been imported
109 | */
110 | public function documentTranscriptionIsImported($documentId);
111 |
112 | /**
113 | * Indicate whether the document page transcription has been imported.
114 | *
115 | * @param int|string $documentId The document ID
116 | * @param int|string $pageId The page ID
117 | */
118 | public function documentPageTranscriptionIsImported($documentId, $pageId);
119 |
120 | /**
121 | * Import a document page's transcription into the external system.
122 | *
123 | * @param int|string $documentId The document ID
124 | * @param int|string $pageId The page ID
125 | * @param string $text The text to import
126 | * @return bool True: success; false: fail
127 | */
128 | public function importDocumentPageTranscription($documentId, $pageId, $text);
129 |
130 | /**
131 | * Import an entire document's transcription into the external system.
132 | *
133 | * @param int|string The document ID
134 | * @param string The text to import
135 | * @return bool True: success; false: fail
136 | */
137 | public function importDocumentTranscription($documentId, $text);
138 | }
139 |
--------------------------------------------------------------------------------
/lib/Scripto/Document.php:
--------------------------------------------------------------------------------
1 | documentExists($id)) {
108 | throw new Scripto_Exception("The specified document does not exist: {$this->_id}");
109 | }
110 |
111 | $this->_id = $id;
112 | $this->_adapter = $adapter;
113 | $this->_mediawiki = $mediawiki;
114 | $this->_title = $this->_adapter->getDocumentTitle($id);
115 | }
116 |
117 | /**
118 | * Set the current document page.
119 | *
120 | * Sets the current page ID, the base title used by MediaWiki, and
121 | * information about the MediaWiki transcription and talk pages.
122 | *
123 | * @param string|null $pageId The unique page identifier.
124 | */
125 | public function setPage($pageId)
126 | {
127 | // Set to the first page if the provided page is NULL or FALSE.
128 | if (null === $pageId || false === $pageId) {
129 | $pageId = $this->getFirstPageId();
130 | }
131 |
132 | // Check if the page exists.
133 | if (!$this->_adapter->documentPageExists($this->_id, $pageId)) {
134 | throw new Scripto_Exception("The specified page does not exist: $pageId");
135 | }
136 |
137 | // Mint the page title used by MediaWiki.
138 | $baseTitle = self::encodeBaseTitle($this->_id, $pageId);
139 |
140 | // Check if the base title is under the maximum character length.
141 | if (self::TITLE_BYTE_LIMIT < strlen($this->_baseTitle)) {
142 | throw new Scripto_Exception('The document ID and/or page ID are too long to set the provided page.');
143 | }
144 |
145 | // Set information about the transcription and talk pages.
146 | $this->_transcriptionPageInfo = $this->_getPageInfo($baseTitle);
147 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $baseTitle);
148 |
149 | $this->_pageId = $pageId;
150 | $this->_pageName = $this->_adapter->getDocumentPageName($this->_id, $pageId);
151 | $this->_baseTitle = $baseTitle;
152 | }
153 |
154 | /**
155 | * Get this document's ID.
156 | *
157 | * @return string|int
158 | */
159 | public function getId()
160 | {
161 | return $this->_id;
162 | }
163 |
164 | /**
165 | * Get this document's title.
166 | */
167 | public function getTitle()
168 | {
169 | return $this->_title;
170 | }
171 |
172 | /**
173 | * Get this document page's name.
174 | */
175 | public function getPageName()
176 | {
177 | return $this->_pageName;
178 | }
179 |
180 | /**
181 | * Get this document's current page ID.
182 | *
183 | * @return string|int
184 | */
185 | public function getPageId()
186 | {
187 | return $this->_pageId;
188 | }
189 |
190 | /**
191 | * Get this document's current base title.
192 | *
193 | * @return string
194 | */
195 | public function getBaseTitle()
196 | {
197 | if (is_null($this->_pageId)) {
198 | throw new Scripto_Exception('The document page must be set before getting the base title.');
199 | }
200 | return $this->_baseTitle;
201 | }
202 |
203 | /**
204 | * Get information about the current MediaWiki transcription page.
205 | *
206 | * @return array
207 | */
208 | public function getTranscriptionPageInfo()
209 | {
210 | if (is_null($this->_pageId)) {
211 | throw new Scripto_Exception('The document page must be set before getting information about the transcription page.');
212 | }
213 | return $this->_transcriptionPageInfo;
214 | }
215 |
216 | /**
217 | * Get information about the current MediaWiki talk page.
218 | *
219 | * @return array
220 | */
221 | public function getTalkPageInfo()
222 | {
223 | if (is_null($this->_pageId)) {
224 | throw new Scripto_Exception('The document page must be set before getting information about the talk page.');
225 | }
226 | return $this->_talkPageInfo;
227 | }
228 |
229 | /**
230 | * Get all of this document's pages from the adapter.
231 | *
232 | * @uses Scripto_Adapter_Interface::getDocumentPages()
233 | * @return array
234 | */
235 | public function getPages()
236 | {
237 | return (array) $this->_adapter->getDocumentPages($this->_id);
238 | }
239 |
240 | /**
241 | * Get this document's first page ID from the adapter.
242 | *
243 | * @uses Scripto_Adapter_Interface::getDocumentFirstPageId()
244 | * @return array
245 | */
246 | public function getFirstPageId()
247 | {
248 | return $this->_adapter->getDocumentFirstPageId($this->_id);
249 | }
250 |
251 | /**
252 | * Get this document's current page file URL from the adapter.
253 | *
254 | * @uses Scripto_Adapter_Interface::getDocumentPageFileUrl()
255 | * @return string
256 | */
257 | public function getPageFileUrl()
258 | {
259 | if (is_null($this->_pageId)) {
260 | throw new Scripto_Exception('The document page must be set before getting the page file URL.');
261 | }
262 | return $this->_adapter->getDocumentPageFileUrl($this->_id, $this->_pageId);
263 | }
264 |
265 | /**
266 | * Get the MediaWiki URL for the current transcription page.
267 | *
268 | * @return string
269 | */
270 | public function getTranscriptionPageMediawikiUrl()
271 | {
272 | if (is_null($this->_pageId)) {
273 | throw new Scripto_Exception('The document page must be set before getting the transcription page MediaWiki URL.');
274 | }
275 | return $this->_getPageMediawikiUrl($this->_baseTitle);
276 | }
277 |
278 | /**
279 | * Get the MediaWiki URL for the current talk page.
280 | *
281 | * @return string
282 | */
283 | public function getTalkPageMediawikiUrl()
284 | {
285 | if (is_null($this->_pageId)) {
286 | throw new Scripto_Exception('The document page must be set before getting the talk page MediaWiki URL.');
287 | }
288 | return $this->_getPageMediawikiUrl('Talk:' . $this->_baseTitle);
289 | }
290 |
291 | /**
292 | * Get the MediaWiki transcription page wikitext for the current page.
293 | *
294 | * @uses Scripto_Service_MediaWiki::getLatestRevisionWikitext()
295 | * @return string The transcription wikitext.
296 | */
297 | public function getTranscriptionPageWikitext()
298 | {
299 | if (is_null($this->_pageId)) {
300 | throw new Scripto_Exception('The document page must be set before getting the transcription page wikitext.');
301 | }
302 | return $this->_mediawiki->getLatestRevisionWikitext($this->_baseTitle);
303 | }
304 |
305 | /**
306 | * Get the MediaWiki talk page wikitext for the current page.
307 | *
308 | * @uses Scripto_Service_MediaWiki::getLatestRevisionWikitext()
309 | * @return string The talk wikitext.
310 | */
311 | public function getTalkPageWikitext()
312 | {
313 | if (is_null($this->_pageId)) {
314 | throw new Scripto_Exception('The document page must be set before getting the talk page wikitext.');
315 | }
316 | return $this->_mediawiki->getLatestRevisionWikitext('Talk:' . $this->_baseTitle);
317 | }
318 |
319 | /**
320 | * Get the MediaWiki transcription page HTML for the current page.
321 | *
322 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml()
323 | * @return string The transcription HTML.
324 | */
325 | public function getTranscriptionPageHtml()
326 | {
327 | if (is_null($this->_pageId)) {
328 | throw new Scripto_Exception('The document page must be set before getting the transcription page HTML.');
329 | }
330 | return $this->_mediawiki->getLatestRevisionHtml($this->_baseTitle);
331 | }
332 |
333 | /**
334 | * Get the MediaWiki talk page HTML for the current page.
335 | *
336 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml()
337 | * @return string The talk HTML.
338 | */
339 | public function getTalkPageHtml()
340 | {
341 | if (is_null($this->_pageId)) {
342 | throw new Scripto_Exception('The document page must be set before getting the talk page HTML.');
343 | }
344 | return $this->_mediawiki->getLatestRevisionHtml('Talk:' . $this->_baseTitle);
345 | }
346 |
347 | /**
348 | * Get the MediaWiki transcription page plain text for the current page.
349 | *
350 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml()
351 | * @return string The transcription page plain text.
352 | */
353 | public function getTranscriptionPagePlainText()
354 | {
355 | if (is_null($this->_pageId)) {
356 | throw new Scripto_Exception('The document page must be set before getting the transcription page plain text.');
357 | }
358 | return html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml($this->_baseTitle)));
359 | }
360 |
361 | /**
362 | * Get the MediaWiki talk plain text for the current page.
363 | *
364 | * @uses Scripto_Service_MediaWiki::getLatestRevisionHtml()
365 | * @return string The talk plain text.
366 | */
367 | public function getTalkPagePlainText()
368 | {
369 | if (is_null($this->_pageId)) {
370 | throw new Scripto_Exception('The document page must be set before getting the talk page plain text.');
371 | }
372 | return html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml('Talk:' . $this->_baseTitle)));
373 | }
374 |
375 | /**
376 | * Get the MediaWiki transcription page revision history for the current page.
377 | *
378 | * @param int $limit The number of revisions to return.
379 | * @param int $startRevisionId The revision ID from which to start.
380 | * @return array
381 | */
382 | public function getTranscriptionPageHistory($limit = 10, $startRevisionId = null)
383 | {
384 | if (is_null($this->_pageId)) {
385 | throw new Scripto_Exception('The document page must be set before getting the transcription page history.');
386 | }
387 | return $this->_getPageHistory($this->_baseTitle, $limit, $startRevisionId);
388 | }
389 |
390 | /**
391 | * Get the MediaWiki talk page revision history for the current page.
392 | *
393 | * @param int $limit The number of revisions to return.
394 | * @param int $startRevisionId The revision ID from which to start.
395 | * @return array
396 | */
397 | public function getTalkPageHistory($limit = 10, $startRevisionId = null)
398 | {
399 | if (is_null($this->_pageId)) {
400 | throw new Scripto_Exception('The document page must be set before getting the talk page history.');
401 | }
402 | return $this->_getPageHistory('Talk:' . $this->_baseTitle, $limit, $startRevisionId);
403 | }
404 |
405 | /**
406 | * Determine if the current user can edit the MediaWiki transcription page.
407 | *
408 | * @return bool
409 | */
410 | public function canEditTranscriptionPage()
411 | {
412 | if (is_null($this->_pageId)) {
413 | throw new Scripto_Exception('The document page must be set before determining whether the user can edit the transcription page.');
414 | }
415 | return $this->_canEdit($this->_transcriptionPageInfo['protections']);
416 | }
417 |
418 | /**
419 | * Determine if the current user can edit the MediaWiki talk page.
420 | *
421 | * @return bool
422 | */
423 | public function canEditTalkPage()
424 | {
425 | if (is_null($this->_pageId)) {
426 | throw new Scripto_Exception('The document page must be set before determining whether the user can edit the talk page.');
427 | }
428 | return $this->_canEdit($this->_talkPageInfo['protections']);
429 | }
430 |
431 | /**
432 | * Edit the MediaWiki transcription page for the current document.
433 | *
434 | * @uses Scripto_Service_MediaWiki::edit()
435 | * @param string $text The wikitext of the transcription.
436 | */
437 | public function editTranscriptionPage($text)
438 | {
439 | if (is_null($this->_pageId)) {
440 | throw new Scripto_Exception('The document page must be set before editing the transcription page.');
441 | }
442 | $this->_mediawiki->edit($this->_baseTitle,
443 | $text,
444 | $this->_transcriptionPageInfo['edit_token']);
445 | }
446 |
447 | /**
448 | * Edit the MediaWiki talk page for the current document.
449 | *
450 | * @uses Scripto_Service_MediaWiki::edit()
451 | * @param string $text The wikitext of the transcription.
452 | */
453 | public function editTalkPage($text)
454 | {
455 | if (is_null($this->_pageId)) {
456 | throw new Scripto_Exception('The document page must be set before editing the talk page.');
457 | }
458 | $this->_mediawiki->edit('Talk:' . $this->_baseTitle,
459 | $text,
460 | $this->_talkPageInfo['edit_token']);
461 | }
462 |
463 | /**
464 | * Protect the current transcription page.
465 | */
466 | public function protectTranscriptionPage()
467 | {
468 | if (is_null($this->_pageId)) {
469 | throw new Scripto_Exception('The document page must be set before protecting the transcription page.');
470 | }
471 | $this->_protectPage($this->_baseTitle, $this->_transcriptionPageInfo['protect_token']);
472 |
473 | // Update information about this page.
474 | $this->_transcriptionPageInfo = $this->_getPageInfo($this->_baseTitle);
475 | }
476 |
477 | /**
478 | * Protect the current talk page.
479 | */
480 | public function protectTalkPage()
481 | {
482 | if (is_null($this->_pageId)) {
483 | throw new Scripto_Exception('The document page must be set before protecting the talk page.');
484 | }
485 | $this->_protectPage('Talk:' . $this->_baseTitle, $this->_talkPageInfo['protect_token']);
486 |
487 | // Update information about this page.
488 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $this->_baseTitle);
489 | }
490 |
491 | /**
492 | * Unprotect the current transcription page.
493 | */
494 | public function unprotectTranscriptionPage()
495 | {
496 | if (is_null($this->_pageId)) {
497 | throw new Scripto_Exception('The document page must be set before unprotecting the transcription page.');
498 | }
499 | $this->_unprotectPage($this->_baseTitle, $this->_transcriptionPageInfo['protect_token']);
500 |
501 | // Update information about this page.
502 | $this->_transcriptionPageInfo = $this->_getPageInfo($this->_baseTitle);
503 | }
504 |
505 | /**
506 | * Unprotect the current talk page.
507 | */
508 | public function unprotectTalkPage()
509 | {
510 | if (is_null($this->_pageId)) {
511 | throw new Scripto_Exception('The document page must be set before unprotecting the talk page.');
512 | }
513 | $this->_unprotectPage('Talk:' . $this->_baseTitle, $this->_talkPageInfo['protect_token']);
514 |
515 | // Update information about this page.
516 | $this->_talkPageInfo = $this->_getPageInfo('Talk:' . $this->_baseTitle);
517 | }
518 |
519 | /**
520 | * Watch the current page.
521 | *
522 | * Watching a transcription page implies watching its talk page.
523 | *
524 | * @uses Scripto_Service_MediaWiki::watch()
525 | */
526 | public function watchPage()
527 | {
528 | if (is_null($this->_pageId)) {
529 | throw new Scripto_Exception('The document page must be set before watching the page.');
530 | }
531 | $this->_mediawiki->watch($this->_baseTitle);
532 | }
533 |
534 | /**
535 | * Unwatch the current page.
536 | *
537 | * Unwatching a transcription page implies unwatching its talk page.
538 | *
539 | * @uses Scripto_Service_MediaWiki::watch()
540 | */
541 | public function unwatchPage()
542 | {
543 | if (is_null($this->_pageId)) {
544 | throw new Scripto_Exception('The document page must be set before unwatching the page.');
545 | }
546 | $this->_mediawiki->watch($this->_baseTitle, null, array('unwatch' => true));
547 | }
548 |
549 | /**
550 | * Determine whether the current transcription page is edit protected.
551 | *
552 | * @return bool
553 | */
554 | public function isProtectedTranscriptionPage()
555 | {
556 | if (is_null($this->_pageId)) {
557 | throw new Scripto_Exception('The document page must be set before determining whether the transcription page is protected.');
558 | }
559 | return $this->_isProtectedPage($this->_transcriptionPageInfo['protections']);
560 | }
561 |
562 | /**
563 | * Determine whether the current talk page is edit protected.
564 | *
565 | * @return bool
566 | */
567 | public function isProtectedTalkPage()
568 | {
569 | if (is_null($this->_pageId)) {
570 | throw new Scripto_Exception('The document page must be set before determining whether the talk page is protected.');
571 | }
572 | return $this->_isProtectedPage($this->_talkPageInfo['protections']);
573 | }
574 |
575 | /**
576 | * Determine whether the current user is watching the current page.
577 | *
578 | * @return bool
579 | */
580 | public function isWatchedPage()
581 | {
582 | if (is_null($this->_pageId)) {
583 | throw new Scripto_Exception('The document page must be set before determining whether the current user is watching the page.');
584 | }
585 | return $this->_transcriptionPageInfo['watched'];
586 | }
587 |
588 | /**
589 | * Determine whether all of this document's transcription pages were already
590 | * exported to the external system.
591 | *
592 | * @uses Scripto_Adapter_Interface::documentTranscriptionIsImported()
593 | * @return bool
594 | */
595 | public function isExported()
596 | {
597 | return $this->_adapter->documentTranscriptionIsImported($this->_id);
598 | }
599 |
600 | /**
601 | * Determine whether the current transcription page was already exported to
602 | * the external system.
603 | *
604 | * @uses Scripto_Adapter_Interface::documentPageTranscriptionIsImported()
605 | * @return bool
606 | */
607 | public function isExportedPage()
608 | {
609 | if (is_null($this->_pageId)) {
610 | throw new Scripto_Exception('The document page must be set before determining whether it is imported.');
611 | }
612 | return $this->_adapter->documentPageTranscriptionIsImported($this->_id, $this->_pageId);
613 | }
614 |
615 | /**
616 | * Export the document page transcription to the external system by calling
617 | * the adapter.
618 | *
619 | * @uses Scripto_Adapter_Interface::importDocumentPageTranscription()
620 | * @param string $type The type of text to set, valid options are
621 | * plain_text, html, and wikitext.
622 | */
623 | public function exportPage($type = 'plain_text')
624 | {
625 | switch ($type) {
626 | case 'plain_text':
627 | $text = $this->getTranscriptionPagePlainText();
628 | break;
629 | case 'html':
630 | $text = $this->getTranscriptionPageHtml();
631 | break;
632 | case 'wikitext':
633 | $text = $this->getTranscriptionPageWikitext();
634 | break;
635 | default:
636 | throw new Scripto_Exception('The provided import type is invalid.');
637 | }
638 | $this->_adapter->importDocumentPageTranscription($this->_id,
639 | $this->_pageId,
640 | trim($text));
641 | }
642 |
643 | /**
644 | * Export the entire document transcription to the external system by
645 | * calling the adapter.
646 | *
647 | * @uses Scripto_Adapter_Interface::importDocumentTranscription()
648 | * @param string $type The type of text to set, valid options are
649 | * plain_text, html, and wikitext.
650 | * @param string $pageDelimiter The delimiter used to stitch pages together.
651 | */
652 | public function export($type = 'plain_text', $pageDelimiter = "\n")
653 | {
654 | $text = array();
655 | foreach ($this->getPages() as $pageId => $pageName) {
656 | $baseTitle = self::encodeBaseTitle($this->_id, $pageId);
657 | switch ($type) {
658 | case 'plain_text':
659 | $text[] = html_entity_decode(strip_tags($this->_mediawiki->getLatestRevisionHtml($baseTitle)));
660 | break;
661 | case 'html':
662 | $text[] = $this->_mediawiki->getLatestRevisionHtml($baseTitle);
663 | break;
664 | case 'wikitext':
665 | $text[] = $this->_mediawiki->getLatestRevisionWikitext($baseTitle);
666 | break;
667 | default:
668 | throw new Scripto_Exception('The provided import type is invalid.');
669 | }
670 | }
671 | $text = implode($pageDelimiter, array_map('trim', $text));
672 | $this->_adapter->importDocumentTranscription($this->_id, trim($text));
673 | }
674 |
675 | /**
676 | * Determine if the current user can edit the specified MediaWiki page.
677 | *
678 | * @uses Scripto_Service_MediaWiki::getUserInfo()
679 | * @param array $pageProtections
680 | * @return bool
681 | */
682 | protected function _canEdit(array $pageProtections)
683 | {
684 | $userInfo = $this->_mediawiki->getUserInfo('rights');
685 |
686 | // Users without edit rights cannot edit pages.
687 | if (!in_array('edit', $userInfo['query']['userinfo']['rights'])) {
688 | return false;
689 | }
690 |
691 | // Users with edit rights can edit unprotected pages.
692 | if (empty($pageProtections)) {
693 | return true;
694 | }
695 |
696 | // Iterate the page protections.
697 | foreach ($pageProtections as $pageProtection) {
698 |
699 | // The page is edit-protected.
700 | if ('edit' == $pageProtection['type']) {
701 |
702 | // Users with edit and protect rights can edit protected pages.
703 | if (in_array('protect', $userInfo['query']['userinfo']['rights'])) {
704 | return true;
705 |
706 | // Users with edit but without protect rights cannot edit
707 | // protected pages.
708 | } else {
709 | return false;
710 | }
711 | }
712 | }
713 |
714 | // Users with edit rights can edit pages that are not edit-protected.
715 | return true;
716 | }
717 |
718 | /**
719 | * Determine whether the provided protections contain an edit protection.
720 | *
721 | * @param array $pageProtections The page protections from the page info:
722 | * {@link Scripto_Document::$_transcriptionPageInfo} or
723 | * {@link Scripto_Document::$_talkPageInfo}.
724 | * @return bool
725 | */
726 | protected function _isProtectedPage(array $pageProtections)
727 | {
728 | // There are no protections.
729 | if (empty($pageProtections)) {
730 | return false;
731 | }
732 |
733 | // Iterate the page protections.
734 | foreach ($pageProtections as $pageProtection) {
735 | // The page is edit protected.
736 | if ('edit' == $pageProtection['type'] || 'create' == $pageProtection['type']) {
737 | return true;
738 | }
739 | }
740 |
741 | // There are no edit protections.
742 | return false;
743 | }
744 |
745 | /**
746 | * Protect the specified page.
747 | *
748 | * @uses Scripto_Service_MediaWiki::protect()
749 | * @param string $title
750 | * @param string $protectToken
751 | */
752 | protected function _protectPage($title, $protectToken)
753 | {
754 | if ($this->_mediawiki->pageCreated($title)) {
755 | $protections = 'edit=sysop';
756 | } else {
757 | $protections = 'create=sysop';
758 | }
759 | $this->_mediawiki->protect($title, $protections, $protectToken);
760 | }
761 |
762 | /**
763 | * Unprotect the specified page.
764 | *
765 | * @uses Scripto_Service_MediaWiki::protect()
766 | * @param string $title
767 | * @param string $protectToken
768 | */
769 | protected function _unprotectPage($title, $protectToken)
770 | {
771 | if ($this->_mediawiki->pageCreated($title)) {
772 | $protections = 'edit=all';
773 | } else {
774 | $protections = 'create=all';
775 | }
776 | $this->_mediawiki->protect($title, $protections, $protectToken);
777 | }
778 |
779 | /**
780 | * Get the MediaWiki URL for the specified page.
781 | *
782 | * @uses Scripto_Service_MediaWiki::getSiteInfo()
783 | * @param string $title
784 | * @return string
785 | */
786 | protected function _getPageMediawikiUrl($title)
787 | {
788 | $siteInfo = $this->_mediawiki->getSiteInfo();
789 | return $siteInfo['query']['general']['server']
790 | . str_replace('$1', $title, $siteInfo['query']['general']['articlepath']);
791 | }
792 |
793 | /**
794 | * Get information for the specified page.
795 | *
796 | * @uses Scripto_Service_MediaWiki::getInfo()
797 | * @param string $title
798 | * @return array
799 | */
800 | protected function _getPageInfo($title)
801 | {
802 | $params = array('inprop' => 'protection|talkid|subjectid|url|watched',
803 | 'intoken' => 'edit|move|delete|protect');
804 | $response = $this->_mediawiki->getInfo($title, $params);
805 | $page = current($response['query']['pages']);
806 | $pageInfo = array('page_id' => isset($page['pageid']) ? $page['pageid'] : null,
807 | 'namespace_index' => isset($page['ns']) ? $page['ns'] : null,
808 | 'mediawiki_title' => isset($page['title']) ? $page['title'] : null,
809 | 'last_revision_id' => isset($page['lastrevid']) ? $page['lastrevid'] : null,
810 | 'counter' => isset($page['counter']) ? $page['counter'] : null,
811 | 'length' => isset($page['length']) ? $page['length'] : null,
812 | 'start_timestamp' => isset($page['starttimestamp']) ? $page['starttimestamp'] : null,
813 | 'edit_token' => isset($page['edittoken']) ? $page['edittoken'] : null,
814 | 'move_token' => isset($page['movetoken']) ? $page['movetoken'] : null,
815 | 'delete_token' => isset($page['deletetoken']) ? $page['deletetoken'] : null,
816 | 'protect_token' => isset($page['protecttoken']) ? $page['protecttoken'] : null,
817 | 'protections' => isset($page['protection']) ? $page['protection'] : null,
818 | 'talk_id' => isset($page['talkid']) ? $page['talkid'] : null,
819 | 'mediawiki_full_url' => isset($page['fullurl']) ? $page['fullurl'] : null,
820 | 'mediawiki_edit_url' => isset($page['editurl']) ? $page['editurl'] : null,
821 | 'watched' => isset($page['watched']) ? true: false,
822 | 'redirect' => isset($page['redirect']) ? true: false,
823 | 'new' => isset($page['new']) ? true: false);
824 | return $pageInfo;
825 | }
826 |
827 | /**
828 | * Get the revisions for the specified page.
829 | *
830 | * @uses Scripto_Service_MediaWiki::getRevisions()
831 | * @param string $title
832 | * @param int $limit
833 | * @param int $startRevisionId
834 | * @return array
835 | */
836 | protected function _getPageHistory($title, $limit = 10, $startRevisionId = null)
837 | {
838 | $revisions = array();
839 | do {
840 | $response = $this->_mediawiki->getRevisions(
841 | $title,
842 | array('rvstartid' => $startRevisionId,
843 | 'rvlimit' => 100,
844 | 'rvprop' => 'ids|flags|timestamp|user|comment|size')
845 | );
846 | $page = current($response['query']['pages']);
847 |
848 | // Return if the page has not been created.
849 | if (array_key_exists('missing', $page)) {
850 | return $revisions;
851 | }
852 |
853 | foreach ($page['revisions'] as $revision) {
854 |
855 | $action = Scripto::getChangeAction(array('comment' => $revision['comment']));
856 |
857 | // Build the revisions.
858 | $revisions[] = array(
859 | 'revision_id' => $revision['revid'],
860 | 'parent_id' => $revision['parentid'],
861 | 'user' => $revision['user'],
862 | 'timestamp' => $revision['timestamp'],
863 | 'comment' => $revision['comment'],
864 | 'size' => $revision['size'],
865 | 'action' => $action,
866 | );
867 |
868 | // Break out of the loops if limit has been reached.
869 | if ($limit == count($revisions)) {
870 | break 2;
871 | }
872 | }
873 |
874 | // Set the query continue, if any.
875 | if (isset($response['query-continue'])) {
876 | $startRevisionId = $response['query-continue']['revisions']['rvstartid'];
877 | } else {
878 | $startRevisionId = null;
879 | }
880 |
881 | } while ($startRevisionId);
882 |
883 | return $revisions;
884 | }
885 |
886 | /**
887 | * Encode a base title that enables fail-safe document page transport
888 | * between the external system, Scripto, and MediaWiki.
889 | *
890 | * The base title is the base MediaWiki page title that corresponds to the
891 | * document page. Encoding is necessary to allow all Unicode characters in
892 | * document and page IDs, even those not allowed in URL syntax and MediaWiki
893 | * naming conventions. Encoding in Base64 allows the title to be decoded.
894 | *
895 | * The base title has four parts:
896 | *
897 | * - A title prefix to keep MediaWiki from capitalizing the first
898 | * character
899 | * - A URL-safe Base64 encoded document ID
900 | * - A delimiter between the encoded document ID and page ID
901 | * - A URL-safe Base64 encoded page ID
902 | *
903 | *
904 | * @link http://en.wikipedia.org/wiki/Base64#URL_applications
905 | * @link http://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_%28technical_restrictions%29
906 | * @param string|int $documentId The document ID
907 | * @param string|int $pageId The page ID
908 | * @return string The encoded base title
909 | */
910 | static public function encodeBaseTitle($documentId, $pageId)
911 | {
912 | return self::BASE_TITLE_PREFIX
913 | . Scripto_Document::base64UrlEncode($documentId)
914 | . self::BASE_TITLE_DELIMITER
915 | . Scripto_Document::base64UrlEncode($pageId);
916 | }
917 |
918 | /**
919 | * Decode the base title.
920 | *
921 | * @param string|int $baseTitle
922 | * @return array An array containing the document ID and page ID
923 | */
924 | static public function decodeBaseTitle($baseTitle)
925 | {
926 | // First remove the title prefix.
927 | $baseTitle = ltrim($baseTitle, self::BASE_TITLE_PREFIX);
928 | // Create an array containing the document ID and page ID.
929 | $baseTitle = explode(self::BASE_TITLE_DELIMITER, $baseTitle);
930 | // URL-safe Base64 decode the array and return it.
931 | return array_map('Scripto_Document::base64UrlDecode', $baseTitle);
932 | }
933 |
934 | /**
935 | * Encode a string to URL-safe Base64.
936 | *
937 | * @link http://en.wikipedia.org/wiki/Base64#URL_applications
938 | * @param string $str
939 | * @return string
940 | */
941 | static public function base64UrlEncode($str)
942 | {
943 | return strtr(rtrim(base64_encode($str), '='), '+/', '-_');
944 | }
945 |
946 | /**
947 | * Decode a string from a URL-safe Base64.
948 | *
949 | * @param string $str
950 | * @return string
951 | */
952 | static public function base64UrlDecode($str)
953 | {
954 | return base64_decode(strtr($str, '-_', '+/'));
955 | }
956 | }
957 |
--------------------------------------------------------------------------------
/lib/Scripto/Exception.php:
--------------------------------------------------------------------------------
1 | array(
55 | 'text', 'title', 'page', 'prop', 'pst', 'uselang'
56 | ),
57 | 'edit' => array(
58 | 'title', 'section', 'text', 'token', 'summary', 'minor', 'notminor',
59 | 'bot', 'basetimestamp', 'starttimestamp', 'recreate', 'createonly',
60 | 'nocreate', 'watchlist', 'md5', 'captchaid', 'captchaword', 'undo',
61 | 'undoafter'
62 | ),
63 | 'protect' => array(
64 | 'title', 'token', 'protections', 'expiry', 'reason', 'cascade'
65 | ),
66 | 'watch' => array(
67 | 'title', 'unwatch', 'token'
68 | ),
69 | 'query' => array(
70 | // title specifications
71 | 'titles', 'revids', 'pageids',
72 | // submodules
73 | 'meta', 'prop', 'list',
74 | // meta submodule
75 | 'siprop', 'sifilteriw', 'sishowalldb', 'sinumberingroup',
76 | 'uiprop',
77 | // prop submodule
78 | 'inprop', 'intoken', 'indexpageids', 'incontinue',
79 | 'rvprop', 'rvcontinue', 'rvlimit', 'rvstartid', 'rvendid',
80 | 'rvstart', 'rvend', 'rvdir', 'rvuser', 'rvexcludeuser',
81 | 'rvexpandtemplates', 'rvgeneratexml', 'rvsection', 'rvtoken',
82 | 'rvdiffto', 'rvdifftotext',
83 | // list submodule
84 | 'ucprop', 'ucuser', 'ucuserprefix', 'ucstart', 'ucend',
85 | 'uccontinue', 'ucdir', 'uclimit', 'ucnamespace', 'ucshow',
86 | 'rcprop', 'rcstart', 'rcend', 'rcdir', 'rclimit', 'rcnamespace',
87 | 'rcuser', 'rcexcludeuser', 'rctype', 'rcshow',
88 | 'wlprop', 'wlstart', 'wlend', 'wldir', 'wllimit', 'wlnamespace',
89 | 'wluser', 'wlexcludeuser', 'wlowner', 'wltoken', 'wlallrev',
90 | 'wlshow',
91 | 'aplimit', 'apminsize', 'apmaxsize', 'apprefix', 'apfrom',
92 | 'apnamespace', 'apfilterredir', 'apfilterlanglinks', 'apprtype',
93 | 'apprlevel', 'apdir',
94 | ),
95 | 'login' => array(
96 | 'lgname', 'lgpassword', 'lgtoken'
97 | ),
98 | 'logout' => array()
99 | );
100 |
101 | /**
102 | * Constructs the MediaWiki API client.
103 | *
104 | * @link http://www.mediawiki.org/wiki/API:Main_page
105 | * @param string $apiUrl The URL to the MediaWiki API.
106 | * @param bool $passCookies Pass cookies to the web browser.
107 | * @param string $cookiePrefix
108 | */
109 | public function __construct($apiUrl, $passCookies = true, $cookiePrefix = null)
110 | {
111 | $this->_passCookies = (bool) $passCookies;
112 |
113 | if (null !== $cookiePrefix) {
114 | $this->_cookiePrefix = $cookiePrefix;
115 | } elseif (isset($_COOKIE[self::COOKIE_NS . 'cookieprefix'])) {
116 | // Set the cookie prefix that was set by MediaWiki during login.
117 | $this->_cookiePrefix = $_COOKIE[self::COOKIE_NS . 'cookieprefix'];
118 | }
119 |
120 | // Set the HTTP client for the MediaWiki API .
121 | self::getHttpClient()->setUri($apiUrl)
122 | ->setConfig(array('keepalive' => true))
123 | ->setCookieJar();
124 |
125 | // Add X-Forwarded-For header if applicable.
126 | if (isset($_SERVER['REMOTE_ADDR']) && isset($_SERVER['SERVER_ADDR'])) {
127 | self::getHttpClient()->setHeaders('X-Forwarded-For',
128 | $_SERVER['REMOTE_ADDR'] . ', ' . $_SERVER['SERVER_ADDR']);
129 | }
130 |
131 | // If MediaWiki API authentication cookies are being passed and the
132 | // MediaWiki cookieprefix is set, get the cookies from the browser and
133 | // add them to the HTTP client cookie jar. Doing so maintains state
134 | // between browser requests.
135 | if ($this->_passCookies && $this->_cookiePrefix) {
136 | require_once 'Zend/Http/Cookie.php';
137 | foreach ($this->_cookieSuffixes as $cookieSuffix) {
138 | $cookieName = self::COOKIE_NS . $this->_cookiePrefix . $cookieSuffix;
139 | if (array_key_exists($cookieName, $_COOKIE)) {
140 | $cookie = new Zend_Http_Cookie($this->_cookiePrefix . $cookieSuffix,
141 | $_COOKIE[$cookieName],
142 | self::getHttpClient()->getUri()->getHost());
143 | self::getHttpClient()->getCookieJar()->addCookie($cookie);
144 | }
145 | }
146 | }
147 | }
148 |
149 | /**
150 | * Gets information about the current user.
151 | *
152 | * @link http://www.mediawiki.org/wiki/API:Meta#userinfo_.2F_ui
153 | * @param string $uiprop
154 | * @return array
155 | */
156 | public function getUserInfo($uiprop = '')
157 | {
158 | $params = array('meta' => 'userinfo',
159 | 'uiprop' => $uiprop);
160 | return $this->query($params);
161 | }
162 |
163 | /**
164 | * Gets overall site information.
165 | *
166 | * @link http://www.mediawiki.org/wiki/API:Meta#siteinfo_.2F_si
167 | * @param string $siprop
168 | * @return array
169 | */
170 | public function getSiteInfo($siprop = 'general')
171 | {
172 | $params = array('meta' => 'siteinfo',
173 | 'siprop' => $siprop);
174 | return $this->query($params);
175 | }
176 |
177 | /**
178 | * Gets a list of contributions made by a given user.
179 | *
180 | * @link http://www.mediawiki.org/wiki/API:Usercontribs
181 | * @param string $ucuser
182 | * @param array $params
183 | * @return array
184 | */
185 | public function getUserContributions($ucuser, array $params = array())
186 | {
187 | $params['ucuser'] = $ucuser;
188 | $params['list'] = 'usercontribs';
189 | return $this->query($params);
190 | }
191 |
192 | /**
193 | * Gets all recent changes to the wiki.
194 | *
195 | * @link http://www.mediawiki.org/wiki/API:Recentchanges
196 | * @param array $params
197 | * @return array
198 | */
199 | public function getRecentChanges(array $params = array())
200 | {
201 | $params['list'] = 'recentchanges';
202 | return $this->query($params);
203 | }
204 |
205 | /**
206 | * Gets a list of pages on the current user's watchlist.
207 | *
208 | * @link http://www.mediawiki.org/wiki/API:Watchlist
209 | * @param array $params
210 | * @return array
211 | */
212 | public function getWatchlist(array $params = array())
213 | {
214 | $params['list'] = 'watchlist';
215 | return $this->query($params);
216 | }
217 |
218 | /**
219 | * Gets a list of pages.
220 | *
221 | * @link http://www.mediawiki.org/wiki/API:Allpages
222 | * @param array $params
223 | * @return array
224 | */
225 | public function getAllPages(array $params = array())
226 | {
227 | $params['list'] = 'allpages';
228 | return $this->query($params);
229 | }
230 |
231 | /**
232 | * Gets basic page information.
233 | *
234 | * @link http://www.mediawiki.org/wiki/API:Properties#info_.2F_in
235 | * @param string $titles
236 | * @param array $params
237 | * @return array
238 | */
239 | public function getInfo($titles, array $params = array())
240 | {
241 | $params['titles'] = $titles;
242 | $params['prop'] = 'info';
243 | return $this->query($params);
244 | }
245 |
246 | /**
247 | * Gets revisions for a given page.
248 | *
249 | * @link http://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv
250 | * @param string $titles
251 | * @param array $params
252 | * @return array
253 | */
254 | public function getRevisions($titles, array $params = array())
255 | {
256 | $params['titles'] = $titles;
257 | $params['prop'] = 'revisions';
258 | return $this->query($params);
259 | }
260 |
261 | /**
262 | * Gets the HTML of a specified revision of a given page.
263 | *
264 | * @param int $revisionId
265 | * @return string
266 | */
267 | public function getRevisionHtml($revisionId)
268 | {
269 | // Get the revision wikitext.
270 | $response = $this->getRevisions(null, array('revids' => $revisionId,
271 | 'rvprop' => 'content'));
272 | $page = current($response['query']['pages']);
273 |
274 | // Parse the wikitext into HTML.
275 | $response = $this->parse(
276 | array('text' => '__NOEDITSECTION__' . $page['revisions'][0]['*'])
277 | );
278 | return $response['parse']['text']['*'];
279 | }
280 |
281 | /**
282 | * Gets the difference between two revisions.
283 | *
284 | * @param int $from The revision ID to diff.
285 | * @param int|string $to The revision to diff to: use the revision ID,
286 | * prev, next, or cur.
287 | * @return string The API returns preformatted table rows without a wrapping
288 | * . Presumably this is so implementers can wrap a custom .
289 | */
290 | public function getRevisionDiff($fromRevisionId, $toRevisionId = 'prev')
291 | {
292 | $response = $this->getRevisions(null, array('revids' => $fromRevisionId,
293 | 'rvdiffto' => $toRevisionId));
294 | $page = current($response['query']['pages']);
295 | return $page['revisions'][0]['diff']['*'];
296 | }
297 |
298 | /**
299 | * Gets the edit token for a given page.
300 | *
301 | * @link http://www.mediawiki.org/wiki/API:Edit#Token
302 | * @param string $title
303 | * @return string
304 | */
305 | public function getEditToken($title)
306 | {
307 | $response = $this->getInfo($title, array('intoken' => 'edit'));
308 | $page = current($response['query']['pages']);
309 |
310 | $edittoken = null;
311 | if (isset($page['edittoken'])) {
312 | $edittoken = $page['edittoken'];
313 | }
314 | return $edittoken;
315 | }
316 |
317 | /**
318 | * Gets the protect token for a given page.
319 | *
320 | * @link http://www.mediawiki.org/wiki/API:Protect#Token
321 | * @param string $title
322 | * @return string
323 | */
324 | public function getProtectToken($title)
325 | {
326 | $response = $this->getInfo($title, array('intoken' => 'protect'));
327 | $page = current($response['query']['pages']);
328 |
329 | $protecttoken = null;
330 | if (isset($page['protecttoken'])) {
331 | $protecttoken = $page['protecttoken'];
332 | }
333 | return $protecttoken;
334 | }
335 |
336 | /**
337 | * Gets the watch token for a given page.
338 | *
339 | * @link http://www.mediawiki.org/wiki/API:Watch#Token
340 | * @param string $title
341 | * @return string
342 | */
343 | public function getWatchToken($title)
344 | {
345 | $response = $this->getInfo($title, array('intoken' => 'watch'));
346 | $page = current($response['query']['pages']);
347 |
348 | $watchtoken = null;
349 | if (isset($page['watchtoken'])) {
350 | $watchtoken = $page['watchtoken'];
351 | }
352 | return $watchtoken;
353 | }
354 |
355 | /**
356 | * Gets the protections for a given page.
357 | *
358 | * @link http://www.mediawiki.org/wiki/API:Properties#info_.2F_in
359 | * @param string $title
360 | * @return array
361 | */
362 | public function getPageProtections($title)
363 | {
364 | $response = $this->getInfo($title, array('inprop' => 'protection'));
365 | $page = current($response['query']['pages']);
366 | return $page['protection'];
367 | }
368 |
369 | /**
370 | * Gets the wikitext of the latest revision of a given page.
371 | *
372 | * @link http://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv
373 | * @param string $title
374 | * @return string|null
375 | */
376 | public function getLatestRevisionWikitext($title)
377 | {
378 | $response = $this->getRevisions($title, array('rvprop' => 'content',
379 | 'rvlimit' => '1'));
380 | $page = current($response['query']['pages']);
381 |
382 | // Return the wikitext only if the page already exists.
383 | $wikitext = null;
384 | if (isset($page['revisions'][0]['*'])) {
385 | $wikitext = $page['revisions'][0]['*'];
386 | }
387 | return $wikitext;
388 | }
389 |
390 | /**
391 | * Gets the HTML of the latest revision of a given page.
392 | *
393 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse
394 | * @param string $title
395 | * @return string|null
396 | */
397 | public function getLatestRevisionHtml($title)
398 | {
399 | // To exclude [edit] links in the parsed wikitext, we must use the
400 | // following hack.
401 | $response = $this->parse(array('text' => '__NOEDITSECTION__{{:' . $title . '}}'));
402 |
403 | // Return the text only if the page already exists. Otherwise, the
404 | // returned HTML is a link to the document's MediaWiki edit page. The
405 | // only indicator I found in the response XML is the "exists" attribute
406 | // in the templates node; but this may not be adequate.
407 | $html = null;
408 | if (isset($response['parse']['templates'][0]['exists'])) {
409 | $html = $response['parse']['text']['*'];
410 | }
411 | return $html;
412 | }
413 |
414 | /**
415 | * Get the HTML preview of the given text.
416 | *
417 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse
418 | * @param string $text
419 | * @return string
420 | */
421 | public function getPreview($text)
422 | {
423 | $response = $this->parse(array('text' => '__NOEDITSECTION__' . $text));
424 | return $response['parse']['text']['*'];
425 | }
426 |
427 | /**
428 | * Returns whether a given page is created.
429 | *
430 | * @link http://www.mediawiki.org/wiki/API:Query#Missing_and_invalid_titles
431 | * @param string $title
432 | * @return bool
433 | */
434 | public function pageCreated($title)
435 | {
436 | $response = $this->query(array('titles' => $title));
437 | $page = current($response['query']['pages']);
438 | if (isset($page['missing']) || isset($page['invalid'])) {
439 | return false;
440 | }
441 | return true;
442 | }
443 |
444 | /**
445 | * Returns parsed wikitext.
446 | *
447 | * @link http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse
448 | * @param array $params
449 | * @return array
450 | */
451 | public function parse(array $params = array())
452 | {
453 | return $this->_request('parse', $params);
454 | }
455 |
456 | /**
457 | * Returns data.
458 | *
459 | * @link http://www.mediawiki.org/wiki/API:Query
460 | * @param array $params
461 | * @return array
462 | */
463 | public function query(array $params = array())
464 | {
465 | return $this->_request('query', $params);
466 | }
467 |
468 | /**
469 | * Watch or unwatch pages.
470 | *
471 | * @link http://www.mediawiki.org/wiki/API:Watch
472 | * @param string $title
473 | * @param array $params
474 | * @return array
475 | */
476 | public function watch($title, $watchtoken = null, array $params = array())
477 | {
478 | // Get the watch token if not passed.
479 | if (is_null($watchtoken)) {
480 | $watchtoken = $this->getWatchToken($title);
481 | }
482 | $params['title'] = $title;
483 | $params['token'] = $watchtoken;
484 | return $this->_request('watch', $params);
485 | }
486 |
487 | /**
488 | * Applies protections to a given page.
489 | *
490 | * @link http://www.mediawiki.org/wiki/API:Protect
491 | * @param string $title
492 | * @param string $protections
493 | * @param string|null $protecttokens
494 | * @param array $params
495 | * @return array
496 | */
497 | public function protect($title,
498 | $protections,
499 | $protecttoken = null,
500 | array $params = array())
501 | {
502 | // Get the protect token if not passed.
503 | if (is_null($protecttoken)) {
504 | $protecttoken = $this->getProtectToken($title);
505 | }
506 |
507 | // Apply protections.
508 | $params['title'] = $title;
509 | $params['protections'] = $protections;
510 | $params['token'] = $protecttoken;
511 |
512 | return $this->_request('protect', $params);
513 | }
514 |
515 | /**
516 | * Create or edit a given page.
517 | *
518 | * @link http://www.mediawiki.org/wiki/API:Edit
519 | * @link http://www.mediawiki.org/wiki/Manual:Preventing_access#Restrict_editing_of_all_pages
520 | * @param string $title
521 | * @param string $text
522 | * @param string|null $edittoken
523 | * @param array $params
524 | * @return array
525 | */
526 | public function edit($title,
527 | $text,
528 | $edittoken = null,
529 | array $params = array())
530 | {
531 | // Get the edit token if not passed.
532 | if (is_null($edittoken)) {
533 | $edittoken = $this->getEditToken($title);
534 | }
535 |
536 | // Protect against edit conflicts by getting the timestamp of the last
537 | // revision.
538 | $response = $this->getRevisions($title);
539 | $page = current($response['query']['pages']);
540 |
541 | $basetimestamp = null;
542 | if (isset($page['revisions'])) {
543 | $basetimestamp = $page['revisions'][0]['timestamp'];
544 | }
545 |
546 | // Edit the page.
547 | $params['title'] = $title;
548 | $params['text'] = $text;
549 | $params['token'] = $edittoken;
550 | $params['basetimestamp'] = $basetimestamp;
551 |
552 | return $this->_request('edit', $params);
553 | }
554 |
555 | /**
556 | * Login to MediaWiki.
557 | *
558 | * @link http://www.mediawiki.org/wiki/API:Login
559 | * @param string $lgname
560 | * @param string $lgpassword
561 | */
562 | public function login($lgname, $lgpassword)
563 | {
564 | // Log in or get the login token.
565 | $params = array('lgname' => $lgname, 'lgpassword' => $lgpassword);
566 | $response = $this->_request('login', $params);
567 |
568 | // Confirm the login token.
569 | if ('NeedToken' == $response['login']['result']) {
570 | $params['lgtoken'] = $response['login']['token'];
571 | $response = $this->_request('login', $params);
572 | }
573 |
574 | // Process a successful login.
575 | if ('Success' == $response['login']['result']) {
576 | if ($this->_passCookies) {
577 | $cookiePrefix = isset($response['login']['cookieprefix'])
578 | ? $response['login']['cookieprefix']
579 | : $this->_cookiePrefix;
580 | // Persist the MediaWiki cookie prefix in the browser. Set to
581 | // expire in 30 days, the same as MediaWiki cookies.
582 | setcookie(self::COOKIE_NS . 'cookieprefix',
583 | $cookiePrefix,
584 | time() + 60 * 60 * 24 * 30,
585 | '/');
586 |
587 | // Persist MediaWiki authentication cookies in the browser.
588 | foreach (self::getHttpClient()->getCookieJar()->getAllCookies() as $cookie) {
589 | setcookie(self::COOKIE_NS . $this->cookiePrefix . $cookie->getName(),
590 | $cookie->getValue(),
591 | $cookie->getExpiryTime(),
592 | '/');
593 | }
594 | }
595 | return;
596 | }
597 |
598 | // Process an unsuccessful login.
599 | $errors = array('NoName' => 'Username is empty.',
600 | 'Illegal' => 'Username is illegal.',
601 | 'NotExists' => 'Username is not found.',
602 | 'EmptyPass' => 'Password is empty.',
603 | 'WrongPass' => 'Password is incorrect.',
604 | 'WrongPluginPass' => 'Password is incorrect (via plugin)',
605 | 'CreateBlocked' => 'IP address is blocked for account creation.',
606 | 'Throttled' => 'Login attempt limit surpassed.',
607 | 'Blocked' => 'User is blocked.');
608 | $error = $response['login']['result'];
609 | if (array_key_exists($error, $errors)) {
610 | throw new Scripto_Service_Exception($errors[$error]);
611 | }
612 | throw new Scripto_Service_Exception('Unknown login error: ' . $response['login']['result']);
613 | }
614 |
615 | /**
616 | * Logout of MediaWiki.
617 | *
618 | * @link http://www.mediawiki.org/wiki/API:Logout
619 | */
620 | public function logout()
621 | {
622 | // Log out.
623 | $this->_request('logout');
624 |
625 | // Reset the cookie jar.
626 | self::getHttpClient()->getCookieJar()->reset();
627 |
628 | if ($this->_passCookies && $this->_cookiePrefix) {
629 | // Delete the MediaWiki authentication cookies from the browser.
630 | setcookie(self::COOKIE_NS . 'cookieprefix', false, 0, '/');
631 | foreach ($this->_cookieSuffixes as $cookieSuffix) {
632 | $cookieName = self::COOKIE_NS . $this->_cookiePrefix . $cookieSuffix;
633 | if (array_key_exists($cookieName, $_COOKIE)) {
634 | setcookie($cookieName, false, 0, '/');
635 | }
636 | }
637 | }
638 | }
639 |
640 | /**
641 | * Makes a MediaWiki API request and returns the response.
642 | *
643 | * @param string $action
644 | * @param array $params
645 | * @return array
646 | */
647 | protected function _request($action, array $params = array())
648 | {
649 | // Check if this action is a valid MediaWiki API action.
650 | if (!array_key_exists($action, $this->_actions)) {
651 | throw new Scripto_Service_Exception('Invalid MediaWiki API action.');
652 | }
653 |
654 | // Set valid parameters for this action.
655 | foreach ($params as $paramName => $paramValue) {
656 | if (in_array($paramName, $this->_actions[$action])) {
657 | self::getHttpClient()->setParameterPost($paramName, $paramValue);
658 | }
659 | }
660 |
661 | // Set default parameters.
662 | self::getHttpClient()->setParameterPost('format', 'json')
663 | ->setParameterPost('action', $action);
664 |
665 | // Get the response body and reset the request.
666 | $body = self::getHttpClient()->request('POST')->getBody();
667 | self::getHttpClient()->resetParameters();
668 |
669 | // Parse the response body, throwing errors when encountered.
670 | $response = json_decode($body, true);
671 | if (isset($response['error'])) {
672 | throw new Scripto_Service_Exception($response['error']['info']);
673 | }
674 | return $response;
675 | }
676 |
677 | /**
678 | * Determine whether the provided MediaWiki API URL is valid.
679 | *
680 | * @param string $apiUrl
681 | * @return bool
682 | */
683 | static public function isValidApiUrl($apiUrl)
684 | {
685 | // Check for valid API URL string.
686 | if (!Zend_Uri::check($apiUrl) || !preg_match('#/api\.php$#', $apiUrl)) {
687 | return false;
688 | }
689 |
690 | try {
691 | // Ping the API endpoint for a valid response.
692 | $body = self::getHttpClient()->setUri($apiUrl)
693 | ->setParameterPost('action', 'query')
694 | ->setParameterPost('meta', 'siteinfo')
695 | ->setParameterPost('format', 'json')
696 | ->request('POST')->getBody();
697 | // Prevent "Unable to Connect" errors.
698 | } catch (Zend_Http_Client_Exception $e) {
699 | return false;
700 | }
701 | self::getHttpClient()->resetParameters(true);
702 |
703 | $response = json_decode($body, true);
704 | if (!is_array($response) || !isset($response['query']['general'])) {
705 | return false;
706 | }
707 |
708 | return true;
709 | }
710 | }
711 |
--------------------------------------------------------------------------------
/tests/README.markdown:
--------------------------------------------------------------------------------
1 | Testing Scripto
2 | =============
3 |
4 | Scripto uses the SimpleTest PHP testing framework. By running these tests, you
5 | can:
6 |
7 | * Test your external system's adapter for expected results;
8 | * Test the your MediaWiki instance via Scripto's MediaWiki API client;
9 | * Test the Scripto_Document base class.
10 |
11 | Installation
12 | -------------
13 |
14 | * Download the [SimpleTest](http://www.simpletest.org/) framework;
15 | * Copy config.php.changeme to config.php:
16 |
17 | On the command line:
18 |
19 | $ cd /path/to/scripto/tests/
20 | $ cp config.php.changeme config.php
21 |
22 | * Set the configuration in config.php:
23 |
24 | You can use the following document IDs to test Scripto's Example adapter:
25 |
26 | // Test document ID.
27 | define('TEST_DOCUMENT_ID', '16344');
28 |
29 | or:
30 |
31 | // Test document ID.
32 | define('TEST_DOCUMENT_ID', '[Facsimile of] letter to Messrs. O. P. Hall et al from Lincoln.');
33 |
34 | Running the Tests
35 | -------------
36 |
37 | On the command line:
38 |
39 | $ cd /path/to/scripto/tests/
40 | $ php all_tests.php
41 |
42 | In the browser:
43 |
44 | * Make sure the Scripto tests directory is available to your web server;
45 | * Go to http://your-domain/tests/all_tests.php
46 |
--------------------------------------------------------------------------------
/tests/adapter_test.php:
--------------------------------------------------------------------------------
1 | _testAdapterFilename = TEST_ADAPTER_FILENAME;
28 | $this->_testAdapterClassName = TEST_ADAPTER_CLASS_NAME;
29 | $this->_testDocumentId = TEST_DOCUMENT_ID;
30 | }
31 |
32 | public function testAdapterIsValid()
33 | {
34 | // Assert adapter file exists.
35 | $this->assertTrue(file_exists($this->_testAdapterFilename), 'Example adapter file does not exist');
36 |
37 | // Assert adapter file is instance of Scripto_Adapter_Interface.
38 | require_once $this->_testAdapterFilename;
39 | $adapter = new $this->_testAdapterClassName;
40 | $this->assertIsA($adapter, 'Scripto_Adapter_Interface', 'Example adapter is not an instance of Scripto_Adapter_Interface');
41 |
42 | $this->_testAdapter = $adapter;
43 | }
44 |
45 | public function testDocumentIsValid()
46 | {
47 | // Assert document ID is valid and exists.
48 | $this->assertTrue((is_int($this->_testDocumentId) || is_string($this->_testDocumentId)), 'Document ID must be int or string (' . gettype($this->_testDocumentId) . ' given)');
49 | $this->assertTrue($this->_testAdapter->documentExists($this->_testDocumentId), "Document ID \"{$this->_testDocumentId}\" does not exist");
50 |
51 | // Assert document title exists.
52 | $documentTitle = $this->_testAdapter->getDocumentTitle($this->_testDocumentId);
53 | $this->assertIsA($documentTitle, 'string', 'Document title must be a string (' . gettype($documentTitle) . ' given)');
54 | }
55 |
56 | public function testDocumentPagesAreValid()
57 | {
58 | // Assert valid document pages format.
59 | $documentPages = $this->_testAdapter->getDocumentPages($this->_testDocumentId);
60 | $this->assertIsA($documentPages, 'array', 'Document pages must be an array (' . gettype($documentPages) . ' given)');
61 | $this->assertTrue(count($documentPages), 'Document pages must not be empty');
62 |
63 | // Assert document first page is valid and exists.
64 | $documentFirstPageId = $this->_testAdapter->getDocumentFirstPageId($this->_testDocumentId);
65 | $this->assertTrue((is_int($documentFirstPageId) || is_string($documentFirstPageId)), 'Document first page ID must be int or string (' . gettype($documentFirstPageId) . ' given)');
66 | $this->assertTrue(array_key_exists($documentFirstPageId, $documentPages), "Document first page ID \"$documentFirstPageId\" does not exist");
67 |
68 | // Iterate all document pages.
69 | foreach ($documentPages as $pageId => $pageName) {
70 |
71 | // Assert document page exists.
72 | $documentPageExists = $this->_testAdapter->documentPageExists($this->_testDocumentId, $pageId);
73 | $this->assertIdentical($documentPageExists, true, "Document page ID \"$pageId\" does not exist");
74 |
75 | // Assert document page name exists.
76 | $documentPageName = $this->_testAdapter->getDocumentPageName($this->_testDocumentId, $pageId);
77 | $this->assertIsA($documentPageName, 'string', "Document page name for page ID \"$pageId\" must be a string (" . gettype($documentPageName) . " given)");
78 |
79 | // Assert document page URL is valid. There's no consistant,
80 | // reliable, and lightweight way to validate a URL, even with
81 | // regular expressions, so just check to see if it returns a string.
82 | $documentPageImageUrl = $this->_testAdapter->getDocumentPageFileUrl($this->_testDocumentId, $pageId);
83 | $this->assertIsA($documentPageImageUrl, 'string', "Document page image URL for page ID \"$pageId\" must be a string (" . gettype($documentPageImageUrl) . " given)");
84 | }
85 | }
86 |
87 | public function testImportTranscriptions()
88 | {
89 | // Must install a parallel external system to test imports. This may be
90 | // too involved to be feasible for most people.
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/tests/all_tests.php:
--------------------------------------------------------------------------------
1 | TestSuite('All tests');
10 | $path = dirname(__FILE__) . DIRECTORY_SEPARATOR;
11 | $this->addFile($path . 'adapter_test.php');
12 | $this->addFile($path . 'mediawiki_test.php');
13 | $this->addFile($path . 'scripto_test.php');
14 | $this->addFile($path . 'document_test.php');
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/tests/config.php.changeme:
--------------------------------------------------------------------------------
1 | _testDocumentId = TEST_DOCUMENT_ID;
21 |
22 | require_once TEST_ADAPTER_FILENAME;
23 | require_once 'Scripto/Service/MediaWiki.php';
24 | require_once 'Scripto/Document.php';
25 |
26 | // Instantiate the Scripto_Document object and set it.
27 | $testAdapterClassName = TEST_ADAPTER_CLASS_NAME;
28 | $this->_testDocument = new Scripto_Document(
29 | $this->_testDocumentId,
30 | new $testAdapterClassName,
31 | new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false)
32 | );
33 | }
34 |
35 | public function testGetId()
36 | {
37 | $this->assertEqual($this->_testDocumentId, $this->_testDocument->getId());
38 | }
39 |
40 | public function testGetTitle()
41 | {
42 | $this->assertIsA($this->_testDocument->getTitle(), 'string');
43 | }
44 |
45 | /**
46 | * Set the page for subsequent tests.
47 | */
48 | public function testPageIsValid()
49 | {
50 | // Assert a page has not been set yet.
51 | $this->assertNull($this->_testDocument->getPageId(), 'The document page ID was prematurely set');
52 |
53 | // Assert a page can be set (in this case, the first page).
54 | $this->_testDocument->setPage(null);
55 | $this->assertNotNull($this->_testDocument->getPageId(), 'The document page ID was not set');
56 |
57 | // Assert the decoding the base title works.
58 | $baseTitle = Scripto_Document::encodeBaseTitle($this->_testDocument->getId(), $this->_testDocument->getPageId());
59 | $decodedBaseTitle = Scripto_Document::decodeBaseTitle($baseTitle);
60 |
61 | $this->assertEqual($decodedBaseTitle[0], $this->_testDocumentId, 'Something went wrong during base title encoding/decoding. Document ID does not match');
62 | $this->assertEqual($decodedBaseTitle[1], $this->_testDocument->getPageId(), 'Something went wrong during base title encoding/decoding. Page ID does not match');
63 | }
64 |
65 | public function testGetPageName()
66 | {
67 | $this->assertIsA($this->_testDocument->getPageName(), 'string');
68 | }
69 |
70 | public function testGetBaseTitle()
71 | {
72 | $this->assertIsA($this->_testDocument->getBaseTitle(), 'string');
73 | }
74 |
75 | public function testGetPages()
76 | {
77 | $this->assertIsA($this->_testDocument->getPages(), 'array');
78 | }
79 |
80 | public function testGetFirstPageId()
81 | {
82 | $firstPageId = $this->_testDocument->getFirstPageId();
83 | $this->assertTrue((is_int($firstPageId) || is_string($firstPageId)));
84 | }
85 |
86 | public function testGetPageFileUrl()
87 | {
88 | $this->assertIsA($this->_testDocument->getPageFileUrl(), 'string');
89 | }
90 |
91 | public function testGetTranscriptionPageMediawikiUrl()
92 | {
93 | $this->assertIsA($this->_testDocument->getTranscriptionPageMediawikiUrl(), 'string');
94 | }
95 |
96 | public function testGetTalkPageMediawikiUrl()
97 | {
98 | $this->assertIsA($this->_testDocument->getTalkPageMediawikiUrl(), 'string');
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/tests/mediawiki_test.php:
--------------------------------------------------------------------------------
1 | Insert non-formatted text here
25 | ----
26 | Strike-through text
27 |
28 | Superscript text
29 | Subscript text
30 | Small Text
31 |
32 | Block quote
33 |
34 | {| class=\"wikitable\"
35 | |-
36 | ! header 1
37 | ! header 2
38 | ! header 3
39 | |-
40 | | row 1, cell 1
41 | | row 1, cell 2
42 | | row 1, cell 3
43 | |-
44 | | row 2, cell 1
45 | | row 2, cell 2
46 | | row 2, cell 3
47 | |}";
48 |
49 | /**
50 | * When getting a preview and page HTML, MediaWiki returns an HTML comment
51 | * containing a dynamic "NewPP limit report." Here, this is removed prior to
52 | * asserting valid get responses.
53 | */
54 | const TEST_EXPECTED_HTML = 'Bold text
55 | Italic text
56 | link title
57 |
58 | Headline text
59 | Insert non-formatted text here
60 |
61 |
62 | Strike-through text
63 |
64 | Superscript text
65 | Subscript text
66 | Small Text
67 |
68 |
69 | Block quote
70 |
71 |
72 |
73 |
74 | header 1
75 | |
76 | header 2
77 | |
78 | header 3
79 | |
80 |
81 | row 1, cell 1
82 | |
83 | row 1, cell 2
84 | |
85 | row 1, cell 3
86 | |
87 |
88 | row 2, cell 1
89 | |
90 | row 2, cell 2
91 | |
92 | row 2, cell 3
93 | |
94 |
95 |
96 | ';
97 |
98 | const TEST_EXPECTED_PREVIEW = '';
99 |
100 | private $_testMediawiki;
101 | private $_testEditCredentials;
102 |
103 | /**
104 | * Use __construct() instead of setUp() because it's unnecessary to set up
105 | * the test case before every test method.
106 | */
107 | public function __construct()
108 | {
109 | parent::__construct();
110 |
111 | // Do not pass cookies to a browser when testing.
112 | require_once 'Scripto/Service/MediaWiki.php';
113 | $this->_testMediawiki = new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false);
114 | }
115 |
116 | public function testCredentials()
117 | {
118 | // Test login and logout if username and password is provided.
119 | if (TEST_MEDIAWIKI_USERNAME && TEST_MEDIAWIKI_PASSWORD) {
120 |
121 | // Assert login works. Throws an error if login is unsuccessful.
122 | $this->_testMediawiki->login(TEST_MEDIAWIKI_USERNAME, TEST_MEDIAWIKI_PASSWORD);
123 |
124 | // Assert logout works.
125 | $this->_testMediawiki->logout();
126 | $userInfo = $this->_testMediawiki->getUserInfo();
127 | $this->assertTrue(isset($userInfo['query']['userinfo']['anon']), 'Logout unsuccessful');
128 | }
129 | }
130 |
131 | public function testEditPage()
132 | {
133 | // Assert the test page's preview is valid. Remove dynamic HTML comments.
134 | $testPagePreview = $this->_testMediawiki->getPreview(self::TEST_WIKITEXT);
135 | $this->assertEqual(self::TEST_EXPECTED_HTML, $this->_removeHtmlComments($testPagePreview), 'The test page preview HTML is invalid');
136 |
137 | // Clear the page before testing edit page. Resetting the database or
138 | // deleting the page is preferable, but resetting is too involved and
139 | // Scripto_Service_MediaWiki does not implement a delete page feature
140 | // because deleting requires special (sysops) permissions.
141 | $this->_testMediawiki->edit(self::TEST_TITLE, '.');
142 | $text = $this->_testMediawiki->getLatestRevisionWikitext(self::TEST_TITLE);
143 | $this->assertEqual('.', $text, 'Clearing the test page did not work');
144 |
145 | // Edit the page with test text.
146 | $this->_testMediawiki->edit(self::TEST_TITLE, self::TEST_WIKITEXT);
147 |
148 | // Assert the test page's Wikitext is valid.
149 | $textPageWikitext = $this->_testMediawiki->getLatestRevisionWikitext(self::TEST_TITLE);
150 | $this->assertEqual(self::TEST_WIKITEXT, $textPageWikitext, 'Editing the test page with test wikitext did not work ');
151 |
152 | // Assert the test page's HTML is valid. Remove dynamic HTML comments.
153 | $testPageHtml = $this->_testMediawiki->getLatestRevisionHtml(self::TEST_TITLE);
154 | $this->assertEqual(self::TEST_EXPECTED_HTML, $this->_removeHtmlComments($testPageHtml), 'The test page HTML is invalid');
155 |
156 | }
157 |
158 | private function _removeHtmlComments($text)
159 | {
160 | // Must include "s" modifier so "." matches new lines.
161 | return preg_replace('//s', '', $text);
162 | }
163 | }
--------------------------------------------------------------------------------
/tests/scripto_test.php:
--------------------------------------------------------------------------------
1 | _testMediawikiUsername = TEST_MEDIAWIKI_USERNAME;
23 | $this->_testMediawikiPassword = TEST_MEDIAWIKI_PASSWORD;
24 | $this->_testDocumentId = TEST_DOCUMENT_ID;
25 |
26 | require_once TEST_ADAPTER_FILENAME;
27 | require_once 'Scripto/Service/MediaWiki.php';
28 | require_once 'Scripto.php';
29 |
30 | // Instantiate the Scripto object and set it.
31 | $testAdapterClassName = TEST_ADAPTER_CLASS_NAME;
32 | $this->_testScripto = new Scripto(
33 | new $testAdapterClassName,
34 | new Scripto_Service_MediaWiki(TEST_MEDIAWIKI_API_URL, false)
35 | );
36 | }
37 |
38 | public function testDocumentExists()
39 | {
40 | $this->assertIsA($this->_testScripto->documentExists($this->_testDocumentId), 'bool');
41 | }
42 |
43 | public function testGetDocument()
44 | {
45 | $this->assertIsA($this->_testScripto->getDocument($this->_testDocumentId), 'Scripto_Document');
46 | }
47 |
48 | public function testLogin()
49 | {
50 | if ($this->_testMediawikiUsername && $this->_testMediawikiPassword) {
51 | $this->_testScripto->login($this->_testMediawikiUsername, $this->_testMediawikiPassword);
52 | $this->assertTrue($this->_testScripto->isLoggedIn());
53 | }
54 | }
55 |
56 | public function testCanExport()
57 | {
58 | $this->assertIsA($this->_testScripto->canExport(), 'bool');
59 | }
60 |
61 | public function testCanProtect()
62 | {
63 | $this->assertIsA($this->_testScripto->canProtect(), 'bool');
64 | }
65 |
66 | public function testGetUserName()
67 | {
68 | $this->assertIsA($this->_testScripto->getUserName(), 'string');
69 | }
70 |
71 | public function testGetUserDocumentPages()
72 | {
73 | $this->assertIsA($this->_testScripto->getUserDocumentPages(), 'array');
74 | }
75 |
76 | public function testGetRecentChanges()
77 | {
78 | $this->assertIsA($this->_testScripto->getRecentChanges(), 'array');
79 | }
80 |
81 | public function testGetWatchlist()
82 | {
83 | if ($this->_testScripto->isLoggedIn()) {
84 | $this->assertIsA($this->_testScripto->getWatchlist(), 'array');
85 | }
86 | }
87 |
88 | public function testGetAllDocuments()
89 | {
90 | $this->assertIsA($this->_testScripto->getAllDocuments(), 'array');
91 | }
92 |
93 | public function testLogout()
94 | {
95 | if ($this->_testScripto->isLoggedIn()) {
96 | $this->_testScripto->logout();
97 | $this->assertFalse($this->_testScripto->isLoggedIn());
98 | }
99 | }
100 | }
101 |
--------------------------------------------------------------------------------