├── .env.example
├── .gitignore
├── Makefile
├── README.md
├── app
    ├── console
    └── export_users
├── composer.json
├── composer.lock
├── data
    └── missed.yml
├── lib
    ├── loader.php
    └── mediawiki.php
├── reports
    ├── 1_nginx.map
    ├── 2_nginx_almost_same_casing.map
    ├── 3_nginx_almost_same_1.map
    ├── 3_nginx_almost_same_2.map
    ├── 4_nginx_redirects_spaces.map
    ├── directly_on_root.txt
    ├── hundred_revs.txt
    ├── location_spaghetti.txt
    ├── location_spaghetti_duplicated.txt
    ├── nginx_redirects.map
    ├── numbers.txt
    ├── redirects.txt
    ├── redirects_sanity.txt
    ├── summary.yml
    ├── summary_meta.yml
    ├── summary_wpd.yml
    ├── translations.txt
    ├── url_all.txt
    ├── url_parts.txt
    └── url_parts_variants.txt
└── src
    └── WebPlatform
        └── Importer
            ├── Commands
                ├── AbstractImporterCommand.php
                ├── CacheWarmerCommand.php
                ├── RefreshPagesCommand.php
                ├── RunCommand.php
                └── SummaryCommand.php
            ├── Converter
                ├── HtmlToMarkdown.php
                └── MediaWikiToHtml.php
            ├── Filter
                └── TitleFilter.php
            ├── GitPhp
                ├── CommitCommandBuilder.php
                └── GitRepository.php
            ├── Helpers
                └── MediaWikiHelper.php
            └── Model
                ├── HtmlRevision.php
                ├── MarkdownRevision.php
                └── MediaWikiDocument.php


/.env.example:
--------------------------------------------------------------------------------
 1 | # Set to your own, or use https://github.com/wikimedia/mediawiki-vagrant
 2 | MEDIAWIKI_API_ORIGIN="https://docs.webplatform.org"
 3 | COMMITER_ANONYMOUS_DOMAIN="docs.webplatform.org"
 4 | 
 5 | # Make sure this fits with your own MediaWiki instance
 6 | MEDIAWIKI_USERID="10080"
 7 | MEDIAWIKI_USERNAME="Renoirb"
 8 | MEDIAWIKI_WIKINAME="wpwiki"
 9 | 
10 | # Your session cookie value, according to example values shown here, the cookie
11 | # value to seek for is "...; wpwiki_session=foo; ...;", and set like this;
12 | MEDIAWIKI_SESSION="foo"
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/
 2 | vendor/
 3 | phpunit.xml
 4 | bin/
 5 | src/WebPlatform/ContentConverter/
 6 | out/
 7 | *.sublime*
 8 | errors/
 9 | .env
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := bash
 2 | 
 3 | dump:
 4 | 	@if [[ -f ../mediawiki/maintenance/dumpBackup.php ]]; then\
 5 | 		php ../mediawiki/maintenance/dumpBackup.php --full --filter=namespace:0,108 > data/dumps/main_full.xml;\
 6 | 		php ../mediawiki/maintenance/dumpBackup.php --full --filter=namespace:3000 > data/dumps/wpd_full.xml;\
 7 | 		php ../mediawiki/maintenance/dumpBackup.php --current --filter=namespace:0 > data/dumps/main.xml;\
 8 | 		php ../mediawiki/maintenance/dumpBackup.php --current --filter=namespace:3000 > data/dumps/wpd.xml;\
 9 | 		php ../mediawiki/maintenance/dumpBackup.php --current --filter=namespace:4 > data/dumps/project.xml;\
10 | 		php ../mediawiki/maintenance/dumpBackup.php --current --filter=namespace:2,200 > data/dumps/user.xml;\
11 | 		php ../mediawiki/maintenance/dumpBackup.php --current --filter=namespace:3020 > data/dumps/meta.xml;\
12 | 		app/export_users > data/users.json;\
13 | 	fi
14 | 


--------------------------------------------------------------------------------
/app/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env php
 2 | <?php
 3 | 
 4 | /**
 5 |  * WebPlatform MediaWiki Conversion.
 6 |  **/
 7 | 
 8 | require_once __DIR__ . '/../vendor/autoload.php';
 9 | 
10 | use Symfony\Component\Console\Application;
11 | 
12 | $console = new Application();
13 | require_once __DIR__ . '/../lib/loader.php';
14 | $console->run();
15 | 


--------------------------------------------------------------------------------
/app/export_users:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env php
 2 | <?php
 3 | 
 4 | /**
 5 |  * WebPlatform Content Converter.
 6 |  *
 7 |  * export_users
 8 |  *
 9 |  * Helper script, make sure you run this script in the same directory
10 |  * you run your MediaWiki installation.
11 |  *
12 |  * To use it, you have to use a terminal and send the script output
13 |  * into a file name so you can use it with this project.
14 |  *
15 |  * Use:
16 |  *
17 |  *   - Copy contents of this file in your mediawiki/ folder, where you can see the "maintenance/" folder
18 |  *   - In a terminal, on a machine that has PHP cli installed, execute;
19 |  *
20 |  *         php app/export_users > data/users.json
21 |  *
22 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
23 |  */
24 | 
25 | /**
26 |  * You can also adjust the path yourself.
27 |  */
28 | $cli = realpath(__DIR__ . '/../../mediawiki/maintenance/commandLine.inc');
29 | 
30 | if (!file_exists($cli)) {
31 |     throw new \Exception('Could not find MediaWiki code checkout in parent directory');
32 | }
33 | require $cli;
34 | 
35 | /**
36 |  * Export all user data into a big JSON string.
37 |  *
38 |  * Will be an array of objects, looking like this;
39 |  *
40 |  * {"1": {
41 |  *  "user_email":"public-webplatform@w3.org",
42 |  *  "user_id":"1",
43 |  *  "user_name":"WikiSysop",
44 |  *  "user_real_name":"",
45 |  *  "user_email_authenticated": null
46 |  * },
47 |  * "21": {
48 |  *  "user_email":"foo@example.org",
49 |  *  "user_id":"21",
50 |  *  "user_name":"Foo",
51 |  *  "user_real_name":"Foo Bar-Baz",
52 |  *  "user_email_authenticated": true
53 |  * }}
54 |  **/
55 | 
56 | // ref: https://www.mediawiki.org/wiki/Manual:Database_access
57 | $dbr = wfGetDB(DB_SLAVE);
58 | 
59 | // ref: https://www.mediawiki.org/wiki/Manual:User_table
60 | $id_list = $dbr->select('user', array('user_email', 'user_id','user_name','user_real_name','user_email_authenticated'));
61 | 
62 | $out = array();
63 | foreach ($id_list as $user_data) {
64 |     $data = (array) $user_data;
65 |     if (!in_array($data['user_id'], $out)) {
66 |         $out[$data['user_id']] = $data;
67 |     } else {
68 |         throw new Exception('Duplicate email adress found!');
69 |     }
70 | }
71 | 
72 | echo json_encode($out);
73 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "webplatform/mediawiki-conversion",
 3 |     "description": "Convert MediaWiki XML backup into structured raw text file tree",
 4 |     "type": "project",
 5 |     "keywords": ["mediawiki","backupDump","exporter","migration"],
 6 |     "license": "MIT",
 7 |     "require": {
 8 |         "webplatform/content-converter": "~1.2",
 9 |         "prewk/xml-string-streamer": "^0.7.1",
10 |         "ryakad/pandoc-php": "~1.0",
11 |         "glicer/simply-html": "~1.0",
12 |         "symfony/filesystem": "~2.7",
13 |         "vlucas/phpdotenv": "~2.0",
14 |         "symfony/console": "~2.7",
15 |         "bit3/git-php": "~1.0"
16 |     },
17 |     "require-dev": {
18 |         "doctrine/annotations": "~1.2",
19 |         "fabpot/php-cs-fixer": "^1.9",
20 |         "phpunit/phpunit": "~4.7"
21 |     },
22 |     "authors": [
23 |         {
24 |             "name": "Renoir Boulanger",
25 |             "email": "hello@renoirboulanger.com"
26 |         }
27 |     ],
28 |     "repositories": [
29 |         {
30 |             "type": "git",
31 |             "url": "https://github.com/webplatform/content-converter.git"
32 |         },
33 |         {
34 |             "type": "git",
35 |             "url": "https://github.com/webplatform/mediawiki-conversion.git"
36 |         }
37 |     ],
38 |     "autoload": {
39 |         "psr-0": {
40 |             "WebPlatform\\Importer\\": "src/",
41 |             "WebPlatform\\ContentConverter\\": "src/WebPlatform/ContentConverter/lib/"
42 |         },
43 |         "files": [
44 |             "lib/mediawiki.php"
45 |         ]
46 |     },
47 |     "config": {
48 |         "bin-dir": "bin"
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/data/missed.yml:
--------------------------------------------------------------------------------
 1 | #
 2 | # missed entries to retry
 3 | #
 4 | # To use, use --missed argument (e.g. `app/console mediawiki:run 3 --missed`)
 5 | #
 6 | # Each entry is relative to the out/ directory. Its assumed that what is not in
 7 | # MediaWiki’s main content namespace (e.g. Meta:Main_Page) will be in a separate git
 8 | # repository but will end up accessible as a sub folder (e.g. Meta/Main_Page).
 9 | #
10 | # https://docs.webplatform.org/w/index.php?action=edit&title=WPD:Getting_Started/examples
11 | #
12 | missed:
13 |   - css/fr
14 |   - Beginners/ja
15 |   - javascript/RegExp
16 |   - Main_Page/zh-hant
17 |   - concepts/es
18 |   - concepts/accessibility/ja
19 |   - concepts/accessibility/es
20 |   - tutorials/using_css_background_images/ja
21 |   - Main_Page/chs
22 |   - svg/attributes/clip-rule
23 |   - css/cssom/CSSImportRule
24 |   - css/cssom/properties
25 |   - css/properties/background-position
26 |   - html/elements/a
27 |   - apis/indexeddb/IDBKeyRange/upperBound
28 |   - tutorials/content-security-policy
29 |   - css/properties/background-position
30 |   - tutorials/mobile_mobifying
31 |   - html/elements/table
32 |   - Beginners/the_beginning
33 |   - glossary/main
34 |   - en
35 |   - css
36 |   - tutorials/css_transitions
37 |   - css/properties/border-image-outset
38 |   - css/cssom/properties/pixelWidth
39 |   - concepts/accessibility
40 |   - dom/DOMTokenList/length
41 |   - dom/DomTokenList/item
42 |   - dom/DataTransfer/files
43 |   - apis/appcache/ApplicationCache/status
44 |   - tutorials/table_styling_basics
45 |   - apis/audio-video/TimeRanges/start
46 |   - dom/HTMLCanvasElement
47 |   - dom/HTMLDataElement
48 |   - dom/HTMLMediaElement
49 |   - dom/HTMLTrackElement
50 |   - tutorials/eventsource_basics
51 |   - tutorials/styling_xml_with_css
52 |   - tutorials/debugging_css
53 |   - tutorials/javascript_statements
54 |   - concepts/programming/about_javascript
55 |   - Accessibility_basics
56 |   - Meta/HTML/Elements/spacer
57 |   - Meta/Editors_Guide
58 |   - Meta/Editors_Guide/content
59 |   - Meta/svg_test
60 |   - Meta/web_platform_wednesday/past_reports
61 |   - WPD/Community/Meetings/General/Earlier
62 |   - WPD/Community/Task_Force
63 |   - WPD/Getting_Started/examples
64 |   - apis/appcache/ApplicationCache
65 |   - concepts/Internet_and_Web/mime_types
66 |   - css/properties/border-radius
67 |   - css/properties/font-size
68 |   - css/selectors/pseudo-classes/not
69 |   - dom/NamedNodeMap/removeNamedItem
70 |   - dom/HTMLLabelElement
71 |   - css/media_queries/media_groups/visual
72 |   - dom/interface
73 |   - tutorials/forms_html5forms
74 |   - guides/html_forms_basics
75 |   - guides/html_links
76 |   - html/attributes/cellPadding
77 |   - html/elements
78 |   - html/elements/input/type/file
79 |   - html/tutorials
80 |   - svg/properties/cx_SVGRadialGradientElement
81 |   - svg/tutorials/smarter_svg_filters
82 |   - tutorials/html5_form_features
83 | 


--------------------------------------------------------------------------------
/lib/loader.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform MediaWiki Conversion.
 5 |  **/
 6 | 
 7 | use WebPlatform\Importer\Commands\RefreshPagesCommand;
 8 | use WebPlatform\Importer\Commands\CacheWarmerCommand;
 9 | use WebPlatform\Importer\Commands\SummaryCommand;
10 | use WebPlatform\Importer\Commands\RunCommand;
11 | use Symfony\Component\Console\Application;
12 | use Dotenv\Dotenv;
13 | 
14 | $dotenv = new Dotenv(BASE_DIR);
15 | $dotenv->load();
16 | $dotenv->required(['MEDIAWIKI_API_ORIGIN', 'COMMITER_ANONYMOUS_DOMAIN']);
17 | 
18 | /**
19 |  * Poor man project loader so we dont need
20 |  * config files for such a small project
21 |  **/
22 | 
23 | if ($console instanceof Application) {
24 | 
25 |     // Load all commands here directly
26 |     $console->add(new RefreshPagesCommand());
27 |     $console->add(new CacheWarmerCommand());
28 |     $console->add(new SummaryCommand());
29 |     $console->add(new RunCommand());
30 | 
31 | } else {
32 |     throw new \Exception('Did you require lib/loader.php AFTER bootstrapping the application?');
33 | }
34 | 


--------------------------------------------------------------------------------
/lib/mediawiki.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform MediaWiki Conversion.
 5 |  *
 6 |  * Autoloaded by composer
 7 |  **/
 8 | 
 9 | // set to run indefinitely if needed
10 | set_time_limit(0);
11 | ini_set('memory_limit', '4024M');
12 | 
13 | // Because we never know if the shell environment we
14 | // run the importer will have LC_ set to an UTF-8
15 | // friendly encoding.
16 | mb_internal_encoding("UTF-8");
17 | 
18 | /* Optional. It’s better to do it in the php.ini file */
19 | date_default_timezone_set('America/Montreal');
20 | 
21 | $_SERVER['REMOTE_ADDR']='127.0.0.1';
22 | 
23 | $wd = realpath(__DIR__ . '/..');
24 | define('BASE_DIR', $wd);
25 | define('DATA_DIR', $wd.'/data');
26 | define('GIT_OUTPUT_DIR', $wd.'/out');
27 | 


--------------------------------------------------------------------------------
/reports/1_nginx.map:
--------------------------------------------------------------------------------
  1 | rewrite (?i)^/css/atrules/@charset$ /css/atrules/charset break;
  2 | rewrite (?i)^/css/atrules/@font-face$ /css/atrules/font-face break;
  3 | rewrite (?i)^/css/atrules/@import$ /css/atrules/import break;
  4 | rewrite (?i)^/css/atrules/@keyframes$ /css/atrules/keyframes break;
  5 | rewrite (?i)^/css/atrules/@media$ /css/atrules/media break;
  6 | rewrite (?i)^/css/atrules/@namespace$ /css/atrules/namespace break;
  7 | rewrite (?i)^/css/atrules/@page$ /css/atrules/page break;
  8 | rewrite (?i)^/css/atrules/@region$ /css/atrules/region break;
  9 | rewrite (?i)^/css/atrules/@supports$ /css/atrules/supports break;
 10 | rewrite (?i)^/css/atrules/@viewport$ /css/atrules/viewport break;
 11 | rewrite (?i)^/css/functions/matrix\(\)$ /css/functions/matrix break;
 12 | rewrite (?i)^/css/functions/rotate\(\)$ /css/functions/rotate break;
 13 | rewrite (?i)^/css/functions/rotate3d\(\)$ /css/functions/rotate3d break;
 14 | rewrite (?i)^/css/functions/rotateX\(\)$ /css/functions/rotateX break;
 15 | rewrite (?i)^/css/functions/rotateY\(\)$ /css/functions/rotateY break;
 16 | rewrite (?i)^/css/functions/rotateZ\(\)$ /css/functions/rotateZ break;
 17 | rewrite (?i)^/css/functions/scale\(\)$ /css/functions/scale break;
 18 | rewrite (?i)^/css/functions/scale3d\(\)$ /css/functions/scale3d break;
 19 | rewrite (?i)^/css/functions/scaleX\(\)$ /css/functions/scaleX break;
 20 | rewrite (?i)^/css/functions/scaleY\(\)$ /css/functions/scaleY break;
 21 | rewrite (?i)^/css/functions/scaleZ\(\)$ /css/functions/scaleZ break;
 22 | rewrite (?i)^/css/functions/skew\(\)$ /css/functions/skew break;
 23 | rewrite (?i)^/css/functions/skewX\(\)$ /css/functions/skewX break;
 24 | rewrite (?i)^/css/functions/skewY\(\)$ /css/functions/skewY break;
 25 | rewrite (?i)^/css/functions/translate\(\)$ /css/functions/translate break;
 26 | rewrite (?i)^/css/functions/translate3d\(\)$ /css/functions/translate3d break;
 27 | rewrite (?i)^/css/functions/translateX\(\)$ /css/functions/translateX break;
 28 | rewrite (?i)^/css/functions/translateY\(\)$ /css/functions/translateY break;
 29 | rewrite (?i)^/css/functions/translateZ\(\)$ /css/functions/translateZ break;
 30 | rewrite (?i)^/css/functions/url\(\)$ /css/functions/url break;
 31 | rewrite (?i)^/css/selectors/pseudo-classes/\:-ms-input-placeholder$ /css/selectors/pseudo-classes/-ms-input-placeholder break;
 32 | rewrite (?i)^/css/selectors/pseudo-classes/\:active$ /css/selectors/pseudo-classes/active break;
 33 | rewrite (?i)^/css/selectors/pseudo-classes/\:checked$ /css/selectors/pseudo-classes/checked break;
 34 | rewrite (?i)^/css/selectors/pseudo-classes/\:disabled$ /css/selectors/pseudo-classes/disabled break;
 35 | rewrite (?i)^/css/selectors/pseudo-classes/\:empty$ /css/selectors/pseudo-classes/empty break;
 36 | rewrite (?i)^/css/selectors/pseudo-classes/\:enabled$ /css/selectors/pseudo-classes/enabled break;
 37 | rewrite (?i)^/css/selectors/pseudo-classes/\:first-child$ /css/selectors/pseudo-classes/first-child break;
 38 | rewrite (?i)^/css/selectors/pseudo-classes/\:first-of-type$ /css/selectors/pseudo-classes/first-of-type break;
 39 | rewrite (?i)^/css/selectors/pseudo-classes/\:focus$ /css/selectors/pseudo-classes/focus break;
 40 | rewrite (?i)^/css/selectors/pseudo-classes/\:hover$ /css/selectors/pseudo-classes/hover break;
 41 | rewrite (?i)^/css/selectors/pseudo-classes/\:in-range$ /css/selectors/pseudo-classes/in-range break;
 42 | rewrite (?i)^/css/selectors/pseudo-classes/\:indeterminate$ /css/selectors/pseudo-classes/indeterminate break;
 43 | rewrite (?i)^/css/selectors/pseudo-classes/\:invalid$ /css/selectors/pseudo-classes/invalid break;
 44 | rewrite (?i)^/css/selectors/pseudo-classes/\:lang\(c\)$ /css/selectors/pseudo-classes/lang break;
 45 | rewrite (?i)^/css/selectors/pseudo-classes/\:last-child$ /css/selectors/pseudo-classes/last-child break;
 46 | rewrite (?i)^/css/selectors/pseudo-classes/\:last-of-type$ /css/selectors/pseudo-classes/last-of-type break;
 47 | rewrite (?i)^/css/selectors/pseudo-classes/\:link$ /css/selectors/pseudo-classes/link break;
 48 | rewrite (?i)^/css/selectors/pseudo-classes/\:not$ /css/selectors/pseudo-classes/not break;
 49 | rewrite (?i)^/css/selectors/pseudo-classes/\:nth-child\(n\)$ /css/selectors/pseudo-classes/nth-child break;
 50 | rewrite (?i)^/css/selectors/pseudo-classes/\:nth-last-child\(n\)$ /css/selectors/pseudo-classes/nth-last-child break;
 51 | rewrite (?i)^/css/selectors/pseudo-classes/\:nth-last-of-type\(n\)$ /css/selectors/pseudo-classes/nth-last-of-type break;
 52 | rewrite (?i)^/css/selectors/pseudo-classes/\:nth-of-type\(n\)$ /css/selectors/pseudo-classes/nth-of-type break;
 53 | rewrite (?i)^/css/selectors/pseudo-classes/\:only-child$ /css/selectors/pseudo-classes/only-child break;
 54 | rewrite (?i)^/css/selectors/pseudo-classes/\:only-of-type$ /css/selectors/pseudo-classes/only-of-type break;
 55 | rewrite (?i)^/css/selectors/pseudo-classes/\:optional$ /css/selectors/pseudo-classes/optional break;
 56 | rewrite (?i)^/css/selectors/pseudo-classes/\:required$ /css/selectors/pseudo-classes/required break;
 57 | rewrite (?i)^/css/selectors/pseudo-classes/\:root$ /css/selectors/pseudo-classes/root break;
 58 | rewrite (?i)^/css/selectors/pseudo-classes/\:target$ /css/selectors/pseudo-classes/target break;
 59 | rewrite (?i)^/css/selectors/pseudo-classes/\:valid$ /css/selectors/pseudo-classes/valid break;
 60 | rewrite (?i)^/css/selectors/pseudo-classes/\:visited$ /css/selectors/pseudo-classes/visited break;
 61 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:after$ /css/selectors/pseudo-elements/after break;
 62 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:before$ /css/selectors/pseudo-elements/before break;
 63 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:first-letter$ /css/selectors/pseudo-elements/first-letter break;
 64 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:first-line$ /css/selectors/pseudo-elements/first-line break;
 65 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:region$ /css/selectors/pseudo-elements/region break;
 66 | rewrite (?i)^/css/selectors/pseudo-elements/\:\:selection$ /css/selectors/pseudo-elements/selection break;
 67 | rewrite (?i)^/css/syntax/!important$ /css/syntax/important break;
 68 | rewrite (?i)^/html/attributes/max\(HTMLProgressElement\)$ /html/attributes/maxHTMLProgressElement break;
 69 | rewrite (?i)^/html/elements/!DOCTYPE$ /html/elements/DOCTYPE break;
 70 | rewrite (?i)^/html/elements/!DOCTYPE/ja$ /html/elements/DOCTYPE/ja break;
 71 | rewrite (?i)^/Accept$ /http/headers/Accept break;
 72 | rewrite (?i)^/apis/css-regions/NamedFlow/regionlayoutupdate$ /apis/css-regions/NamedFlow/regionfragmentchange break;
 73 | rewrite (?i)^/apis/indexeddb/IDBCuror/update$ /apis/indexeddb/IDBCursor/update break;
 74 | rewrite (?i)^/apis/pointerevents/PointerEvent$ /dom/objects/PointerEvent break;
 75 | rewrite (?i)^/apis/timing/methods/requestAnimationFrame$ /dom/Window/requestAnimationFrame break;
 76 | rewrite (?i)^/apis/timing/properties/domContentLoadedEventEnd$ /apis/navigation_timing/PerformanceTiming/domContentLoadedEventEnd break;
 77 | rewrite (?i)^/apis/webrtc/objects/MediaStream/properties/videoTracks$ /apis/webrtc/MediaStream/videoTracks break;
 78 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack$ /apis/webrtc/MediaStreamTrack break;
 79 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/events/ended$ /apis/webrtc/MediaStreamTrack/ended break;
 80 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/events/muted$ /apis/webrtc/MediaStreamTrack/muted break;
 81 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/events/unmuted$ /apis/webrtc/MediaStreamTrack/unmuted break;
 82 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/enabled$ /apis/webrtc/MediaStreamTrack/enabled break;
 83 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/kind$ /apis/webrtc/MediaStreamTrack/kind break;
 84 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/label$ /apis/webrtc/MediaStreamTrack/label break;
 85 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/onended$ /apis/webrtc/MediaStreamTrack/onended break;
 86 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/onmute$ /apis/webrtc/MediaStreamTrack/onmute break;
 87 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrack/properties/readyState$ /apis/webrtc/MediaStreamTrack/readyState break;
 88 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList$ /apis/webrtc/MediaStreamTrackList break;
 89 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/events/addtrack$ /apis/webrtc/MediaStreamTrackList/addtrack break;
 90 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/events/removetrack$ /apis/webrtc/MediaStreamTrackList/removetrack break;
 91 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/methods/add$ /apis/webrtc/MediaStreamTrackList/add break;
 92 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/methods/item$ /apis/webrtc/MediaStreamTrackList/item break;
 93 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/methods/remove$ /apis/webrtc/MediaStreamTrackList/remove break;
 94 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/properties/length$ /apis/webrtc/MediaStreamTrackList/length break;
 95 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/properties/onaddtrack$ /apis/webrtc/MediaStreamTrackList/onaddtrack break;
 96 | rewrite (?i)^/apis/webrtc/objects/MediaStreamTrackList/properties/onremovetrack$ /apis/webrtc/MediaStreamTrackList/onremovetrack break;
 97 | rewrite (?i)^/apis/websockets/CloseEvent$ /apis/websocket/CloseEvent break;
 98 | rewrite (?i)^/apis/websockets/CloseEvent/code$ /apis/websocket/CloseEvent/code break;
 99 | rewrite (?i)^/apis/websockets/CloseEvent/reason$ /apis/websocket/CloseEvent/reason break;
100 | rewrite (?i)^/apis/websockets/CloseEvent/wasClean$ /apis/websocket/CloseEvent/wasClean break;
101 | rewrite (?i)^/apis/websockets/MessageEvent$ /apis/websocket/MessageEvent break;
102 | rewrite (?i)^/apis/websockets/MessageEvent/data$ /apis/websocket/MessageEvent/data break;
103 | rewrite (?i)^/A(\ |_)Quick(\ |_)Start(\ |_)to(\ |_)getting(\ |_)started(\ |_)contributing(\ |_)to(\ |_)Web(\ |_)Platform.org$ /WPD/Quick_Start break;
104 | rewrite (?i)^/bdi$ /html/elements/bdi break;
105 | rewrite (?i)^/border-image$ /css/properties/border-image break;
106 | rewrite (?i)^/canvas$ /html/elements/canvas break;
107 | rewrite (?i)^/concepts/localisation$ /concepts/internationalization break;
108 | rewrite (?i)^/concepts/PointerEvents$ /concepts/Pointer_Events break;
109 | rewrite (?i)^/concepts/proprietary-ie-techniques$ /concepts/proprietary_internet_explorer_techniques break;
110 | rewrite (?i)^/cross-fade$ /css/functions/cross-fade break;
111 | rewrite (?i)^/css/atrules/-ms-viewport$ /css/atrules/viewport break;
112 | rewrite (?i)^/css/cross-fade$ /css/functions/cross-fade break;
113 | rewrite (?i)^/css/cssom/CSSRegionStyleRule$ /OLDcss/cssom/CSSRegionStyleRule break;
114 | rewrite (?i)^/css/cssom/CSSRule/CSSRule$ /css/cssom/CSSRule break;
115 | rewrite (?i)^/css/cssom/methods/removeImport$ /css/cssom/styleSheet/removeImport break;
116 | rewrite (?i)^/css/cssom/methods/removeProperty$ /css/cssom/CSSStyleDeclaration/removeProperty break;
117 | rewrite (?i)^/css/cssom/methods/removeRule$ /css/cssom/styleSheet/removeRule break;
118 | rewrite (?i)^/css/cssom/methods/setProperty$ /css/cssom/CSSStyleDeclaration/setProperty break;
119 | rewrite (?i)^/css/cssom/NamedFlow$ /apis/css-regions/NamedFlow break;
120 | rewrite (?i)^/css/cssom/NamedFlow/firstEmptyRegionIndex$ /apis/css-regions/NamedFlow/firstEmptyRegionIndex break;
121 | rewrite (?i)^/css/cssom/NamedFlow/getContent$ /apis/css-regions/NamedFlow/getContent break;
122 | rewrite (?i)^/css/cssom/NamedFlow/getNamedFlows$ /apis/css-regions/NamedFlow/getNamedFlows break;
123 | rewrite (?i)^/css/cssom/NamedFlow/getRegions$ /apis/css-regions/NamedFlow/getRegions break;
124 | rewrite (?i)^/css/cssom/NamedFlow/getRegionsByContent$ /apis/css-regions/NamedFlow/getRegionsByContent break;
125 | rewrite (?i)^/css/cssom/NamedFlow/name$ /apis/css-regions/NamedFlow/name break;
126 | rewrite (?i)^/css/cssom/NamedFlow/overset$ /apis/css-regions/NamedFlow/overset break;
127 | rewrite (?i)^/css/cssom/NamedFlow/regionlayoutupdate$ /apis/css-regions/NamedFlow/regionlayoutupdate break;
128 | rewrite (?i)^/css/cssom/Region$ /apis/css-regions/Region break;
129 | rewrite (?i)^/css/cssom/Region/getComputedRegionStyle$ /apis/css-regions/Region/getComputedRegionStyle break;
130 | rewrite (?i)^/css/cssom/Region/getRegionFlowRanges$ /apis/css-regions/Region/getRegionFlowRanges break;
131 | rewrite (?i)^/css/cssom/Region/regionOverset$ /apis/css-regions/Region/regionOverset break;
132 | rewrite (?i)^/css/flexbox$ /css/properties/flex break;
133 | rewrite (?i)^/css/functions/transition-timing-function$ /css/properties/transition-timing-function break;
134 | rewrite (?i)^/css/linear-gradient$ /css/functions/linear-gradient break;
135 | rewrite (?i)^/css/properties/-ms-block-progression$ /css/properties/block-progression break;
136 | rewrite (?i)^/css/properties/-ms-box-align$ /css/properties/box-align break;
137 | rewrite (?i)^/css/properties/-ms-box-direction$ /css/properties/box-direction break;
138 | rewrite (?i)^/css/properties/-ms-box-flex$ /css/properties/box-flex break;
139 | rewrite (?i)^/css/properties/-ms-box-line-progression$ /css/properties/box-line-progression break;
140 | rewrite (?i)^/css/properties/-ms-box-lines$ /css/properties/box-lines break;
141 | rewrite (?i)^/css/properties/-ms-box-ordinal-group$ /css/properties/box-ordinal-group break;
142 | rewrite (?i)^/css/properties/-ms-box-pack$ /css/properties/box-pack break;
143 | rewrite (?i)^/css/properties/-ms-content-zoom-factor$ /css/properties/msContentZoomFactor break;
144 | rewrite (?i)^/css/properties/-ms-flex-align$ /css/properties/flex-align break;
145 | rewrite (?i)^/css/properties/-ms-flex-item-align$ /css/properties/flex-item-align break;
146 | rewrite (?i)^/css/properties/-ms-flex-line-pack$ /css/properties/flex-line-pack break;
147 | rewrite (?i)^/css/properties/-ms-flex-order$ /css/properties/flex-order break;
148 | rewrite (?i)^/css/properties/-ms-flex-pack$ /css/properties/justify-content break;
149 | rewrite (?i)^/css/properties/-ms-linear-gradient$ /css/functions/linear-gradient break;
150 | rewrite (?i)^/css/properties/animation/animation$ /css/properties/animation break;
151 | rewrite (?i)^/css/properties/box-align$ /css/properties/align-items break;
152 | rewrite (?i)^/css/properties/flex-align$ /css/properties/align-items break;
153 | rewrite (?i)^/css/properties/flex-item-align$ /css/properties/align-self break;
154 | rewrite (?i)^/css/properties/flex-line-pack$ /css/properties/align-content break;
155 | rewrite (?i)^/css/properties/flex-order$ /css/properties/order break;
156 | rewrite (?i)^/css/properties/flex-pack$ /css/properties/justify-content break;
157 | rewrite (?i)^/css/properties/foo$ /css/properties/grid-row break;
158 | rewrite (?i)^/css/properties/mask-border-image$ /css/properties/mask-border break;
159 | rewrite (?i)^/css/properties/mask-box-image$ /css/properties/mask-border-image break;
160 | rewrite (?i)^/css/properties/mask-box-image-outset$ /css/properties/mask-border-outset break;
161 | rewrite (?i)^/css/properties/mask-box-image-repeat$ /css/properties/mask-border-repeat break;
162 | rewrite (?i)^/css/properties/mask-box-image-slice$ /css/properties/mask-border-slice break;
163 | rewrite (?i)^/css/properties/mask-box-image-source$ /css/properties/mask-border-source break;
164 | rewrite (?i)^/css/properties/mask-box-image-width$ /css/properties/mask-border-width break;
165 | rewrite (?i)^/css/properties/mask-source-type$ /css/properties/mask-mode break;
166 | rewrite (?i)^/css/properties/ms-block-progression$ /css/properties/-ms-block-progression break;
167 | rewrite (?i)^/css/properties/ms-box-align$ /css/properties/-ms-box-align break;
168 | rewrite (?i)^/css/properties/ms-box-direction$ /css/properties/-ms-box-direction break;
169 | rewrite (?i)^/css/properties/ms-box-flex$ /css/properties/-ms-box-flex break;
170 | rewrite (?i)^/css/properties/ms-box-line-progression$ /css/properties/-ms-box-line-progression break;
171 | rewrite (?i)^/css/properties/ms-box-lines$ /css/properties/-ms-box-lines break;
172 | rewrite (?i)^/css/properties/ms-box-ordinal-group$ /css/properties/-ms-box-ordinal-group break;
173 | rewrite (?i)^/css/properties/ms-box-orient$ /css/properties/-ms-box-orient break;
174 | rewrite (?i)^/css/properties/ms-box-pack$ /css/properties/-ms-box-pack break;
175 | rewrite (?i)^/css/properties/ms-content-zoom-chaining$ /css/properties/-ms-content-zoom-chaining break;
176 | rewrite (?i)^/css/properties/ms-content-zoom-factor$ /css/properties/msContentZoomFactor break;
177 | rewrite (?i)^/css/properties/ms-content-zoom-limit$ /css/properties/-ms-content-zoom-limit break;
178 | rewrite (?i)^/css/properties/ms-content-zoom-limit-max$ /css/properties/-ms-content-zoom-limit-max break;
179 | rewrite (?i)^/css/properties/ms-content-zoom-limit-min$ /css/properties/-ms-content-zoom-limit-min break;
180 | rewrite (?i)^/css/properties/ms-content-zoom-snap$ /css/properties/-ms-content-zoom-snap break;
181 | rewrite (?i)^/css/properties/ms-content-zoom-snap-points$ /css/properties/-ms-content-zoom-snap-points break;
182 | rewrite (?i)^/css/properties/ms-content-zoom-snap-type$ /css/properties/-ms-content-zoom-snap-type break;
183 | rewrite (?i)^/css/properties/ms-content-zooming$ /css/properties/-ms-content-zooming break;
184 | rewrite (?i)^/css/properties/ms-flex$ /css/properties/-ms-flex break;
185 | rewrite (?i)^/css/properties/ms-flex-align$ /css/properties/-ms-flex-align break;
186 | rewrite (?i)^/css/properties/ms-flex-direction$ /css/properties/-ms-flex-direction break;
187 | rewrite (?i)^/css/properties/ms-flex-flow$ /css/properties/-ms-flex-flow break;
188 | rewrite (?i)^/css/properties/ms-flex-item-align$ /css/properties/-ms-flex-item-align break;
189 | rewrite (?i)^/css/properties/ms-flex-line-pack$ /css/properties/-ms-flex-line-pack break;
190 | rewrite (?i)^/css/properties/ms-flex-order$ /css/properties/-ms-flex-order break;
191 | rewrite (?i)^/css/properties/ms-flex-pack$ /css/properties/-ms-flex-pack break;
192 | rewrite (?i)^/css/properties/ms-flex-wrap$ /css/properties/-ms-flex-wrap break;
193 | rewrite (?i)^/css/properties/ms-grid-column$ /css/properties/grid-column break;
194 | rewrite (?i)^/css/properties/ms-grid-column-span$ /css/properties/grid-column-span break;
195 | rewrite (?i)^/css/properties/ms-hyphenate-limit-lines$ /css/properties/hyphenate-limit-lines break;
196 | rewrite (?i)^/css/properties/ms-hyphens$ /css/properties/hyphens break;
197 | rewrite (?i)^/css/properties/ms-touch-action$ /css/properties/-ms-touch-action break;
198 | rewrite (?i)^/css/properties/region-break-after$ /css/properties/break-after break;
199 | rewrite (?i)^/css/properties/region-break-before$ /css/properties/break-before break;
200 | rewrite (?i)^/css/properties/region-break-inside$ /css/properties/break-inside break;
201 | rewrite (?i)^/css/properties/region-overflow$ /css/properties/region-fragment break;
202 | rewrite (?i)^/css/properties/region-verflow$ /css/properties/region-overflow break;
203 | rewrite (?i)^/css/radial-gradient$ /css/functions/radial-gradient break;
204 | rewrite (?i)^/css/repeating-linear-gradient$ /css/functions/repeating-linear-gradient break;
205 | rewrite (?i)^/css/repeating-radial-gradient$ /css/functions/repeating-radial-gradient break;
206 | rewrite (?i)^/css/selectors/-ms-scrollbar-shadow-color$ /css/properties/-ms-scrollbar-shadow-color break;
207 | rewrite (?i)^/css/selectors/border-image$ /css/properties/border-image break;
208 | rewrite (?i)^/css/selectors/cursor$ /css/properties/cursor break;
209 | rewrite (?i)^/css/selectors/outline$ /css/properties/outline break;
210 | rewrite (?i)^/css/selectors/outline-color$ /css/properties/outline-color break;
211 | rewrite (?i)^/css/selectors/outline-style$ /css/properties/outline-style break;
212 | rewrite (?i)^/css/selectors/outline-width$ /css/properties/outline-width break;
213 | rewrite (?i)^/css/selectors/user-select$ /css/properties/user-select break;
214 | rewrite (?i)^/css/selectors/zoom$ /css/properties/zoom break;
215 | rewrite (?i)^/css/text/word-spacing/word-spacing$ /css/properties/word-spacing break;
216 | rewrite (?i)^/css/Training$ /css/tutorials break;
217 | rewrite (?i)^/css/transforms$ /css/properties/transform break;
218 | rewrite (?i)^/css/transforms/transform$ /css/properties/transform break;
219 | rewrite (?i)^/css/tutorials/css-regions$ /tutorials/css-regions break;
220 | rewrite (?i)^/css/units/color$ /css/data_types/color break;
221 | rewrite (?i)^/css/units/text$ /css/data_types/text break;
222 | rewrite (?i)^/data$ /html/elements/data break;
223 | rewrite (?i)^/dom/events/DOMContentLoaded$ /dom/Event/DOMContentLoaded break;
224 | rewrite (?i)^/dom/events/load$ /dom/Element/load break;
225 | rewrite (?i)^/dom/images$ /dom/Image break;
226 | rewrite (?i)^/dom/object/PointerEvent/pointercancel$ /dom/objects/PointerEvent/pointercancel break;
227 | rewrite (?i)^/html/elements/applets$ /html/elements/applet break;
228 | rewrite (?i)^/html/elements/comment$ /html/elements/html_comment_data-type break;
229 | rewrite (?i)^/html/elements/dl-test$ /html/elements/dl break;
230 | rewrite (?i)^/http$ /concepts/protocols/http break;
231 | rewrite (?i)^/ImageCapture$ /apis/image_capture/ImageCapture break;
232 | rewrite (?i)^/js/objects/parseFloat$ /javascript/functions/parseFloat break;
233 | rewrite (?i)^/js/objects/parseInt$ /javascript/functions/parseInt break;
234 | rewrite (?i)^/keygen$ /html/elements/keygen break;
235 | rewrite (?i)^/mask-composite$ /css/properties/mask-composite break;
236 | rewrite (?i)^/rb$ /html/elements/rb break;
237 | rewrite (?i)^/rp$ /html/elements/rp break;
238 | rewrite (?i)^/rtc$ /html/elements/rtc break;
239 | rewrite (?i)^/svg/elements/feColorMix$ /svg/elements/feColorMatrix break;
240 | rewrite (?i)^/svg/elements/feFuncGelement$ /svg/elements/feFuncG break;
241 | rewrite (?i)^/svg/elements/patterrn$ /svg/elements/pattern break;
242 | rewrite (?i)^/tutorial/JavaScript-Statements$ /tutorials/javascript_statements break;
243 | rewrite (?i)^/tutorials/google-chrome-frame$ /tutorials/google_chrome_frame break;
244 | rewrite (?i)^/tutorials/JavaScript-About$ /tutorials/javascript_about break;
245 | rewrite (?i)^/tutorials/Mobile$ /tutorials/mobile_optimization_best_practices break;


--------------------------------------------------------------------------------
/reports/2_nginx_almost_same_casing.map:
--------------------------------------------------------------------------------
 1 | rewrite (?i)^/apis/indexedDB$ /apis/indexeddb break;
 2 | rewrite (?i)^/beginners$ /Beginners break;
 3 | rewrite (?i)^/beginners/advanced$ /Beginners/advanced break;
 4 | rewrite (?i)^/beginners/css$ /Beginners/css break;
 5 | rewrite (?i)^/beginners/es$ /Beginners/es break;
 6 | rewrite (?i)^/beginners/glossary$ /Beginners/glossary break;
 7 | rewrite (?i)^/beginners/html$ /Beginners/html break;
 8 | rewrite (?i)^/beginners/ja$ /Beginners/ja break;
 9 | rewrite (?i)^/beginners/javascript$ /Beginners/javascript break;
10 | rewrite (?i)^/beginners/ko$ /Beginners/ko break;
11 | rewrite (?i)^/beginners/planning$ /Beginners/planning break;
12 | rewrite (?i)^/beginners/programming$ /Beginners/programming break;
13 | rewrite (?i)^/beginners/pt-br$ /Beginners/pt-br break;
14 | rewrite (?i)^/beginners/tr$ /Beginners/tr break;
15 | rewrite (?i)^/beginners/zh$ /Beginners/zh break;
16 | rewrite (?i)^/concepts/accessibility/POUR$ /concepts/accessibility/pour break;
17 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/About(\ |_)proxy(\ |_)based(\ |_)browsers$ /concepts/Internet_and_Web/About_proxy_based_browsers break;
18 | rewrite (?i)^/concepts/programming/javascript/LiveConnect$ /concepts/programming/javascript/liveconnect break;
19 | rewrite (?i)^/css/Mediaqueries$ /css/mediaqueries break;
20 | rewrite (?i)^/css/Namespaces$ /css/namespaces break;
21 | rewrite (?i)^/css/Properties/color$ /css/properties/color break;
22 | rewrite (?i)^/css/Properties/opacity$ /css/properties/opacity break;
23 | rewrite (?i)^/css/selectors/Type$ /css/selectors/type break;
24 | rewrite (?i)^/html/attributes/dateTime$ /html/attributes/datetime break;
25 | rewrite (?i)^/html/attributes/longDesc$ /html/attributes/longdesc break;
26 | rewrite (?i)^/html/attributes/readOnly$ /html/attributes/readonly break;
27 | rewrite (?i)^/tutorials/Closures$ /tutorials/closures break;
28 | rewrite (?i)^/tutorials/Forms$ /tutorials/forms break;
29 | rewrite (?i)^/tutorials/Location$ /tutorials/location break;
30 | rewrite (?i)^/tutorials/Offline$ /tutorials/offline break;


--------------------------------------------------------------------------------
/reports/3_nginx_almost_same_1.map:
--------------------------------------------------------------------------------
 1 | # Most likely OK to ignore, but good enough to check if adresses here works
 2 | rewrite (?i)^/CSS/Selectors/pseudo-classes/\:target$ /CSS/Selectors/pseudo-classes/target break;
 3 | rewrite (?i)^/beginners/crash(\ |_)course$ /Beginners/crash_course break;
 4 | rewrite (?i)^/beginners/the(\ |_)beginning$ /Beginners/the_beginning break;
 5 | rewrite (?i)^/concepts/Color(\ |_)theory$ /concepts/color_theory break;
 6 | rewrite (?i)^/concepts/Design(\ |_)theory$ /concepts/design_theory break;
 7 | rewrite (?i)^/concepts/Information(\ |_)architecture$ /concepts/information_architecture break;
 8 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web$ /concepts/Internet_and_Web break;
 9 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/About(\ |_)HTML5(\ |_)hybrid(\ |_)applications$ /concepts/Internet_and_Web/About_HTML5_hybrid_applications break;
10 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)Web/About(\ |_)proxy(\ |_)based(\ |_)browsers$ /concepts/Internet_and_Web/proxy_based_browsers break;
11 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/how(\ |_)browsers(\ |_)work$ /concepts/Internet_and_Web/how_browsers_work break;
12 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work$ /concepts/Internet_and_Web/How_does_the_Internet_Work break;
13 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work$ /concepts/Internet_and_Web/How_does_the_Internet_Work break;
14 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/How(\ |_)does(\ |_)the(\ |_)Internet(\ |_)Work$ /concepts/Internet_and_Web/How_does_the_Internet_Work break;
15 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work/es$ /concepts/Internet_and_Web/How_does_the_Internet_Work/es break;
16 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work/ja$ /concepts/Internet_and_Web/How_does_the_Internet_Work/ja break;
17 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/html5(\ |_)hybrid(\ |_)applications$ /concepts/Internet_and_Web/html5_hybrid_applications break;
18 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/html5(\ |_)hybrid(\ |_)applications/concepts/mobile(\ |_)tools$ /concepts/Internet_and_Web/html5_hybrid_applications/concepts/mobile_tools break;
19 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/mime(\ |_)types$ /concepts/Internet_and_Web/mime_types break;
20 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/OAuth$ /concepts/Internet_and_Web/OAuth break;
21 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/proxy(\ |_)based(\ |_)browsers$ /concepts/Internet_and_Web/proxy_based_browsers break;
22 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)Web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web$ /concepts/Internet_and_Web/The_History_of_the_Web break;
23 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web$ /concepts/Internet_and_Web/The_History_of_the_Web break;
24 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web$ /concepts/Internet_and_Web/The_History_of_the_Web break;
25 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/The(\ |_)History(\ |_)of(\ |_)the(\ |_)Web$ /concepts/Internet_and_Web/The_History_of_the_Web break;
26 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/es$ /concepts/Internet_and_Web/The_History_of_the_Web/es break;
27 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/ja$ /concepts/Internet_and_Web/The_History_of_the_Web/ja break;
28 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/tr$ /concepts/Internet_and_Web/The_History_of_the_Web/tr break;
29 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)web/the(\ |_)web(\ |_)standards(\ |_)model$ /concepts/Internet_and_Web/the_web_standards_model break;
30 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)Web/the(\ |_)web(\ |_)standards(\ |_)model$ /concepts/Internet_and_Web/The_Web_Standards_Model break;
31 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/The(\ |_)Web(\ |_)Standards(\ |_)Model$ /concepts/Internet_and_Web/The_Web_Standards_Model break;
32 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)web(\ |_)standards(\ |_)model$ /concepts/Internet_and_Web/the_web_standards_model break;
33 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/the(\ |_)web(\ |_)standards(\ |_)model/ja$ /concepts/Internet_and_Web/the_web_standards_model/ja break;
34 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)Web/the(\ |_)web(\ |_)standards(\ |_)model/ja$ /concepts/Internet_and_Web/The_Web_Standards_Model/ja break;
35 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/The(\ |_)Web(\ |_)Standards(\ |_)Model/ja$ /concepts/Internet_and_Web/The_Web_Standards_Model/ja break;
36 | rewrite (?i)^/concepts/internet(\ |_)and(\ |_)web/webrtc$ /concepts/Internet_and_Web/webrtc break;
37 | rewrite (?i)^/concepts/programming/Programming(\ |_)basics$ /concepts/programming/programming_basics break;
38 | rewrite (?i)^/concepts/programming/The(\ |_)principles(\ |_)of(\ |_)unobtrusive(\ |_)JavaScript$ /concepts/programming/the_principles_of_unobtrusive_javascript break;
39 | rewrite (?i)^/concepts/programming/The(\ |_)purpose(\ |_)of(\ |_)JavaScript$ /concepts/programming/the_purpose_of_javascript break;
40 | rewrite (?i)^/concepts/programming/Variables(\ |_)in(\ |_)JavaScript$ /concepts/programming/variables_in_javascript break;
41 | rewrite (?i)^/concepts/Web(\ |_)typography$ /concepts/web_typography break;
42 | rewrite (?i)^/concepts/Wireframing(\ |_)a(\ |_)site$ /concepts/wireframing_a_site break;
43 | rewrite (?i)^/tutorials/Absolute(\ |_)and(\ |_)fixed(\ |_)positioning$ /tutorials/absolute_and_fixed_positioning break;
44 | rewrite (?i)^/tutorials/Animation(\ |_)in(\ |_)JavaScript$ /tutorials/animation_in_javascript break;
45 | rewrite (?i)^/tutorials/Animations(\ |_)in(\ |_)SVG$ /tutorials/animations_in_svg break;
46 | rewrite (?i)^/tutorials/Audio(\ |_)and(\ |_)video$ /tutorials/audio_and_video break;
47 | rewrite (?i)^/tutorials/Basic(\ |_)shapes$ /tutorials/basic_shapes break;
48 | rewrite (?i)^/tutorials/Box(\ |_)model$ /tutorials/box_model break;
49 | rewrite (?i)^/tutorials/Building(\ |_)SVG(\ |_)paths$ /tutorials/building_svg_paths break;
50 | rewrite (?i)^/tutorials/Building(\ |_)web(\ |_)apps$ /tutorials/building_web_apps break;
51 | rewrite (?i)^/tutorials/Building(\ |_)web(\ |_)games$ /tutorials/building_web_games break;
52 | rewrite (?i)^/tutorials/Creating(\ |_)and(\ |_)modifying(\ |_)HTML$ /tutorials/creating_and_modifying_html break;
53 | rewrite (?i)^/tutorials/Creating(\ |_)basic(\ |_)navigation(\ |_)menus$ /tutorials/creating_basic_navigation_menus break;
54 | rewrite (?i)^/tutorials/CSS(\ |_)text(\ |_)quick(\ |_)start$ /tutorials/css_text_quick_start break;
55 | rewrite (?i)^/tutorials/Debugging(\ |_)CSS$ /tutorials/debugging_css break;
56 | rewrite (?i)^/tutorials/Events(\ |_)in(\ |_)JavaScript$ /tutorials/events_in_javascript break;
57 | rewrite (?i)^/tutorials/External(\ |_)content(\ |_)in(\ |_)SVG$ /tutorials/external_content_in_svg break;
58 | rewrite (?i)^/tutorials/Fills(\ |_)and(\ |_)strokes(\ |_)in(\ |_)SVG$ /tutorials/fills_and_strokes_in_svg break;
59 | rewrite (?i)^/tutorials/Floats(\ |_)and(\ |_)clearing$ /tutorials/floats_and_clearing break;
60 | rewrite (?i)^/tutorials/Generated(\ |_)content$ /tutorials/generated_content break;
61 | rewrite (?i)^/tutorials/Getting(\ |_)your(\ |_)content(\ |_)online$ /tutorials/Getting_Your_Content_Online break;
62 | rewrite (?i)^/tutorials/getting(\ |_)your(\ |_)content(\ |_)online$ /tutorials/Getting_Your_Content_Online break;
63 | rewrite (?i)^/tutorials/Graceful(\ |_)degradation(\ |_)versus(\ |_)progressive(\ |_)enhancement$ /tutorials/graceful_degradation_versus_progressive_enhancement break;
64 | rewrite (?i)^/tutorials/JavaScript(\ |_)best(\ |_)practices$ /tutorials/javascript_best_practices break;
65 | rewrite (?i)^/tutorials/JavaScript(\ |_)for(\ |_)mobile/Best(\ |_)practices(\ |_)when(\ |_)writing(\ |_)for(\ |_)mobile$ /tutorials/JavaScript_for_mobile/best_practices_when_writing_for_mobile break;
66 | rewrite (?i)^/tutorials/JavaScript(\ |_)functions$ /tutorials/javascript_functions break;
67 | rewrite (?i)^/tutorials/Layout(\ |_)fundamentals$ /tutorials/layout_fundamentals break;
68 | rewrite (?i)^/tutorials/Making(\ |_)CSS(\ |_)easier(\ |_)to(\ |_)read$ /tutorials/making_css_easier_to_read break;
69 | rewrite (?i)^/tutorials/Manipulating(\ |_)CSS(\ |_)with(\ |_)JavaScript$ /tutorials/manipulating_css_with_javascript break;
70 | rewrite (?i)^/tutorials/Media(\ |_)queries$ /tutorials/media_queries break;
71 | rewrite (?i)^/tutorials/Objects(\ |_)in(\ |_)JavaScript$ /tutorials/objects_in_javascript break;
72 | rewrite (?i)^/tutorials/Optimizing(\ |_)CSS$ /tutorials/optimizing_css break;
73 | rewrite (?i)^/tutorials/Position(\ |_)and(\ |_)transformation$ /tutorials/position_and_transformation break;
74 | rewrite (?i)^/tutorials/Responsive(\ |_)layouts$ /tutorials/responsive_layouts break;
75 | rewrite (?i)^/tutorials/Scripting(\ |_)SVG$ /tutorials/scripting_svg break;
76 | rewrite (?i)^/tutorials/State(\ |_)in(\ |_)CSS$ /tutorials/state_in_css break;
77 | rewrite (?i)^/tutorials/Static(\ |_)and(\ |_)relative(\ |_)positioning$ /tutorials/static_and_relative_positioning break;
78 | rewrite (?i)^/tutorials/Styling(\ |_)lists(\ |_)and(\ |_)links$ /tutorials/styling_lists_and_links break;
79 | rewrite (?i)^/tutorials/Styling(\ |_)SVG(\ |_)with(\ |_)CSS$ /tutorials/styling_svg_with_css break;
80 | rewrite (?i)^/tutorials/SVG(\ |_)clipping(\ |_)and(\ |_)masking$ /tutorials/svg_clipping_and_masking break;
81 | rewrite (?i)^/tutorials/SVG(\ |_)filters$ /tutorials/svg_filters break;
82 | rewrite (?i)^/tutorials/SVG(\ |_)fonts$ /tutorials/svg_fonts break;
83 | rewrite (?i)^/tutorials/SVG(\ |_)gradients$ /tutorials/svg_gradients break;
84 | rewrite (?i)^/tutorials/SVG(\ |_)image(\ |_)element$ /tutorials/svg_image_element break;
85 | rewrite (?i)^/tutorials/SVG(\ |_)introduction$ /tutorials/svg_introduction break;
86 | rewrite (?i)^/tutorials/SVG(\ |_)links$ /tutorials/svg_links break;
87 | rewrite (?i)^/tutorials/SVG(\ |_)pattern(\ |_)fills$ /tutorials/svg_pattern_fills break;
88 | rewrite (?i)^/tutorials/SVG(\ |_)syntax(\ |_)and(\ |_)deployment$ /tutorials/svg_syntax_and_deployment break;
89 | rewrite (?i)^/tutorials/Table(\ |_)styling(\ |_)basics$ /tutorials/table_styling_basics break;
90 | rewrite (?i)^/tutorials/the(\ |_)basics(\ |_)of(\ |_)html$ /guides/the_basics_of_html break;
91 | rewrite (?i)^/tutorials/Traversing(\ |_)the(\ |_)DOM$ /tutorials/traversing_the_dom break;
92 | rewrite (?i)^/tutorials/Using(\ |_)text(\ |_)in(\ |_)SVG$ /tutorials/using_text_in_svg break;
93 | rewrite (?i)^/tutorials/Using(\ |_)Web(\ |_)Audio$ /tutorials/using_web_audio break;
94 | rewrite (?i)^/tutorials/Your(\ |_)first(\ |_)look(\ |_)at(\ |_)JavaScript$ /tutorials/your_first_look_at_javascript break;


--------------------------------------------------------------------------------
/reports/4_nginx_redirects_spaces.map:
--------------------------------------------------------------------------------
  1 | rewrite (?i)^/apis/xhr/methods/open(\ |_)\(XDomainRequest\)$ /apis/xhr/methods/open_XDomainRequest break;
  2 | rewrite (?i)^/apis/xhr/methods/send(\ |_)\(XDomainRequest\)$ /apis/xhr/methods/send_XDomainRequest break;
  3 | rewrite (?i)^/canvas/properties/height(\ |_)\(canvas\)$ /canvas/properties/height_canvas break;
  4 | rewrite (?i)^/html/attributes/align(\ |_)\(Table,(\ |_)iframe(\ |_)elements\)$ /html/attributes/align_Table_iframe_elements break;
  5 | rewrite (?i)^/html/attributes/align(\ |_)\(Table(\ |_)caption,(\ |_)legend(\ |_)element\)$ /html/attributes/align_Table_caption_legend_element break;
  6 | rewrite (?i)^/html/attributes/autocomplete(\ |_)\(input,(\ |_)form(\ |_)elements\)$ /html/attributes/autocomplete_input_form_elements break;
  7 | rewrite (?i)^/html/attributes/background(\ |_)\(Body(\ |_)element\)$ /html/attributes/background_Body_element break;
  8 | rewrite (?i)^/html/attributes/background(\ |_)\(Table,(\ |_)table(\ |_)cell,(\ |_)td,(\ |_)th(\ |_)elements\)$ /html/attributes/background_Table_table_cell_td_th_elements break;
  9 | rewrite (?i)^/html/attributes/border(\ |_)\(frameSet,(\ |_)iframe\)$ /html/attributes/border_frameSet_iframe break;
 10 | rewrite (?i)^/html/attributes/cols(\ |_)\(frameSet\)$ /html/attributes/cols_frameSet break;
 11 | rewrite (?i)^/html/attributes/cols(\ |_)\(TextArea(\ |_)element\)$ /html/attributes/cols_TextArea_element break;
 12 | rewrite (?i)^/html/attributes/href(\ |_)\(base\)$ /html/attributes/href_base break;
 13 | rewrite (?i)^/html/attributes/label(\ |_)\(optGroup\)$ /html/attributes/label_optGroup break;
 14 | rewrite (?i)^/html/attributes/max(\ |_)\(HTMLInputElement\)$ /html/attributes/max_HTMLInputElement break;
 15 | rewrite (?i)^/html/attributes/name(\ |_)\(frames\)$ /html/attributes/name_frames break;
 16 | rewrite (?i)^/html/attributes/name(\ |_)\(meta(\ |_)object\)$ /html/attributes/name_meta_object break;
 17 | rewrite (?i)^/html/attributes/name(\ |_)\(window\)$ /html/attributes/name_window break;
 18 | rewrite (?i)^/html/attributes/name(\ |_)param(\ |_)element\)$ /html/attributes/name_param_element break;
 19 | rewrite (?i)^/html/attributes/size(\ |_)\(control\)$ /html/attributes/size_control break;
 20 | rewrite (?i)^/html/attributes/size(\ |_)\(hr\)$ /html/attributes/size_hr break;
 21 | rewrite (?i)^/html/attributes/src(\ |_)\(iframe,(\ |_)embed,(\ |_)xml\)$ /html/attributes/src_iframe_embed_xml break;
 22 | rewrite (?i)^/html/attributes/src(\ |_)\(input,(\ |_)img\)$ /html/attributes/src_input_img break;
 23 | rewrite (?i)^/html/attributes/src(\ |_)\(script\)$ /html/attributes/src_script break;
 24 | rewrite (?i)^/html/attributes/start(\ |_)\(ol\)$ /html/attributes/start_ol break;
 25 | rewrite (?i)^/html/attributes/type(\ |_)\(button(\ |_)element\)$ /html/attributes/type_button_element break;
 26 | rewrite (?i)^/html/attributes/type(\ |_)\(param(\ |_)element\)$ /html/attributes/type_param_element break;
 27 | rewrite (?i)^/html/attributes/type(\ |_)\(script(\ |_)element\)$ /html/attributes/type_script_element break;
 28 | rewrite (?i)^/html/attributes/type(\ |_)\(select(\ |_)element\)$ /html/attributes/type_select_element break;
 29 | rewrite (?i)^/html/attributes/type(\ |_)\(textarea(\ |_)element\)$ /html/attributes/type_textarea_element break;
 30 | rewrite (?i)^/html/attributes/type(\ |_)\(ul,li,ol(\ |_)elements\)$ /html/attributes/type_ulliol_elements break;
 31 | rewrite (?i)^/html/attributes/vAlign(\ |_)\(table(\ |_)caption\)$ /html/attributes/vAlign_table_caption break;
 32 | rewrite (?i)^/html/attributes/value(\ |_)\(button(\ |_)element\)$ /html/attributes/value_button_element break;
 33 | rewrite (?i)^/html/attributes/value(\ |_)\(HTMLProgressElement\)$ /html/attributes/value_HTMLProgressElement break;
 34 | rewrite (?i)^/html/attributes/value(\ |_)\(input(\ |_)elements\)$ /html/attributes/value_input_elements break;
 35 | rewrite (?i)^/html/attributes/value(\ |_)\(li(\ |_)element\)$ /html/attributes/value_li_element break;
 36 | rewrite (?i)^/html/attributes/value(\ |_)\(param(\ |_)element\)$ /html/attributes/value_param_element break;
 37 | rewrite (?i)^/html/attributes/value(\ |_)\(select,(\ |_)option(\ |_)element\)$ /html/attributes/value_select_option_element break;
 38 | rewrite (?i)^/html/attributes/value(\ |_)\(textarea(\ |_)element\)$ /html/attributes/value_textarea_element break;
 39 | rewrite (?i)^/html/attributes/version(\ |_)\(dom/version$ /html/attributes/version_dom/version break;
 40 | rewrite (?i)^/html/attributes/width(\ |_)\(img,(\ |_)input(\ |_)elements\)$ /html/attributes/width_img_input_elements break;
 41 | rewrite (?i)^/html/attributes/width(\ |_)\(merge(\ |_)candidate\)$ /html/attributes/width_merge_candidate break;
 42 | rewrite (?i)^/Notes\:The(\ |_)future(\ |_)of(\ |_)the(\ |_)open(\ |_)web$ /NotesThe_future_of_the_open_web break;
 43 | rewrite (?i)^/svg/elements(\ |_)\(alphabetical(\ |_)order\)$ /svg/elements_alphabetical_order break;
 44 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedAngle\)$ /svg/properties/animVal_SVGAnimatedAngle break;
 45 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedBoolean\)$ /svg/properties/animVal_SVGAnimatedBoolean break;
 46 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedEnumeration\)$ /svg/properties/animVal_SVGAnimatedEnumeration break;
 47 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedInteger\)$ /svg/properties/animVal_SVGAnimatedInteger break;
 48 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedLength\)$ /svg/properties/animVal_SVGAnimatedLength break;
 49 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedLengthList\)$ /svg/properties/animVal_SVGAnimatedLengthList break;
 50 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedNumber\)$ /svg/properties/animVal_SVGAnimatedNumber break;
 51 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedNumberList\)$ /svg/properties/animVal_SVGAnimatedNumberList break;
 52 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedPreserveAspectRatio\)$ /svg/properties/animVal_SVGAnimatedPreserveAspectRatio break;
 53 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedRect\)$ /svg/properties/animVal_SVGAnimatedRect break;
 54 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedString\)$ /svg/properties/animVal_SVGAnimatedString break;
 55 | rewrite (?i)^/svg/properties/animVal(\ |_)\(SVGAnimatedTransformList\)$ /svg/properties/animVal_SVGAnimatedTransformList break;
 56 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedAngle\)$ /svg/properties/baseVal_SVGAnimatedAngle break;
 57 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedBoolean\)$ /svg/properties/baseVal_SVGAnimatedBoolean break;
 58 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedEnumeration\)$ /svg/properties/baseVal_SVGAnimatedEnumeration break;
 59 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedInteger\)$ /svg/properties/baseVal_SVGAnimatedInteger break;
 60 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedLength\)$ /svg/properties/baseVal_SVGAnimatedLength break;
 61 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedLengthList\)$ /svg/properties/baseVal_SVGAnimatedLengthList break;
 62 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedNumber\)$ /svg/properties/baseVal_SVGAnimatedNumber break;
 63 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedNumberList\)$ /svg/properties/baseVal_SVGAnimatedNumberList break;
 64 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedPreserveAspectRatio\)$ /svg/properties/baseVal_SVGAnimatedPreserveAspectRatio break;
 65 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedRect\)$ /svg/properties/baseVal_SVGAnimatedRect break;
 66 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedString\)$ /svg/properties/baseVal_SVGAnimatedString break;
 67 | rewrite (?i)^/svg/properties/baseVal(\ |_)\(SVGAnimatedTransformList\)$ /svg/properties/baseVal_SVGAnimatedTransformList break;
 68 | rewrite (?i)^/svg/properties/cx(\ |_)\(SVGRadialGradientElement\)$ /svg/properties/cx_SVGRadialGradientElement break;
 69 | rewrite (?i)^/svg/properties/cy(\ |_)\(SVGRadialGradientElement\)$ /svg/properties/cy_SVGRadialGradientElement break;
 70 | rewrite (?i)^/svg/properties/r(\ |_)\(SVGRadialGradientElement\)$ /svg/properties/r_SVGRadialGradientElement break;
 71 | rewrite (?i)^/svg/properties/rx(\ |_)\(SVGEllipseElement\)$ /svg/properties/rx_SVGEllipseElement break;
 72 | rewrite (?i)^/svg/properties/rx(\ |_)\(SVGRectElement\)$ /svg/properties/rx_SVGRectElement break;
 73 | rewrite (?i)^/svg/properties/ry(\ |_)\(SVGEllipseElement\)$ /svg/properties/ry_SVGEllipseElement break;
 74 | rewrite (?i)^/svg/properties/ry(\ |_)\(SVGRectElement\)$ /svg/properties/ry_SVGRectElement break;
 75 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGComponentTransferFunctionElement\)$ /svg/properties/type_SVGComponentTransferFunctionElement break;
 76 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGFEColorMatrixElement\)$ /svg/properties/type_SVGFEColorMatrixElement break;
 77 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGFETurbulenceElement\)$ /svg/properties/type_SVGFETurbulenceElement break;
 78 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGScriptElement\)$ /svg/properties/type_SVGScriptElement break;
 79 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGStyleElement\)$ /svg/properties/type_SVGStyleElement break;
 80 | rewrite (?i)^/svg/properties/type(\ |_)\(SVGTransform\)$ /svg/properties/type_SVGTransform break;
 81 | rewrite (?i)^/svg/properties/unitType(\ |_)\(SVGLength\)$ /svg/properties/unitType_SVGLength break;
 82 | rewrite (?i)^/svg/properties/x1(\ |_)\(SVGLinearGradientElement\)$ /svg/properties/x1_SVGLinearGradientElement break;
 83 | rewrite (?i)^/svg/properties/x1(\ |_)\(SVGLineElement\)$ /svg/properties/x1_SVGLineElement break;
 84 | rewrite (?i)^/svg/properties/x2(\ |_)\(SVGLinearGradientElement\)$ /svg/properties/x2_SVGLinearGradientElement break;
 85 | rewrite (?i)^/svg/properties/x2(\ |_)\(SVGLineElement\)$ /svg/properties/x2_SVGLineElement break;
 86 | rewrite (?i)^/svg/properties/y1(\ |_)\(SVGLinearGradientElement\)$ /svg/properties/y1_SVGLinearGradientElement break;
 87 | rewrite (?i)^/svg/properties/y1(\ |_)\(SVGLineElement\)$ /svg/properties/y1_SVGLineElement break;
 88 | rewrite (?i)^/svg/properties/y2(\ |_)\(SVGLinearGradientElement\)$ /svg/properties/y2_SVGLinearGradientElement break;
 89 | rewrite (?i)^/svg/properties/y2(\ |_)\(SVGLineElement\)$ /svg/properties/y2_SVGLineElement break;
 90 | rewrite (?i)^/tutorials/Raw(\ |_)WebGL(\ |_)101(\ |_)—(\ |_)Part(\ |_)4\:(\ |_)Textures$ /tutorials/Raw_WebGL_101_-_Part_4_Textures break;
 91 | rewrite (?i)^/tutorials/What(\ |_)is(\ |_)CSS\?)$ /tutorials/What_is_CSS break;
 92 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial$ /tutorials/canvas/Canvas_tutorial break;
 93 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Applying(\ |_)styles(\ |_)and(\ |_)colors$ /tutorials/canvas/Canvas_tutorial/Applying_styles_and_colors break;
 94 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Basic(\ |_)animations$ /tutorials/canvas/Canvas_tutorial/Basic_animations break;
 95 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Basic(\ |_)usage$ /tutorials/canvas/Canvas_tutorial/Basic_usage break;
 96 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Compositing$ /tutorials/canvas/Canvas_tutorial/Compositing break;
 97 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Drawing(\ |_)shapes$ /tutorials/canvas/Canvas_tutorial/Drawing_shapes break;
 98 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Transformations$ /tutorials/canvas/Canvas_tutorial/Transformations break;
 99 | rewrite (?i)^/canvas/tutorial/Canvas(\ |_)tutorial/Using(\ |_)images$ /tutorials/canvas/Canvas_tutorial/Using_images break;
100 | rewrite (?i)^/ConceptMarking(\ |_)up(\ |_)textual(\ |_)content(\ |_)in(\ |_)HTML$ /Marking_up_textual_content_in_HTML break;
101 | rewrite (?i)^/concepts/About(\ |_)mobile(\ |_)web$ /concepts/mobile_web/mobile_web break;
102 | rewrite (?i)^/concepts/an(\ |_)introduction(\ |_)to(\ |_)digital(\ |_)design$ /concepts/an_introduction_to_web_design break;
103 | rewrite (?i)^/concepts/at(\ |_)tools$ /concepts/accessibility/at_tools break;
104 | rewrite (?i)^/concepts/bringing(\ |_)accessibility(\ |_)into(\ |_)your(\ |_)organization$ /concepts/accessibility/bringing_accessibility_into_your_organization break;
105 | rewrite (?i)^/concepts/color(\ |_)schemes(\ |_)and(\ |_)mockups$ /concepts/color_theory/color_schemes_and_mockups break;
106 | rewrite (?i)^/concepts/IA/planning(\ |_)a(\ |_)website$ /concepts/information_architecture/planning_a_website break;
107 | rewrite (?i)^/concepts/Internet(\ |_)and(\ |_)Web/About(\ |_)HTML5(\ |_)hybrid(\ |_)applications$ /concepts/Internet_and_Web/html5_hybrid_applications break;
108 | rewrite (?i)^/concepts/internet(\ |_)web/mime(\ |_)types$ /concepts/Internet_and_Web/mime_types break;
109 | rewrite (?i)^/concepts/mobile(\ |_)web/About(\ |_)mobile(\ |_)friendly(\ |_)web(\ |_)design(\ |_)and(\ |_)development(\ |_)overview$ /concepts/mobile_web/mobile_friendly_web_dev_overview break;
110 | rewrite (?i)^/concepts/mobile(\ |_)web/About(\ |_)mobile(\ |_)JavaScript(\ |_)best(\ |_)practices$ /concepts/mobile_web/mobile_javascript_best_practices break;
111 | rewrite (?i)^/concepts/mobile(\ |_)web/About(\ |_)mobile(\ |_)web$ /concepts/mobile_web/mobile_web break;
112 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox$ /PointerEvents break;
113 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/documentation$ /PointerEvents/documentation break;
114 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/experimental$ /PointerEvents/experimental break;
115 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/learn$ /PointerEvents/learn break;
116 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/prototypes$ /PointerEvents/prototypes break;
117 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/specifications$ /PointerEvents/specifications break;
118 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/test$ /PointerEvents/test break;
119 | rewrite (?i)^/concepts/Pointer(\ |_)Events(\ |_)sandbox/try$ /PointerEvents/try break;
120 | rewrite (?i)^/concepts/programming/programming(\ |_)basics/ja$ /concepts/programming/programming_basics/langja break;
121 | rewrite (?i)^/concepts/programming/programming(\ |_)basics/langja$ /ja/concepts/programming/programming_basics break;
122 | rewrite (?i)^/concepts/user(\ |_)experience(\ |_)design$ /concepts/ux/user_experience_design break;
123 | rewrite (?i)^/concepts/ux/user(\ |_)experience(\ |_)design(\ |_)techniques$ /concepts/ux/techniques/stakeholder_mapping break;
124 | rewrite (?i)^/concepts/what(\ |_)does(\ |_)a(\ |_)good(\ |_)web(\ |_)page(\ |_)need$ /concepts/ux/What_does_a_good_web_page_need break;
125 | rewrite (?i)^/css/color/color(\ |_)table$ /css/color/ break;
126 | rewrite (?i)^/css/data(\ |_)types/uri$ /css/data_types/url break;
127 | rewrite (?i)^/css/media(\ |_)queries/behavior$ /css/properties/behavior break;
128 | rewrite (?i)^/css/selectors/pseudo-classes/Dynamic(\ |_)pseudo-classes$ /css/selectors/pseudo-classes break;
129 | rewrite (?i)^/guides/css(\ |_)shorthand(\ |_)reference$ /guides/css_shorthand break;
130 | rewrite (?i)^/guides/html5(\ |_)form(\ |_)features$ /tutorials/html5_form_features break;
131 | rewrite (?i)^/html/attributes/BGCOLOR(\ |_)html(\ |_)attribute$ /html/attributes/BGCOLOR break;
132 | rewrite (?i)^/html/attributes/STYLE(\ |_)html(\ |_)attribute$ /html/attributes/style break;
133 | rewrite (?i)^/html/attributes/type(\ |_)type(\ |_)a(\ |_)link(\ |_)embed$ /html/attributes/type break;
134 | rewrite (?i)^/html(\ |_)text/es$ /guides/html_text/es break;
135 | rewrite (?i)^/javascript/regular(\ |_)expression/compile$ /javascript/RegExp/compile break;
136 | rewrite (?i)^/javascript/regular(\ |_)expression/exec$ /javascript/RegExp/exec break;
137 | rewrite (?i)^/javascript/regular(\ |_)expression/global$ /javascript/RegExp/global break;
138 | rewrite (?i)^/javascript/regular(\ |_)expression/ignoreCase$ /javascript/RegExp/ignoreCase break;
139 | rewrite (?i)^/javascript/regular(\ |_)expression/multiline$ /javascript/RegExp/multiline break;
140 | rewrite (?i)^/javascript/regular(\ |_)expression/source$ /javascript/RegExp/source break;
141 | rewrite (?i)^/javascript/regular(\ |_)expression/sticky$ /javascript/RegExp/sticky break;
142 | rewrite (?i)^/javascript/regular(\ |_)expression/test$ /javascript/RegExp/test break;
143 | rewrite (?i)^/javascript/regular(\ |_)expression/unicode$ /javascript/RegExp/unicode break;
144 | rewrite (?i)^/JavaScript(\ |_)animation$ /tutorials/animation_in_javascript_2 break;
145 | rewrite (?i)^/Main(\ |_)Page/zh-hans$ /Main_Page/zh break;
146 | rewrite (?i)^/Proposalstest/css(\ |_)properties(\ |_)display$ /WPD/Proposals/CSS_Property_Milestone/css_prop_enhancements/test/css_properties_display break;
147 | rewrite (?i)^/Styling(\ |_)lists(\ |_)and(\ |_)links$ /guides/Styling_lists_and_links break;
148 | rewrite (?i)^/svg/tutorials/smarter(\ |_)svg(\ |_)basics$ /svg/tutorials/smarter_svg_shapes break;
149 | rewrite (?i)^/svg/tutorials/smarter(\ |_)svg(\ |_)filter$ /svg/tutorials/smarter_svg_filters break;
150 | rewrite (?i)^/svg/tutorials/smarter(\ |_)svg(\ |_)interaction$ /Meta/svg/tutorials/smarter_svg_interaction break;
151 | rewrite (?i)^/svg/tutorials/smarter(\ |_)svg(\ |_)scope$ /svg/tutorials/smarter_svg_overview break;
152 | rewrite (?i)^/svg/tutorials/smarter(\ |_)svg(\ |_)script$ /svg/tutorials/smarter_svg_interaction break;
153 | rewrite (?i)^/sxsw(\ |_)talk(\ |_)proposal$ /WPD/sxsw_talk_proposal break;
154 | rewrite (?i)^/tutorial/html(\ |_)links$ /guides/html_lists break;
155 | rewrite (?i)^/tutorials/advanced(\ |_)selectors$ /guides/advanced_selectors_guide break;
156 | rewrite (?i)^/tutorials/anim(\ |_)animations$ /Meta/anim_animations break;
157 | rewrite (?i)^/tutorials/anim(\ |_)transforms$ /Meta/anim_transforms break;
158 | rewrite (?i)^/tutorials/Background(\ |_)images$ /tutorials/using_css_background_images break;
159 | rewrite (?i)^/tutorials/basic(\ |_)shapes$ /tutorials/basic_svg_shapes break;
160 | rewrite (?i)^/tutorials/Color(\ |_)in(\ |_)CSS$ /tutorials/setting_color_in_css break;
161 | rewrite (?i)^/tutorials/creating(\ |_)prototypes$ /concepts/creating_prototypes break;
162 | rewrite (?i)^/tutorials/CSS3(\ |_)animations$ /tutorials/css_animations break;
163 | rewrite (?i)^/tutorials/CSS3(\ |_)transforms$ /tutorials/css_transforms break;
164 | rewrite (?i)^/tutorials/CSS3(\ |_)transitions$ /tutorials/css_transitions break;
165 | rewrite (?i)^/tutorials/CSS(\ |_)border-image$ /tutorials/css_border_image break;
166 | rewrite (?i)^/tutorials/CSS(\ |_)box-shadow$ /tutorials/css_box_shadow break;
167 | rewrite (?i)^/tutorials/CSS(\ |_)gradients$ /tutorials/creating_gradients_in_css break;
168 | rewrite (?i)^/tutorials/CSS(\ |_)text(\ |_)styling(\ |_)advanced$ /guides/advanced_css_text_styling break;
169 | rewrite (?i)^/tutorials/css(\ |_)text(\ |_)styling(\ |_)fundamentals$ /guides/css_text_styling_fundamentals break;
170 | rewrite (?i)^/tutorials/custom(\ |_)filters$ /Meta/tutorials/custom_filters break;
171 | rewrite (?i)^/tutorials/doctypes(\ |_)and(\ |_)markup(\ |_)styles$ /guides/doctypes_and_markup_styles break;
172 | rewrite (?i)^/tutorials/getting(\ |_)started(\ |_)with(\ |_)css$ /guides/getting_started_with_css break;
173 | rewrite (?i)^/tutorials/how(\ |_)browsers(\ |_)work$ /concepts/Internet_and_Web/how_browsers_work break;
174 | rewrite (?i)^/tutorials/html(\ |_)forms(\ |_)basics$ /guides/html_forms_basics break;
175 | rewrite (?i)^/tutorials/html(\ |_)links$ /guides/html_links break;
176 | rewrite (?i)^/tutorials/html(\ |_)lists$ /guides/html_lists break;
177 | rewrite (?i)^/tutorials/html(\ |_)structural(\ |_)elements$ /guides/html_structural_elements break;
178 | rewrite (?i)^/tutorials/html(\ |_)tables$ /guides/html_tables break;
179 | rewrite (?i)^/tutorials/html(\ |_)text$ /guides/html_text break;
180 | rewrite (?i)^/tutorials/html(\ |_)validation$ /guides/html_validation break;
181 | rewrite (?i)^/tutorials/images(\ |_)in(\ |_)html$ /guides/images_in_html break;
182 | rewrite (?i)^/tutorials/Information(\ |_)Architecture(\ |_)-(\ |_)planning(\ |_)out(\ |_)a(\ |_)web(\ |_)site$ /tutorials/information_architecture/Planning_out_a_website break;
183 | rewrite (?i)^/tutorials/intro(\ |_)web(\ |_)audio(\ |_)api(\ |_)1$ /tutorials/audio/intro_web_audio_api_1 break;
184 | rewrite (?i)^/tutorials/intro(\ |_)web(\ |_)audio(\ |_)api(\ |_)2$ /tutorials/audio/intro_web_audio_api_2 break;
185 | rewrite (?i)^/tutorials/lesser-known(\ |_)semantic(\ |_)elements$ /guides/lesser-known_semantic_elements break;
186 | rewrite (?i)^/tutorials/List(\ |_)basics$ /tutorials/using_specific_list_styles break;
187 | rewrite (?i)^/tutorials/Manipulating(\ |_)replaced(\ |_)element(\ |_)content(\ |_)with(\ |_)object-fit$ /tutorials/object_fit break;
188 | rewrite (?i)^/tutorials/more(\ |_)about(\ |_)the(\ |_)html(\ |_)head$ /guides/more_about_the_html_head break;
189 | rewrite (?i)^/tutorials/Programming(\ |_)-(\ |_)the(\ |_)real(\ |_)basics$ /concepts/programming/programming_basics break;
190 | rewrite (?i)^/tutorials/selectors(\ |_)fundamentals$ /tutorials/using_selectors break;
191 | rewrite (?i)^/tutorials/Styling(\ |_)forms$ /guides/styling_forms_guide break;
192 | rewrite (?i)^/tutorials/Styling(\ |_)tables$ /guides/styling_tables break;
193 | rewrite (?i)^/tutorials/Styling(\ |_)XML(\ |_)data(\ |_)with(\ |_)CSS$ /tutorials/styling_xml_with_css break;
194 | rewrite (?i)^/tutorials/Targetting(\ |_)CSS(\ |_)at(\ |_)different(\ |_)media(\ |_)types$ /tutorials/targetting_css_at_different_media break;
195 | rewrite (?i)^/tutorials/the(\ |_)html(\ |_)head$ /guides/the_html_head break;
196 | rewrite (?i)^/tutorials/using(\ |_)web(\ |_)audio(\ |_)api(\ |_)pt1$ /tutorials/intro_web_audio_api_1 break;
197 | rewrite (?i)^/tutorials/what(\ |_)is(\ |_)css$ /tutorials/learning_what_css_is break;
198 | rewrite (?i)^/tutorials/why(\ |_)use(\ |_)css$ /tutorials/learning_why_we_use_css break;


--------------------------------------------------------------------------------
/reports/directly_on_root.txt:
--------------------------------------------------------------------------------
  1 | absolute unit
  2 | accessibility article ideas
  3 | Accessibility basics
  4 | Accessibility testing
  5 | after
  6 | alignment
  7 | apis
  8 | aria
  9 | ascender
 10 | baseline
 11 | Beginners
 12 | blowout
 13 | bold
 14 | brute force
 15 | cap line
 16 | centered
 17 | character
 18 | character encoding
 19 | character set
 20 | code position
 21 | community
 22 | concepts
 23 | container element
 24 | Contributor Agreement
 25 | copy
 26 | css
 27 | descender
 28 | dom
 29 | edge case
 30 | em
 31 | en
 32 | events
 33 | Floats and clearing
 34 | flush
 35 | flush left
 36 | flush right
 37 | font
 38 | functional notation
 39 | generic font family
 40 | glossary
 41 | glyph
 42 | grid
 43 | guides
 44 | gutter
 45 | hanging punctuation
 46 | html
 47 | hyphenation
 48 | italic
 49 | its
 50 | javascript
 51 | justification
 52 | justified
 53 | kerning
 54 | kiss
 55 | leading
 56 | letter spacing
 57 | ligature
 58 | lining figures
 59 | lorem ipsum
 60 | Main Page
 61 | manipulating css with javascript
 62 | mathml
 63 | mean line
 64 | monospaced font
 65 | non lining figures
 66 | Notes:The future of the open web
 67 | oblique
 68 | orphan
 69 | pagination
 70 | pica
 71 | point
 72 | PointerEvents
 73 | proportional font
 74 | ragged
 75 | recommendation
 76 | relative unit
 77 | rendering engine
 78 | river
 79 | rule
 80 | sans serif font
 81 | serif font
 82 | server-side languages
 83 | small caps
 84 | Standards
 85 | stem
 86 | Styling forms
 87 | Styling tables
 88 | subscript
 89 | superscript
 90 | svg
 91 | svg reference
 92 | svg wow demos
 93 | symbol font
 94 | taxonomy
 95 | test case
 96 | tutorials
 97 | typeface
 98 | webgl
 99 | weight
100 | whitespace
101 | widow
102 | x height
103 | xml
104 | xslt


--------------------------------------------------------------------------------
/reports/hundred_revs.txt:
--------------------------------------------------------------------------------
 1 | tutorials/Web Education Intro (105)
 2 | css/properties/font-size (156)
 3 | Main Page (107)
 4 | tutorials/css transforms (102)
 5 | Beginners (106)
 6 | html/elements/dl-old (116)
 7 | tutorials/css-regions (124)
 8 | svg/tutorials/smarter svg overview (168)
 9 | svg/tutorials/smarter svg filters (159)
10 | svg/tutorials/smarter svg animation (132)


--------------------------------------------------------------------------------
/reports/location_spaghetti_duplicated.txt:
--------------------------------------------------------------------------------
  1 | CSS/Selectors/pseudo-classes/:target:
  2 |   - new_location: "CSS/Selectors/pseudo-classes/\:target"
  3 |   - url_match_attempt: "CSS/Selectors/pseudo-classes/\:target"
  4 |   - redirect_to: "CSS/Selectors/pseudo-classes/target"
  5 | 
  6 | beginners/crash course:
  7 |   - new_location: "beginners/crash(\ |_)course"
  8 |   - url_match_attempt: "beginners/crash_course"
  9 |   - redirect_to: "Beginners/crash_course"
 10 | 
 11 | beginners/the beginning:
 12 |   - new_location: "beginners/the(\ |_)beginning"
 13 |   - url_match_attempt: "beginners/the_beginning"
 14 |   - redirect_to: "Beginners/the_beginning"
 15 | 
 16 | concepts/Color theory:
 17 |   - new_location: "concepts/Color(\ |_)theory"
 18 |   - url_match_attempt: "concepts/Color_theory"
 19 |   - redirect_to: "concepts/color_theory"
 20 | 
 21 | concepts/Design theory:
 22 |   - new_location: "concepts/Design(\ |_)theory"
 23 |   - url_match_attempt: "concepts/Design_theory"
 24 |   - redirect_to: "concepts/design_theory"
 25 | 
 26 | concepts/Information architecture:
 27 |   - new_location: "concepts/Information(\ |_)architecture"
 28 |   - url_match_attempt: "concepts/Information_architecture"
 29 |   - redirect_to: "concepts/information_architecture"
 30 | 
 31 | concepts/internet and web:
 32 |   - new_location: "concepts/internet(\ |_)and(\ |_)web"
 33 |   - url_match_attempt: "concepts/internet_and_web"
 34 |   - redirect_to: "concepts/Internet_and_Web"
 35 | 
 36 | concepts/internet and web/About HTML5 hybrid applications:
 37 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/About(\ |_)HTML5(\ |_)hybrid(\ |_)applications"
 38 |   - url_match_attempt: "concepts/internet_and_web/About_HTML5_hybrid_applications"
 39 |   - redirect_to: "concepts/Internet_and_Web/About_HTML5_hybrid_applications"
 40 | 
 41 | concepts/Internet and Web/About proxy based browsers:
 42 |   - new_location: "concepts/Internet(\ |_)and(\ |_)Web/About(\ |_)proxy(\ |_)based(\ |_)browsers"
 43 |   - url_match_attempt: "concepts/Internet_and_Web/About_proxy_based_browsers"
 44 |   - redirect_to: "concepts/Internet_and_Web/proxy_based_browsers"
 45 | 
 46 | concepts/internet and web/how browsers work:
 47 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/how(\ |_)browsers(\ |_)work"
 48 |   - url_match_attempt: "concepts/internet_and_web/how_browsers_work"
 49 |   - redirect_to: "concepts/Internet_and_Web/how_browsers_work"
 50 | 
 51 | concepts/internet and web/How does the Internet Work:
 52 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/How(\ |_)does(\ |_)the(\ |_)Internet(\ |_)Work"
 53 |   - url_match_attempt: "concepts/internet_and_web/How_does_the_Internet_Work"
 54 |   - redirect_to: "concepts/Internet_and_Web/How_does_the_Internet_Work"
 55 | 
 56 | concepts/internet and web/how does the internet work/es:
 57 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work/es"
 58 |   - url_match_attempt: "concepts/internet_and_web/how_does_the_internet_work/es"
 59 |   - redirect_to: "concepts/Internet_and_Web/How_does_the_Internet_Work/es"
 60 | 
 61 | concepts/internet and web/how does the internet work/ja:
 62 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/how(\ |_)does(\ |_)the(\ |_)internet(\ |_)work/ja"
 63 |   - url_match_attempt: "concepts/internet_and_web/how_does_the_internet_work/ja"
 64 |   - redirect_to: "concepts/Internet_and_Web/How_does_the_Internet_Work/ja"
 65 | 
 66 | concepts/internet and web/html5 hybrid applications:
 67 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/html5(\ |_)hybrid(\ |_)applications"
 68 |   - url_match_attempt: "concepts/internet_and_web/html5_hybrid_applications"
 69 |   - redirect_to: "concepts/Internet_and_Web/html5_hybrid_applications"
 70 | 
 71 | concepts/internet and web/html5 hybrid applications/concepts/mobile tools:
 72 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/html5(\ |_)hybrid(\ |_)applications/concepts/mobile(\ |_)tools"
 73 |   - url_match_attempt: "concepts/internet_and_web/html5_hybrid_applications/concepts/mobile_tools"
 74 |   - redirect_to: "concepts/Internet_and_Web/html5_hybrid_applications/concepts/mobile_tools"
 75 | 
 76 | concepts/internet and web/mime types:
 77 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/mime(\ |_)types"
 78 |   - url_match_attempt: "concepts/internet_and_web/mime_types"
 79 |   - redirect_to: "concepts/Internet_and_Web/mime_types"
 80 | 
 81 | concepts/internet and web/OAuth:
 82 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/OAuth"
 83 |   - url_match_attempt: "concepts/internet_and_web/OAuth"
 84 |   - redirect_to: "concepts/Internet_and_Web/OAuth"
 85 | 
 86 | concepts/internet and web/proxy based browsers:
 87 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/proxy(\ |_)based(\ |_)browsers"
 88 |   - url_match_attempt: "concepts/internet_and_web/proxy_based_browsers"
 89 |   - redirect_to: "concepts/Internet_and_Web/proxy_based_browsers"
 90 | 
 91 | concepts/internet and web/The History of the Web:
 92 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/The(\ |_)History(\ |_)of(\ |_)the(\ |_)Web"
 93 |   - url_match_attempt: "concepts/internet_and_web/The_History_of_the_Web"
 94 |   - redirect_to: "concepts/Internet_and_Web/The_History_of_the_Web"
 95 | 
 96 | concepts/internet and web/the history of the web/es:
 97 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/es"
 98 |   - url_match_attempt: "concepts/internet_and_web/the_history_of_the_web/es"
 99 |   - redirect_to: "concepts/Internet_and_Web/The_History_of_the_Web/es"
100 | 
101 | concepts/internet and web/the history of the web/ja:
102 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/ja"
103 |   - url_match_attempt: "concepts/internet_and_web/the_history_of_the_web/ja"
104 |   - redirect_to: "concepts/Internet_and_Web/The_History_of_the_Web/ja"
105 | 
106 | concepts/internet and web/the history of the web/tr:
107 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/the(\ |_)history(\ |_)of(\ |_)the(\ |_)web/tr"
108 |   - url_match_attempt: "concepts/internet_and_web/the_history_of_the_web/tr"
109 |   - redirect_to: "concepts/Internet_and_Web/The_History_of_the_Web/tr"
110 | 
111 | concepts/internet and web/the web standards model:
112 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/the(\ |_)web(\ |_)standards(\ |_)model"
113 |   - url_match_attempt: "concepts/internet_and_web/the_web_standards_model"
114 |   - redirect_to: "concepts/Internet_and_Web/the_web_standards_model"
115 | 
116 | concepts/internet and web/The Web Standards Model/ja:
117 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/The(\ |_)Web(\ |_)Standards(\ |_)Model/ja"
118 |   - url_match_attempt: "concepts/internet_and_web/The_Web_Standards_Model/ja"
119 |   - redirect_to: "concepts/Internet_and_Web/The_Web_Standards_Model/ja"
120 | 
121 | concepts/internet and web/webrtc:
122 |   - new_location: "concepts/internet(\ |_)and(\ |_)web/webrtc"
123 |   - url_match_attempt: "concepts/internet_and_web/webrtc"
124 |   - redirect_to: "concepts/Internet_and_Web/webrtc"
125 | 
126 | concepts/programming/Programming basics:
127 |   - new_location: "concepts/programming/Programming(\ |_)basics"
128 |   - url_match_attempt: "concepts/programming/Programming_basics"
129 |   - redirect_to: "concepts/programming/programming_basics"
130 | 
131 | concepts/programming/The principles of unobtrusive JavaScript:
132 |   - new_location: "concepts/programming/The(\ |_)principles(\ |_)of(\ |_)unobtrusive(\ |_)JavaScript"
133 |   - url_match_attempt: "concepts/programming/The_principles_of_unobtrusive_JavaScript"
134 |   - redirect_to: "concepts/programming/the_principles_of_unobtrusive_javascript"
135 | 
136 | concepts/programming/The purpose of JavaScript:
137 |   - new_location: "concepts/programming/The(\ |_)purpose(\ |_)of(\ |_)JavaScript"
138 |   - url_match_attempt: "concepts/programming/The_purpose_of_JavaScript"
139 |   - redirect_to: "concepts/programming/the_purpose_of_javascript"
140 | 
141 | concepts/programming/Variables in JavaScript:
142 |   - new_location: "concepts/programming/Variables(\ |_)in(\ |_)JavaScript"
143 |   - url_match_attempt: "concepts/programming/Variables_in_JavaScript"
144 |   - redirect_to: "concepts/programming/variables_in_javascript"
145 | 
146 | concepts/Web typography:
147 |   - new_location: "concepts/Web(\ |_)typography"
148 |   - url_match_attempt: "concepts/Web_typography"
149 |   - redirect_to: "concepts/web_typography"
150 | 
151 | concepts/Wireframing a site:
152 |   - new_location: "concepts/Wireframing(\ |_)a(\ |_)site"
153 |   - url_match_attempt: "concepts/Wireframing_a_site"
154 |   - redirect_to: "concepts/wireframing_a_site"
155 | 
156 | tutorials/Absolute and fixed positioning:
157 |   - new_location: "tutorials/Absolute(\ |_)and(\ |_)fixed(\ |_)positioning"
158 |   - url_match_attempt: "tutorials/Absolute_and_fixed_positioning"
159 |   - redirect_to: "tutorials/absolute_and_fixed_positioning"
160 | 
161 | tutorials/Animation in JavaScript:
162 |   - new_location: "tutorials/Animation(\ |_)in(\ |_)JavaScript"
163 |   - url_match_attempt: "tutorials/Animation_in_JavaScript"
164 |   - redirect_to: "tutorials/animation_in_javascript"
165 | 
166 | tutorials/Animations in SVG:
167 |   - new_location: "tutorials/Animations(\ |_)in(\ |_)SVG"
168 |   - url_match_attempt: "tutorials/Animations_in_SVG"
169 |   - redirect_to: "tutorials/animations_in_svg"
170 | 
171 | tutorials/Audio and video:
172 |   - new_location: "tutorials/Audio(\ |_)and(\ |_)video"
173 |   - url_match_attempt: "tutorials/Audio_and_video"
174 |   - redirect_to: "tutorials/audio_and_video"
175 | 
176 | tutorials/Basic shapes:
177 |   - new_location: "tutorials/Basic(\ |_)shapes"
178 |   - url_match_attempt: "tutorials/Basic_shapes"
179 |   - redirect_to: "tutorials/basic_shapes"
180 | 
181 | tutorials/Box model:
182 |   - new_location: "tutorials/Box(\ |_)model"
183 |   - url_match_attempt: "tutorials/Box_model"
184 |   - redirect_to: "tutorials/box_model"
185 | 
186 | tutorials/Building SVG paths:
187 |   - new_location: "tutorials/Building(\ |_)SVG(\ |_)paths"
188 |   - url_match_attempt: "tutorials/Building_SVG_paths"
189 |   - redirect_to: "tutorials/building_svg_paths"
190 | 
191 | tutorials/Building web apps:
192 |   - new_location: "tutorials/Building(\ |_)web(\ |_)apps"
193 |   - url_match_attempt: "tutorials/Building_web_apps"
194 |   - redirect_to: "tutorials/building_web_apps"
195 | 
196 | tutorials/Building web games:
197 |   - new_location: "tutorials/Building(\ |_)web(\ |_)games"
198 |   - url_match_attempt: "tutorials/Building_web_games"
199 |   - redirect_to: "tutorials/building_web_games"
200 | 
201 | tutorials/Creating and modifying HTML:
202 |   - new_location: "tutorials/Creating(\ |_)and(\ |_)modifying(\ |_)HTML"
203 |   - url_match_attempt: "tutorials/Creating_and_modifying_HTML"
204 |   - redirect_to: "tutorials/creating_and_modifying_html"
205 | 
206 | tutorials/Creating basic navigation menus:
207 |   - new_location: "tutorials/Creating(\ |_)basic(\ |_)navigation(\ |_)menus"
208 |   - url_match_attempt: "tutorials/Creating_basic_navigation_menus"
209 |   - redirect_to: "tutorials/creating_basic_navigation_menus"
210 | 
211 | tutorials/CSS text quick start:
212 |   - new_location: "tutorials/CSS(\ |_)text(\ |_)quick(\ |_)start"
213 |   - url_match_attempt: "tutorials/CSS_text_quick_start"
214 |   - redirect_to: "tutorials/css_text_quick_start"
215 | 
216 | tutorials/Debugging CSS:
217 |   - new_location: "tutorials/Debugging(\ |_)CSS"
218 |   - url_match_attempt: "tutorials/Debugging_CSS"
219 |   - redirect_to: "tutorials/debugging_css"
220 | 
221 | tutorials/Events in JavaScript:
222 |   - new_location: "tutorials/Events(\ |_)in(\ |_)JavaScript"
223 |   - url_match_attempt: "tutorials/Events_in_JavaScript"
224 |   - redirect_to: "tutorials/events_in_javascript"
225 | 
226 | tutorials/External content in SVG:
227 |   - new_location: "tutorials/External(\ |_)content(\ |_)in(\ |_)SVG"
228 |   - url_match_attempt: "tutorials/External_content_in_SVG"
229 |   - redirect_to: "tutorials/external_content_in_svg"
230 | 
231 | tutorials/Fills and strokes in SVG:
232 |   - new_location: "tutorials/Fills(\ |_)and(\ |_)strokes(\ |_)in(\ |_)SVG"
233 |   - url_match_attempt: "tutorials/Fills_and_strokes_in_SVG"
234 |   - redirect_to: "tutorials/fills_and_strokes_in_svg"
235 | 
236 | tutorials/Floats and clearing:
237 |   - new_location: "tutorials/Floats(\ |_)and(\ |_)clearing"
238 |   - url_match_attempt: "tutorials/Floats_and_clearing"
239 |   - redirect_to: "tutorials/floats_and_clearing"
240 | 
241 | tutorials/Generated content:
242 |   - new_location: "tutorials/Generated(\ |_)content"
243 |   - url_match_attempt: "tutorials/Generated_content"
244 |   - redirect_to: "tutorials/generated_content"
245 | 
246 | tutorials/getting your content online:
247 |   - new_location: "tutorials/getting(\ |_)your(\ |_)content(\ |_)online"
248 |   - url_match_attempt: "tutorials/getting_your_content_online"
249 |   - redirect_to: "tutorials/Getting_Your_Content_Online"
250 | 
251 | tutorials/Graceful degradation versus progressive enhancement:
252 |   - new_location: "tutorials/Graceful(\ |_)degradation(\ |_)versus(\ |_)progressive(\ |_)enhancement"
253 |   - url_match_attempt: "tutorials/Graceful_degradation_versus_progressive_enhancement"
254 |   - redirect_to: "tutorials/graceful_degradation_versus_progressive_enhancement"
255 | 
256 | tutorials/JavaScript best practices:
257 |   - new_location: "tutorials/JavaScript(\ |_)best(\ |_)practices"
258 |   - url_match_attempt: "tutorials/JavaScript_best_practices"
259 |   - redirect_to: "tutorials/javascript_best_practices"
260 | 
261 | tutorials/JavaScript for mobile/Best practices when writing for mobile:
262 |   - new_location: "tutorials/JavaScript(\ |_)for(\ |_)mobile/Best(\ |_)practices(\ |_)when(\ |_)writing(\ |_)for(\ |_)mobile"
263 |   - url_match_attempt: "tutorials/JavaScript_for_mobile/Best_practices_when_writing_for_mobile"
264 |   - redirect_to: "tutorials/JavaScript_for_mobile/best_practices_when_writing_for_mobile"
265 | 
266 | tutorials/JavaScript functions:
267 |   - new_location: "tutorials/JavaScript(\ |_)functions"
268 |   - url_match_attempt: "tutorials/JavaScript_functions"
269 |   - redirect_to: "tutorials/javascript_functions"
270 | 
271 | tutorials/Layout fundamentals:
272 |   - new_location: "tutorials/Layout(\ |_)fundamentals"
273 |   - url_match_attempt: "tutorials/Layout_fundamentals"
274 |   - redirect_to: "tutorials/layout_fundamentals"
275 | 
276 | tutorials/Making CSS easier to read:
277 |   - new_location: "tutorials/Making(\ |_)CSS(\ |_)easier(\ |_)to(\ |_)read"
278 |   - url_match_attempt: "tutorials/Making_CSS_easier_to_read"
279 |   - redirect_to: "tutorials/making_css_easier_to_read"
280 | 
281 | tutorials/Manipulating CSS with JavaScript:
282 |   - new_location: "tutorials/Manipulating(\ |_)CSS(\ |_)with(\ |_)JavaScript"
283 |   - url_match_attempt: "tutorials/Manipulating_CSS_with_JavaScript"
284 |   - redirect_to: "tutorials/manipulating_css_with_javascript"
285 | 
286 | tutorials/Media queries:
287 |   - new_location: "tutorials/Media(\ |_)queries"
288 |   - url_match_attempt: "tutorials/Media_queries"
289 |   - redirect_to: "tutorials/media_queries"
290 | 
291 | tutorials/Objects in JavaScript:
292 |   - new_location: "tutorials/Objects(\ |_)in(\ |_)JavaScript"
293 |   - url_match_attempt: "tutorials/Objects_in_JavaScript"
294 |   - redirect_to: "tutorials/objects_in_javascript"
295 | 
296 | tutorials/Optimizing CSS:
297 |   - new_location: "tutorials/Optimizing(\ |_)CSS"
298 |   - url_match_attempt: "tutorials/Optimizing_CSS"
299 |   - redirect_to: "tutorials/optimizing_css"
300 | 
301 | tutorials/Position and transformation:
302 |   - new_location: "tutorials/Position(\ |_)and(\ |_)transformation"
303 |   - url_match_attempt: "tutorials/Position_and_transformation"
304 |   - redirect_to: "tutorials/position_and_transformation"
305 | 
306 | tutorials/Responsive layouts:
307 |   - new_location: "tutorials/Responsive(\ |_)layouts"
308 |   - url_match_attempt: "tutorials/Responsive_layouts"
309 |   - redirect_to: "tutorials/responsive_layouts"
310 | 
311 | tutorials/Scripting SVG:
312 |   - new_location: "tutorials/Scripting(\ |_)SVG"
313 |   - url_match_attempt: "tutorials/Scripting_SVG"
314 |   - redirect_to: "tutorials/scripting_svg"
315 | 
316 | tutorials/State in CSS:
317 |   - new_location: "tutorials/State(\ |_)in(\ |_)CSS"
318 |   - url_match_attempt: "tutorials/State_in_CSS"
319 |   - redirect_to: "tutorials/state_in_css"
320 | 
321 | tutorials/Static and relative positioning:
322 |   - new_location: "tutorials/Static(\ |_)and(\ |_)relative(\ |_)positioning"
323 |   - url_match_attempt: "tutorials/Static_and_relative_positioning"
324 |   - redirect_to: "tutorials/static_and_relative_positioning"
325 | 
326 | tutorials/Styling lists and links:
327 |   - new_location: "tutorials/Styling(\ |_)lists(\ |_)and(\ |_)links"
328 |   - url_match_attempt: "tutorials/Styling_lists_and_links"
329 |   - redirect_to: "tutorials/styling_lists_and_links"
330 | 
331 | tutorials/Styling SVG with CSS:
332 |   - new_location: "tutorials/Styling(\ |_)SVG(\ |_)with(\ |_)CSS"
333 |   - url_match_attempt: "tutorials/Styling_SVG_with_CSS"
334 |   - redirect_to: "tutorials/styling_svg_with_css"
335 | 
336 | tutorials/SVG clipping and masking:
337 |   - new_location: "tutorials/SVG(\ |_)clipping(\ |_)and(\ |_)masking"
338 |   - url_match_attempt: "tutorials/SVG_clipping_and_masking"
339 |   - redirect_to: "tutorials/svg_clipping_and_masking"
340 | 
341 | tutorials/SVG filters:
342 |   - new_location: "tutorials/SVG(\ |_)filters"
343 |   - url_match_attempt: "tutorials/SVG_filters"
344 |   - redirect_to: "tutorials/svg_filters"
345 | 
346 | tutorials/SVG fonts:
347 |   - new_location: "tutorials/SVG(\ |_)fonts"
348 |   - url_match_attempt: "tutorials/SVG_fonts"
349 |   - redirect_to: "tutorials/svg_fonts"
350 | 
351 | tutorials/SVG gradients:
352 |   - new_location: "tutorials/SVG(\ |_)gradients"
353 |   - url_match_attempt: "tutorials/SVG_gradients"
354 |   - redirect_to: "tutorials/svg_gradients"
355 | 
356 | tutorials/SVG image element:
357 |   - new_location: "tutorials/SVG(\ |_)image(\ |_)element"
358 |   - url_match_attempt: "tutorials/SVG_image_element"
359 |   - redirect_to: "tutorials/svg_image_element"
360 | 
361 | tutorials/SVG introduction:
362 |   - new_location: "tutorials/SVG(\ |_)introduction"
363 |   - url_match_attempt: "tutorials/SVG_introduction"
364 |   - redirect_to: "tutorials/svg_introduction"
365 | 
366 | tutorials/SVG links:
367 |   - new_location: "tutorials/SVG(\ |_)links"
368 |   - url_match_attempt: "tutorials/SVG_links"
369 |   - redirect_to: "tutorials/svg_links"
370 | 
371 | tutorials/SVG pattern fills:
372 |   - new_location: "tutorials/SVG(\ |_)pattern(\ |_)fills"
373 |   - url_match_attempt: "tutorials/SVG_pattern_fills"
374 |   - redirect_to: "tutorials/svg_pattern_fills"
375 | 
376 | tutorials/SVG syntax and deployment:
377 |   - new_location: "tutorials/SVG(\ |_)syntax(\ |_)and(\ |_)deployment"
378 |   - url_match_attempt: "tutorials/SVG_syntax_and_deployment"
379 |   - redirect_to: "tutorials/svg_syntax_and_deployment"
380 | 
381 | tutorials/Table styling basics:
382 |   - new_location: "tutorials/Table(\ |_)styling(\ |_)basics"
383 |   - url_match_attempt: "tutorials/Table_styling_basics"
384 |   - redirect_to: "tutorials/table_styling_basics"
385 | 
386 | tutorials/the basics of html:
387 |   - new_location: "tutorials/the(\ |_)basics(\ |_)of(\ |_)html"
388 |   - url_match_attempt: "tutorials/the_basics_of_html"
389 |   - redirect_to: "guides/the_basics_of_html"
390 | 
391 | tutorials/Traversing the DOM:
392 |   - new_location: "tutorials/Traversing(\ |_)the(\ |_)DOM"
393 |   - url_match_attempt: "tutorials/Traversing_the_DOM"
394 |   - redirect_to: "tutorials/traversing_the_dom"
395 | 
396 | tutorials/Using text in SVG:
397 |   - new_location: "tutorials/Using(\ |_)text(\ |_)in(\ |_)SVG"
398 |   - url_match_attempt: "tutorials/Using_text_in_SVG"
399 |   - redirect_to: "tutorials/using_text_in_svg"
400 | 
401 | tutorials/Using Web Audio:
402 |   - new_location: "tutorials/Using(\ |_)Web(\ |_)Audio"
403 |   - url_match_attempt: "tutorials/Using_Web_Audio"
404 |   - redirect_to: "tutorials/using_web_audio"
405 | 
406 | tutorials/Your first look at JavaScript:
407 |   - new_location: "tutorials/Your(\ |_)first(\ |_)look(\ |_)at(\ |_)JavaScript"
408 |   - url_match_attempt: "tutorials/Your_first_look_at_JavaScript"
409 |   - redirect_to: "tutorials/your_first_look_at_javascript"
410 | 


--------------------------------------------------------------------------------
/reports/nginx_redirects.map:
--------------------------------------------------------------------------------
  1 | rewrite ^/css/atrules/@charset$ /css/atrules/charset permanent;
  2 | rewrite ^/css/atrules/@font-face$ /css/atrules/font-face permanent;
  3 | rewrite ^/css/atrules/@import$ /css/atrules/import permanent;
  4 | rewrite ^/css/atrules/@keyframes$ /css/atrules/keyframes permanent;
  5 | rewrite ^/css/atrules/@media$ /css/atrules/media permanent;
  6 | rewrite ^/css/atrules/@namespace$ /css/atrules/namespace permanent;
  7 | rewrite ^/css/atrules/@page$ /css/atrules/page permanent;
  8 | rewrite ^/css/atrules/@region$ /css/atrules/region permanent;
  9 | rewrite ^/css/atrules/@supports$ /css/atrules/supports permanent;
 10 | rewrite ^/css/atrules/@viewport$ /css/atrules/viewport permanent;
 11 | rewrite ^/css/functions/matrix\(\)$ /css/functions/matrix permanent;
 12 | rewrite ^/css/functions/rotate\(\)$ /css/functions/rotate permanent;
 13 | rewrite ^/css/functions/rotate3d\(\)$ /css/functions/rotate3d permanent;
 14 | rewrite ^/css/functions/rotateX\(\)$ /css/functions/rotateX permanent;
 15 | rewrite ^/css/functions/rotateY\(\)$ /css/functions/rotateY permanent;
 16 | rewrite ^/css/functions/rotateZ\(\)$ /css/functions/rotateZ permanent;
 17 | rewrite ^/css/functions/scale\(\)$ /css/functions/scale permanent;
 18 | rewrite ^/css/functions/scale3d\(\)$ /css/functions/scale3d permanent;
 19 | rewrite ^/css/functions/scaleX\(\)$ /css/functions/scaleX permanent;
 20 | rewrite ^/css/functions/scaleY\(\)$ /css/functions/scaleY permanent;
 21 | rewrite ^/css/functions/scaleZ\(\)$ /css/functions/scaleZ permanent;
 22 | rewrite ^/css/functions/skew\(\)$ /css/functions/skew permanent;
 23 | rewrite ^/css/functions/skewX\(\)$ /css/functions/skewX permanent;
 24 | rewrite ^/css/functions/skewY\(\)$ /css/functions/skewY permanent;
 25 | rewrite ^/css/functions/translate\(\)$ /css/functions/translate permanent;
 26 | rewrite ^/css/functions/translate3d\(\)$ /css/functions/translate3d permanent;
 27 | rewrite ^/css/functions/translateX\(\)$ /css/functions/translateX permanent;
 28 | rewrite ^/css/functions/translateY\(\)$ /css/functions/translateY permanent;
 29 | rewrite ^/css/functions/translateZ\(\)$ /css/functions/translateZ permanent;
 30 | rewrite ^/css/functions/url\(\)$ /css/functions/url permanent;
 31 | rewrite ^/css/selectors/pseudo-classes/\:-ms-input-placeholder$ /css/selectors/pseudo-classes/-ms-input-placeholder permanent;
 32 | rewrite ^/css/selectors/pseudo-classes/\:active$ /css/selectors/pseudo-classes/active permanent;
 33 | rewrite ^/css/selectors/pseudo-classes/\:checked$ /css/selectors/pseudo-classes/checked permanent;
 34 | rewrite ^/css/selectors/pseudo-classes/\:disabled$ /css/selectors/pseudo-classes/disabled permanent;
 35 | rewrite ^/css/selectors/pseudo-classes/\:empty$ /css/selectors/pseudo-classes/empty permanent;
 36 | rewrite ^/css/selectors/pseudo-classes/\:enabled$ /css/selectors/pseudo-classes/enabled permanent;
 37 | rewrite ^/css/selectors/pseudo-classes/\:first-child$ /css/selectors/pseudo-classes/first-child permanent;
 38 | rewrite ^/css/selectors/pseudo-classes/\:first-of-type$ /css/selectors/pseudo-classes/first-of-type permanent;
 39 | rewrite ^/css/selectors/pseudo-classes/\:focus$ /css/selectors/pseudo-classes/focus permanent;
 40 | rewrite ^/css/selectors/pseudo-classes/\:hover$ /css/selectors/pseudo-classes/hover permanent;
 41 | rewrite ^/css/selectors/pseudo-classes/\:in-range$ /css/selectors/pseudo-classes/in-range permanent;
 42 | rewrite ^/css/selectors/pseudo-classes/\:indeterminate$ /css/selectors/pseudo-classes/indeterminate permanent;
 43 | rewrite ^/css/selectors/pseudo-classes/\:invalid$ /css/selectors/pseudo-classes/invalid permanent;
 44 | rewrite ^/css/selectors/pseudo-classes/\:lang\(c\)$ /css/selectors/pseudo-classes/lang permanent;
 45 | rewrite ^/css/selectors/pseudo-classes/\:last-child$ /css/selectors/pseudo-classes/last-child permanent;
 46 | rewrite ^/css/selectors/pseudo-classes/\:last-of-type$ /css/selectors/pseudo-classes/last-of-type permanent;
 47 | rewrite ^/css/selectors/pseudo-classes/\:link$ /css/selectors/pseudo-classes/link permanent;
 48 | rewrite ^/css/selectors/pseudo-classes/\:not$ /css/selectors/pseudo-classes/not permanent;
 49 | rewrite ^/css/selectors/pseudo-classes/\:nth-child\(n\)$ /css/selectors/pseudo-classes/nth-child permanent;
 50 | rewrite ^/css/selectors/pseudo-classes/\:nth-last-child\(n\)$ /css/selectors/pseudo-classes/nth-last-child permanent;
 51 | rewrite ^/css/selectors/pseudo-classes/\:nth-last-of-type\(n\)$ /css/selectors/pseudo-classes/nth-last-of-type permanent;
 52 | rewrite ^/css/selectors/pseudo-classes/\:nth-of-type\(n\)$ /css/selectors/pseudo-classes/nth-of-type permanent;
 53 | rewrite ^/css/selectors/pseudo-classes/\:only-child$ /css/selectors/pseudo-classes/only-child permanent;
 54 | rewrite ^/css/selectors/pseudo-classes/\:only-of-type$ /css/selectors/pseudo-classes/only-of-type permanent;
 55 | rewrite ^/css/selectors/pseudo-classes/\:optional$ /css/selectors/pseudo-classes/optional permanent;
 56 | rewrite ^/css/selectors/pseudo-classes/\:required$ /css/selectors/pseudo-classes/required permanent;
 57 | rewrite ^/css/selectors/pseudo-classes/\:root$ /css/selectors/pseudo-classes/root permanent;
 58 | rewrite ^/css/selectors/pseudo-classes/\:target$ /css/selectors/pseudo-classes/target permanent;
 59 | rewrite ^/css/selectors/pseudo-classes/\:valid$ /css/selectors/pseudo-classes/valid permanent;
 60 | rewrite ^/css/selectors/pseudo-classes/\:visited$ /css/selectors/pseudo-classes/visited permanent;
 61 | rewrite ^/css/selectors/pseudo-elements/\:\:after$ /css/selectors/pseudo-elements/after permanent;
 62 | rewrite ^/css/selectors/pseudo-elements/\:\:before$ /css/selectors/pseudo-elements/before permanent;
 63 | rewrite ^/css/selectors/pseudo-elements/\:\:first-letter$ /css/selectors/pseudo-elements/first-letter permanent;
 64 | rewrite ^/css/selectors/pseudo-elements/\:\:first-line$ /css/selectors/pseudo-elements/first-line permanent;
 65 | rewrite ^/css/selectors/pseudo-elements/\:\:region$ /css/selectors/pseudo-elements/region permanent;
 66 | rewrite ^/css/selectors/pseudo-elements/\:\:selection$ /css/selectors/pseudo-elements/selection permanent;
 67 | rewrite ^/css/syntax/!important$ /css/syntax/important permanent;
 68 | rewrite ^/html/attributes/max\(HTMLProgressElement\)$ /html/attributes/maxHTMLProgressElement permanent;
 69 | rewrite ^/html/elements/!DOCTYPE$ /html/elements/DOCTYPE permanent;
 70 | rewrite ^/html/elements/!DOCTYPE/ja$ /html/elements/DOCTYPE/ja permanent;
 71 | rewrite ^/Accept$ /http/headers/Accept permanent;
 72 | rewrite ^/apis/css-regions/NamedFlow/regionlayoutupdate$ /apis/css-regions/NamedFlow/regionfragmentchange permanent;
 73 | rewrite ^/apis/indexeddb/IDBCuror/update$ /apis/indexeddb/IDBCursor/update permanent;
 74 | rewrite ^/apis/pointerevents/PointerEvent$ /dom/objects/PointerEvent permanent;
 75 | rewrite ^/apis/timing/methods/requestAnimationFrame$ /dom/Window/requestAnimationFrame permanent;
 76 | rewrite ^/apis/timing/properties/domContentLoadedEventEnd$ /apis/navigation_timing/PerformanceTiming/domContentLoadedEventEnd permanent;
 77 | rewrite ^/apis/webrtc/objects/MediaStream/properties/videoTracks$ /apis/webrtc/MediaStream/videoTracks permanent;
 78 | rewrite ^/apis/webrtc/objects/MediaStreamTrack$ /apis/webrtc/MediaStreamTrack permanent;
 79 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/events/ended$ /apis/webrtc/MediaStreamTrack/ended permanent;
 80 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/events/muted$ /apis/webrtc/MediaStreamTrack/muted permanent;
 81 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/events/unmuted$ /apis/webrtc/MediaStreamTrack/unmuted permanent;
 82 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/enabled$ /apis/webrtc/MediaStreamTrack/enabled permanent;
 83 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/kind$ /apis/webrtc/MediaStreamTrack/kind permanent;
 84 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/label$ /apis/webrtc/MediaStreamTrack/label permanent;
 85 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/onended$ /apis/webrtc/MediaStreamTrack/onended permanent;
 86 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/onmute$ /apis/webrtc/MediaStreamTrack/onmute permanent;
 87 | rewrite ^/apis/webrtc/objects/MediaStreamTrack/properties/readyState$ /apis/webrtc/MediaStreamTrack/readyState permanent;
 88 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList$ /apis/webrtc/MediaStreamTrackList permanent;
 89 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/events/addtrack$ /apis/webrtc/MediaStreamTrackList/addtrack permanent;
 90 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/events/removetrack$ /apis/webrtc/MediaStreamTrackList/removetrack permanent;
 91 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/methods/add$ /apis/webrtc/MediaStreamTrackList/add permanent;
 92 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/methods/item$ /apis/webrtc/MediaStreamTrackList/item permanent;
 93 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/methods/remove$ /apis/webrtc/MediaStreamTrackList/remove permanent;
 94 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/properties/length$ /apis/webrtc/MediaStreamTrackList/length permanent;
 95 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/properties/onaddtrack$ /apis/webrtc/MediaStreamTrackList/onaddtrack permanent;
 96 | rewrite ^/apis/webrtc/objects/MediaStreamTrackList/properties/onremovetrack$ /apis/webrtc/MediaStreamTrackList/onremovetrack permanent;
 97 | rewrite ^/apis/websockets/CloseEvent$ /apis/websocket/CloseEvent permanent;
 98 | rewrite ^/apis/websockets/CloseEvent/code$ /apis/websocket/CloseEvent/code permanent;
 99 | rewrite ^/apis/websockets/CloseEvent/reason$ /apis/websocket/CloseEvent/reason permanent;
100 | rewrite ^/apis/websockets/CloseEvent/wasClean$ /apis/websocket/CloseEvent/wasClean permanent;
101 | rewrite ^/apis/websockets/MessageEvent$ /apis/websocket/MessageEvent permanent;
102 | rewrite ^/apis/websockets/MessageEvent/data$ /apis/websocket/MessageEvent/data permanent;
103 | rewrite ^/A(\ |_)Quick(\ |_)Start(\ |_)to(\ |_)getting(\ |_)started(\ |_)contributing(\ |_)to(\ |_)Web(\ |_)Platform.org$ /WPD/Quick_Start permanent;
104 | rewrite ^/bdi$ /html/elements/bdi permanent;
105 | rewrite ^/border-image$ /css/properties/border-image permanent;
106 | rewrite ^/canvas$ /html/elements/canvas permanent;
107 | rewrite ^/concepts/localisation$ /concepts/internationalization permanent;
108 | rewrite ^/concepts/PointerEvents$ /concepts/Pointer_Events permanent;
109 | rewrite ^/concepts/proprietary-ie-techniques$ /concepts/proprietary_internet_explorer_techniques permanent;
110 | rewrite ^/cross-fade$ /css/functions/cross-fade permanent;
111 | rewrite ^/css/atrules/-ms-viewport$ /css/atrules/viewport permanent;
112 | rewrite ^/css/cross-fade$ /css/functions/cross-fade permanent;
113 | rewrite ^/css/cssom/CSSRegionStyleRule$ /OLDcss/cssom/CSSRegionStyleRule permanent;
114 | rewrite ^/css/cssom/CSSRule/CSSRule$ /css/cssom/CSSRule permanent;
115 | rewrite ^/css/cssom/methods/removeImport$ /css/cssom/styleSheet/removeImport permanent;
116 | rewrite ^/css/cssom/methods/removeProperty$ /css/cssom/CSSStyleDeclaration/removeProperty permanent;
117 | rewrite ^/css/cssom/methods/removeRule$ /css/cssom/styleSheet/removeRule permanent;
118 | rewrite ^/css/cssom/methods/setProperty$ /css/cssom/CSSStyleDeclaration/setProperty permanent;
119 | rewrite ^/css/cssom/NamedFlow$ /apis/css-regions/NamedFlow permanent;
120 | rewrite ^/css/cssom/NamedFlow/firstEmptyRegionIndex$ /apis/css-regions/NamedFlow/firstEmptyRegionIndex permanent;
121 | rewrite ^/css/cssom/NamedFlow/getContent$ /apis/css-regions/NamedFlow/getContent permanent;
122 | rewrite ^/css/cssom/NamedFlow/getNamedFlows$ /apis/css-regions/NamedFlow/getNamedFlows permanent;
123 | rewrite ^/css/cssom/NamedFlow/getRegions$ /apis/css-regions/NamedFlow/getRegions permanent;
124 | rewrite ^/css/cssom/NamedFlow/getRegionsByContent$ /apis/css-regions/NamedFlow/getRegionsByContent permanent;
125 | rewrite ^/css/cssom/NamedFlow/name$ /apis/css-regions/NamedFlow/name permanent;
126 | rewrite ^/css/cssom/NamedFlow/overset$ /apis/css-regions/NamedFlow/overset permanent;
127 | rewrite ^/css/cssom/NamedFlow/regionlayoutupdate$ /apis/css-regions/NamedFlow/regionlayoutupdate permanent;
128 | rewrite ^/css/cssom/Region$ /apis/css-regions/Region permanent;
129 | rewrite ^/css/cssom/Region/getComputedRegionStyle$ /apis/css-regions/Region/getComputedRegionStyle permanent;
130 | rewrite ^/css/cssom/Region/getRegionFlowRanges$ /apis/css-regions/Region/getRegionFlowRanges permanent;
131 | rewrite ^/css/cssom/Region/regionOverset$ /apis/css-regions/Region/regionOverset permanent;
132 | rewrite ^/css/flexbox$ /css/properties/flex permanent;
133 | rewrite ^/css/functions/transition-timing-function$ /css/properties/transition-timing-function permanent;
134 | rewrite ^/css/linear-gradient$ /css/functions/linear-gradient permanent;
135 | rewrite ^/css/properties/-ms-block-progression$ /css/properties/block-progression permanent;
136 | rewrite ^/css/properties/-ms-box-align$ /css/properties/box-align permanent;
137 | rewrite ^/css/properties/-ms-box-direction$ /css/properties/box-direction permanent;
138 | rewrite ^/css/properties/-ms-box-flex$ /css/properties/box-flex permanent;
139 | rewrite ^/css/properties/-ms-box-line-progression$ /css/properties/box-line-progression permanent;
140 | rewrite ^/css/properties/-ms-box-lines$ /css/properties/box-lines permanent;
141 | rewrite ^/css/properties/-ms-box-ordinal-group$ /css/properties/box-ordinal-group permanent;
142 | rewrite ^/css/properties/-ms-box-pack$ /css/properties/box-pack permanent;
143 | rewrite ^/css/properties/-ms-content-zoom-factor$ /css/properties/msContentZoomFactor permanent;
144 | rewrite ^/css/properties/-ms-flex-align$ /css/properties/flex-align permanent;
145 | rewrite ^/css/properties/-ms-flex-item-align$ /css/properties/flex-item-align permanent;
146 | rewrite ^/css/properties/-ms-flex-line-pack$ /css/properties/flex-line-pack permanent;
147 | rewrite ^/css/properties/-ms-flex-order$ /css/properties/flex-order permanent;
148 | rewrite ^/css/properties/-ms-flex-pack$ /css/properties/justify-content permanent;
149 | rewrite ^/css/properties/-ms-linear-gradient$ /css/functions/linear-gradient permanent;
150 | rewrite ^/css/properties/animation/animation$ /css/properties/animation permanent;
151 | rewrite ^/css/properties/box-align$ /css/properties/align-items permanent;
152 | rewrite ^/css/properties/flex-align$ /css/properties/align-items permanent;
153 | rewrite ^/css/properties/flex-item-align$ /css/properties/align-self permanent;
154 | rewrite ^/css/properties/flex-line-pack$ /css/properties/align-content permanent;
155 | rewrite ^/css/properties/flex-order$ /css/properties/order permanent;
156 | rewrite ^/css/properties/flex-pack$ /css/properties/justify-content permanent;
157 | rewrite ^/css/properties/foo$ /css/properties/grid-row permanent;
158 | rewrite ^/css/properties/mask-border-image$ /css/properties/mask-border permanent;
159 | rewrite ^/css/properties/mask-box-image$ /css/properties/mask-border-image permanent;
160 | rewrite ^/css/properties/mask-box-image-outset$ /css/properties/mask-border-outset permanent;
161 | rewrite ^/css/properties/mask-box-image-repeat$ /css/properties/mask-border-repeat permanent;
162 | rewrite ^/css/properties/mask-box-image-slice$ /css/properties/mask-border-slice permanent;
163 | rewrite ^/css/properties/mask-box-image-source$ /css/properties/mask-border-source permanent;
164 | rewrite ^/css/properties/mask-box-image-width$ /css/properties/mask-border-width permanent;
165 | rewrite ^/css/properties/mask-source-type$ /css/properties/mask-mode permanent;
166 | rewrite ^/css/properties/ms-block-progression$ /css/properties/-ms-block-progression permanent;
167 | rewrite ^/css/properties/ms-box-align$ /css/properties/-ms-box-align permanent;
168 | rewrite ^/css/properties/ms-box-direction$ /css/properties/-ms-box-direction permanent;
169 | rewrite ^/css/properties/ms-box-flex$ /css/properties/-ms-box-flex permanent;
170 | rewrite ^/css/properties/ms-box-line-progression$ /css/properties/-ms-box-line-progression permanent;
171 | rewrite ^/css/properties/ms-box-lines$ /css/properties/-ms-box-lines permanent;
172 | rewrite ^/css/properties/ms-box-ordinal-group$ /css/properties/-ms-box-ordinal-group permanent;
173 | rewrite ^/css/properties/ms-box-orient$ /css/properties/-ms-box-orient permanent;
174 | rewrite ^/css/properties/ms-box-pack$ /css/properties/-ms-box-pack permanent;
175 | rewrite ^/css/properties/ms-content-zoom-chaining$ /css/properties/-ms-content-zoom-chaining permanent;
176 | rewrite ^/css/properties/ms-content-zoom-factor$ /css/properties/msContentZoomFactor permanent;
177 | rewrite ^/css/properties/ms-content-zoom-limit$ /css/properties/-ms-content-zoom-limit permanent;
178 | rewrite ^/css/properties/ms-content-zoom-limit-max$ /css/properties/-ms-content-zoom-limit-max permanent;
179 | rewrite ^/css/properties/ms-content-zoom-limit-min$ /css/properties/-ms-content-zoom-limit-min permanent;
180 | rewrite ^/css/properties/ms-content-zoom-snap$ /css/properties/-ms-content-zoom-snap permanent;
181 | rewrite ^/css/properties/ms-content-zoom-snap-points$ /css/properties/-ms-content-zoom-snap-points permanent;
182 | rewrite ^/css/properties/ms-content-zoom-snap-type$ /css/properties/-ms-content-zoom-snap-type permanent;
183 | rewrite ^/css/properties/ms-content-zooming$ /css/properties/-ms-content-zooming permanent;
184 | rewrite ^/css/properties/ms-flex$ /css/properties/-ms-flex permanent;
185 | rewrite ^/css/properties/ms-flex-align$ /css/properties/-ms-flex-align permanent;
186 | rewrite ^/css/properties/ms-flex-direction$ /css/properties/-ms-flex-direction permanent;
187 | rewrite ^/css/properties/ms-flex-flow$ /css/properties/-ms-flex-flow permanent;
188 | rewrite ^/css/properties/ms-flex-item-align$ /css/properties/-ms-flex-item-align permanent;
189 | rewrite ^/css/properties/ms-flex-line-pack$ /css/properties/-ms-flex-line-pack permanent;
190 | rewrite ^/css/properties/ms-flex-order$ /css/properties/-ms-flex-order permanent;
191 | rewrite ^/css/properties/ms-flex-pack$ /css/properties/-ms-flex-pack permanent;
192 | rewrite ^/css/properties/ms-flex-wrap$ /css/properties/-ms-flex-wrap permanent;
193 | rewrite ^/css/properties/ms-grid-column$ /css/properties/grid-column permanent;
194 | rewrite ^/css/properties/ms-grid-column-span$ /css/properties/grid-column-span permanent;
195 | rewrite ^/css/properties/ms-hyphenate-limit-lines$ /css/properties/hyphenate-limit-lines permanent;
196 | rewrite ^/css/properties/ms-hyphens$ /css/properties/hyphens permanent;
197 | rewrite ^/css/properties/ms-touch-action$ /css/properties/-ms-touch-action permanent;
198 | rewrite ^/css/properties/region-break-after$ /css/properties/break-after permanent;
199 | rewrite ^/css/properties/region-break-before$ /css/properties/break-before permanent;
200 | rewrite ^/css/properties/region-break-inside$ /css/properties/break-inside permanent;
201 | rewrite ^/css/properties/region-overflow$ /css/properties/region-fragment permanent;
202 | rewrite ^/css/properties/region-verflow$ /css/properties/region-overflow permanent;
203 | rewrite ^/css/radial-gradient$ /css/functions/radial-gradient permanent;
204 | rewrite ^/css/repeating-linear-gradient$ /css/functions/repeating-linear-gradient permanent;
205 | rewrite ^/css/repeating-radial-gradient$ /css/functions/repeating-radial-gradient permanent;
206 | rewrite ^/css/selectors/-ms-scrollbar-shadow-color$ /css/properties/-ms-scrollbar-shadow-color permanent;
207 | rewrite ^/css/selectors/border-image$ /css/properties/border-image permanent;
208 | rewrite ^/css/selectors/cursor$ /css/properties/cursor permanent;
209 | rewrite ^/css/selectors/outline$ /css/properties/outline permanent;
210 | rewrite ^/css/selectors/outline-color$ /css/properties/outline-color permanent;
211 | rewrite ^/css/selectors/outline-style$ /css/properties/outline-style permanent;
212 | rewrite ^/css/selectors/outline-width$ /css/properties/outline-width permanent;
213 | rewrite ^/css/selectors/user-select$ /css/properties/user-select permanent;
214 | rewrite ^/css/selectors/zoom$ /css/properties/zoom permanent;
215 | rewrite ^/css/text/word-spacing/word-spacing$ /css/properties/word-spacing permanent;
216 | rewrite ^/css/Training$ /css/tutorials permanent;
217 | rewrite ^/css/transforms$ /css/properties/transform permanent;
218 | rewrite ^/css/transforms/transform$ /css/properties/transform permanent;
219 | rewrite ^/css/tutorials/css-regions$ /tutorials/css-regions permanent;
220 | rewrite ^/css/units/color$ /css/data_types/color permanent;
221 | rewrite ^/css/units/text$ /css/data_types/text permanent;
222 | rewrite ^/data$ /html/elements/data permanent;
223 | rewrite ^/dom/events/DOMContentLoaded$ /dom/Event/DOMContentLoaded permanent;
224 | rewrite ^/dom/events/load$ /dom/Element/load permanent;
225 | rewrite ^/dom/images$ /dom/Image permanent;
226 | rewrite ^/dom/object/PointerEvent/pointercancel$ /dom/objects/PointerEvent/pointercancel permanent;
227 | rewrite ^/html/elements/applets$ /html/elements/applet permanent;
228 | rewrite ^/html/elements/comment$ /html/elements/html_comment_data-type permanent;
229 | rewrite ^/html/elements/dl-test$ /html/elements/dl permanent;
230 | rewrite ^/http$ /concepts/protocols/http permanent;
231 | rewrite ^/ImageCapture$ /apis/image_capture/ImageCapture permanent;
232 | rewrite ^/js/objects/parseFloat$ /javascript/functions/parseFloat permanent;
233 | rewrite ^/js/objects/parseInt$ /javascript/functions/parseInt permanent;
234 | rewrite ^/keygen$ /html/elements/keygen permanent;
235 | rewrite ^/mask-composite$ /css/properties/mask-composite permanent;
236 | rewrite ^/rb$ /html/elements/rb permanent;
237 | rewrite ^/rp$ /html/elements/rp permanent;
238 | rewrite ^/rtc$ /html/elements/rtc permanent;
239 | rewrite ^/svg/elements/feColorMix$ /svg/elements/feColorMatrix permanent;
240 | rewrite ^/svg/elements/feFuncGelement$ /svg/elements/feFuncG permanent;
241 | rewrite ^/svg/elements/patterrn$ /svg/elements/pattern permanent;
242 | rewrite ^/tutorial/JavaScript-Statements$ /tutorials/javascript_statements permanent;
243 | rewrite ^/tutorials/google-chrome-frame$ /tutorials/google_chrome_frame permanent;
244 | rewrite ^/tutorials/JavaScript-About$ /tutorials/javascript_about permanent;
245 | rewrite ^/tutorials/Mobile$ /tutorials/mobile_optimization_best_practices permanent;


--------------------------------------------------------------------------------
/reports/numbers.txt:
--------------------------------------------------------------------------------
1 | Numbers:
2 |   - "iterations": 5087
3 |   - "content pages": 4670
4 |   - "redirects": 404
5 |   - "translated": 92
6 |   - "not in a directory": 104
7 |   - "redirects for URL sanity": 1002
8 |   - "edits average": 7
9 |   - "edits median": 5


--------------------------------------------------------------------------------
/reports/translations.txt:
--------------------------------------------------------------------------------
 1 | apis/es
 2 | Beginners/es
 3 | Beginners/ja
 4 | Beginners/ko
 5 | Beginners/pt-br
 6 | Beginners/tr
 7 | Beginners/zh
 8 | concepts/accessibility/es
 9 | concepts/accessibility/ja
10 | concepts/es
11 | concepts/Internet and Web/How does the Internet Work/es
12 | concepts/Internet and Web/How does the Internet Work/ja
13 | concepts/Internet and Web/the history of the web/es
14 | concepts/Internet and Web/the history of the web/ja
15 | concepts/Internet and Web/the history of the web/tr
16 | concepts/Internet and Web/The Web Standards Model/ja
17 | concepts/web design/es
18 | css/es
19 | css/fr
20 | css/ko
21 | css/properties/font-size/ko
22 | css/properties/outline/sv
23 | css/tutorials/ko
24 | css/zh
25 | dom/es
26 | guides/css text styling fundamentals/ja
27 | guides/doctypes and markup styles/ja
28 | guides/getting started with css/ja
29 | guides/html5 form features/ja
30 | guides/html forms basics/ja
31 | guides/html links/ja
32 | guides/html lists/ja
33 | guides/html structural elements/ja
34 | guides/html tables/ja
35 | guides/html text/es
36 | guides/html text/ja
37 | guides/html validation/ja
38 | guides/images in html/ja
39 | guides/lesser-known semantic elements/ja
40 | guides/the basics of html/es
41 | guides/the basics of html/ja
42 | guides/the basics of html/ko
43 | guides/the basics of html/sv
44 | guides/the html head/ja
45 | html/elements/!DOCTYPE/ja
46 | html/elements/a/ja
47 | html/elements/acronym/ja
48 | html/elements/address/ja
49 | html/elements/applet/ja
50 | html/elements/area/ja
51 | html/elements/article/ja
52 | html/elements/aside/ja
53 | html/elements/audio/ja
54 | html/elements/b/ja
55 | html/elements/base/ja
56 | html/elements/basefont/ja
57 | html/elements/bdo/ja
58 | html/elements/bgSound/ja
59 | html/elements/big/ja
60 | html/elements/blockquote/ja
61 | html/elements/body/ja
62 | html/elements/br/ja
63 | html/elements/button/ja
64 | html/es
65 | html/ja
66 | javascript/ja
67 | Main Page/de
68 | Main Page/es
69 | Main Page/fr
70 | Main Page/ja
71 | Main Page/ko
72 | Main Page/nl
73 | Main Page/pt-br
74 | Main Page/tr
75 | Main Page/zh
76 | Main Page/zh-hant
77 | svg/es
78 | tutorials/box model/ja
79 | tutorials/creating and modifying html/ja
80 | tutorials/events in javascript/ja
81 | tutorials/getting your content online/ja
82 | tutorials/javascript functions/ja
83 | tutorials/layout fundamentals/ja
84 | tutorials/making css easier to read/ja
85 | tutorials/manipulating css with javascript/ja
86 | tutorials/media queries/ja
87 | tutorials/objects in javascript/ja
88 | tutorials/traversing the dom/ja
89 | tutorials/using css background images/ja
90 | tutorials/using selectors/ja
91 | tutorials/Web Education Intro/tr
92 | tutorials/your first look at javascript/ja


--------------------------------------------------------------------------------
/reports/url_parts_variants.txt:
--------------------------------------------------------------------------------
 1 | All words that exists in an URL, and the different ways they are written (needs harmonizing!):
 2 |  - accept, Accept
 3 |  - accessibility_basics, Accessibility_basics
 4 |  - accessibility_testing, Accessibility_testing
 5 |  - animatable, Animatable
 6 |  - animation, Animation
 7 |  - canvas_tutorial, Canvas_tutorial
 8 |  - Connection, connection
 9 |  - cookie, Cookie
10 |  - css, CSS
11 |  - DataTransfer, dataTransfer
12 |  - date, Date
13 |  - doctype, DOCTYPE
14 |  - Document, document
15 |  - DOMTokenList, DomTokenList
16 |  - Element, element
17 |  - Error, error
18 |  - Event, event
19 |  - file, File
20 |  - filesystem, FileSystem
21 |  - Floats_and_clearing, floats_and_clearing
22 |  - formTarget, formtarget
23 |  - Function, function
24 |  - gamepad, Gamepad
25 |  - geolocation, Geolocation
26 |  - Getting_Your_Content_Online, getting_your_content_online
27 |  - Global, global
28 |  - History, history
29 |  - How_does_the_Internet_Work, How_does_the_Internet_work
30 |  - ID, id
31 |  - Image, image
32 |  - Implementation, implementation
33 |  - indexeddb, indexedDB
34 |  - ISO, iso
35 |  - javascript, JavaScript
36 |  - JavaScript_for_mobile, javascript_for_mobile
37 |  - json, JSON
38 |  - link, Link
39 |  - Location, location
40 |  - math, Math
41 |  - moveEnd, moveend
42 |  - moveStart, movestart
43 |  - Navigator, navigator
44 |  - Node, node
45 |  - number, Number
46 |  - oauth, OAuth
47 |  - object, Object
48 |  - online, onLine
49 |  - option, Option
50 |  - Performance, performance
51 |  - PhotoSettingsOptions, photoSettingsOptions
52 |  - pointerevents, PointerEvents
53 |  - position, Position
54 |  - q, Q
55 |  - Range, range
56 |  - readOnly, readonly
57 |  - Region, region
58 |  - selection, Selection
59 |  - selectors, Selectors
60 |  - storage, Storage
61 |  - String, string
62 |  - StyleMedia, styleMedia
63 |  - styleSheet, stylesheet
64 |  - Styling_lists_and_links, styling_lists_and_links
65 |  - Styling_tables, styling_tables
66 |  - text, Text
67 |  - tfoot, tFoot
68 |  - the_basics_of_html, The_basics_of_HTML
69 |  - The_History_of_the_Web, The_history_of_the_Web, the_history_of_the_web
70 |  - thead, tHead
71 |  - timeStamp, timestamp
72 |  - Unicode, unicode
73 |  - URL, url
74 |  - websocket, WebSocket
75 |  - what_does_a_good_web_page_need, What_does_a_good_web_page_need


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Commands/AbstractImporterCommand.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Commands;
  7 | 
  8 | use Symfony\Component\Console\Output\OutputInterface;
  9 | use Symfony\Component\Console\Input\InputInterface;
 10 | use Symfony\Component\Console\Input\InputOption;
 11 | use Symfony\Component\Console\Command\Command;
 12 | use Symfony\Component\Filesystem\Filesystem;
 13 | use WebPlatform\ContentConverter\Model\MediaWikiApiParseActionResponse;
 14 | use WebPlatform\ContentConverter\Model\MediaWikiContributor;
 15 | use WebPlatform\ContentConverter\Helpers\YamlHelper;
 16 | use WebPlatform\Importer\Helpers\MediaWikiHelper;
 17 | use WebPlatform\Importer\Model\MediaWikiDocument;
 18 | use Prewk\XmlStringStreamer;
 19 | use RuntimeException;
 20 | use Exception;
 21 | 
 22 | /**
 23 |  * Common importer command methods.
 24 |  *
 25 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 26 |  */
 27 | abstract class AbstractImporterCommand extends Command
 28 | {
 29 |     /** @var WebPlatform\ContentConverter\Helpers\ApiRequestHelperInterface Conversion helper instance */
 30 |     protected $apiHelper;
 31 | 
 32 |     /** @var WebPlatform\ContentConverter\Helpers\YamlHelper Yaml Helper instance */
 33 |     protected $yaml;
 34 | 
 35 |     /** @var Symfony\Component\Filesystem\Filesystem Symfony Filesystem handler */
 36 |     protected $filesystem;
 37 | 
 38 |     protected $users = [];
 39 | 
 40 |     protected $missed = [];
 41 | 
 42 |     protected function configure()
 43 |     {
 44 |         $helpText = 'What file to read from. Argument is relative from data/ ';
 45 |         $helpText .= 'folder from this directory (e.g. dumps/wpd_full.xml, would read from data/dumps/foo.xml)';
 46 | 
 47 |         $this->addOption('xml-source', '', InputOption::VALUE_OPTIONAL, $helpText, 'dumps/main_full.xml');
 48 |     }
 49 | 
 50 |     protected function execute(InputInterface $input, OutputInterface $output)
 51 |     {
 52 |         $this->yaml = new YamlHelper();
 53 |         $this->filesystem = new Filesystem();
 54 |         $this->initCookieString();
 55 |     }
 56 | 
 57 |     /**
 58 |      * Source XML file read stream factory.
 59 |      *
 60 |      * @param string $xmlSourcePath path where the XML file should be read from, relative to DATA_DIR
 61 |      *
 62 |      * @return Prewk\XmlStringStreamer A XML String stream
 63 |      */
 64 |     protected function sourceXmlStreamFactory($xmlSourcePath)
 65 |     {
 66 |         $file = realpath($xmlSourcePath);
 67 |         if ($file === false) {
 68 |             $message = 'Cannot run script, source XML file at "%s" could not be found';
 69 |             throw new RuntimeException(sprintf($message, $xmlSourcePath));
 70 |         }
 71 | 
 72 |         return XmlStringStreamer::createStringWalkerParser($file);
 73 |     }
 74 | 
 75 |     /**
 76 |      * Load Authors.
 77 |      *
 78 |      * Author array of MediaWikiContributor objects with $this->users[$uid],
 79 |      * where $uid is MediaWiki user_id.
 80 |      *
 81 |      * You may have to increase memory_limit value,
 82 |      * but we’ll load this only once.
 83 |      **/
 84 |     protected function loadUsers($usersSourcePath)
 85 |     {
 86 |         $file = realpath($usersSourcePath);
 87 |         if ($file === false) {
 88 |             $message = 'Cannot run script, source users file at "%s" could not be found';
 89 |             throw new RuntimeException(sprintf($message, $usersSourcePath));
 90 |         }
 91 | 
 92 |         $users_loop = json_decode(file_get_contents($file), 1);
 93 | 
 94 |         foreach ($users_loop as &$u) {
 95 |             $uid = (int) $u['user_id'];
 96 |             $this->users[$uid] = new MediaWikiContributor($u);
 97 |             unset($u); // Dont fill too much memory, if that helps.
 98 |         }
 99 |     }
100 | 
101 |     private function load($loadFilePath)
102 |     {
103 |         if (realpath($loadFilePath) === false) {
104 |             $message = 'Could not find file at %s';
105 |             throw new RuntimeException(sprintf($message, $loadFilePath));
106 |         }
107 | 
108 |         return file_get_contents($loadFilePath);
109 |     }
110 | 
111 |     protected function loadMissed($missedNormalizedTitlesSource)
112 |     {
113 |         if (realpath($missedNormalizedTitlesSource) === false) {
114 |             $message = 'Could not find missed file at %s';
115 |             throw new RuntimeException(sprintf($message, $missedNormalizedTitlesSource));
116 |         }
117 | 
118 |         $missedFileContents = file_get_contents($missedNormalizedTitlesSource);
119 | 
120 |         try {
121 |             $missed = $this->yaml->unserialize($missedFileContents);
122 |         } catch (Exception $e) {
123 |             $message = 'Could not get file %s contents to be parsed as YAML. Is it in YAML format?';
124 |             throw new Exception(sprintf($message, $missedNormalizedTitlesSource), null, $e);
125 |         }
126 | 
127 |         if (!isset($missed['missed'])) {
128 |             throw new Exception('Please ensure missed.yml has a list of titles under a "missed:" top level key');
129 |         }
130 | 
131 |         $this->missed = $missed['missed'];
132 |     }
133 | 
134 |     protected function initMediaWikiHelper($actionName)
135 |     {
136 |         /**
137 |          * Your MediaWiki API URL
138 |          *
139 |          * https://www.mediawiki.org/wiki/API:Data_formats
140 |          * https://www.mediawiki.org/wiki/API:Parsing_wikitext
141 |          **/
142 |         $apiUrl = getenv('MEDIAWIKI_API_ORIGIN').'/w/api.php?action=';
143 | 
144 |         switch ($actionName) {
145 |             case 'parse':
146 |                 $apiUrl .= 'parse&pst=1&utf8=&prop=indicators|text|templates|categories|links|displaytitle';
147 |                 $apiUrl .= '&disabletoc=true&disablepp=true&disableeditsection=true&preview=true&format=json&page=';
148 |                 break;
149 |             case 'purge':
150 |                 $apiUrl .= 'purge&title=';
151 |                 break;
152 |         }
153 |         // Let’s use the Converter makeRequest() helper.
154 |         $this->apiHelper = new MediaWikiHelper($apiUrl);
155 |     }
156 | 
157 |     protected function apiRequest($title)
158 |     {
159 |         return $this->apiHelper->makeRequest($title, $this->cookieString);
160 |     }
161 | 
162 |     protected function documentPurge(MediaWikiDocument $wikiDocument)
163 |     {
164 |         $id = $wikiDocument->getId();
165 | 
166 |         $cacheDir = sprintf('%s/.cache', GIT_OUTPUT_DIR);
167 |         $cacheFile = sprintf('%s/%d.json', $cacheDir, $id);
168 | 
169 |         if ($this->filesystem->exists($cacheFile) === true) {
170 |             $this->filesystem->remove($cacheFile);
171 |         }
172 |     }
173 | 
174 |     protected function documentFetch(MediaWikiDocument $wikiDocument)
175 |     {
176 |         $id = $wikiDocument->getId();
177 |         $title = $wikiDocument->getTitle();
178 | 
179 |         $cacheDir = sprintf('%s/.cache', GIT_OUTPUT_DIR);
180 |         $cacheFile = sprintf('%s/%d.json', $cacheDir, $id);
181 | 
182 |         if ($this->filesystem->exists($cacheFile) === false) {
183 |             if ($this->filesystem->exists($cacheDir) === false) {
184 |                 $this->filesystem->mkdir($cacheDir);
185 |             }
186 | 
187 |             $obj = $this->apiHelper->retrieve($title, $this->cookieString);
188 |             $this->filesystem->dumpFile($cacheFile, json_encode($obj));
189 |         } else {
190 |             $contents = file_get_contents($cacheFile);
191 |             $obj = new MediaWikiApiParseActionResponse($contents);
192 |             $obj->toggleFromCache();
193 |         }
194 | 
195 |         return $obj;
196 |     }
197 | 
198 |     private function initCookieString()
199 |     {
200 |         if (
201 |             isset($_ENV['MEDIAWIKI_USERID']) &&
202 |             isset($_ENV['MEDIAWIKI_USERNAME']) &&
203 |             isset($_ENV['MEDIAWIKI_SESSION']) &&
204 |             isset($_ENV['MEDIAWIKI_WIKINAME'])
205 |         ) {
206 |             $cookies['UserID'] = getenv('MEDIAWIKI_USERID');
207 |             $cookies['UserName'] = getenv('MEDIAWIKI_USERNAME');
208 |             $cookies['_session'] = getenv('MEDIAWIKI_SESSION');
209 |             $cookieString = str_replace(
210 |                 ['":"', '","', '{"', '"}'],
211 |                 ['=', ';'.getenv('MEDIAWIKI_WIKINAME'), getenv('MEDIAWIKI_WIKINAME'), ';'],
212 |                 json_encode($cookies)
213 |             );
214 |         } else {
215 |             $cookieString = null;
216 |         }
217 | 
218 |         $this->cookieString = $cookieString;
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Commands/CacheWarmerCommand.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Commands;
  7 | 
  8 | use Symfony\Component\Console\Input\InputInterface;
  9 | use Symfony\Component\Console\Output\OutputInterface;
 10 | use Symfony\Component\Console\Input\InputOption;
 11 | use WebPlatform\Importer\Model\MediaWikiDocument;
 12 | use SimpleXMLElement;
 13 | use Exception;
 14 | 
 15 | /**
 16 |  * Pre-fetch MediaWiki API output to speed up mediawiki:run 3rd pass.
 17 |  *
 18 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 19 |  */
 20 | class CacheWarmerCommand extends AbstractImporterCommand
 21 | {
 22 |     protected function configure()
 23 |     {
 24 |         $description = <<<DESCR
 25 | 
 26 |                 Walk through MediaWiki dumpBackup XML file, run each
 27 |                 document and make an API call to an instance we use
 28 |                 to migrate content out.
 29 | 
 30 |                 This script is there to speed up `mediawiki:run` at 3rd pass
 31 |                 so that it doesn’t need to make HTTP requests and work
 32 |                 only with local files.
 33 | 
 34 | DESCR;
 35 |         $this
 36 |             ->setName('mediawiki:cache-warmer')
 37 |             ->setDescription($description)
 38 |             ->setDefinition(
 39 |                 [
 40 |                     new InputOption('missed', '', InputOption::VALUE_NONE, 'Give XML node indexes of missed conversion so we can run through only them'),
 41 |                     new InputOption('max-pages', '', InputOption::VALUE_OPTIONAL, 'Do not make full run, limit to a maximum of pages', 0),
 42 |                     new InputOption('resume-at', '', InputOption::VALUE_OPTIONAL, 'Resume run at a specific XML document index number ', 0),
 43 |                 ]
 44 |             );
 45 | 
 46 |         parent::configure();
 47 |     }
 48 | 
 49 |     protected function execute(InputInterface $input, OutputInterface $output)
 50 |     {
 51 |         parent::execute($input, $output);
 52 | 
 53 |         $this->initMediaWikiHelper('parse');
 54 | 
 55 |         $xmlSource = $input->getOption('xml-source');
 56 |         $listMissed = $input->getOption('missed');
 57 | 
 58 |         $maxHops = (int) $input->getOption('max-pages');   // Maximum number of pages we go through
 59 | 
 60 |         $resumeAt = (int) $input->getOption('resume-at');
 61 | 
 62 |         $ids = [];
 63 | 
 64 |         if ($listMissed === true) {
 65 |             $this->loadMissed(DATA_DIR.'/missed.yml');
 66 |             $total = count($this->missed);
 67 |         }
 68 | 
 69 |         $output->writeln('Warming cache:');
 70 | 
 71 |         $streamer = $this->sourceXmlStreamFactory(DATA_DIR.'/'.$xmlSource);
 72 |         $counter = 0;
 73 |         while ($node = $streamer->getNode()) {
 74 |             $pageNode = new SimpleXMLElement($node);
 75 |             if (isset($pageNode->title)) {
 76 |                 ++$counter;
 77 |                 if ($maxHops > 0 && $maxHops === $counter - 1) {
 78 |                     $output->writeln(sprintf(PHP_EOL.'Reached desired maximum of %d documents', $maxHops).PHP_EOL);
 79 |                     break;
 80 |                 }
 81 | 
 82 |                 /*
 83 |                  * Handle interruption by telling where to resume work.
 84 |                  *
 85 |                  * This is useful if job stopped and you want to resume work back at a specific point.
 86 |                  */
 87 |                 if ($counter < $resumeAt) {
 88 |                     continue;
 89 |                 }
 90 | 
 91 |                 $wikiDocument = new MediaWikiDocument($pageNode);
 92 |                 $previous_location = (isset($normalized_location))?$normalized_location:'';
 93 |                 $normalized_location = $wikiDocument->getTitle();
 94 |                 $id = $wikiDocument->getId();
 95 | 
 96 |                 /**
 97 |                  * Do not make API requests to a wiki page that is known
 98 |                  * to be deleted or has a redirect.
 99 |                  */
100 |                 if ($wikiDocument->hasRedirect() === true) {
101 |                     continue;
102 |                 }
103 | 
104 |                 /**
105 |                  * This is when we want only to pass through files described in data/missed.yml
106 |                  *
107 |                  * Much useful if you want to make slow API requests and not run the import again.
108 |                  */
109 |                 if ($listMissed === true && !in_array($normalized_location, $this->missed)) {
110 |                     continue;
111 |                 }
112 | 
113 |                 /**
114 |                  * If we went thus far, we got a match. But what if we have none left. Just quit it!
115 |                  */
116 |                 if ($listMissed === true && --$total < 1) {
117 |                     break;
118 |                 }
119 | 
120 |                 if (in_array($id, array_keys($ids))) {
121 |                     $text = 'We got an unexpected situation, two wiki pages has the same id. The wiki page "%s" with id %d, has same as "%s"';
122 |                     throw new Exception(sprintf($text, $previous_location, $id, $normalized_location));
123 |                 }
124 | 
125 |                 $ids[$id] = $normalized_location;
126 | 
127 |                 $respObj = $this->documentFetch($wikiDocument);
128 |                 $isCachedAppend = ($respObj->isFromCache())?null:' (new)';
129 |                 $output->writeln(sprintf('  - %d: %s%s', $id, $normalized_location, $isCachedAppend));
130 | 
131 | 
132 |             }
133 |         }
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Commands/RefreshPagesCommand.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Commands;
  7 | 
  8 | use Symfony\Component\Console\Input\InputInterface;
  9 | use Symfony\Component\Console\Output\OutputInterface;
 10 | use Symfony\Component\Console\Input\InputOption;
 11 | use WebPlatform\Importer\Model\MediaWikiDocument;
 12 | use SimpleXMLElement;
 13 | use Exception;
 14 | 
 15 | /**
 16 |  * Send ?action=purge requests to MediaWiki.
 17 |  *
 18 |  * Purpose of this script is to mimick an authenticated user
 19 |  * to go to a page with ?action=purge to make MediaWiki clear the
 20 |  * cached generated HTML from Memcached, or what MediaWiki handles with
 21 |  * for $wgMainCacheType internally.
 22 |  *
 23 |  * I’m aware there is an API way to make requests, but this is roughly a
 24 |  * work around so we don’t have to manually open web browser tabs and click
 25 |  * "refresh" to reload cache.
 26 |  *
 27 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 28 |  */
 29 | class RefreshPagesCommand extends AbstractImporterCommand
 30 | {
 31 |     /** @var WebPlatform\ContentConverter\Converter\ConverterInterface Converter instance */
 32 |     protected $converter;
 33 | 
 34 |     protected function configure()
 35 |     {
 36 |         $description = <<<DESCR
 37 | 
 38 |                 You went through `mediawiki:run` pass 1,2,3 then realized that
 39 |                 you needed to edit pages, and now you need to clear MediaWiki cache?
 40 | 
 41 |                 Problem is that there are too many pages to go through?
 42 | 
 43 |                 That’s what this does.
 44 | 
 45 |                 This is nothing fancy, let’s emulate we’re a browser and ask as
 46 |                 an authenticated user to "refresh" the page from standard MediaWiki
 47 |                 front controller  (i.e. NOT /w/api.php).
 48 | 
 49 |                 To use:
 50 | 
 51 |                     - Login to your wiki
 52 |                     - Go to another page on the wiki while logged in
 53 |                     - In developer tools, get a to MediaWiki (e.g. /wiki/Main_Page)
 54 |                     - Get the value of cookies that ends with (e.g. wpwikiUserID, provided \$wgDBname is set to "wpwiki"):
 55 |                        - UserID
 56 |                        - UserName
 57 |                        - _session
 58 |                     - Paste the values in `.env`
 59 |                     - Use like described in `mediawiki:run`, at 3rd pass
 60 | 
 61 | DESCR;
 62 |         $this
 63 |             ->setName('mediawiki:refresh-pages')
 64 |             ->setDescription($description)
 65 |             ->setDefinition(
 66 |                 [
 67 |                     new InputOption('missed', '', InputOption::VALUE_NONE, 'Give XML node indexes of missed conversion so we can run through only them'),
 68 |                     new InputOption('max-pages', '', InputOption::VALUE_OPTIONAL, 'Do not make full run, limit to a maximum of pages', 0),
 69 |                     new InputOption('resume-at', '', InputOption::VALUE_OPTIONAL, 'Resume run at a specific XML document index number ', 0),
 70 |                 ]
 71 |             );
 72 | 
 73 |         parent::configure();
 74 |     }
 75 | 
 76 |     protected function execute(InputInterface $input, OutputInterface $output)
 77 |     {
 78 |         parent::execute($input, $output);
 79 | 
 80 |         $this->initMediaWikiHelper('purge');
 81 | 
 82 |         $xmlSource = $input->getOption('xml-source');
 83 |         $listMissed = $input->getOption('missed');
 84 | 
 85 |         $maxHops = (int) $input->getOption('max-pages');   // Maximum number of pages we go through
 86 | 
 87 |         $resumeAt = (int) $input->getOption('resume-at');
 88 | 
 89 |         $this->loadMissed(DATA_DIR.'/missed.yml');
 90 | 
 91 |         $output->writeln(sprintf('Sending purge to %s:', $this->apiHelper->getHelperEndpoint()));
 92 | 
 93 |         $streamer = $this->sourceXmlStreamFactory(DATA_DIR.'/'.$xmlSource);
 94 |         $counter = 0;
 95 |         while ($node = $streamer->getNode()) {
 96 |             $pageNode = new SimpleXMLElement($node);
 97 |             if (isset($pageNode->title)) {
 98 |                 ++$counter;
 99 |                 if ($maxHops > 0 && $maxHops === $counter - 1) {
100 |                     $output->writeln(sprintf(PHP_EOL.'Reached desired maximum of %d documents', $maxHops).PHP_EOL);
101 |                     break;
102 |                 }
103 | 
104 |                 $wikiDocument = new MediaWikiDocument($pageNode);
105 |                 $normalized_location = $wikiDocument->getName();
106 |                 $title = $wikiDocument->getTitle();
107 |                 $id = $wikiDocument->getId();
108 | 
109 |                 /**
110 |                  * Handle interruption by telling where to resume work.
111 |                  *
112 |                  * This is useful if job stopped and you want to resume work back at a specific point.
113 |                  */
114 |                 if ($counter < $resumeAt) {
115 |                     continue;
116 |                 }
117 | 
118 |                 /**
119 |                  * This is when we want only to pass through files described in data/missed.yml
120 |                  *
121 |                  * Much useful if you want to make slow API requests and not run the import again.
122 |                  */
123 |                 if ($listMissed === true && !in_array($normalized_location, $this->missed)) {
124 |                     continue;
125 |                 }
126 | 
127 |                 $this->documentPurge($wikiDocument);
128 | 
129 |                 try {
130 |                     $purgeCall = $this->apiRequest($title);
131 |                 } catch (Exception $e) {
132 |                     $message = 'Had issue with attempt to refresh page from MediaWiki for %s';
133 |                     throw new Exception(sprintf($message, $title), 0, $e);
134 |                 }
135 | 
136 | 
137 |                 if (empty($purgeCall)) {
138 |                     $message = 'Refresh call did not work, we expected a HTML and got nothing, check at %s%s gives from a web browser';
139 |                     throw new Exception(sprintf($message, $this->apiHelper->getHelperEndpoint(), $title));
140 |                 }
141 | 
142 |                 $output->writeln(sprintf(' - %d: %s', $id, $title));
143 |             }
144 |         }
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Commands/RunCommand.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Commands;
  7 | 
  8 | use Symfony\Component\Console\Input\InputInterface;
  9 | use Symfony\Component\Console\Output\OutputInterface;
 10 | use Symfony\Component\Console\Input\InputOption;
 11 | use Symfony\Component\Console\Input\InputArgument;
 12 | use Bit3\GitPhp\GitException;
 13 | use WebPlatform\ContentConverter\Persistency\GitCommitFileRevision;
 14 | use WebPlatform\Importer\Model\MediaWikiApiParseActionResponse;
 15 | use WebPlatform\Importer\Converter\HtmlToMarkdown;
 16 | use WebPlatform\Importer\Model\MediaWikiDocument;
 17 | use WebPlatform\Importer\GitPhp\GitRepository;
 18 | use WebPlatform\Importer\Filter\TitleFilter;
 19 | use WebPlatform\Importer\Model\HtmlRevision;
 20 | use SplDoublyLinkedList;
 21 | use SimpleXMLElement;
 22 | use DomainException;
 23 | use Exception;
 24 | 
 25 | /**
 26 |  * Read and create a summary from a MediaWiki dumpBackup XML file.
 27 |  *
 28 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 29 |  */
 30 | class RunCommand extends AbstractImporterCommand
 31 | {
 32 |     /** @var WebPlatform\ContentConverter\Converter\ConverterInterface Converter instance */
 33 |     protected $converter;
 34 | 
 35 |     /** @var Bit3\GitPhp\GitRepository Git Repository handler */
 36 |     protected $git;
 37 | 
 38 |     protected function configure()
 39 |     {
 40 |         $description = <<<DESCR
 41 | 
 42 |                 Walk through MediaWiki dumpBackup XML file and run through revisions
 43 |                 to convert them into static files.
 44 | 
 45 |                 Script is designed to run in three passes that has to be run in
 46 |                 this order.
 47 | 
 48 |                 1.) Handle deleted pages
 49 | 
 50 |                     When a Wiki page is moved, MediaWiki allows to leave a redirect behind.
 51 |                     The objective of this pass is to put the former content underneath all history
 52 |                     such that this pass leaves an empty output directory but with all the deleted
 53 |                     file history kept.
 54 | 
 55 | 
 56 |                 2.) Handle pages that weren’t deleted in history
 57 | 
 58 |                     Write history on top of deleted content. That way we won’t get conflicts between
 59 |                     content that got deleted from still current content.
 60 | 
 61 |                     Beware; This command can take MORE than an HOUR to complete.
 62 | 
 63 | 
 64 |                 3.) Convert content
 65 | 
 66 |                     Loop through ALL documents that still has content, take latest revision and pass it through
 67 |                     a converter.
 68 | 
 69 | DESCR;
 70 |         $this
 71 |             ->setName('mediawiki:run')
 72 |             ->setDescription($description)
 73 |             ->setDefinition(
 74 |                 [
 75 |                     new InputArgument('pass', InputArgument::REQUIRED, 'The pass number: 1,2,3', null),
 76 |                     new InputOption('missed', '', InputOption::VALUE_NONE, 'Give XML node indexes of missed conversion so we can run a 3rd pass only for them'),
 77 |                     new InputOption('max-revs', '', InputOption::VALUE_OPTIONAL, 'Do not run full import, limit it to maximum of revisions per page ', 0),
 78 |                     new InputOption('max-pages', '', InputOption::VALUE_OPTIONAL, 'Do not run  full import, limit to a maximum of pages', 0),
 79 |                     new InputOption('namespace-prefix', '', InputOption::VALUE_OPTIONAL, 'If not against main MediaWiki namespace, set prefix (e.g. Meta) so we can create a git repo with all contents on root so that we can use export as a submodule.', false),
 80 |                     new InputOption('resume-at', '', InputOption::VALUE_OPTIONAL, 'Resume run at a specific XML document index number ', 0),
 81 |                     new InputOption('only-assets', '', InputOption::VALUE_NONE, '3rd pass specific. Skip document conversion, git add only assets that are refered in documents'),
 82 |                 ]
 83 |             );
 84 | 
 85 |         parent::configure();
 86 |     }
 87 | 
 88 |     protected function execute(InputInterface $input, OutputInterface $output)
 89 |     {
 90 |         parent::execute($input, $output);
 91 | 
 92 |         $passNbr = (int) $input->getArgument('pass');
 93 | 
 94 |         $xmlSource = $input->getOption('xml-source');
 95 |         $listMissed = $input->getOption('missed');
 96 | 
 97 |         $maxHops = (int) $input->getOption('max-pages');   // Maximum number of pages we go through
 98 |         $revMaxHops = (int) $input->getOption('max-revs'); // Maximum number of revisions per page we go through
 99 |         $namespacePrefix = $input->getOption('namespace-prefix');
100 | 
101 |         $resumeAt = (int) $input->getOption('resume-at');
102 | 
103 |         $onlyAssets = $input->getOption('only-assets');
104 | 
105 |         $redirects = [];
106 |         $pages = [];
107 | 
108 |         if ($listMissed === true && $passNbr === 3) {
109 |             $this->loadMissed(DATA_DIR.'/missed.yml');
110 |         } elseif ($listMissed === true && $passNbr !== 3) {
111 |             throw new DomainException('Missed option is only supported at 3rd pass');
112 |         }
113 | 
114 |         if ($onlyAssets === true && $passNbr !== 3) {
115 |             throw new DomainException('only-assets option is only useful at 3rd pass');
116 |         }
117 | 
118 |         $repoInitialized = (realpath(GIT_OUTPUT_DIR.'/.git') === false) ? false : true;
119 |         if ($this->filesystem->exists(GIT_OUTPUT_DIR) === false) {
120 |             $this->filesystem->mkdir(GIT_OUTPUT_DIR);
121 |         }
122 |         $this->git = new GitRepository(realpath(GIT_OUTPUT_DIR));
123 |         if ($repoInitialized === false) {
124 |             $this->git->init()->execute();
125 |         }
126 | 
127 |         if ($passNbr === 3) {
128 |             // We are at conversion pass, instantiate our Converter!
129 |             // instanceof WebPlatform\ContentConverter\Converter\ConverterInterface
130 |             $this->converter = new HtmlToMarkdown();
131 |             $this->initMediaWikiHelper('parse');
132 |         } else {
133 |             $this->loadUsers(DATA_DIR.'/users.json');
134 |         }
135 | 
136 | 
137 |         $this->titleFilter = new TitleFilter();
138 | 
139 |         $streamer = $this->sourceXmlStreamFactory(DATA_DIR.'/'.$xmlSource);
140 |         $counter = 0;
141 |         while ($node = $streamer->getNode()) {
142 |             $pageNode = new SimpleXMLElement($node);
143 |             if (isset($pageNode->title)) {
144 |                 ++$counter;
145 |                 if ($maxHops > 0 && $maxHops === $counter - 1) {
146 |                     $output->writeln(sprintf('Reached desired maximum of %d documents', $maxHops).PHP_EOL);
147 |                     break;
148 |                 }
149 | 
150 |                 /*
151 |                  * Handle interruption by telling where to resume work.
152 |                  *
153 |                  * This is useful if job stopped and you want to resume work back at a specific point.
154 |                  */
155 |                 if ($counter < $resumeAt) {
156 |                     continue;
157 |                 }
158 | 
159 |                 $wikiDocument = new MediaWikiDocument($pageNode);
160 |                 $persistable = new GitCommitFileRevision($wikiDocument, 'out/', '.md');
161 | 
162 |                 $title = $wikiDocument->getTitle();
163 |                 $normalized_location = $wikiDocument->getName();
164 |                 $file_path = $this->titleFilter->filter($persistable->getName());
165 |                 $file_path = ($namespacePrefix === false) ? $file_path : str_replace(sprintf('%s/', $namespacePrefix), '', $file_path);
166 |                 $redirect_to = $this->titleFilter->filter($wikiDocument->getRedirect()); // False if not a redirect, string if it is
167 | 
168 |                 $language_code = $wikiDocument->getLanguageCode();
169 |                 $language_name = $wikiDocument->getLanguageName();
170 |                 $revs = $wikiDocument->getRevisions()->count();
171 |                 $revList = $wikiDocument->getRevisions();
172 |                 $revLast = $wikiDocument->getLatest();
173 | 
174 |                 /**
175 |                  * This is when we want only to pass through files described in data/missed.yml
176 |                  *
177 |                  * Much useful if you want to make slow API requests and not run the import again.
178 |                  */
179 |                 if ($listMissed === true && !in_array($normalized_location, $this->missed)) {
180 |                     continue;
181 |                 }
182 | 
183 |                 $output->writeln(sprintf('"%s":', $title));
184 |                 $output->writeln(sprintf('  id: %d', $wikiDocument->getId()));
185 |                 $output->writeln(sprintf('  index: %d', $counter));
186 |                 $output->writeln(sprintf('  normalized: %s', $normalized_location));
187 |                 $output->writeln(sprintf('  file: %s', $file_path));
188 | 
189 |                 if ($wikiDocument->isTranslation() === true) {
190 |                     $output->writeln(sprintf('  lang: %s (%s)', $language_code, $language_name));
191 |                 }
192 | 
193 |                 if ($wikiDocument->hasRedirect() === true) {
194 |                     $output->writeln(sprintf('  redirect_to: %s', $redirect_to));
195 |                 }
196 | 
197 |                 /*
198 |                  * Merge deleted content history under current content.
199 |                  *
200 |                  * 1st pass: Only those with redirects (i.e. deleted pages). Should leave an empty out/ directory!
201 |                  * 2nd pass: Only those without redirects (i.e. current content).
202 |                  * 3nd pass: Only for those without redirects, they are going to get the latest version passed through the convertor
203 |                  */
204 |                 if ($wikiDocument->hasRedirect() === false && $passNbr === 1) {
205 |                     // Skip all NON redirects for pass 1
206 |                     $output->writeln(sprintf('  skip: Document %s WITHOUT redirect, at pass 1 (handling redirects)', $title).PHP_EOL.PHP_EOL);
207 |                     continue;
208 |                 } elseif ($wikiDocument->hasRedirect() && $passNbr === 2) {
209 |                     // Skip all redirects for pass 2
210 |                     $output->writeln(sprintf('  skip: Document %s WITH redirect, at pass 2 (handling non redirects)', $title).PHP_EOL.PHP_EOL);
211 |                     continue;
212 |                 } elseif ($wikiDocument->hasRedirect() && $passNbr === 3) {
213 |                     // Skip all redirects for pass 2
214 |                     $output->writeln(sprintf('  skip: Document %s WITH redirect, at pass 3', $title).PHP_EOL.PHP_EOL);
215 |                     continue;
216 |                 }
217 | 
218 |                 if ($passNbr < 1 || $passNbr > 3) {
219 |                     throw new DomainException('This command has only three pases.');
220 |                 }
221 | 
222 |                 if ($passNbr === 3) {
223 |                     // Overwriting $revList for last pass we’ll
224 |                     // use for conversion.
225 |                     $revList = new SplDoublyLinkedList();
226 |                     $revList->push($revLast);
227 |                 } else {
228 |                     $output->writeln(sprintf('  revisions_count: %d', $revs));
229 |                     $output->writeln(sprintf('  revisions:'));
230 |                 }
231 | 
232 |                 /* ----------- REVISIONS --------------- **/
233 |                 $revCounter = 0;
234 |                 for ($revList->rewind(); $revList->valid(); $revList->next()) {
235 |                     ++$revCounter;
236 | 
237 |                     if ($revMaxHops > 0 && $revMaxHops === $revCounter) {
238 |                         $output->writeln(sprintf('    stop: Reached maximum %d revisions', $revMaxHops).PHP_EOL.PHP_EOL);
239 |                         break;
240 |                     }
241 | 
242 |                     $removeFile = false;
243 | 
244 |                     $wikiRevision = $revList->current();
245 | 
246 |                     /* -------------------- Author -------------------- **/
247 |                     // An edge case where MediaWiki may give author as user_id 0, even though we dont have it
248 |                     // so we’ll give the first user instead.
249 |                     $contributor_id = ($wikiRevision->getContributorId() === 0) ? 1 : $wikiRevision->getContributorId();
250 | 
251 |                     /*
252 |                      * Fix duplicates and merge them as only one.
253 |                      *
254 |                      * Please adjust to suit your own.
255 |                      *
256 |                      * Queried using jq;
257 |                      *
258 |                      *     cat data/users.json | jq '.[]|select(.user_real_name == "Renoir Boulanger")'
259 |                      *
260 |                      * #TODO: Change the hardcoded list.
261 |                      */
262 |                     if (in_array($contributor_id, [172943, 173060, 173278, 173275, 173252, 173135, 173133, 173087, 173086, 173079, 173059, 173058, 173057])) {
263 |                         $contributor_id = getenv('MEDIAWIKI_USERID');
264 |                     }
265 |                     /* -------------------- /Author -------------------- **/
266 | 
267 |                     // Lets handle conversion only at 3rd pass.
268 |                     if ($passNbr === 3) {
269 |                         try {
270 |                             /* @var MediaWikiApiParseActionResponse object to work with */
271 |                             $respObj = $this->documentFetch($wikiDocument);
272 |                         } catch (Exception $e) {
273 |                             $output->writeln(sprintf('    ERROR: %s, left a note in errors/%d.txt', $e->getMessage(), $counter));
274 |                             $this->filesystem->dumpFile(sprintf('errors/%d.txt', $counter), $e->getMessage());
275 |                             throw new Exception('Debugging why API call did not work.', 0, $e); // DEBUG
276 |                             continue;
277 |                         }
278 | 
279 |                         if ($respObj->isFromCache()) {
280 |                             // #XXX: Make sure AbstractImporterCommand has the same path as below
281 |                             $output->writeln(sprintf('  cached: %s', sprintf('out/.cache/%d.json', $wikiDocument->getId())));
282 |                         } else {
283 |                             $output->writeln('  cached: Not from cache');
284 |                         }
285 | 
286 |                         if ($respObj->isEmpty() === true) {
287 |                             $output->writeln(sprintf('  skip: Document %s is empty, maybe deleted or been emptied without a redirect left', $title).PHP_EOL.PHP_EOL);
288 |                             continue;
289 |                         }
290 | 
291 |                         $newRev = new HtmlRevision($respObj, true);
292 |                         $newRev->enableMarkdownConversion();
293 |                         $newRev->setTitle($wikiDocument->getDocumentTitle());
294 | 
295 |                         $assets = $newRev->getAssets();
296 |                         if (count($assets) >= 1) {
297 |                             $output->writeln(sprintf('  assets: %d', count($assets)));
298 |                         } else {
299 |                             $output->writeln('  assets: None');
300 |                         }
301 |                         if ($onlyAssets === true) {
302 |                             if (count($assets) >= 1) {
303 |                                 $problematicAssets = [];
304 |                                 foreach ($newRev->getAssets() as $file) {
305 |                                     try {
306 |                                         $this->git
307 |                                             ->add()
308 |                                             ->execute(preg_replace('/^\//', '', $file));
309 |                                     } catch (Exception $e) {
310 |                                         $problematicAssets[] = $file;
311 |                                     }
312 |                                 }
313 | 
314 |                                 if (count($problematicAssets) >= 1) {
315 |                                     $message = '    assets_status: NOT OK, %d problematic files, see errors/problematic_assets/%d.txt';
316 |                                     $output->writeln(sprintf($message, count($problematicAssets), $wikiDocument->getId()));
317 |                                     $this->filesystem->dumpFile(sprintf('errors/problematic_assets/%d.txt', $wikiDocument->getId()), print_r($problematicAssets, 1));
318 |                                 } else {
319 |                                     $output->writeln('  assets_status: OK, all added.');
320 |                                 }
321 |                             }
322 | 
323 |                             continue;
324 |                         } /* End $onlyAssets */
325 | 
326 |                         // NOTE:
327 |                         //
328 |                         // In HtmlRevision, if the file is empty or only has a comment, we
329 |                         // rewrite the file to contain only a title.
330 |                         //
331 |                         // We could use `$newRev->isEmpty()` here to detect the fact that its
332 |                         // empty, but we would need to refactor the logic on how to delete
333 |                         // revisions.
334 |                         //
335 |                         // Since there are not many empty files, it has been decided to leave
336 |                         // as is.
337 |                         //
338 |                         if ($newRev->isEmpty()) {
339 |                             //die('Manually delete file?');
340 |                             $wikiRevision = $this->converter->apply($newRev);
341 |                             $removeFile = true; // Won't work. But, it could be a start.
342 |                         } else {
343 |                             $wikiRevision = $this->converter->apply($newRev);
344 |                         }
345 | 
346 |                         // Most of the time, title is better written from the document itself than
347 |                         // from the URL. Let's only set title front matter attribute when we aren't a
348 |                         // translation. We'll then use instead the text in the first h1 we find
349 |                         // in the DOM.
350 |                         $metadata = $newRev->getMetadata();
351 |                         if (isset($metadata['first_title'])) {
352 |                             $wikiRevision->setTitle($metadata['first_title']);
353 |                         } else {
354 |                             $wikiRevision->setTitle($wikiDocument->getDocumentTitle());
355 |                         }
356 | 
357 |                         if ($wikiDocument->isTranslation() === true) {
358 |                             $wikiRevision->setFrontMatter(['lang' => $wikiDocument->getLanguageCode()]);
359 |                         }
360 | 
361 |                         $revision_id = $revLast->getId();
362 |                     } else {
363 |                         if (isset($this->users[$contributor_id])) {
364 |                             $contributor = clone $this->users[$contributor_id]; // We want a copy, because its specific to here only anyway.
365 |                             $wikiRevision->setContributor($contributor, false);
366 |                         } else {
367 |                             // In case we didn’t find data for $this->users[$contributor_id]
368 |                             $contributor = clone $this->users[1]; // We want a copy, because its specific to here only anyway.
369 |                             $wikiRevision->setContributor($contributor, false);
370 |                         }
371 | 
372 |                         $revision_id = $wikiRevision->getId();
373 |                         $output->writeln(sprintf('    - id: %d', $revision_id));
374 |                         $output->writeln(sprintf('      index: %d', $revCounter));
375 |                     }
376 | 
377 |                     $persistArgs = $persistable->setRevision($wikiRevision)->getArgs();
378 |                     if ($passNbr < 3) {
379 |                         foreach ($persistArgs as $argKey => $argVal) {
380 |                             if ($argKey === 'message') {
381 |                                 $argVal = mb_strimwidth($argVal, strpos($argVal, ': ') + 2, 100);
382 |                             }
383 |                             $output->writeln(sprintf('      %s: "%s"', $argKey, $argVal));
384 |                         }
385 |                     }
386 | 
387 |                     if ($passNbr < 3 && $revLast->getId() === $wikiRevision->getId() && $wikiDocument->hasRedirect()) {
388 |                         $output->writeln('      is_last_and_has_redirect: True');
389 |                         $removeFile = true;
390 |                     }
391 | 
392 |                     $persistable->setRevision($wikiRevision);
393 |                     $this->filesystem->dumpFile($file_path, (string) $persistable);
394 |                     try {
395 |                         $this->git
396 |                             ->add()
397 |                             // Make sure out/ matches what we set at GitCommitFileRevision constructor.
398 |                             ->execute(preg_replace('/^out\//', '', $file_path));
399 |                     } catch (GitException $e) {
400 |                         $message = sprintf('Could not add file "%s" with title "%s" for revision %d', $file_path, $title, $revision_id);
401 |                         throw new Exception($message, null, $e);
402 |                     }
403 | 
404 |                     if ($passNbr < 3) {
405 | 
406 |                         // We won’t expose all WebPlatform user emails to the public. Instead,
407 |                         // we’ll create a bogus email alias based on their MediaWiki username.
408 |                         $real_name = $wikiRevision->getContributor()->getRealName();
409 |                         $username = $wikiRevision->getContributor()->getName();
410 |                         $email = sprintf('%s@%s', $username, getenv('COMMITER_ANONYMOUS_DOMAIN'));
411 |                         $author_overload = sprintf('%s <%s>', $real_name, $email);
412 | 
413 |                         try {
414 |                             $this->git
415 |                                 ->commit()
416 |                                 // In order to enforce git to use the same commiter data
417 |                                 // than the author’s we had to overload CommitCommandBuilder
418 |                                 // class.
419 |                                 //
420 |                                 // In WebPlatform\Importer\GitPhp\CommitCommandBuilder, we
421 |                                 // overload [date, author] methods so we can inject the same
422 |                                 // matching GIT_COMMITTER_* values at commit time.
423 |                                 ->message($persistArgs['message'])
424 |                                 ->author('"'.$author_overload.'"')
425 |                                 ->date('"'.$persistArgs['date'].'"')
426 |                                 ->allowEmpty()
427 |                                 ->execute();
428 |                         } catch (GitException $e) {
429 |                             var_dump($this->git);
430 |                             $message = sprintf('Could not commit for revision %d', $revision_id);
431 |                             throw new Exception($message, null, $e);
432 |                         }
433 | 
434 |                         if ($removeFile === true) {
435 |                             try {
436 |                                 $this->git
437 |                                     ->rm()
438 |                                     // Make sure out/ matches what we set at GitCommitFileRevision constructor.
439 |                                     ->execute(preg_replace('/^out\//', '', $file_path));
440 |                             } catch (GitException $e) {
441 |                                 $message = sprintf('Could remove %s at revision %d', $file_path, $revision_id);
442 |                                 throw new Exception($message, null, $e);
443 |                             }
444 | 
445 |                             $this->git
446 |                                 ->commit()
447 |                                 ->message('Remove file; '.$persistArgs['message'])
448 |                                 // ... no need to worry here. We overloaded author, date
449 |                                 // remember?
450 |                                 ->author('"'.$author_overload.'"')
451 |                                 ->date('"'.$persistArgs['date'].'"')
452 |                                 ->allowEmpty()
453 |                                 ->execute();
454 | 
455 |                             $this->filesystem->remove($file_path);
456 |                         }
457 |                     } /* End of $passNubr === 3 */
458 |                 }
459 |                 /* ----------- REVISIONS --------------- **/
460 |                 $output->writeln(PHP_EOL);
461 |             }
462 |         }
463 |     }
464 | }
465 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Commands/SummaryCommand.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Commands;
  7 | 
  8 | use WebPlatform\ContentConverter\Persistency\GitCommitFileRevision;
  9 | use Symfony\Component\Console\Output\OutputInterface;
 10 | use Symfony\Component\Console\Input\InputInterface;
 11 | use WebPlatform\Importer\Model\MediaWikiDocument;
 12 | use Symfony\Component\Console\Input\InputOption;
 13 | use WebPlatform\Importer\Filter\TitleFilter;
 14 | use SimpleXMLElement;
 15 | use Exception;
 16 | 
 17 | /**
 18 |  * Read and create a summary from a MediaWiki dumpBackup XML file.
 19 |  *
 20 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 21 |  */
 22 | class SummaryCommand extends AbstractImporterCommand
 23 | {
 24 |     protected function configure()
 25 |     {
 26 |         $description = <<<DESCR
 27 | 
 28 |                 Walk through MediaWiki dumpBackup XML file,
 29 |                 summarize revisions give details about the
 30 |                 wiki contents.
 31 | 
 32 |                 - List all pages
 33 |                 - Which pages are translations
 34 |                 - Which pages are redirects
 35 |                 - Number of edits ("Revision") per page
 36 |                 - Edits average and median
 37 | 
 38 | DESCR;
 39 |         $this
 40 |             ->setName('mediawiki:summary')
 41 |             ->setDescription($description)
 42 |             ->setDefinition(
 43 |                 [
 44 |                     new InputOption('missed', '', InputOption::VALUE_NONE, 'Give XML node indexes of missed conversion so we can run a 3rd pass only for them'),
 45 |                     new InputOption('max-revs', '', InputOption::VALUE_OPTIONAL, 'Do not make full run, limit it to maximum of revisions per document ', 0),
 46 |                     new InputOption('max-pages', '', InputOption::VALUE_OPTIONAL, 'Do not make full run, limit to a maximum of documents', 0),
 47 |                     new InputOption('namespace-prefix', '', InputOption::VALUE_OPTIONAL, 'If not against main MediaWiki namespace, set prefix (e.g. Meta) so we can create a git repo with all contents on root so that we can use export as a submodule.', false),
 48 |                     new InputOption('display-author', '', InputOption::VALUE_NONE, 'Display or not the author and email address (useful to hide info for public reports), defaults to false'),
 49 |                     new InputOption('indexes', '', InputOption::VALUE_NONE, 'Whether or not we display loop indexes'),
 50 |                 ]
 51 |             );
 52 | 
 53 |         parent::configure();
 54 |     }
 55 | 
 56 |     protected function execute(InputInterface $input, OutputInterface $output)
 57 |     {
 58 |         parent::execute($input, $output);
 59 | 
 60 |         $xmlSource = $input->getOption('xml-source');
 61 |         $listMissed = $input->getOption('missed');
 62 | 
 63 |         $maxHops = (int) $input->getOption('max-pages');   // Maximum number of pages we go through
 64 |         $revMaxHops = (int) $input->getOption('max-revs'); // Maximum number of revisions per page we go through
 65 |         $namespacePrefix = $input->getOption('namespace-prefix');
 66 | 
 67 |         $displayIndex = $input->getOption('indexes');
 68 |         $displayAuthor = $input->getOption('display-author');
 69 | 
 70 |         $redirects = [];
 71 |         $pages = [];
 72 |         $urlParts = [];
 73 |         $urlPartsAll = [];
 74 |         $missedIndexes = [];
 75 | 
 76 |         $urlsWithContent = [];
 77 |         $moreThanHundredRevs = [];
 78 |         $translations = [];
 79 |         $sanity_redirs = [];
 80 |         $directlyOnRoot = [];
 81 |         $rev_count = []; // So we can know what’s the average
 82 | 
 83 |         // Pages we have to make sure aren’t duplicate on the CMS prior
 84 |         // to the final migration.
 85 |         $temporary_acceptable_duplicates = [];
 86 |         //$temporary_acceptable_duplicates[] = 'css/selectors/pseudo-classes/:lang'; // DONE
 87 | 
 88 |         if ($listMissed === true) {
 89 |             $this->loadMissed(DATA_DIR.'/missed.yml');
 90 |         }
 91 | 
 92 |         $this->loadUsers(DATA_DIR.'/users.json');
 93 | 
 94 |         $this->titleFilter = new TitleFilter();
 95 | 
 96 |         $streamer = $this->sourceXmlStreamFactory(DATA_DIR.'/'.$xmlSource);
 97 |         $counter = 0;
 98 |         while ($node = $streamer->getNode()) {
 99 |             $pageNode = new SimpleXMLElement($node);
100 |             if (isset($pageNode->title)) {
101 |                 ++$counter;
102 |                 if ($maxHops > 0 && $maxHops === $counter - 1) {
103 |                     $output->writeln(sprintf('Reached desired maximum of %d documents', $maxHops).PHP_EOL);
104 |                     break;
105 |                 }
106 | 
107 |                 $wikiDocument = new MediaWikiDocument($pageNode);
108 |                 $persistable = new GitCommitFileRevision($wikiDocument, 'out/', '.md');
109 | 
110 |                 $title = $wikiDocument->getTitle();
111 |                 $normalized_location = $wikiDocument->getName();
112 |                 $file_path = $this->titleFilter->filter($persistable->getName());
113 |                 $file_path = ($namespacePrefix === false) ? $file_path : str_replace(sprintf('%s/', $namespacePrefix), '', $file_path);
114 |                 $redirect_to = $this->titleFilter->filter($wikiDocument->getRedirect()); // False if not a redirect, string if it is
115 | 
116 |                 $language_code = $wikiDocument->getLanguageCode();
117 |                 $language_name = $wikiDocument->getLanguageName();
118 |                 $revs = $wikiDocument->getRevisions()->count();
119 |                 $revList = $wikiDocument->getRevisions();
120 |                 $revLast = $wikiDocument->getLatest();
121 | 
122 |                 $output->writeln(sprintf('"%s":', $title));
123 |                 $output->writeln(sprintf('  - id: %d', $wikiDocument->getId()));
124 |                 if ($displayIndex === true) {
125 |                     $output->writeln(sprintf('  - index: %d', $counter));
126 |                 }
127 |                 $output->writeln(sprintf('  - normalized: %s', $normalized_location));
128 |                 $output->writeln(sprintf('  - file: %s', $file_path));
129 | 
130 |                 if ($wikiDocument->isTranslation() === true) {
131 |                     $output->writeln(sprintf('  - lang: %s (%s)', $language_code, $language_name));
132 |                 }
133 | 
134 |                 if ($wikiDocument->hasRedirect() === true) {
135 |                     $output->writeln(sprintf('  - redirect_to: %s', $redirect_to));
136 |                 } else {
137 |                     /**
138 |                      * Gather what we can know from the location.
139 |                      *
140 |                      * Explode all parts in two separate arrays so we’ll be able to tell
141 |                      * if we have conflicts (e.g. CSS/Selectors .. css/selectors). So we can
142 |                      * harmonize the names to have **ONLY ONE** way of writing the casing for a
143 |                      * given path.
144 |                      *
145 |                      * If you want to define how to write an URL part, refer to TitleFilter class.
146 |                      */
147 |                     $urlsWithContent[] = $title;
148 |                     foreach (explode('/', $normalized_location) as $urlDepth => $urlPart) {
149 |                         $urlPartKey = strtolower($urlPart);
150 |                         $urlParts[$urlPartKey] = $urlPart;
151 |                         $urlPartsAll[$urlPartKey][] = $urlPart;
152 |                     }
153 |                 }
154 | 
155 |                 if ($listMissed === true && in_array($normalized_location, $this->missed)) {
156 |                     $missedIndexes[$counter] = $title;
157 |                 }
158 | 
159 |                 $output->writeln(sprintf('  - revs: %d', $revs));
160 |                 $output->writeln(sprintf('  - revisions:'));
161 | 
162 |                 /* ----------- REVISION --------------- **/
163 |                 $revCounter = 0;
164 |                 for ($revList->rewind(); $revList->valid(); $revList->next()) {
165 |                     ++$revCounter;
166 | 
167 |                     if ($revMaxHops > 0 && $revMaxHops === $revCounter) {
168 |                         $output->writeln(sprintf('    - stop: Reached maximum %d revisions', $revMaxHops).PHP_EOL.PHP_EOL);
169 |                         break;
170 |                     }
171 | 
172 |                     $wikiRevision = $revList->current();
173 | 
174 |                     /* -------------------- Author -------------------- **/
175 |                     // An edge case where MediaWiki may give author as user_id 0, even though we dont have it
176 |                     // so we’ll give the first user instead.
177 |                     $contributor_id = ($wikiRevision->getContributorId() === 0) ? 1 : $wikiRevision->getContributorId();
178 | 
179 |                     /**
180 |                      * Fix duplicates and merge them as only one.
181 |                      *
182 |                      * Please adjust to suit your own.
183 |                      *
184 |                      * Queried using jq;
185 |                      *
186 |                      *     cat data/users.json | jq '.[]|select(.user_real_name == "Renoir Boulanger")'
187 |                      *
188 |                      * #TODO: Change the hardcoded list.
189 |                      */
190 |                     if (in_array($contributor_id, [172943, 173060, 173278, 173275, 173252, 173135, 173133, 173087, 173086, 173079, 173059, 173058, 173057])) {
191 |                         $contributor_id = getenv('MEDIAWIKI_USERID');
192 |                     }
193 | 
194 |                     if (isset($this->users[$contributor_id])) {
195 |                         $contributor = clone $this->users[$contributor_id]; // We want a copy, because its specific to here only anyway.
196 |                         $wikiRevision->setContributor($contributor, false);
197 |                     } else {
198 |                         // In case we didn’t find data for $this->users[$contributor_id]
199 |                         $contributor = clone $this->users[1]; // We want a copy, because its specific to here only anyway.
200 |                         $wikiRevision->setContributor($contributor, false);
201 |                     }
202 |                     /* -------------------- /Author -------------------- **/
203 | 
204 |                     $output->writeln(sprintf('    - id: %d', $wikiRevision->getId()));
205 |                     if ($displayIndex === true) {
206 |                         $output->writeln(sprintf('      index: %d', $revCounter));
207 |                     }
208 | 
209 |                     $persistArgs = $persistable->setRevision($wikiRevision)->getArgs();
210 |                     foreach ($persistArgs as $argKey => $argVal) {
211 |                         if ($argKey === 'message') {
212 |                             $argVal = trim(mb_strimwidth($argVal, strpos($argVal, ': ') + 2, 100));
213 |                         }
214 |                         if ($argKey === 'message' && empty($argVal)) {
215 |                             // Lets not pollute report with empty messages
216 |                             continue;
217 |                         }
218 |                         if ($displayAuthor === false && $argKey === 'author') {
219 |                             continue;
220 |                         }
221 |                         $output->writeln(sprintf('      %s: "%s"', $argKey, $argVal));
222 |                     }
223 | 
224 |                     if ($revLast->getId() === $wikiRevision->getId() && $wikiDocument->hasRedirect()) {
225 |                         $output->writeln('      is_last_and_has_redirect: True');
226 |                     }
227 |                 }
228 | 
229 |                 /* ----------- REVISION --------------- */
230 | 
231 |                 $rev_count[] = $revs;
232 | 
233 |                 // Which pages are directly on /wiki/foo. Are there some we
234 |                 // should move elsewhere such as the glossary items?
235 |                 if (count(explode('/', $title)) == 1 && $wikiDocument->hasRedirect() === false) {
236 |                     $directlyOnRoot[] = $title;
237 |                 }
238 | 
239 |                 if ($revs > 99) {
240 |                     $moreThanHundredRevs[] = sprintf('%s (%d)', $title, $revs);
241 |                 }
242 | 
243 |                 if ($wikiDocument->isTranslation() === true && $wikiDocument->hasRedirect() === false) {
244 |                     $translations[] = $title;
245 |                 }
246 | 
247 |                 // The ones with invalid URL characters that shouldn’t be part of
248 |                 // a page name because they may confuse with their natural use (:,(,),!,?)
249 |                 if ($title !== $normalized_location && $wikiDocument->hasRedirect() === false) {
250 |                     $sanity_redirs[$title] = $normalized_location;
251 |                 }
252 | 
253 |                 // We have a number of pages, some of them had been
254 |                 // deleted or erased with a redirect left behind.
255 |                 //
256 |                 // Since we want to write to files all pages that currently
257 |                 // has content into a filesystem, we have to generate a file
258 |                 // name that can be stored into a filesystem. We therefore have
259 |                 // to normalize the names.
260 |                 //
261 |                 // We don’t want to have two entries with the same name.
262 |                 //
263 |                 // If a redirect (i.e. an empty file) exist, let’s set keep it
264 |                 // separate from the pages that still has content.
265 |                 //
266 |                 // Sanity check;
267 |                 // 1. Get list of redirects
268 |                 // 2. Get list of pages
269 |                 //
270 |                 // If we have a page duplicate, throw an exception!
271 |                 if ($wikiDocument->hasRedirect() === true) {
272 |                     // Pages we know are redirects within MediaWiki, we won’t
273 |                     // pass them within the $pages aray because they would be
274 |                     // empty content with only a redirect anyway.
275 |                     if ($normalized_location !== $redirect_to) {
276 |                         $redirects[str_replace('_', ' ', $normalized_location)] = $redirect_to;
277 |                     }
278 |                 } elseif (!in_array($normalized_location, array_keys($pages))) {
279 |                     // Pages we know has content, lets count them!
280 |                     if ($wikiDocument->hasRedirect() === false) {
281 |                         $pages[$normalized_location] = $title;
282 |                     }
283 |                 } elseif (in_array($title, $temporary_acceptable_duplicates)) {
284 |                     // Lets not throw, we got that covered.
285 |                 } else {
286 |                     // Hopefully we should never encounter this.
287 |                     $previous = $pages[$normalized_location];
288 |                     $duplicatePagesExceptionText = 'We have duplicate entry for %s it '
289 |                                                    .'would be stored in %s which would override content of %s';
290 |                     throw new Exception(sprintf($duplicatePagesExceptionText, $title, $file_path, $previous));
291 |                 }
292 | 
293 |                 $output->writeln(PHP_EOL.PHP_EOL);
294 |             } /* End of if (isset($pageNode->title)) */
295 |         } /* End of while ($node = $streamer->getNode()) */
296 | 
297 |         /*
298 |          * Work some numbers on number of edits
299 |          *
300 |          * - Average
301 |          * - Median
302 |          */
303 |         $total_edits = 0;
304 |         sort($rev_count);
305 |         $edit_average = array_sum($rev_count) / $counter;
306 | 
307 |         // Calculate median
308 |         $value_in_middle = floor(($counter - 1) / 2);
309 |         if ($counter % 2) {
310 |             // odd number, middle is the median
311 |             $edit_median = $rev_count[$value_in_middle];
312 |         } else {
313 |             // even number, calculate avg of 2 medians
314 |             $low = $rev_count[$value_in_middle];
315 |             $high = $rev_count[$value_in_middle + 1];
316 |             $edit_median = (($low + $high) / 2);
317 |         }
318 | 
319 |         $numbers = array('Numbers:');
320 |         $numbers[] = sprintf('  - "iterations": %d', $counter);
321 |         $numbers[] = sprintf('  - "content pages": %d', count($pages));
322 |         $numbers[] = sprintf('  - "redirects": %d', count($redirects));
323 |         $numbers[] = sprintf('  - "translated": %d', count($translations));
324 |         $numbers[] = sprintf('  - "not in a directory": %d', count($directlyOnRoot));
325 |         $numbers[] = sprintf('  - "redirects for URL sanity": %d', count($sanity_redirs));
326 |         $numbers[] = sprintf('  - "edits average": %d', $edit_average);
327 |         $numbers[] = sprintf('  - "edits median": %d', $edit_median);
328 |         $this->filesystem->dumpFile('reports/numbers.txt', implode($numbers, PHP_EOL));
329 | 
330 |         $this->filesystem->dumpFile('reports/hundred_revs.txt', implode($moreThanHundredRevs, PHP_EOL));
331 | 
332 |         natcasesort($translations);
333 |         $this->filesystem->dumpFile('reports/translations.txt', implode(PHP_EOL, $translations));
334 |         natcasesort($directlyOnRoot);
335 |         $this->filesystem->dumpFile('reports/directly_on_root.txt', implode(PHP_EOL, $directlyOnRoot));
336 |         natcasesort($urlsWithContent);
337 |         $this->filesystem->dumpFile('reports/url_all.txt', implode(PHP_EOL, $urlsWithContent));
338 | 
339 |         natcasesort($urlParts);
340 |         $this->filesystem->dumpFile('reports/url_parts.txt', implode(PHP_EOL, $urlParts));
341 | 
342 |         // Creating list for https://github.com/webplatform/mediawiki-conversion/issues/2
343 |         ksort($urlPartsAll);
344 |         $urlPartsAllOut = array('All words that exists in an URL, and the different ways they are written (needs harmonizing!):');
345 |         foreach ($urlPartsAll as $urlPartsAllKey => $urlPartsAllRow) {
346 |             $urlPartsAllEntryUnique = array_unique($urlPartsAllRow);
347 |             if (count($urlPartsAllEntryUnique) > 1) {
348 |                 $urlPartsAllOut[] = sprintf(' - %s', implode(', ', $urlPartsAllEntryUnique));
349 |             }
350 |         }
351 |         $this->filesystem->dumpFile('reports/url_parts_variants.txt', implode(PHP_EOL, $urlPartsAllOut));
352 | 
353 |         ksort($redirects, SORT_NATURAL | SORT_FLAG_CASE);
354 |         ksort($sanity_redirs, SORT_NATURAL | SORT_FLAG_CASE);
355 | 
356 |         $nginx_almost_same_1 = ['# Most likely OK to ignore, but good enough to check if adresses here works'];
357 |         $nginx_almost_same_2 = ['# Most likely OK to ignore, but good enough to check if adresses here works'];
358 |         $nginx_almost_same_casing = [];
359 |         $nginx_redirects_spaces = [];
360 |         $nginx_redirects = [];
361 | 
362 |         $nginx_esc['Meta:'] = 'Meta/';
363 |         $nginx_esc['WPD:'] = 'WPD/';
364 |         $nginx_esc[':'] = '\\:';
365 |         $nginx_esc['('] = '\\(';
366 |         $nginx_esc[')'] = '\\)';
367 |         $nginx_esc['?'] = '\\?)';
368 |         $nginx_esc[' '] = '(\ |_)'; // Ordering matter, otherwise the () will be escaped and we want them here!
369 | 
370 |         $rewriteCheck[' '] = '(\ |_)'; // Ordering matter, otherwise the () will be escaped and we want them here!
371 | 
372 |         $location_spaghetti = [];
373 |         $location_spaghetti_duplicated = [];
374 |         $hopefully_not_duplicate = [];
375 | 
376 |         $prepare_nginx_redirects = array_merge($sanity_redirs, $redirects);
377 |         foreach ($prepare_nginx_redirects as $url => $redirect_to) {
378 |             // NGINX Case-insensitive redirect? Its done through (?i)! Should be documented!!!
379 |             $new_location = str_replace(array_keys($nginx_esc), $nginx_esc, $url);
380 |             $url_match_attempt = str_replace('(\ |_)', '_', $new_location);
381 |             $work_item = $url.':'.PHP_EOL.'  - new_location: "'.$new_location.'"'.PHP_EOL.'  - url_match_attempt: "'.$url_match_attempt.'"'.PHP_EOL.'  - redirect_to: "'.$redirect_to.'"'.PHP_EOL;
382 |             $duplicate = false;
383 | 
384 |             if (array_key_exists(strtolower($url), $hopefully_not_duplicate)) {
385 |                 $location_spaghetti_duplicated[strtolower($url)] = $work_item;
386 |                 $duplicate = true;
387 |             } else {
388 |                 $hopefully_not_duplicate[strtolower($url)] = $work_item;
389 |             }
390 |             $location_spaghetti[] = $work_item;
391 | 
392 |             if ($duplicate === true) {
393 |                 $nginx_almost_same_1[] = sprintf('rewrite (?i)^/%s$ /%s break;', $new_location, $redirect_to);
394 |             } elseif ($url_match_attempt === $redirect_to) {
395 |                 $nginx_almost_same_2[] = sprintf('rewrite (?i)^/%s$ /%s break;', $new_location, $redirect_to);
396 |             } elseif (strtolower($url_match_attempt) === strtolower($redirect_to)) {
397 |                 $nginx_almost_same_casing[] = sprintf('rewrite (?i)^/%s$ /%s break;', $new_location, $redirect_to);
398 |             } elseif (stripos($url, ' ') > 1) {
399 |                 $nginx_redirects_spaces[] = sprintf('rewrite (?i)^/%s$ /%s break;', $new_location, $redirect_to);
400 |             } else {
401 |                 $nginx_redirects[] = sprintf('rewrite (?i)^/%s$ /%s break;', $new_location, $redirect_to);
402 |             }
403 |         }
404 |         $this->filesystem->dumpFile('reports/location_spaghetti_duplicated.txt', implode(PHP_EOL, $location_spaghetti_duplicated));
405 |         $this->filesystem->dumpFile('reports/location_spaghetti.txt', implode(PHP_EOL, $location_spaghetti));
406 |         $this->filesystem->dumpFile('reports/4_nginx_redirects_spaces.map', implode(PHP_EOL, $nginx_redirects_spaces));
407 |         $this->filesystem->dumpFile('reports/3_nginx_almost_same_1.map', implode(PHP_EOL, $nginx_almost_same_1));
408 |         $this->filesystem->dumpFile('reports/3_nginx_almost_same_2.map', implode(PHP_EOL, $nginx_almost_same_2));
409 |         $this->filesystem->dumpFile('reports/2_nginx_almost_same_casing.map', implode(PHP_EOL, $nginx_almost_same_casing));
410 |         $this->filesystem->dumpFile('reports/1_nginx.map', implode(PHP_EOL, $nginx_redirects));
411 | 
412 |         $redirects_sanity_out = array('URLs to return new Location (from => to):');
413 |         foreach ($sanity_redirs as $title => $sanitized) {
414 |             $redirects_sanity_out[] = sprintf(' - "%s": "%s"', $title, $sanitized);
415 |         }
416 |         $this->filesystem->dumpFile('reports/redirects_sanity.txt', implode(PHP_EOL, $redirects_sanity_out));
417 | 
418 |         $redirects_out = array('Redirects (from => to):');
419 |         foreach ($redirects as $url => $redirect_to) {
420 |             $redirects_out[] = sprintf(' - "%s": "%s"', $url, $redirect_to);
421 |         }
422 |         $this->filesystem->dumpFile('reports/redirects.txt', implode(PHP_EOL, $redirects_out));
423 | 
424 |         if ($listMissed === true) {
425 |             try {
426 |                 $missed_out = $this->yaml->serialize($missedIndexes);
427 |             } catch (Exception $e) {
428 |                 $missed_out = sprintf('Could not create YAML out of missedIndexes array; Error was %s', $e->getMessage());
429 |             }
430 |             $this->filesystem->dumpFile('reports/missed_retry_argument.txt', 'app/console mediawiki:run 3 --retry='.implode(',', array_keys($missedIndexes)));
431 |             $this->filesystem->dumpFile('reports/missed_entries.yml', 'Missed:'.PHP_EOL.$missed_out);
432 |             $output->writeln('Created missed_retry_argument.txt and missed_entries.yml in reports/ you can try to recover!');
433 |         }
434 |     }
435 | }
436 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Converter/HtmlToMarkdown.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Converter;
  7 | 
  8 | use WebPlatform\ContentConverter\Converter\ConverterInterface;
  9 | use WebPlatform\ContentConverter\Model\AbstractRevision;
 10 | use WebPlatform\Importer\Model\MarkdownRevision;
 11 | use WebPlatform\Importer\Model\HtmlRevision;
 12 | use Pandoc\Pandoc;
 13 | 
 14 | /**
 15 |  * HTML to Markdown converter using Markdownify PHP library.
 16 |  *
 17 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 18 |  */
 19 | class HtmlToMarkdown implements ConverterInterface
 20 | {
 21 | 
 22 |     protected $converter;
 23 | 
 24 |     protected $options = array(
 25 |         "from" => "html",
 26 |         "to" => "markdown_github+blank_before_header+blank_before_blockquote+definition_lists",
 27 |         "atx-headers" => null,
 28 |         "parse-raw" => null,
 29 |         "no-highlight" => null,
 30 |         "normalize" => null
 31 |     );
 32 | 
 33 |     public function __construct()
 34 |     {
 35 |         $this->converter = new Pandoc();
 36 | 
 37 |         /**
 38 |          * Found language code in WebPlatform code samples
 39 |          *
 40 |          *  - brush:
 41 |          *  - css
 42 |          *  - de1
 43 |          *  - glsl
 44 |          *  - html
 45 |          *  - html4strict
 46 |          *  - http
 47 |          *  - js
 48 |          *  - lang-css
 49 |          *  - lang-markup
 50 |          *  - other
 51 |          *  - php
 52 |          *  - prettyprint
 53 |          *  - python
 54 |          *  - script
 55 |          *  - xml
 56 |          *  - yaml
 57 |          *  - style=&quot;background-color:
 58 |          */
 59 |         $validLanguageCodes['html'] = ['markup', 'xhtml', 'html5', 'html4strict', 'lang-markup'];
 60 |         $validLanguageCodes['css'] = ['lang-css'];
 61 |         $validLanguageCodes['svg'] = [];
 62 |         $validLanguageCodes['xml'] = [];
 63 |         $validLanguageCodes['yaml'] = [];
 64 |         $validLanguageCodes['js'] = ['script', 'javascript'];
 65 | 
 66 |         $this->languageCodeCallback = function ($matches) use ($validLanguageCodes) {
 67 |             if (!is_array($matches) || !isset($matches[1])) {
 68 |                 return '```';
 69 |             }
 70 | 
 71 |             if (in_array($matches[1], array_keys($validLanguageCodes))) {
 72 |                 return sprintf('``` %s', $matches[1]);
 73 |             }
 74 | 
 75 |             // Some entries such as '``` {.script style="font-size: 16px;"}' has been found in $matches[0] :(
 76 |             // ... in this case, we'll change $matches[1] to have ' style="..."' removed.
 77 |             $matches[1] = substr($matches[1], 0, strpos($matches[1], ' '));
 78 |             // ... Yup. Another input has "brush: .js" at $matches[1]. Let's trim that out too.
 79 |             $matches[1] = str_replace('brush: .', '', $matches[1]);
 80 | 
 81 |             foreach ($validLanguageCodes as $kp => $possibilities) {
 82 |                 if (in_array($matches[1], $possibilities)) {
 83 |                     return sprintf('``` %s', $kp);
 84 |                 }
 85 |             }
 86 | 
 87 |             return '```';
 88 |         };
 89 | 
 90 |         return $this;
 91 |     }
 92 | 
 93 |     public function markdownify($html)
 94 |     {
 95 |         return $this->converter->runWith($html, $this->options);
 96 |     }
 97 | 
 98 |     /**
 99 |      * Apply Wikitext rewrites.
100 |      *
101 |      * @param AbstractRevision $revision Input we want to transfer into Markdown
102 |      *
103 |      * @return AbstractRevision
104 |      */
105 |     public function apply(AbstractRevision $revision)
106 |     {
107 |         if ($revision instanceof HtmlRevision) {
108 |             $wasEmpty = $revision->isEmpty();
109 | 
110 |             // Since MediaWikiApiParseActionResponse
111 |             // implements \JsonSerializable
112 |             $dto = $revision->getApiResponseObject()->jsonSerialize();
113 |             $title = (isset($dto['parse']['displaytitle'])) ? $dto['parse']['displaytitle'] : $revision->getTitle();
114 | 
115 |             $html = $revision->getContent();
116 |             $matter_local = $revision->getFrontMatterData();
117 | 
118 |             $matter_local['uri'] = $title;
119 | 
120 |             if (isset($matter_local['broken_links']) && count($matter_local['broken_links']) >= 1) {
121 |                 $links = $matter_local['broken_links'];
122 |                 $matter_local['todo_broken_links']['note'] = 'During import MediaWiki could not find the following links,';
123 |                 $matter_local['todo_broken_links']['note'] .= ' please fix and adjust this list.';
124 |                 $matter_local['todo_broken_links']['links'] = $links;
125 |             }
126 |             unset($matter_local['broken_links']);
127 | 
128 |             if (isset($matter_local['tags']) && count($matter_local['tags']) < 1) {
129 |                 unset($matter_local['tags']);
130 |             }
131 | 
132 |             if (isset($matter_local['readiness'])) {
133 |                 $matter_local['readiness'] = str_replace('_', ' ', $matter_local['readiness']);
134 |             }
135 | 
136 |             if ($revision->isMarkdownConvertible() === true) {
137 |                 $content = $this->markdownify($html);
138 |                 $content = preg_replace_callback("/```\s?\{\.(.*)\}/muS", $this->languageCodeCallback, $content);
139 |             } else {
140 |                 $content = $html;
141 |             }
142 | 
143 |             if (isset($matter_local['tables']) && is_array($matter_local['tables'])) {
144 |                 $newTables = [];
145 |                 foreach ($matter_local['tables'] as $tableKey => $tableData) {
146 |                     $newTableData = [];
147 |                     foreach ($tableData as $subTableKey => $subtableValue) {
148 |                         $rowKeyCopy = $this->markdownify($subTableKey);
149 |                         $rowDataCopy = $this->markdownify($subtableValue);
150 |                         $newTableData[$rowKeyCopy] = $rowDataCopy;
151 |                     }
152 |                     $newTables[$tableKey] = $newTableData;
153 |                 }
154 |                 unset($matter_local['tables']);
155 |                 $matter_local = array_merge($matter_local, $newTables);
156 |             }
157 | 
158 |             if (isset($matter_local['attributions'])) {
159 |                 $newAttributions = [];
160 |                 foreach ($matter_local['attributions'] as $attributionRow) {
161 |                     $rowData = $this->markdownify($attributionRow);
162 |                     if (!empty($rowData)) {
163 |                         $newAttributions[] = $rowData;
164 |                     }
165 |                 }
166 |                 if (count($newAttributions) >= 1) {
167 |                     $matter_local['attributions'] = $newAttributions;
168 |                 } else {
169 |                     unset($matter_local['attributions']);
170 |                 }
171 |             }
172 | 
173 |             if (empty($content) && $wasEmpty === false) {
174 |                 $matter_local['notes'][] = 'Require manual conversion! See https://github.com/webplatform/mediawiki-conversion/issues/24';
175 |                 $content = $revision->getTextContent();
176 |             }
177 | 
178 |             $newRev = new MarkdownRevision($content, $matter_local);
179 |             $newRev->setAuthor($revision->getAuthor());
180 | 
181 |             return $newRev;
182 |         }
183 | 
184 |         return $revision;
185 |     }
186 | }
187 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Converter/MediaWikiToHtml.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Converter;
  7 | 
  8 | use WebPlatform\ContentConverter\Converter\MediaWikiToHtml as BaseConverter;
  9 | use WebPlatform\ContentConverter\Converter\ConverterInterface;
 10 | use WebPlatform\ContentConverter\Model\AbstractRevision;
 11 | use WebPlatform\ContentConverter\Model\MediaWikiRevision;
 12 | use WebPlatform\Importer\Model\HtmlRevision;
 13 | use GlHtml\GlHtml;
 14 | use Exception;
 15 | 
 16 | /**
 17 |  * Wikitext to HTML converter using MediaWiki API.
 18 |  *
 19 |  * This class creates an HTTP request to a MediaWiki API endpoint
 20 |  * and uses its own Parser to give us HTML.
 21 |  *
 22 |  * Every wiki has its own subtelties, the purpose of this class
 23 |  * is to extend the original so we can handle specifics for WebPlatform
 24 |  * Docs MediaWiki and how we want to export its contents.
 25 |  *
 26 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 27 |  */
 28 | class MediaWikiToHtml extends BaseConverter implements ConverterInterface
 29 | {
 30 |     /**
 31 |      * Apply Wikitext rewrites.
 32 |      *
 33 |      * Notice we purposefully NOT extend parent::apply(), nor enforce
 34 |      * $revision instanceof MediaWikiRevision because we’ll send back
 35 |      * the HtmlRevision object self::factory gives us
 36 |      *
 37 |      * @param AbstractRevision $revision Input we want to transfer into Markdown
 38 |      *
 39 |      * @return AbstractRevision
 40 |      */
 41 |     public function apply(AbstractRevision $revision)
 42 |     {
 43 |         if ($revision instanceof MediaWikiRevision) {
 44 |             try {
 45 |                 $mwparse = $this->getPageFromApi($revision->getTitle());
 46 |             } catch (Exception $e) {
 47 |                 $title = $revision->getTitle();
 48 |                 $url = $this->apiUrl.urlencode($title);
 49 |                 $message = sprintf('Could not get data from API for %s with the following URI %s', $title, $url);
 50 |                 throw new Exception($message, 0, $e);
 51 |             }
 52 | 
 53 |             if (!isset($mwparse['text']) || !isset($mwparse['text']['*'])) {
 54 |                 throw new Exception('MediaWiki API did not return HTML string from parser');
 55 |             }
 56 | 
 57 |             $content = $mwparse['text']['*'];
 58 |             $matter_local = [];
 59 | 
 60 |             $matter_local['displaytitle'] = $mwparse['displaytitle'];
 61 | 
 62 |             if (isset($mwparse['categories']) && is_array($mwparse['categories'])) {
 63 |                 foreach ($mwparse['categories'] as $catObj) {
 64 |                     $matter_local['categories'][] = $catObj['*'];
 65 |                 }
 66 |             }
 67 | 
 68 |             if (isset($mwparse['links']) && is_array($mwparse['links'])) {
 69 |                 foreach ($mwparse['links'] as $linkObj) {
 70 |                     if (!isset($linkObj['exists'])) {
 71 |                         $broken_links[] = $linkObj['*'];
 72 |                     }
 73 |                 }
 74 |                 if (isset($broken_links) && count($broken_links) >= 1) {
 75 |                     $matter_local['todo_broken_links']['note'] = 'During import MediaWiki could not find the following links,';
 76 |                     $matter_local['todo_broken_links']['note'] .= ' please fix and adjust this list.';
 77 |                     $matter_local['todo_broken_links']['links'] = $broken_links;
 78 |                 }
 79 |             }
 80 | 
 81 |             $pageDom = new GlHtml($content);
 82 | 
 83 |             $readinessMatches = $pageDom->get('.readiness-state');
 84 |             if (isset($readinessMatches[0])) {
 85 |                 $matter_local['readiness'] = str_replace('readiness-state ', '', $readinessMatches[0]->getAttribute('class'));
 86 |                 $readinessMatches[0]->delete();
 87 |             }
 88 | 
 89 |             $standardizationStatus = $pageDom->get('.standardization_status');
 90 |             if (isset($standardizationStatus[0])) {
 91 |                 $matter_local['standardisation_status'] = $standardizationStatus[0]->getText();
 92 |                 $standardizationStatus[0]->delete();
 93 |             }
 94 | 
 95 |             $contentRevisionNote = $pageDom->get('.is-revision-notes');
 96 |             if (count($contentRevisionNote) >= 1) {
 97 |                 if (isset($contentRevisionNote[0])) {
 98 |                     foreach ($contentRevisionNote as $note) {
 99 |                         $contentRevisionNoteText = $note->getText();
100 |                         $note->delete();
101 |                         if (!empty($contentRevisionNoteText) && strcmp('{{{', substr($contentRevisionNoteText, 0, 3)) !== 0) {
102 |                             $matter_local['notes'][] = $contentRevisionNoteText;
103 |                         }
104 |                     }
105 |                 }
106 |             }
107 | 
108 |             $dataMetasOut = [];
109 |             // Use data-type instead, and if data-meta exists, we know the key,
110 |             // the other one must be the value.
111 |             $tags = $pageDom->get('[data-meta]');
112 |             if (count($tags) >= 1) {
113 |                 foreach ($tags as $tag) {
114 |                     //$dataMetasKey = $tag->getDOMNode()->parentNode->getAttribute('data-meta');
115 |                     //$dataNodeObj = $tag->getDOMNode()->firstChild;
116 |                     //$dataMetasBody = '';
117 | 
118 |                     $metaName = $tag->getDOMNode()->parentNode->getAttribute('data-meta');
119 |                     $obj = ['content' => $tag->getHtml(), 'name' => $metaName];
120 |                     var_dump($obj);
121 | 
122 |                     /*
123 |                     if (isset($dataNodeObj->tagName) && $dataNodeObj->tagName !== 'span') {
124 |                         echo 'Is NOT a Span. Dig deeper.'.PHP_EOL;
125 |                         //$dataMetasBody = $dataNodeObj->nextSibling->textContent;
126 |                         var_dump($dataNodeObj->nextSibling->textContent);
127 |                     } else {
128 |                         echo 'Is a Span'.PHP_EOL;
129 |                         var_dump($dataNodeObj->textContent);
130 |                     }
131 | 
132 |                     if (isset($dataNodeObj->wholeText)) {
133 |                         echo 'Has wholeText';
134 |                         var_dump($dataNodeObj->wholeText);
135 |                     }
136 |                     */
137 | 
138 |                     //if (is_string($dataNodeObj->nextSibling) && $dataNodeObj->childNodes === null) {
139 |                     //    echo 'case 1'.PHP_EOL;
140 |                         /*
141 |                          * When we have text directly in the node
142 |                          *
143 |                          *
144 |                          * Returns
145 |                          *
146 |                          * <span data-meta="return" data-type="key">Returns an object of type <span data-type="value">Object</span></span>
147 |                          *
148 |                          * e.g.:
149 |                          *
150 |                          * object(DOMText)#176272 (19) {
151 |                          *     ["wholeText"]=> string(26) "Returns an object of type ",
152 |                          *     ["data"]=> string(26) "Returns an object of type ",
153 |                          *     ["length"]=> int(26),
154 |                          *     ["nodeName"]=> string(5) "#text",
155 |                          *     ["nodeValue"]=> string(26) "Returns an object of type ",
156 |                          *     ["nodeType"]=> int(3),
157 |                          *     ["parentNode"]=> string(22) "(object value omitted)",
158 |                          *     ["childNodes"]=> NULL,
159 |                          *     ["firstChild"]=> NULL,
160 |                          *     ["lastChild"]=> NULL,
161 |                          *     ["previousSibling"]=> NULL,
162 |                          *     ["nextSibling"]=> string(22) "(object value omitted)",
163 |                          *     ["attributes"]=> NULL,
164 |                          *     ["ownerDocument"]=> string(22) "(object value omitted)",
165 |                          *     ["namespaceURI"]=> NULL,
166 |                          *     ["prefix"]=> string(0) "",
167 |                          *     ["localName"]=> NULL,
168 |                          *     ["baseURI"]=> NULL,
169 |                          *     ["textContent"]=> string(26) "Returns an object of type "
170 |                          * }
171 |                          */
172 |                     //    $dataMetasBody = $dataNodeObj->nextSibling->textContent;
173 |                     //} elseif ($dataNodeObj->childNodes !== null && count($dataNodeObj->childNodes) > 1) {
174 |                     //    echo 'case 2'.PHP_EOL;
175 | 
176 |                         /*
177 |                          * When we have nested italic.
178 |                          *
179 |                          * We want internal value "apis/web-storage/Storage";
180 |                          *
181 |                          * e.g.
182 |                          *
183 |                          *     {{API_Object_Property
184 |                          *     |Property_applies_to=apis/web-storage/Storage
185 |                          *     }}
186 |                          *
187 |                          * If we dig at API_Object_Property has, we have...
188 |                          *
189 |                          *     {{#if:{{{Property_applies_to|}}}|<span data-meta="applies_to" data-type="key">''Property of <span data-type="value">[[{{{Property_applies_to|}}}]]''</span></span>|}}
190 |                          *
191 |                          * Notice the ''property...'' between doubled single quotes.
192 |                          *
193 |                          * Generates the following HTML
194 |                          *
195 |                          *     <span data-meta="applies_to" data-type="key">
196 |                          *       <i>Property of
197 |                          *         <span data-type="value">
198 |                          *           <a href="/wiki/apis/web-storage/Storage" title="apis/web-storage/Storage">apis/web-storage/Storage</a>
199 |                          *         </span>
200 |                          *       </i>
201 |                          *     </span>
202 |                          *
203 |                          * object(DOMElement)#176272 (17) {
204 |                          *   ["tagName"]=> string(1) "i",
205 |                          *   ["schemaTypeInfo"]=> NULL,
206 |                          *   ["nodeName"]=> string(1) "i",
207 |                          *   ["nodeValue"]=> string(36) "Property of apis/web-storage/Storage",
208 |                          *   ["nodeType"]=> int(1),
209 |                          *   ["parentNode"]=> string(22) "(object value omitted)",
210 |                          *   ["childNodes"]=> string(22) "(object value omitted)",
211 |                          *   ["firstChild"]=> string(22) "(object value omitted)",
212 |                          *   ["lastChild"]=> string(22) "(object value omitted)",
213 |                          *   ["previousSibling"]=> NULL,
214 |                          *   ["attributes"]=> string(22) "(object value omitted)",
215 |                          *   ["ownerDocument"]=> string(22) "(object value omitted)",
216 |                          *   ["namespaceURI"]=> NULL,
217 |                          *   ["prefix"]=> string(0) "",
218 |                          *   ["localName"]=> string(1) "i",
219 |                          *   ["baseURI"]=> NULL,
220 |                          *   ["textContent"]=> string(36) "Property of apis/web-storage/Storage"
221 |                          * }
222 |                          */
223 |                     //    $dataMetasBody = $dataNodeObj->childNodes[1]->textContent;
224 |                     //} else {
225 |                     //    echo 'case else'.PHP_EOL;
226 |                     //}
227 | 
228 |                     //var_dump($dataNodeObj);
229 | 
230 |                     //if (!empty($dataMetasBody)) {
231 |                     //    $dataMetasOut[$dataMetasKey] = $dataMetasBody;
232 |                     //}
233 |                 }
234 |                 //$matter_local['foo'] = $dataMetasOut;
235 |             }
236 | 
237 |             $titles = $pageDom->get('h1,h2,h3,h4');
238 |             foreach ($titles as $title) {
239 |                 $title->replaceInner($title->getText());
240 |             }
241 | 
242 |             // Replacing HTML with purified version
243 |             //$configObject = [ 'safe' => 1, 'deny_attribute' => '*', 'keep_bad' => 2, 'make_tag_strict' => 1, 'balance' => 2];
244 |             //$configObject['elements'] => 'a,h1,h2,h3,h4,pre,code'
245 |             $content = $pageDom->get('body')[0]->getHtml();
246 | 
247 |             $matter_rev = $revision->getFrontMatterData();
248 | 
249 |             $newRev = new HtmlRevision($content, array_merge($matter_rev, $matter_local));
250 | 
251 |             return $newRev->setTitle($revision->getTitle());
252 |         }
253 | 
254 |         return $revision;
255 |     }
256 | }
257 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Filter/TitleFilter.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Filter;
  7 | 
  8 | use WebPlatform\ContentConverter\Filter\AbstractFilter;
  9 | 
 10 | /**
 11 |  * Title Filter.
 12 |  *
 13 |  * Ensure document titles from WebPlatform MediaWiki 2015-07 won’t create conflicting file names when
 14 |  * we use its title as a filename.
 15 |  *
 16 |  * Conflicts can be caused to folders being created with different casing.
 17 |  *
 18 |  * For example, consider the following and notice how casing can vary:
 19 |  *
 20 |  *    concepts/Internet and Web/The History of the Web
 21 |  *    concepts/Internet and Web/the history of the web/es
 22 |  *    concepts/Internet and Web/the history of the web/ja
 23 |  *    tutorials/canvas/canvas tutorial
 24 |  *    tutorials/canvas/Canvas tutorial/Applying styles and colors
 25 |  *    tutorials/canvas/Canvas tutorial/Basic animations
 26 |  *
 27 |  * Would create the following files and folders;
 28 |  *
 29 |  *    concepts/
 30 |  *      - Internet_and_Web/
 31 |  *        - The_History_of_the_Web/
 32 |  *          - index.html
 33 |  *        - the_history_of_the_web/
 34 |  *          - es.html
 35 |  *          - ja.html
 36 |  *    tutorials/
 37 |  *      - canvas/
 38 |  *        - canvas_tutorial/
 39 |  *          - index.html
 40 |  *        - Canvas_tutorial/
 41 |  *          - Applying_styles_and_colors/
 42 |  *            - index.html
 43 |  *
 44 |  * Notice that we would have at the same directory level with two folders
 45 |  * with almost the same name but with different casing patterns.
 46 |  *
 47 |  * On a case-sensitive filesystem, this will create conflicts we are attempting to solve here.
 48 |  *
 49 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 50 |  */
 51 | class TitleFilter extends AbstractFilter
 52 | {
 53 |     /**
 54 |      * Rewrite only ones that would end up creating two folders with different Casing !== casing and create
 55 |      * an issue when we write files on a filesystem due to case sensitivity.
 56 |      *
 57 |      * List of replacements from mediawiki-conversion/data/url_parts_variants.txt, and notes on why some are commented
 58 |      * and other are. All should have been compared with their actual use from 2015-07-24 snapshot of our content and
 59 |      * all the urls in use from mediawiki-conversion/data/url_all.txt
 60 |      *
 61 |      * Don’t rewrite unless necessary. Otherwise we might lose links within the content.
 62 |      **/
 63 |     public function __construct()
 64 |     {
 65 | 
 66 |         // ones we shouldn’t impact               // Keep commented // Why we commented
 67 |         // -------------------------------------- // -------------- // --------------------
 68 |         //$words[] = 'Accept';                    // X              // http/headers/Accept, html/attributes/accept, html/attributes/acceptCharset
 69 |         //$words[] = 'ReadOnly';                  // X              // html/attributes/readonly, .../MediaStreamTrack/readonly
 70 |         //$words[] = 'Accessibility_basics';      // X              // Accessibility_basics
 71 |         //$words[] = 'Accessibility_testing';     // X              // Accessibility_testing
 72 |         $words[] = 'Accessibility_article_ideas';
 73 |         //$words[] = 'Animatable';                // X
 74 |         //$words[] = 'Animation';                 // X              // css/properties/animation, css/properties/animations,
 75 |         $words[] = 'Canvas_tutorial';
 76 |         //$words[] = 'Connection';                // X
 77 |         //$words[] = 'Cookie';                    // X              // http/headers/Cookie, dom/Document/cookie
 78 |         //$words[] = 'css';                       // X
 79 |         //$words[] = 'DataTransfer';              // X              // dom/DragEvent/dataTransfer, dom/DataTransfer, dom/DataTransfer/clearData
 80 |         //$words[] = 'Date';                      // X
 81 |         //$words[] = 'DOCTYPE';                   // X              // html/elements/DOCTYPE, dom/Document/doctype
 82 |         //$words[] = 'Document';                  // X
 83 |         //$words[] = 'element';                   // X
 84 |         //$words[] = 'Error';                     // X
 85 |         //$words[] = 'Event';                     // X
 86 |         //$words[] = 'File';                      // X
 87 |         //$words[] = 'FileSystem';                // X
 88 |         //$words[] = 'Floats_and_clearing';       // X               // tutorials/floats_and_clearing, Floats_and_clearing
 89 |         //$words[] = 'formTarget';                // X               // html/attributes/formtarget, dom/HTMLInputElement/formTarget, html/attributes/formtarget
 90 |         //$words[] = 'Function';                  // X               // concepts/programming/javascript/functions, css/functions, javascript/Function, javascript/Function/bind
 91 |         //$words[] = 'GamePad';                   // X               // tutorials/gamepad, apis/gamepad/Gamepad, apis/gamepad/GamepadEvent/gamepad
 92 |         //$words[] = 'GeoLocation';               // X               // apis/geolocation, apis/geolocation/Coordinates/accuracy, apis/geolocation/Geolocation/clearWatch
 93 |         $words[] = 'Getting_Your_Content_Online';
 94 |         //$words[] = 'Global';                    // X
 95 |         $words[] = 'History';
 96 |         $words[] = 'How_does_the_Internet_Work';
 97 |         $words[] = 'Internet_and_Web';
 98 |         //$words[] = 'ID';                        // X
 99 |         //$words[] = 'Image';                     // X
100 |         //$words[] = 'Implementation';            // X
101 |         //$words[] = 'indexeddb';                 // X
102 |         //$words[] = 'ISO';                       // X
103 |         $words[] = 'JavaScript_for_mobile';
104 |         //$words[] = 'Link';                      // X
105 |         //$words[] = 'Location';                  // X               // apis/location/assign, apis/workers/WorkerGlobalScope/location, dom/KeyboardEvent/location, dom/Location/hash
106 |         //$words[] = 'Math';                      // X
107 |         //$words[] = 'MoveEnd';                   // X
108 |         //$words[] = 'MoveStart';                 // X
109 |         //$words[] = 'Navigator';                 // X
110 |         //$words[] = 'Node';                      // X
111 |         //$words[] = 'Number';                    // X
112 |         //$words[] = 'oauth';                     // X
113 |         //$words[] = 'Object';                    // X
114 |         //$words[] = 'onLine';                    // X
115 |         //$words[] = 'Option';                    // X
116 |         //$words[] = 'Performance';               // X
117 |         //$words[] = 'PhotoSettingsOptions';      // X
118 |         //$words[] = 'PointerEvents';             // X
119 |         //$words[] = 'Position';                  // X
120 |         //$words[] = 'Q';                         // X
121 |         //$words[] = 'Range';                     // X
122 |         //$words[] = 'Region';                    // X
123 |         $words[] = 'removeStream';
124 |         //$words[] = 'selection';                 // X
125 |         //$words[] = 'selectors';                 // X
126 |         //$words[] = 'storage';                   // X
127 |         //$words[] = 'string';                    // X
128 |         //$words[] = 'StyleMedia';                // X
129 |         //$words[] = 'styleSheet';                // X
130 |         //$words[] = 'Styling_lists_and_links';   // X               // guides/Styling lists and links, tutorials/styling lists and links
131 |         //$words[] = 'Styling_tables';            // X               // guides/styling tables, Styling tables
132 |         //$words[] = 'text';                      // X
133 |         //$words[] = 'tfoot';                     // X
134 |         //$words[] = 'the_basics_of_html';        // X               // guides/the basics of html/ko, guides/the basics of html, tutorials/The basics of HTML
135 |         $words[] = 'The_History_of_the_Web';
136 |         //$words[] = 'thead';                     // X
137 |         //$words[] = 'timeStamp';                 // X
138 |         //$words[] = 'tutorials';                 // X
139 |         //$words[] = 'Unicode';                   // X
140 |         //$words[] = 'url';                       // X
141 |         //$words[] = 'websocket';                 // X
142 |         $words[] = 'What_does_a_good_web_page_need';
143 |         $words[] = 'Translations';
144 | 
145 |         // Ones that are common in an URL but yet, in this
146 |         // precise context, had casing discrepancies.
147 |         $words[] = 'css\/cssom\/styleSheet';
148 |         $words[] = 'css\/selectors';
149 |         $words[] = 'dom\/DOMTokenList';
150 |         $words[] = 'tutorials\/HTML_forms';
151 | 
152 |         $matchers = [];
153 |         foreach ($words as $k => $word) {
154 |             $matchers[] = sprintf('/%s/iuS', $word);
155 |             // We need remove RegEx escaping for
156 |             // replacement at addPass below.
157 |             $words[$k] = stripslashes($word);
158 |         }
159 | 
160 |         $this->addPass($matchers, $words);
161 | 
162 |         return $this;
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/GitPhp/CommitCommandBuilder.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform MediaWiki Conversion workbench.
 5 |  */
 6 | namespace WebPlatform\Importer\GitPhp;
 7 | 
 8 | use Bit3\GitPhp\Command\CommitCommandBuilder as BaseCommitCommandBuilder;
 9 | 
10 | /**
11 |  * Commit command builder.
12 |  *
13 |  * Let’s override GIT_COMMITTER_* shell environment variables
14 |  * with the ones of the original authors.
15 |  */
16 | class CommitCommandBuilder extends BaseCommitCommandBuilder
17 | {
18 |     public function author($author)
19 |     {
20 |         preg_match('/^(.*)\ <(.*)>/', $author, $matches);
21 | 
22 |         if (isset($matches[1])) {
23 |             $this->processBuilder->setEnv('GIT_COMMITTER_NAME', $matches[1]);
24 |         }
25 |         if (isset($matches[2])) {
26 |             $this->processBuilder->setEnv('GIT_COMMITTER_EMAIL', $matches[2]);
27 |         }
28 | 
29 |         return parent::author($author);
30 |     }
31 | 
32 |     public function date($date)
33 |     {
34 |         $this->processBuilder->setEnv('GIT_COMMITTER_DATE', $date);
35 | 
36 |         return parent::date($date);
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/GitPhp/GitRepository.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform MediaWiki Conversion workbench.
 5 |  */
 6 | namespace WebPlatform\Importer\GitPhp;
 7 | 
 8 | use Bit3\GitPhp\GitRepository as BaseGitRepository;
 9 | 
10 | /**
11 |  * Extends Git Repository so we can inject our own shell environment to Process.
12 |  *
13 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
14 |  */
15 | class GitRepository extends BaseGitRepository
16 | {
17 |     /**
18 |      * Create commit command.
19 |      *
20 |      * @return CommitCommandBuilder
21 |      */
22 |     public function commit()
23 |     {
24 |         return new CommitCommandBuilder($this);
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Helpers/MediaWikiHelper.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform MediaWiki Conversion workbench.
 5 |  */
 6 | namespace WebPlatform\Importer\Helpers;
 7 | 
 8 | use WebPlatform\ContentConverter\Helpers\MediaWikiHelper as BaseMediaWikiHelper;
 9 | 
10 | /**
11 |  * MediaWiki subtelty helper.
12 |  *
13 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
14 |  */
15 | class MediaWikiHelper extends BaseMediaWikiHelper
16 | {
17 | }
18 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Model/MarkdownRevision.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | /**
 4 |  * WebPlatform Content Converter.
 5 |  */
 6 | namespace WebPlatform\Importer\Model;
 7 | 
 8 | use WebPlatform\ContentConverter\Model\MarkdownRevision as BaseMarkdownRevision;
 9 | use Symfony\Component\Yaml\Dumper;
10 | 
11 | /**
12 |  * Markdown Revision, with some project specific adjustments.
13 |  *
14 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
15 |  */
16 | class MarkdownRevision extends BaseMarkdownRevision
17 | {
18 |     public function getFrontMatter()
19 |     {
20 |         $yaml = new Dumper();
21 |         $yaml->setIndentation(2);
22 | 
23 |         if (!empty($this->getTitle()) && !isset($this->front_matter['title'])) {
24 |             $this->front_matter['title'] = $this->getTitle();
25 |         }
26 | 
27 |         ksort($this->front_matter);
28 | 
29 |         $out[] = '---';
30 |         $titleCopy = str_replace("'", "\'", $this->front_matter['title']);
31 |         unset($this->front_matter['title']);
32 |         $out[] .= sprintf("title: '%s'", $titleCopy);
33 | 
34 |         if (!empty($this->front_matter)) {
35 |             $out[] = $yaml->dump($this->front_matter, 3, 0, false, false);
36 |         }
37 |         $out[] = '---';
38 | 
39 |         return implode($out, PHP_EOL);
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/WebPlatform/Importer/Model/MediaWikiDocument.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | /**
  4 |  * WebPlatform MediaWiki Conversion workbench.
  5 |  */
  6 | namespace WebPlatform\Importer\Model;
  7 | 
  8 | use WebPlatform\ContentConverter\Model\MediaWikiDocument as BaseMediaWikiDocument;
  9 | 
 10 | /**
 11 |  * Adjust MediaWiki Document behavior specific to WebPlatform content.
 12 |  *
 13 |  * @author Renoir Boulanger <hello@renoirboulanger.com>
 14 |  */
 15 | class MediaWikiDocument extends BaseMediaWikiDocument
 16 | {
 17 |     // List namespaces
 18 |     public static $NAMESPACE_PREFIXES = array('10' => 'Template:','102' => 'Property:','15' => 'Category:','3000' => 'WPD:','3020' => 'Meta:');
 19 | 
 20 |     /** @var string page Title, but in MW it ends up being an URL too */
 21 |     protected $title = null;
 22 | 
 23 |     /** @var mixed string representation of the possible path or false if no redirect was specified */
 24 |     protected $redirect = false;
 25 | 
 26 |     const LANG_ENGLISH = 0;
 27 | 
 28 |     const LANG_JAPANESE = 'ja';
 29 | 
 30 |     const LANG_GERMAN = 'de';
 31 | 
 32 |     const LANG_TURKISH = 'tr';
 33 | 
 34 |     const LANG_KOREAN = 'ko';
 35 | 
 36 |     const LANG_SPANISH = 'es';
 37 | 
 38 |     const LANG_PORTUGUESE_BRAZIL = 'pt-br';
 39 | 
 40 |     const LANG_PORTUGUESE = 'pt';
 41 | 
 42 |     const LANG_CHINESE = 'zh';
 43 | 
 44 |     const LANG_CHINESE_HANT = 'zh-hant';
 45 | 
 46 |     const LANG_CHINESE_HANS = 'zh-hans';
 47 | 
 48 |     const LANG_FRENCH = 'fr';
 49 | 
 50 |     const LANG_SWEDISH = 'sv';
 51 | 
 52 |     const LANG_DUTCH = 'nl';
 53 | 
 54 |     /**
 55 |      * String RegEx to find if the page is a page translation.
 56 |      *
 57 |      * From https://docs.webplatform.org/wiki/Template:Languages?action=raw
 58 |      *
 59 |      * Removed:
 60 |      *
 61 |      *   - id (no translations made in this language)
 62 |      *   - th (^)
 63 |      *
 64 |      * Added:
 65 |      *
 66 |      *   - zh-hant
 67 |      *   - zh-hans
 68 |      *
 69 |      * Should reflect the list of defined translation in [[Template:Languages]] source.
 70 |      */
 71 |     const REGEX_LANGUAGES = '/\/(ar|ast|az|bcc|bg|ca|cs|da|de|diq|el|eo|es|fa|fi|fr|gl|gu|he|hu|hy|it|ja|ka|kk|km|ko|ksh|kw|mk|ml|mr|ms|nl|no|oc|pl|pt|pt\-br|ro|ru|si|sk|sl|sq|sr|sv|ta|tr|uk|vi|yue|zh|zh\-hant|zh\-hans)"$/';
 72 | 
 73 |     /**
 74 |      * Commonly used translation codes used in WebPlatform Docs.
 75 |      *
 76 |      * Each key represent a language code generally put at the end of a page URL (e.g. Main_Page/es).
 77 |      *
 78 |      * Value is an array of two;
 79 |      * 1. CAPITALIZED english name of the language (e.g. self::$translationCodes['zh'][0] would be 'CHINESE'), so we could map back to self::CHINESE,
 80 |      * 2. Language name in its native form (e.g. self::$translationCodes['zh'][1] would be '中文')
 81 |      *
 82 |      * See also:
 83 |      *   - https://docs.webplatform.org/w/index.php?title=Special%3AWhatLinksHere&target=Template%3ALanguages&namespace=0
 84 |      *   - https://docs.webplatform.org/wiki/WPD:Translations
 85 |      *   - https://docs.webplatform.org/wiki/WPD:Multilanguage_Support
 86 |      *   - https://docs.webplatform.org/wiki/WPD:Implementation_Patterns
 87 |      *   - http://www.w3.org/International/articles/language-tags/
 88 |      *
 89 |      * Ideally, we should use self::REGEX_LANGUAGES, but in the end after looking up dumpBackup XML file, only those had contents;
 90 |      *
 91 |      * [de,es,fr,ja,ko,nl,pt-br,sv,tr,zh,zh-hant,zh-hans]
 92 |      *
 93 |      * @var array
 94 |      */
 95 |     public static $translationCodes = array(
 96 |                     'en' => ['ENGLISH', 'English'],
 97 |                     'ja' => ['JAPANESE', '日本語'],
 98 |                     'de' => ['GERMAN', 'Deutsch'],
 99 |                     'tr' => ['TURKISH', 'Türkçe'],
100 |                     'ko' => ['KOREAN', '한국어'],
101 |                     'es' => ['SPANISH', 'Español'],
102 |                     'pt-br' => ['PORTUGUESE_BRAZIL', 'Português do Brasil'],
103 |                     'pt' => ['PORTUGUESE', 'Português'],
104 |                     'zh' => ['CHINESE', '中文'],
105 |                     'zh-hant' => ['CHINESE_HANT', '中文（繁體）'],
106 |                     'zh-hans' => ['CHINESE_HANS', '中文（简体）'],
107 |                     'fr' => ['FRENCH', 'Français'],
108 |                     'sv' => ['SWEDISH', 'Svenska'],
109 |                     'nl' => ['DUTCH', 'Nederlands'],
110 |                 );
111 | 
112 |     /**
113 |      * We expect this is *only* OK the entry *just before*
114 |      * the last *IS* either "elements" or "attributes" because
115 |      * the current implementation used language codes that was
116 |      * conflated with valid HTML/SVG/SGML elements and attributes.
117 |      *
118 |      * e.g. [tr, id, ...]
119 |      *
120 |      *   - html/elements/tr
121 |      *   - html/attributes/id
122 |      *   - svg/attributes/marker/tr
123 |      *   - mathml/elements/menclose
124 |      *
125 |      * @return bool
126 |      */
127 |     public function isChildOfKnownPageListing()
128 |     {
129 |         $knownPageListings = ['elements','attributes'];
130 | 
131 |         $needles = explode('/', $this->getName());
132 |         $size = (int) count($needles);
133 | 
134 |         if ($size < 2) {
135 |             return false;
136 |         }
137 | 
138 |         return in_array($needles[ $size - 2 ], $knownPageListings);
139 |     }
140 | 
141 |     public function isTranslation()
142 |     {
143 |         // An edge case. Contents in html/elements/tr,
144 |         if ($this->isChildOfKnownPageListing()) {
145 |             return false;
146 |         }
147 | 
148 |         return in_array($this->getLastTitleFragment(), array_keys(self::$translationCodes)) === true;
149 |     }
150 | 
151 |     public function getDocumentTitle()
152 |     {
153 |         $title = $this->title;
154 |         if ($this->isTranslation()) {
155 |             $parts = explode('/', $title);
156 |             $select = count($parts) - 2;
157 | 
158 |             if (isset($parts[$select])) {
159 |                 return $parts[$select];
160 |             }
161 |         }
162 | 
163 |         return $this->getLastTitleFragment();
164 |     }
165 | 
166 |     public function getLastTitleFragment()
167 |     {
168 |         $title = $this->getTitle();
169 | 
170 |         return (strrpos($title, '/') === false)?$title:substr($title, (int) strrpos($title, '/') + 1);
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------