├── README.md └── grapher.php /README.md: -------------------------------------------------------------------------------- 1 | # DokuWiki Grapher 2 | 3 | This is a simple script to generate a directed graph description from DokuWiki link structures. Read the [introductional blog post](https://www.splitbrain.org/blog/2010-08/02-graphing_dokuwiki_help_needed) for some more info. 4 | 5 | ## Installing 6 | 7 | Download the [grapher.php](https://raw.githubusercontent.com/splitbrain/dokuwiki-grapher/master/grapher.php) file into your DokuWiki ``bin`` directory. Then run it from command line. 8 | 9 | ## Usage 10 | 11 | See ``bin/grapher.php --help``: 12 | 13 | ``` 14 | USAGE: grapher.php [] 15 | 16 | Creates a graph representation of pages and media files and how they 17 | are interlinked. 18 | 19 | 20 | OPTIONS 21 | 22 | -d , --depth Recursion depth, eg. how deep to look into the 23 | given namespaces. Use 0 for all. Default: 1 24 | 25 | -m , How to handle media files. 'ns' includes only media 26 | --media that is located in the given namespaces, 'all' 27 | includes all media files and 'none' ignores the 28 | media files completely. Default: ns 29 | 30 | -f , The wanted output format. 'dot' is a very simple 31 | --format format which can be used to visualize the resulting 32 | graph with graphviz. The 'gexf' format is a more 33 | complex XML-based format which contains more info 34 | about the found nodes and can be loaded in Gephi. 35 | Default: dot 36 | 37 | -o , --output Where to store the output eg. a filename. If not 38 | given the output is written to STDOUT. 39 | 40 | --no-colors Do not use any colors in output. Useful when piping 41 | output to other tools or files. 42 | 43 | -h, --help Display this help screen and exit immeadiately. 44 | 45 | 46 | Give all wiki namespaces you want to have graphed. 47 | If no namespace is given, the root namespace is 48 | assumed. 49 | ``` 50 | 51 | ## Visualize 52 | 53 | Run the created file through [GraphViz](http://www.graphviz.org/) or [Gephi](https://gephi.org/). -------------------------------------------------------------------------------- /grapher.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/php 2 | setHelp('Creates a graph representation of pages and media files and how they are interlinked.'); 16 | $options->registerOption( 17 | 'depth', 18 | 'Recursion depth, eg. how deep to look into the given namespaces. Use 0 for all. Default: 1', 19 | 'd', 'depth'); 20 | $options->registerOption( 21 | 'media', 22 | "How to handle media files. 'ns' includes only media that is located in the given namespaces, ". 23 | "'all' includes all media files and 'none' ignores the media files completely. ". 24 | "Default: ns", 25 | 'm', 'ns|all|none'); 26 | $options->registerOption( 27 | 'format', 28 | "The wanted output format. 'dot' is a very simple format which can be used to visualize the resulting ". 29 | "graph with graphviz. The 'gexf' format is a more complex XML-based format which contains more info ". 30 | "about the found nodes and can be loaded in Gephi. Default: dot", 31 | 'f', 'dot|gexf'); 32 | $options->registerOption( 33 | 'output', 34 | "Where to store the output eg. a filename. If not given the output is written to STDOUT.", 35 | 'o', 'file'); 36 | $options->registerArgument( 37 | 'namespaces', 38 | "Give all wiki namespaces you want to have graphed. If no namespace is given, the root ". 39 | "namespace is assumed.", 40 | false 41 | ); 42 | } 43 | 44 | /** 45 | * Your main program 46 | * 47 | * Arguments and options have been parsed when this is run 48 | * 49 | * @param DokuCLI_Options $options 50 | * @return void 51 | */ 52 | protected function main(DokuCLI_Options $options) { 53 | $depth = $options->getOpt('depth', 1); 54 | $media = $options->getOpt('media', 'ns'); 55 | if(!in_array($media, array('ns', 'all', 'none'))) { 56 | $this->fatal('Bad media option: ' . $media); 57 | } 58 | $format = $options->getOpt('format', 'dot'); 59 | if(!in_array($format, array('dot', 'gexf'))) { 60 | $this->fatal('Bad format option: ' . $format); 61 | } 62 | $output = $options->getOpt('output', '-'); 63 | if($output == '-') $output = 'php://stdout'; 64 | 65 | $namespaces = array_map('cleanID', $options->args); 66 | if(!count($namespaces)) $namespaces = array(''); //import from top 67 | 68 | $fh = @fopen($output, 'w'); 69 | if(!$fh) $this->fatal("Failed to open $output"); 70 | 71 | $data = $this->gather_data($namespaces, $depth, $media); 72 | if($format == 'dot') { 73 | $this->create_dot($data, $fh); 74 | } elseif($format == 'gexf') { 75 | $this->create_gexf($data, $fh); 76 | } 77 | 78 | fclose($fh); 79 | } 80 | 81 | /** 82 | * Find all the node and edge data for the given namespaces 83 | * @param $namespaces 84 | * @param int $depth 85 | * @param string $incmedia 86 | * @return array 87 | */ 88 | protected function gather_data($namespaces, $depth = 0, $incmedia = 'ns') { 89 | global $conf; 90 | /** @var helper_plugin_translation $transplugin */ 91 | $transplugin = plugin_load('helper', 'translation'); 92 | 93 | $pages = array(); 94 | $media = array(); 95 | foreach($namespaces as $ns) { 96 | // find media 97 | if($incmedia == 'ns') { 98 | $data = array(); 99 | search( 100 | $data, 101 | $conf['mediadir'], 102 | 'search_universal', 103 | array( 104 | 'depth' => $depth, 105 | 'listfiles' => true, 106 | 'listdirs' => false, 107 | 'pagesonly' => false, 108 | 'skipacl' => true, 109 | 'keeptxt' => true, 110 | 'meta' => true, 111 | ), 112 | str_replace(':', '/', $ns) 113 | ); 114 | 115 | // go through all those media files 116 | while($item = array_shift($data)) { 117 | $media[$item['id']] = array( 118 | 'title' => noNS($item['id']), 119 | 'size' => $item['size'], 120 | 'ns' => getNS($item['id']), 121 | 'time' => $item['mtime'], 122 | ); 123 | } 124 | } 125 | 126 | // find pages 127 | $data = array(); 128 | search( 129 | $data, 130 | $conf['datadir'], 131 | 'search_universal', 132 | array( 133 | 'depth' => $depth, 134 | 'listfiles' => true, 135 | 'listdirs' => false, 136 | 'pagesonly' => true, 137 | 'skipacl' => true, 138 | 'firsthead' => true, 139 | 'meta' => true, 140 | ), 141 | str_replace(':', '/', $ns) 142 | ); 143 | 144 | // ns start page 145 | if($ns && page_exists($ns)) { 146 | $data[] = array( 147 | 'id' => $ns, 148 | 'ns' => getNS($ns), 149 | 'title' => p_get_first_heading($ns, false), 150 | 'size' => filesize(wikiFN($ns)), 151 | 'mtime' => filemtime(wikiFN($ns)), 152 | 'perm' => 16, 153 | 'type' => 'f', 154 | 'level' => 0, 155 | 'open' => 1, 156 | ); 157 | } 158 | 159 | // go through all those pages 160 | while($item = array_shift($data)) { 161 | $time = (int) p_get_metadata($item['id'], 'date created', false); 162 | if(!$time) $time = $item['mtime']; 163 | $lang = ($transplugin) ? $transplugin->getLangPart($item['id']) : ''; 164 | 165 | if($lang) $item['ns'] = preg_replace('/^' . $lang . '(:|$)/', '', $item['ns']); 166 | 167 | $pages[$item['id']] = array( 168 | 'title' => $item['title'], 169 | 'ns' => $item['ns'], 170 | 'size' => $item['size'], 171 | 'time' => $time, 172 | 'links' => array(), 173 | 'media' => array(), 174 | 'lang' => $lang 175 | ); 176 | } 177 | } 178 | 179 | // now get links and media 180 | foreach($pages as $pid => $item) { 181 | // get instructions 182 | $ins = p_cached_instructions(wikiFN($pid), false, $pid); 183 | // find links and media usage 184 | foreach($ins as $i) { 185 | $mid = null; 186 | 187 | if($i[0] == 'internallink') { 188 | $id = $i[1][0]; 189 | $exists = true; 190 | resolve_pageid($item['ns'], $id, $exists); 191 | list($id) = explode('#', $id, 2); 192 | if($id == $pid) continue; // skip self references 193 | if($exists && isset($pages[$id])) { 194 | $pages[$pid]['links'][] = $id; 195 | } 196 | if(is_array($i[1][1]) && $i[1][1]['type'] == 'internalmedia') { 197 | $mid = $i[1][1]['src']; // image link 198 | } else { 199 | continue; // we're done here 200 | } 201 | } 202 | 203 | if($i[0] == 'internalmedia') { 204 | $mid = $i[1][0]; 205 | } 206 | 207 | if(is_null($mid)) continue; 208 | if($incmedia == 'none') continue; // no media wanted 209 | 210 | $exists = true; 211 | resolve_mediaid($item['ns'], $mid, $exists); 212 | list($mid) = explode('#', $mid, 2); 213 | $mid = cleanID($mid); 214 | 215 | if($exists) { 216 | if($incmedia == 'all') { 217 | if(!isset($media[$mid])) { //add node 218 | $media[$mid] = array( 219 | 'size' => filesize(mediaFN($mid)), 220 | 'time' => filemtime(mediaFN($mid)), 221 | 'ns' => getNS($mid), 222 | 'title' => noNS($mid), 223 | ); 224 | } 225 | $pages[$pid]['media'][] = $mid; 226 | } elseif(isset($media[$mid])) { 227 | $pages[$pid]['media'][] = $mid; 228 | } 229 | } 230 | } 231 | 232 | // clean up duplicates 233 | $pages[$pid]['links'] = array_unique($pages[$pid]['links']); 234 | $pages[$pid]['media'] = array_unique($pages[$pid]['media']); 235 | } 236 | 237 | return array('pages' => $pages, 'media' => $media); 238 | } 239 | 240 | /** 241 | * Create a Graphviz dot representation 242 | * 243 | * @param array $data 244 | * @param resource $fh 245 | */ 246 | protected function create_dot(&$data, $fh) { 247 | $pages =& $data['pages']; 248 | $media =& $data['media']; 249 | 250 | fwrite($fh, "digraph G {\n"); 251 | // create all nodes first 252 | foreach($pages as $id => $page) { 253 | fwrite($fh, " \"page-$id\" [shape=note, label=\"$id\\n{$page['title']}\", color=lightblue, fontname=Helvetica];\n"); 254 | } 255 | foreach($media as $id => $item) { 256 | fwrite($fh, " \"media-$id\" [shape=box, label=\"$id\", color=sandybrown, fontname=Helvetica];\n"); 257 | } 258 | // now create all the links 259 | foreach($pages as $id => $page) { 260 | foreach($page['links'] as $link) { 261 | fwrite($fh, " \"page-$id\" -> \"page-$link\" [color=navy];\n"); 262 | } 263 | foreach($page['media'] as $link) { 264 | fwrite($fh, " \"page-$id\" -> \"media-$link\" [color=firebrick];\n"); 265 | } 266 | } 267 | fwrite($fh, "}\n"); 268 | } 269 | 270 | /** 271 | * Create a GEXF representation 272 | * 273 | * @param array $data 274 | * @param resource $fh 275 | */ 276 | protected function create_gexf(&$data, $fh) { 277 | $pages =& $data['pages']; 278 | $media =& $data['media']; 279 | 280 | fwrite($fh, "\n"); 281 | fwrite( 282 | $fh, "\n" 284 | ); 285 | fwrite($fh, " \n"); 286 | fwrite($fh, " DokuWiki\n"); 287 | fwrite($fh, " \n"); 288 | fwrite($fh, " \n"); 289 | 290 | // define attributes 291 | fwrite($fh, " \n"); 292 | fwrite($fh, " \n"); 293 | fwrite($fh, " \n"); 294 | fwrite($fh, " \n"); 295 | fwrite($fh, " \n"); 296 | fwrite($fh, " page|media\n"); 297 | fwrite($fh, " \n"); 298 | fwrite($fh, " \n"); 299 | fwrite($fh, " \n"); 300 | fwrite($fh, " \n"); 301 | 302 | // create all nodes first 303 | fwrite($fh, " \n"); 304 | foreach($pages as $id => $item) { 305 | $title = htmlspecialchars($item['title']); 306 | $lang = htmlspecialchars($item['lang']); 307 | fwrite($fh, " \n"); 308 | fwrite($fh, " \n"); 309 | fwrite($fh, " \n"); 310 | fwrite($fh, " \n"); 311 | fwrite($fh, " \n"); 312 | fwrite($fh, " \n"); 313 | fwrite($fh, " \n"); 314 | fwrite($fh, " \n"); 315 | fwrite($fh, " \n"); 316 | fwrite($fh, " \n"); 317 | fwrite($fh, " \n"); 318 | fwrite($fh, " \n"); 319 | } 320 | foreach($media as $id => $item) { 321 | $title = htmlspecialchars($item['title']); 322 | $lang = htmlspecialchars($item['lang']); 323 | fwrite($fh, " \n"); 324 | fwrite($fh, " \n"); 325 | fwrite($fh, " \n"); 326 | fwrite($fh, " \n"); 327 | fwrite($fh, " \n"); 328 | fwrite($fh, " \n"); 329 | fwrite($fh, " \n"); 330 | fwrite($fh, " \n"); 331 | fwrite($fh, " \n"); 332 | fwrite($fh, " \n"); 333 | fwrite($fh, " \n"); 334 | fwrite($fh, " \n"); 335 | } 336 | fwrite($fh, " \n"); 337 | 338 | // now create all the edges 339 | fwrite($fh, " \n"); 340 | $cnt = 0; 341 | foreach($pages as $id => $page) { 342 | foreach($page['links'] as $link) { 343 | $cnt++; 344 | fwrite($fh, " \n"); 345 | } 346 | foreach($page['media'] as $link) { 347 | $cnt++; 348 | fwrite($fh, " \n"); 349 | } 350 | } 351 | fwrite($fh, " \n"); 352 | 353 | fwrite($fh, " \n"); 354 | fwrite($fh, "\n"); 355 | } 356 | 357 | } 358 | 359 | $grapher = new Grapher(); 360 | $grapher->run(); 361 | --------------------------------------------------------------------------------