├── boot.php
├── lang
└── de_de.lang
├── pages
├── index.php
├── info.php
├── crawler.php
└── settings.php
├── fragments
└── loglink.php
├── package.yml
├── assets
└── css
│ └── linkchecker_css.css
├── README.md
├── LICENSE
└── lib
└── linkchecker.php
/boot.php:
--------------------------------------------------------------------------------
1 | getAssetsUrl('css/linkchecker_css.css'));
4 | }
5 |
6 |
7 |
--------------------------------------------------------------------------------
/lang/de_de.lang:
--------------------------------------------------------------------------------
1 | linkchecker_title = Linkchecker
2 | linkchecker_einstellungen = Einstellungen
3 | linkchecker_crawler = Links checken
4 |
5 |
6 | linkchecker_info = Info
--------------------------------------------------------------------------------
/pages/index.php:
--------------------------------------------------------------------------------
1 | i18n('linkchecker_title'));
3 |
4 | //include rex_be_controller::getCurrentPageObject()->getSubPath();
5 | rex_be_controller::includeCurrentPageSubPath();
6 |
--------------------------------------------------------------------------------
/fragments/loglink.php:
--------------------------------------------------------------------------------
1 |
2 |
3 |
= $this->status_code ?>
4 |
--------------------------------------------------------------------------------
/pages/info.php:
--------------------------------------------------------------------------------
1 | '.$Parsedown->text($file).'';
7 |
8 | $fragment = new rex_fragment();
9 | $fragment->setVar('title', $this->i18n('linkchecker_info'));
10 | $fragment->setVar('body', $content, false);
11 | echo $fragment->parse('core/page/section.php');
12 |
13 |
14 |
--------------------------------------------------------------------------------
/package.yml:
--------------------------------------------------------------------------------
1 | package: linkchecker
2 | version: '0.0.beta'
3 | author: Friends Of REDAXO
4 | supportpage: https://github.com/FriendsOfREDAXO/linkchecker
5 |
6 | page:
7 | title: translate:linkchecker_title
8 | perm: linkchecker[params]
9 | icon: rex-icon fa-chain-broken
10 | pjax: false
11 | subpages:
12 | settings: { title: translate:linkchecker_einstellungen, perm: admin }
13 | crawler: { title: translate:linkchecker_crawler }
14 | info: { title: translate:linkchecker_info }
15 |
16 | requires:
17 | redaxo: '^5.5'
18 |
--------------------------------------------------------------------------------
/assets/css/linkchecker_css.css:
--------------------------------------------------------------------------------
1 | .lc_log { display: flex; flex-wrap: nowrap; width: 100%; }
2 | .lc_log + .lc_log { border-top: 1px solid #bbbbbb; }
3 | .lc_log div { padding: 4px }
4 | .lc_log .code200 { background: green; color: #fff; }
5 | .lc_log .code301 { background: red; color: #fff; }
6 | .lc_log .code302 { background: red; color: #fff; }
7 | .lc_log .code404 { background: red; color: #fff; }
8 | .lc_log .code400 { background: red; color: #fff; }
9 | .lc_log .codeerr { background: red; color: #fff; }
10 | .lc_log .codemailto { background: yellow; }
11 | .lc_log .codetel { background: yellow; }
12 | .lc_log .clink { width: 100%; }
13 | .lc_log .statuscode { width: 50px; text-align: center; }
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Linkchecker ###
2 |
3 |
4 | Hier eine kurze Anleitung schreiben.
5 | Aber erst wenn alle Grundfunktionen fertig sind :-)
6 |
7 | Infos zu Statuscodes: https://de.wikipedia.org/wiki/HTTP-Statuscode
8 |
9 | ---
10 |
11 | ### ToDo ###
12 |
13 | Während der Ausführung wird die Ausgabe noch nicht korrekt aktualisiert.
14 |
15 | ---
16 |
17 | ### Autor
18 |
19 | **Friends Of REDAXO**
20 |
21 | * http://www.redaxo.org
22 | * https://github.com/FriendsOfREDAXO
23 |
24 | **Projekt-Lead**
25 |
26 | * [Oliver Kreischer](https://github.com/olien)
27 |
28 | ___
29 |
30 | * Idee und Realisierung der ersten Version: [Wolfgang Bund](https://github.com/dtpop) // [Oliver Kreischer](https://github.com/olien) // [Tim Filler](https://github.com/elricco)
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Friends Of REDAXO
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/pages/crawler.php:
--------------------------------------------------------------------------------
1 |
11 |
12 |
13 |
';
14 | $linkchecker = new linkchecker();
15 | $linkchecker->run();
16 | echo '
17 |
18 | ';
19 |
20 |
21 | } else {
22 | $content .= '
23 |
40 | ';
41 | }
42 |
43 | $fragment = new rex_fragment();
44 | if ($config['submit']) {
45 | $fragment->setVar('title', $this->i18n('linkchecker_crawler'));
46 | }
47 | $fragment->setVar('body', $content, false);
48 | echo $fragment->parse('core/page/section.php');
49 |
--------------------------------------------------------------------------------
/pages/settings.php:
--------------------------------------------------------------------------------
1 | setConfig('baselink',$config['baselink']);
22 | $this->setConfig('depth',$config['depth']);
23 | $this->setConfig('maxlinks',$config['maxlinks']);
24 | $this->setConfig('no200',$config['no200']);
25 | $form .= rex_view::info('Werte gespeichert');
26 | }
27 |
28 | // open form
29 | $form .= '
30 |
104 | ';
105 |
106 | $fragment = new rex_fragment();
107 | $fragment->setVar('class', 'edit');
108 | $fragment->setVar('title', 'Einstellung');
109 | $fragment->setVar('body', $form, false);
110 | echo $fragment->parse('core/page/section.php');
111 |
--------------------------------------------------------------------------------
/lib/linkchecker.php:
--------------------------------------------------------------------------------
1 | baselink = rex_config::get('linkchecker','baselink');
19 | $this->maxlinks = rex_config::get('linkchecker','maxlinks');
20 | $this->maxdepth = rex_config::get('linkchecker','depth');
21 | $this->no200 = rex_config::get('linkchecker','no200');
22 |
23 | $parsed_root = parse_url($this->baselink);
24 | $this->real_root = $parsed_root['scheme'] . '://' . $parsed_root['host'] . '/';
25 |
26 | }
27 |
28 | public function run () {
29 | // error_reporting(0);
30 | $this->crawl_page($this->baselink);
31 | echo rex_view::info('Fertig');
32 | return;
33 | }
34 |
35 |
36 | /**
37 | * Ruft den Inhalt der Seite ($plink) auf
38 | * @param type $plink = pagelink
39 | * return: links
40 | */
41 | private function crawl_page ($plink) {
42 | $pages_to_crawl = []; // wird gefüllt mit den von dieser Seite weiter zu crawlenden Links
43 | $this->pages_checked[$plink]['link'] = $plink;
44 |
45 | if ($this->depth >= $this->maxdepth) {
46 | $message = 'Seitentiefe überschritten!';
47 | $this->log_page($plink,$message);
48 | return;
49 | }
50 |
51 | $this->depth++;
52 |
53 | $this->log_page($plink);
54 |
55 | $doc = new DOMDocument();
56 | libxml_use_internal_errors(true);
57 | $doc->loadHTML(file_get_contents($plink));
58 |
59 | $a_elements = $doc->getElementsByTagName('a');
60 | $base = $doc->getElementsByTagName('base');
61 | if (@is_object($base[0]->attributes['href']) && $base[0]->attributes['href']->value) {
62 | $this->base = trim($base[0]->attributes['href']->value, '/') . '/';
63 | } else {
64 | $this->base = $this->real_root;
65 | }
66 | foreach ($a_elements as $a) {
67 | if (count($this->links_checked) > $this->maxlinks) {
68 | $message = 'Maximale Linkanzahl überschritten';
69 | $this->log_page($plink,$message);
70 | return;
71 | }
72 |
73 | $alink = $a->getAttribute('href');
74 | $clink = $this->get_checklink($alink);
75 |
76 | if (!$clink) {
77 | $this->log_link($clink,$alink);
78 | continue;
79 | }
80 |
81 | $parsed_link = parse_url($clink);
82 |
83 | if (in_array($parsed_link['scheme'],['tel','mailto'])) {
84 | $this->log_link($clink,$alink,$parsed_link['scheme']);
85 | continue;
86 | }
87 |
88 | // links nicht 2x checken - aber mehrfach loggen
89 | if (isset($this->links_checked[$alink])) {
90 | extract($this->links_checked[$alink]['status']);
91 | $this->log_link($clink,$alink,$status_code);
92 | continue;
93 | }
94 |
95 | $status_code = false;
96 |
97 | if (isset($parsed_link['scheme']) && strpos($parsed_link['scheme'],'http') === 0) {
98 | $status_code = $this->get_status_code($clink);
99 | }
100 |
101 | // Nur spidern, wenn Link korrekt + noch nicht gespidert
102 | // nur Links von der eigenen Seite weiter verfolgen
103 | // unique per Page
104 | if ($status_code && $status_code < 400 && $this->check_page_link($clink)) {
105 | $pages_to_crawl[$clink] = $clink;
106 | }
107 | $this->log_link($clink,$alink,$status_code);
108 | }
109 |
110 | foreach ($pages_to_crawl as $l) {
111 | $this->crawl_page($l);
112 | }
113 | $this->depth--;
114 |
115 | }
116 |
117 | /**
118 | *
119 | * @param type $clink - kann auch false sein - durch get_checklink geprüft
120 | * @param type $alink - ursprünglicher Link
121 | */
122 | private function log_link ($clink,$alink,$status_code = 'err') {
123 | $this->links_checked[$alink]['status'] = [
124 | 'clink'=>$clink,
125 | 'alink'=>$alink,
126 | 'status_code'=>$status_code
127 | ];
128 |
129 | $fragment = new rex_fragment();
130 | $fragment->setVar('clink', $clink);
131 | $fragment->setVar('alink', $alink);
132 | $fragment->setVar('status_code', $status_code);
133 | echo $fragment->parse('loglink.php');
134 |
135 | /*
136 | while (@ob_get_status()) {
137 | @ob_end_flush();
138 | }
139 | */
140 | // dump(ob_get_status());
141 | // ob_end_flush();
142 | // while(@ob_end_flush());
143 | // flush();
144 | }
145 |
146 | /**
147 | *
148 | * @param type $link
149 | */
150 | private function log_page ($link,$message = '') {
151 | echo '';
152 | if ($message) {
153 | echo ''.$message.'
';
154 | }
155 | echo 'Tiefe: '.$this->depth.'
';
156 | }
157 |
158 |
159 | private function get_status_code ($link) {
160 | // dump($link);
161 | $header = get_headers($link);
162 | if (!$header) {
163 | return false;
164 | }
165 | $statuscode = explode(' ',$header[0])[1];
166 | return $statuscode;
167 | }
168 |
169 | /**
170 | * Prüft den Link und passt ihn gemäß base href bzw. Startadresse für den Check an.
171 | * @param type $alink
172 | * @return boolean
173 | */
174 | private function get_checklink ($alink) {
175 | $p_link = parse_url($alink);
176 | if (!$p_link) {
177 | return false;
178 | }
179 | if (!isset($p_link['scheme'])) {
180 | return($this->base . ltrim($alink,'/'));
181 | }
182 | return $alink;
183 | }
184 |
185 | /**
186 | * prüft, ob der übergebene Link zur eigenen Seite gehört und gespidert werden soll
187 | * @param type $link
188 | */
189 | private function check_page_link ($link) {
190 |
191 | if (strpos($link,$this->real_root) === 0) {
192 | if (!isset($this->pages_checked[$link])) {
193 | return true;
194 | }
195 | }
196 | return false;
197 | }
198 |
199 |
200 | }
--------------------------------------------------------------------------------