├── .coveralls.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── composer.json
├── phpunit.xml.dist
├── src
├── GlLinkChecker.php
├── GlLinkCheckerError.php
└── GlLinkCheckerReport.php
└── tests
├── GlLinkCheckerErrorTest.php
├── GlLinkCheckerTest.php
├── bootstrap.php
├── expectedReport.html
├── json
└── blog.json
├── md
└── example.md
└── site1
├── download
└── index.html
├── img
└── index.html
├── index.html
├── robots.txt
├── section
└── probleme-solution
│ └── compresser-css-html-js.html
└── sitemap.xml
/.coveralls.yml:
--------------------------------------------------------------------------------
1 | coverage_clover: tests/logs/clover.xml
2 | json_path: tests/logs/coveralls-upload.json
3 | service_name: travis-ci
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | composer.phar
2 | composer.lock
3 | composer-test.lock
4 | vendor/
5 |
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: php
2 | php:
3 | - '5.5'
4 | - '5.6'
5 | - '7.0'
6 | - '7.1'
7 | - '7.2'
8 | install:
9 | - composer install
10 | script:
11 | - ./vendor/bin/phpunit --coverage-clover ./tests/logs/clover.xml
12 | after_script:
13 | - php vendor/bin/coveralls -v
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 Glicer - Emmanuel ROECKER & Rym BOUCHAGOUR, https://github.com/emmanuelroecker
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # php-linkchecker
2 |
3 | [](https://scrutinizer-ci.com/g/emmanuelroecker/php-linkchecker/?branch=master)
4 | [](https://travis-ci.org/emmanuelroecker/php-linkchecker)
5 | [](https://coveralls.io/github/emmanuelroecker/php-linkchecker?branch=master)
6 | [](https://insight.sensiolabs.com/projects/4f63b147-1922-4527-9d0d-e369397a1c13)
7 |
8 | Check broken links in html / json files, sitemap.xml, markdown and robots.txt.
9 |
10 | It's working with :
11 |
12 | * [Guzzle](http://docs.guzzlephp.org)
13 | * [Symfony Finder Component](http://symfony.com/doc/2.3/components/finder.html)
14 | * [Glicer Simply-html Component](https://github.com/emmanuelroecker/php-simply-html)
15 |
16 | ## Installation
17 |
18 | This library can be found on [Packagist](https://packagist.org/packages/glicer/link-checker).
19 |
20 | The recommended way to install is through [composer](http://getcomposer.org).
21 |
22 | Edit your `composer.json` and add :
23 |
24 | ```json
25 | {
26 | "require": {
27 | "glicer/link-checker": "dev-master"
28 | }
29 | }
30 | ```
31 |
32 | Install dependencies :
33 |
34 | ```bash
35 | php composer.phar install
36 | ```
37 |
38 | ## How to check links in html / json files ?
39 |
40 | ```php
41 | require 'vendor/autoload.php';
42 |
43 | use GlLinkChecker\GlLinkChecker;
44 | use GlLinkChecker\GlLinkCheckerReport;
45 | use Symfony\Component\Finder\Finder;
46 |
47 | //relative url use host http://lyon.glicer.com to check link
48 | $linkChecker = new GlLinkChecker('http://lyon.glicer.com');
49 |
50 | //construct list of local html and json files to check
51 | $finder = new Finder();
52 | $files = $finder->files()->in('./public')->name("*.html")->name("*.json");
53 |
54 | //launch links checking
55 | $result = $linkChecker->checkFiles(
56 | $files,
57 | function ($nbr) {
58 | // called at beginning - $nbr urls to check
59 | },
60 | function ($url, $files) {
61 | // called each $url - $files : list of filename containing $url link
62 | },
63 | function () {
64 | // called at the end
65 | }
66 | );
67 |
68 | //convert $result array in a temp html file
69 | $filereport = GlLinkCheckerReport::toTmpHtml('lyonCheck',$result);
70 |
71 | //$filereport contain fullpath to html file
72 | print_r($filereport);
73 | ```
74 |
75 | you can view $filereport with your browser
76 |
77 | ## How to check links in robots.txt and sitemap files ?
78 |
79 | ```php
80 | require 'vendor/autoload.php';
81 |
82 | use GlLinkChecker\GlLinkChecker;
83 |
84 | $linkChecker = new GlLinkChecker('http://lyon.glicer.com');
85 | $result = $linkChecker->checkRobotsSitemap();
86 |
87 | print_r($result);
88 | ```
89 |
90 | GlLinkChecker::checkRobotsSitemap() return an array like :
91 |
92 | ```php
93 | $result = [
94 | 'disallow' =>
95 | ['error' => ['/img/', '/download/']],
96 | 'sitemap' =>
97 | [
98 | 'ok' => [
99 | '/sitemap.xml' =>
100 | [
101 | 'ok' =>
102 | [
103 | '/index.html',
104 | '/section/probleme-solution/compresser-css-html-js.html'
105 | ]
106 | ]
107 | ]
108 | ]
109 | ];
110 | ```
111 |
112 | ## Running Tests
113 |
114 | Launch from command line :
115 |
116 | ```console
117 | vendor\bin\phpunit
118 | ```
119 |
120 | ## License MIT
121 |
122 | ## Contact
123 |
124 | Authors : Emmanuel ROECKER & Rym BOUCHAGOUR
125 |
126 | [Web Development Blog - http://dev.glicer.com](http://dev.glicer.com)
127 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "glicer/link-checker",
3 | "type": "library",
4 | "description": "Check broken links in html files, sitemap.xml and robots.txt",
5 | "keywords": ["html", "broken", "links"],
6 | "homepage": "https://github.com/emmanuelroecker/php-linkchecker",
7 | "license": "MIT",
8 | "authors": [
9 | {
10 | "name": "Emmanuel ROECKER",
11 | "homepage": "http://dev.glicer.com"
12 | },
13 | {
14 | "name": "Rym BOUCHAGOUR",
15 | "homepage": "http://dev.glicer.com"
16 | }
17 | ],
18 | "require": {
19 | "php": ">=5.5",
20 | "guzzlehttp/guzzle": "^6.2",
21 | "glicer/simply-html": "^1.0",
22 | "symfony/finder": "^2.3 || ^3.0",
23 | "symfony/console": "^2.3 || ^3.0"
24 | },
25 | "require-dev": {
26 | "phpunit/phpunit": "^4.8 || ^5.7 || ^6.5",
27 | "symfony/process": "^2.3 || ^3.0",
28 | "php-coveralls/php-coveralls": "^2.0"
29 | },
30 | "autoload": {
31 | "psr-4": {
32 | "GlLinkChecker\\": "src/"
33 | }
34 | },
35 | "autoload-dev": {
36 | "psr-4": {
37 | "GlLinkChecker\\Tests\\": "tests/"
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | tests
6 |
7 |
8 |
9 |
10 | src
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/GlLinkChecker.php:
--------------------------------------------------------------------------------
1 | client = new Client([
48 | 'base_uri' => $rooturl,
49 | 'verify' => false,
50 | 'defaults' => [
51 | 'headers' => [
52 | 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
53 | 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
54 | 'Accept-Language' => 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
55 | 'Accept-Encoding' => 'gzip, deflate'
56 | ]
57 | ]
58 | ]);
59 | $this->internalurls = $internalurls;
60 | }
61 |
62 | /**
63 | * get all links in an object
64 | *
65 | * @param $obj
66 | * @param array $links
67 | */
68 | private function searchInArray($obj, array &$links)
69 | {
70 | foreach ($obj as $key => $elem) {
71 | if (is_string($elem)) {
72 | if (preg_match("/^(http|https|ftp|ftps).*$/", $elem)) {
73 | if (filter_var($elem, FILTER_VALIDATE_URL)) {
74 | $links[$elem] = $elem;
75 | }
76 | }
77 | } else {
78 | if (is_array($elem)) {
79 | $this->searchInArray($elem, $links);
80 | }
81 | }
82 | }
83 | }
84 |
85 | /**
86 | * get all links in a json
87 | *
88 | * @param string $json
89 | *
90 | * @return array
91 | */
92 | private function getJsonLinks($json)
93 | {
94 | $obj = json_decode($json, true);
95 | $links = [];
96 | $this->searchInArray($obj, $links);
97 |
98 | return $links;
99 | }
100 |
101 |
102 | /**
103 | * check links in a sitemap
104 | *
105 | * @param string $sitemap
106 | *
107 | * @return array
108 | * @throws \Exception
109 | */
110 | private function checkSitemap($sitemap)
111 | {
112 | $xml = new GlHtml($sitemap);
113 | $listloc = $xml->get("loc");
114 | $result = [];
115 | foreach ($listloc as $loc) {
116 | $response = $this->client->get($loc->getText(), ['exceptions' => false]);
117 | if ($response->getStatusCode() != 200) {
118 | $result['error'][] = $loc->getText();
119 | } else {
120 | $result['ok'][] = $loc->getText();
121 | }
122 | }
123 |
124 | return $result;
125 | }
126 |
127 | /**
128 | * check http error status code
129 | *
130 | * @param array $result
131 | * @param array $urls
132 | * @param int $statuscode
133 | */
134 | private function checkStatus(array &$result, array $urls, $statuscode) {
135 | foreach ($urls as $url) {
136 | $response = $this->client->get($url, ['exceptions' => false]);
137 | if ($response->getStatusCode() != $statuscode) {
138 | $result[$statuscode]["error"][] = $url;
139 | } else {
140 | $result[$statuscode]["ok"][] = $url;
141 | }
142 | }
143 | }
144 |
145 | /**
146 | * check 403 and 404 errors
147 | *
148 | * @param array $urlerrors
149 | * @param array $urlforbiddens
150 | *
151 | * @return string
152 | */
153 | public function checkErrors(array $urlerrors, array $urlforbiddens)
154 | {
155 | $result = [];
156 |
157 | $this->checkStatus($result,$urlerrors,404);
158 | $this->checkStatus($result,$urlforbiddens, 403);
159 |
160 | return $result;
161 | }
162 |
163 | /**
164 | * check links in robots.txt and sitemap
165 | *
166 | * @return array
167 | * @throws \Exception
168 | */
169 | public function checkRobotsSitemap()
170 | {
171 | $response = $this->client->get("/robots.txt");
172 | if ($response->getStatusCode() != 200) {
173 | throw new \Exception("Cannot find robots.txt");
174 | }
175 |
176 | $robotstxt = $response->getBody()->getContents();
177 | $robotstxt = explode("\n", $robotstxt);
178 | $result = [];
179 | foreach ($robotstxt as $line) {
180 | if (preg_match('/^\s*Sitemap:(.*)/i', $line, $match)) {
181 | $urlsitemap = trim($match[1]);
182 | $response = $this->client->get($urlsitemap, ['exceptions' => false]);
183 | if ($response->getStatusCode() != 200) {
184 | $result['sitemap']['error'][] = $urlsitemap;
185 | } else {
186 | $result['sitemap']['ok'][$urlsitemap] = $this->checkSitemap($response->getBody()->getContents());
187 | }
188 | }
189 |
190 | if (preg_match('/^\s*Disallow:(.*)/i', $line, $match)) {
191 | $urldisallow = trim($match[1]);
192 | $response = $this->client->get($urldisallow, ['exceptions' => false]);
193 | if (($response->getStatusCode() != 200) && ($response->getStatusCode() != 403)) {
194 | $result['disallow']['error'][] = $urldisallow;
195 | } else {
196 | $result['disallow']['ok'][] = $urldisallow;
197 | }
198 | }
199 | }
200 |
201 | return $result;
202 | }
203 |
204 | /**
205 | * check links in any text file
206 | *
207 | * @return array
208 | * @throws \Exception
209 | */
210 | public function getLinksFromMarkdown($markdownContent)
211 | {
212 | $pattern = '/\[.+\]\((https?:\/\/\S+)\)/';
213 |
214 | if($num_found = preg_match_all($pattern, $markdownContent, $out)) return $out[1];
215 | else return [];
216 | }
217 |
218 |
219 | /**
220 | * check links in html and json files
221 | *
222 | * @param Finder $files
223 | * @param callable $checkstart
224 | * @param callable $checking
225 | * @param callable $checkend
226 | *
227 | * @throws \Exception
228 | * @return GlLinkCheckerError[]
229 | */
230 | public function checkFiles(Finder $files, callable $checkstart, callable $checking, callable $checkend, Array $criterias = ['lowercase', 'endslash', 'absolute'])
231 | {
232 | $linksByFile = [];
233 | /**
234 | * @var SplFileInfo $file
235 | */
236 | foreach ($files as $file) {
237 | $inner = file_get_contents($file->getRealPath());
238 | $keyname = $file->getRelativePathname();
239 | $extension = $file->getExtension();
240 | switch($extension){
241 | case "html":
242 | $html = new GlHtml($inner);
243 | $linksByFile[$keyname] = $html->getLinks();
244 | break;
245 | case "json":
246 | $linksByFile[$keyname] = $this->getJsonLinks($inner);
247 | break;
248 | case "md":
249 | $linksByFile[$keyname] = $this->getLinksFromMarkdown($inner);
250 | break;
251 | default:
252 | throw new \Exception("Extension unknown : " . $keyname);
253 | break;
254 | }
255 | }
256 |
257 | //reverse $linksByFile
258 | $links = [];
259 | foreach ($linksByFile as $filename => $filelinks) {
260 | foreach ($filelinks as $filelink) {
261 | $links[$filelink][] = $filename;
262 | }
263 | }
264 |
265 | $checkstart(count($links));
266 | $result = [];
267 | foreach ($links as $link => $files) {
268 | $checking($link, $files);
269 |
270 | $gllink = new GlLinkCheckerError($this->client, $link, $files);
271 |
272 | if ($gllink->isInternal($this->internalurls)) {
273 | $gllink->check($criterias);
274 | }
275 |
276 | $gllink->check(['exist']);
277 | $result[] = $gllink;
278 | }
279 | $checkend();
280 |
281 | return $result;
282 | }
283 | }
284 |
--------------------------------------------------------------------------------
/src/GlLinkCheckerError.php:
--------------------------------------------------------------------------------
1 | client = $client;
90 | $this->url = $url;
91 | $this->link = $link;
92 | $this->files = $files;
93 | }
94 |
95 | /**
96 | * @return bool
97 | */
98 | private function checkexisthead()
99 | {
100 | try {
101 | $response = $this->client->head($this->link);
102 | $this->statuscode = $response->getStatusCode();
103 | $this->isExist = (($this->statuscode == 200) || ($this->statuscode == 204));
104 |
105 | return $this->isExist;
106 | } catch (ClientException $e) {
107 | $this->statuscode = $e->getCode();
108 | } catch (RequestException $e) {
109 |
110 | }
111 |
112 | $this->isExist = false;
113 |
114 | return false;
115 | }
116 |
117 | /**
118 | * @return bool
119 | */
120 | private function checkexistget()
121 | {
122 | try {
123 | $response = $this->client->get($this->link);
124 | $this->statuscode = $response->getStatusCode();
125 | $this->isExist = (($this->statuscode == 200) || ($this->statuscode == 204));
126 |
127 | return $this->isExist;
128 | } catch (ClientException $e) {
129 | $this->statuscode = $e->getCode();
130 | } catch (RequestException $e) {
131 | }
132 |
133 | $this->isExist = false;
134 |
135 | return false;
136 | }
137 |
138 | /**
139 | *
140 | */
141 | private function checkexist()
142 | {
143 | if ($this->checkexisthead()) {
144 | return true;
145 | }
146 | if ($this->checkexistget()) {
147 | return true;
148 | }
149 |
150 | return false;
151 | }
152 |
153 | /**
154 | * @return bool
155 | */
156 | private function checkendslash()
157 | {
158 | if (substr($this->link, -1) == '/') {
159 | $this->isNotEndSlash = true;
160 |
161 | return true;
162 | }
163 |
164 | if (isset($this->url['path']) && (strlen($this->url['path']) > 0)) {
165 | $extension = pathinfo($this->url['path'], PATHINFO_EXTENSION);
166 | if (isset($extension) && (strlen($extension) > 0)) {
167 | $this->isNotEndSlash = true;
168 |
169 | return true;
170 | }
171 | }
172 |
173 | $this->isNotEndSlash = false;
174 |
175 | return false;
176 | }
177 |
178 | /**
179 | * @return bool
180 | */
181 | private function checkabsolute()
182 | {
183 | if (isset($this->url['host']) && (strlen($this->url['host']) > 0)) {
184 | $this->isAbsolute = true;
185 |
186 | return true;
187 | }
188 | if (isset($this->url['path']) && (strpos($this->url['path'], "/") === 0)) {
189 | $this->isAbsolute = true;
190 |
191 | return true;
192 | }
193 | $this->isAbsolute = false;
194 |
195 | return false;
196 | }
197 |
198 | /**
199 | * @return bool
200 | */
201 | private function checklowercase()
202 | {
203 | $this->isLowerCase = ($this->link === strtolower($this->link));
204 |
205 | return $this->isLowerCase;
206 | }
207 |
208 |
209 | /**
210 | * @param array|null $internalurls
211 | *
212 | * @return bool
213 | */
214 | public function isInternal($internalurls)
215 | {
216 | if (!isset($internalurls)) {
217 | return true;
218 | }
219 |
220 | if (!isset($this->url['host']) || (strlen($this->url['host']) <= 0)) {
221 | return true;
222 | }
223 |
224 | foreach ($internalurls as $internalurl) {
225 | if (strpos($this->link, $internalurl) === 0) {
226 | return true;
227 | }
228 | }
229 |
230 | return false;
231 | }
232 |
233 | /**
234 | * @param array $list
235 | *
236 | * @return bool
237 | */
238 | public function check(array $list)
239 | {
240 | $result = true;
241 | foreach ($list as $element) {
242 | $element = "check" . trim(strtolower($element));
243 | $result &= $this->$element();
244 | }
245 |
246 | return $result;
247 | }
248 |
249 | /**
250 | * @return string
251 | */
252 | public function getLink()
253 | {
254 | return $this->link;
255 | }
256 |
257 | /**
258 | * @return array
259 | */
260 | public function getFiles()
261 | {
262 | return $this->files;
263 | }
264 |
265 | /**
266 | * @return int
267 | */
268 | public function getStatusCode()
269 | {
270 | return $this->statuscode;
271 | }
272 |
273 | /**
274 | * @return array
275 | */
276 | public function getErrorMessages()
277 | {
278 | $message = [];
279 |
280 | if (!($this->isAbsolute)) {
281 | $message[] = "Must be absolute (Sample : /article/index.html)";
282 | }
283 |
284 | if (!($this->isLowerCase)) {
285 | $message[] = "Must be in lowercase (Sample : http://www.example.com/index.html)";
286 | }
287 |
288 | if (!($this->isExist)) {
289 | $message[] = "Must exist (Http get error)";
290 | }
291 |
292 | if (!($this->isNotEndSlash)) {
293 | $message[] = "Must have a slash at the end (Sample : http://www.example.com/)";
294 | }
295 |
296 | return $message;
297 | }
298 |
299 | /**
300 | * @return array
301 | */
302 | public function getErrorArray() {
303 | $error = [];
304 |
305 | $error['absolute'] = $this->isAbsolute;
306 | $error['lowercase'] = $this->isLowerCase;
307 | $error['exist'] = $this->isExist;
308 | $error['notendslash'] = $this->isNotEndSlash;
309 |
310 | return $error;
311 | }
312 | }
313 |
--------------------------------------------------------------------------------
/src/GlLinkCheckerReport.php:
--------------------------------------------------------------------------------
1 | write("\xEF\xBB\xBF"); //add ut8 bom to txt file
40 | $resultoutput->write(print_r($result, true));
41 |
42 | return $resultfile;
43 | }
44 |
45 | /**
46 | * write links test result in a temp html file
47 | *
48 | * @param string $name
49 | * @param GlLinkCheckerError[] $result
50 | *
51 | * @return string
52 | */
53 | public static function toTmpHtml($name, $result)
54 | {
55 | $resultfile = sys_get_temp_dir() . "/" . uniqid($name) . ".html";
56 | $html = self::toHtml($name, $result);
57 | file_put_contents($resultfile, $html);
58 |
59 | return $resultfile;
60 | }
61 |
62 | /**
63 | * render report in html format
64 | *
65 | * @param string $title
66 | * @param GlLinkCheckerError[] $links
67 | *
68 | * @return string
69 | */
70 | private static function toHtml($title,array $links)
71 | {
72 | $html = '';
73 | $html .= '
';
74 | $html .= '' . $title . ' ';
75 | $html .= '';
102 | $html .= '';
103 |
104 | /**
105 | * @var GlLinkCheckerError $link
106 | */
107 | foreach ($links as $link) {
108 | $html .= '';
109 | $url = $link->getLink();
110 | $files = " -> " . implode(" ", $link->getFiles());
111 | $errors = $link->getErrorMessages();
112 |
113 | if (count($errors) <= 0) {
114 | $html .= '
' . $url . ' ' . $files;
115 | $html .= '
';
116 | continue;
117 | }
118 |
119 | $tooltip = implode(' ', $errors);
120 | $html .= '' . $url . ' ' . $link->getStatusCode(
121 | ) . $files;
122 | $html .= '';
123 | }
124 | $html .= ' ';
125 |
126 | return $html;
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/tests/GlLinkCheckerErrorTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('http://dev.glicer.com', $linkerror->getLink());
37 | $this->assertEquals(['file1','file2'], $linkerror->getFiles());
38 | }
39 |
40 | public function testCheck()
41 | {
42 | $client = new Client();
43 | $linkerror = new GlLinkCheckerError($client, 'http://dev.glicer.com',['index.html']);
44 |
45 | $linkerror->check(['exist','endslash','absolute','lowercase']);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/tests/GlLinkCheckerTest.php:
--------------------------------------------------------------------------------
1 | checkRobotsSitemap();
39 | $expected = [
40 | 'disallow' =>
41 | ['ok' => ['/img/', '/download/']],
42 | 'sitemap' =>
43 | [
44 | 'ok' => [
45 | '/sitemap.xml' =>
46 | [
47 | 'ok' =>
48 | [
49 | '/index.html',
50 | '/section/probleme-solution/compresser-css-html-js.html'
51 | ]
52 | ]
53 | ]
54 | ]
55 | ];
56 | $this->assertEquals($expected, $result);
57 | }
58 |
59 | public function testErrors()
60 | {
61 | $linkChecker = new GlLinkChecker('http://' . WEB_SERVER_HOST . ':' . WEB_SERVER_PORT);
62 | $result = $linkChecker->checkErrors(['/nothing.html'], ['/test.html']);
63 |
64 | $expected = [
65 | '404' =>
66 | ['ok' => ['/nothing.html']],
67 | '403' =>
68 | ['error' => ['/test.html']]
69 | ];
70 | $this->assertEquals($expected, $result);
71 | }
72 |
73 | private function validatelink($link, $links, $result, array $errorarray)
74 | {
75 | $key = array_search($link, $links);
76 | if ($key === false) {
77 | $this->fail($link . " - " . var_export($links, TRUE));
78 | }
79 |
80 | $this->assertEquals(
81 | $errorarray,$result[$key]->getErrorArray(),$link
82 | );
83 | }
84 |
85 | public function testJson()
86 | {
87 | $finder = new Finder();
88 | $files = $finder->files()->in('./tests/json')->name("*.json");
89 |
90 | $linkChecker = new GlLinkChecker();
91 | $result = $linkChecker->checkFiles(
92 | $files,
93 | function () {
94 | },
95 | function () {
96 | },
97 | function () {
98 | }
99 | );
100 | $this->assertEquals(3, count($result));
101 |
102 | $links = [];
103 | foreach ($result as $link) {
104 | $links[] = $link->getLink();
105 | }
106 |
107 | $this->validatelink("http://dev.glicer.com/", $links,$result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
108 | $this->validatelink("http://lyon.glicer.com/", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => false, 'notendslash' => true]);
109 | $this->validatelink("http://dev.glicer.com/section/probleme-solution/prefixer-automatiquement-css.html", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
110 | }
111 |
112 | public function testMarkdown()
113 | {
114 | $finder = new Finder();
115 | $files = $finder->files()->in('./tests/md')->name("*.md");
116 |
117 | $linkChecker = new GlLinkChecker();
118 | $result = $linkChecker->checkFiles(
119 | $files,
120 | function () {
121 | },
122 | function () {
123 | },
124 | function () {
125 | }
126 | );
127 | $this->assertEquals(3, count($result));
128 |
129 | $links = [];
130 | foreach ($result as $link) {
131 | $links[] = $link->getLink();
132 | }
133 |
134 | $this->validatelink("https://ucarecdn.com/aa1a5994-8de9-4d24-99ce-3a0d686c30bd/-/resize/700x/", $links,$result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
135 | $this->validatelink("https://projects.breatheco.de/d/landing-page-with-react#readme", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => false]);
136 | $this->validatelink("https://ucarecdn.com/8729c2f0-e4a6-4721-9ee9-3f29e6e852b5/", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => false, 'notendslash' => true]);
137 | }
138 |
139 | public function testLinks()
140 | {
141 | $finder = new Finder();
142 | $files = $finder->files()->in('./tests/site1')->name("*.html");
143 |
144 | $linkChecker = new GlLinkChecker('http://' . WEB_SERVER_HOST . ':' . WEB_SERVER_PORT);
145 | $result = $linkChecker->checkFiles(
146 | $files,
147 | function () {
148 | },
149 | function () {
150 | },
151 | function () {
152 | }
153 | );
154 |
155 | $this->assertEquals(6, count($result));
156 |
157 | $links = [];
158 | foreach ($result as $link) {
159 | $links[] = $link->getLink();
160 | }
161 |
162 | $this->validatelink("/section/probleme-solution/compresser-css-html-js.html", $links, $result,['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
163 | $this->validatelink("http://dev.glicer.com/", $links,$result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
164 | $this->validatelink("http://stop.glicer.com/no-exist.html", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => false, 'notendslash' => true]);
165 | $this->validatelink("/index.html", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
166 | $this->validatelink("http://lyon.glicer.com/", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => false, 'notendslash' => true]);
167 | $this->validatelink("http://dev.glicer.com/section/probleme-solution/prefixer-automatiquement-css.html", $links, $result, ['absolute' => true, 'lowercase' => true, 'exist' => true, 'notendslash' => true]);
168 | }
169 |
170 | public function testReport()
171 | {
172 | $finder = new Finder();
173 | $files = $finder->files()->in('./tests/site1')->name("*.html");
174 | $linkChecker = new GlLinkChecker('http://' . WEB_SERVER_HOST . ':' . WEB_SERVER_PORT);
175 | $result = $linkChecker->checkFiles(
176 | $files,
177 | function () {
178 | },
179 | function () {
180 | },
181 | function () {
182 | }
183 | );
184 |
185 | //sort link by name
186 | usort($result,function(GlLinkCheckerError $linkA,GlLinkCheckerError $linkB) {
187 | return strcmp($linkA->getLink(), $linkB->getlink());
188 | });
189 |
190 | $filereport = GlLinkCheckerReport::toTmpHtml('testReport',$result);
191 |
192 | $report = file_get_contents($filereport);
193 | $reportexpected = file_get_contents(__DIR__ . '/expectedReport.html');
194 |
195 | $this->assertEquals($reportexpected,$report);
196 | }
197 |
198 | /**
199 | * @expectedException \Exception
200 | */
201 | public function testUnknownExtension()
202 | {
203 | $finder = new Finder();
204 | $files = $finder->files()->in( __DIR__. '/../')->name("*.yml");
205 | $linkChecker = new GlLinkChecker('http://' . WEB_SERVER_HOST . ':' . WEB_SERVER_PORT);
206 | $linkChecker->checkFiles(
207 | $files,
208 | function () {
209 | },
210 | function () {
211 | },
212 | function () {
213 | }
214 | );
215 | }
216 |
217 | public function getLinksFromMarkdownProvider()
218 | {
219 | return [
220 | ['', []],
221 | ['[a link](http://link.com)', ['http://link.com']],
222 | ];
223 | }
224 |
225 | /**
226 | * @dataProvider getLinksFromMarkdownProvider
227 | */
228 | public function testGetLinksFromMarkdown($linkString, $expected)
229 | {
230 | $linkChecker = new GlLinkChecker('http://' . WEB_SERVER_HOST . ':' . WEB_SERVER_PORT);
231 | $result = $linkChecker->getLinksFromMarkdown($linkString);
232 |
233 | $this->assertSame($expected, $result);
234 | }
235 | }
236 |
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 | start();
16 |
17 | echo sprintf(
18 | '%s - Web server started on %s:%d',
19 | date('r'),
20 | WEB_SERVER_HOST,
21 | WEB_SERVER_PORT
22 | ) . PHP_EOL;
23 |
24 | //wait server start
25 | sleep(1);
26 |
27 | // Kill the web server when the process ends
28 | register_shutdown_function(
29 | function () use ($process) {
30 | echo 'Web server shutdown' . PHP_EOL;
31 | $process->stop();
32 | }
33 | );
34 |
35 | // More bootstrap code
--------------------------------------------------------------------------------
/tests/expectedReport.html:
--------------------------------------------------------------------------------
1 | testReport /index.html -> section/probleme-solution/compresser-css-html-js.html
--------------------------------------------------------------------------------
/tests/json/blog.json:
--------------------------------------------------------------------------------
1 | {"blog": [
2 | {
3 | "name": "url1",
4 | "title": "title url1",
5 | "link": "http://dev.glicer.com/",
6 | "date": 1470082706
7 | },
8 | {
9 | "name": "url2",
10 | "title": "title url2",
11 | "link": "http://lyon.glicer.com/",
12 | "date": 1470065791
13 | },
14 | {
15 | "name": "url3",
16 | "title": "title url3",
17 | "link": "http://dev.glicer.com/section/probleme-solution/prefixer-automatiquement-css.html",
18 | "date": 1470064766
19 | }
20 | ]}
--------------------------------------------------------------------------------
/tests/md/example.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Learn HTML"
3 | subtitle: "HTML is to websites what columns are for buildings.
4 |
5 | Learn the basics of HTML - the foundation of the web."
6 | time: "12 minutes"
7 | date: "2018-31-10"
8 | tags: ["fale"]
9 | ---
10 |
11 | [[info]]
12 | | :point_up: Since in the previous chapter we equated houses, stores and buildings to web pages, now we have to say that HTML are the blueprints.
13 |
14 |
15 | # **HTML is the Website Skeleton**
16 | ***
17 |
18 | All web pages have HTML – it’s the structure of EVERYTHING. Think of it as building columns at a construction site.
19 |
20 | HTML makes you divide the website information into parts – similar to the basic parts of a document: header, title, content, footnote, subtitle, etc. Then, with CSS, you can make your page beautiful, and, with JavaScript, make it interactive.
21 |
22 | Originally browsers only knew how to interpret HTML. Websites were simple and neither CSS or JavaScript was used. A website was a simple plain text document with the typical elements any Word Document has: Headings, Bullet lists, Paragraphs, etc.
23 |
24 | 
25 |
26 |
27 | All tags must open and close. To close a tag you must place the same word but using the `/` symbol.
28 |
29 | # **The Attributes**
30 | ***
31 | Once the `` is defined, we can describe in detail its behavior by assigning attributes to those ``. For example, if we want our HTML document/page to have a link to another page, we use the `` tag, and we assign to it an attribute called **href**, which allows us to specify the URL of the page with which we want to have a connection.
32 |
33 | ```html
34 | Click here and it will take you to Google.com
35 | ```
36 |
37 |
38 | In theory, you have to use [one of these tags](https://projects.breatheco.de/d/landing-page-with-react#readme) and don’t invent your own because the browser won’t know how to interpret them. You must learn what each tag means and does in order to put them to good use…but, please, don’t worry! There aren’t that many! 🙂
39 |
40 | For the main heading of the document, the tag that we use is ``. For example: An online store has an "electronics" category, the title that applies would be "Electronics" and the `` tag would be written as follows:
41 |
42 | ```html
43 | Electronic items
44 | ```
45 |
46 |
47 | ##### **Nested Tags** :
48 | Finally, tags can contain one or more tags within them. For example, if we would like to give a cursive style to the word "electronic" we must wrap that word with the tag ``:
49 |
50 | ```html
51 | Electronic Tags
52 | ```
53 |
54 | ## Blank Spaces and Line Jumps
55 | ***
56 | The browser ignores blank spaces and end of lines. If we want to jump one line, we have to use the ` ` tag. If we want more "spaces" we need to insert one ` ` per each blank space (yes, we know it’s weird, but it is what it is).
57 |
58 | **These three alternatives will look the same (spaces and jumps of line will be ignored):**
59 | ```html
60 | Hello World
61 | ```
62 | ```html
63 | Hello
64 | World
65 | ```
66 |
67 | ```html
68 | Hello World
69 | ```
70 |
71 |
72 | # **Page Structure**
73 | ***
74 | All pages must begin with the `` statement, then the `` and the `` should follow. These tags **must** contain other tags within them (nested tags) because they will split the page in 2 main parts: the HEAD and the BODY:
75 |
76 |
77 | ```html
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 | ```
89 |
90 | Lets simulate how a browser thinks: Imagine a user on his browser (client side) that types the URL: breatheco.de
91 |
92 | + The server will open the default HTML file on that server, which will probably be: index.html.
93 | + Then, it will read its content and interpret it as HTML (because the extension of the file is index.html).
94 | + The user will not see the text content of the file, instead it will view a visual interpretation of that text.
95 |
96 | As you can see, the page in question will include AT LEAST the following tags:
97 |
98 | 
99 |
100 | |**Name** |**Tags** |**Description** |
101 | |:----------|:----------|:-----------------|
102 | |HTML |`` |We must begin by letting the browser know that this is an HTML document. We can also specify the HTML version that we are using. |
103 | |Head |`` |Everything that is written inside of the HEAD won’t be seen by the user. It’s the part of the page where the developer specifies information about the website itself: the language being used, what the website is about, the necessary fonts, the icon that the tab will have on the browser (favicon), and many other important things. |
104 | |Body |`` |Here you will place all the content that will be viewed by the end user. If this were MS Word, the body would mark the beginning of your page content (the first line of your document). |
105 |
106 | # **The \ is like the Envelope of a Letter.**
107 | ***
108 | We read the envelope of a letter to find out information of the letter itself, but not of its content. Here you can find out who wrote the letter, in what language is it written, where is it from, etc.
109 |
110 | In the case of HTML, the `` can contain the following tags (among less important ones):
111 |
112 | |**Name** |**Tag** |**Description** |
113 | |:----------|:---------|:-----------------|
114 | |Title |`` |The title appears in the browser’s window, it’s also used when you share the page through social media: Twitter, Instagram, Facebook, etc. All those networks use the title of the page as the excerpt when a user copies the URL of your page to share on their wall. |
115 | |Meta |` ` |The meta tags describe a document. They are used to specify things like: the author, title, date, keywords, descriptions, etc. Search engines love these tags because they allow an easier comprehension of the content before it is read. |
116 | |Link |` ` |Used for linking the page with the CSS style sheets. In the CSS chapter we will learn how to create style sheets and we will be able to import them using this tag. |
117 | |Style |`