├── examples
├── example-simple.php
├── example-daterange.php
├── example-downloaded.php
├── example-tables.php
└── example-language.php
├── composer.json
├── README.md
└── src
└── gwtdata.php
/examples/example-simple.php:
--------------------------------------------------------------------------------
1 | LogIn($email, $passwd) === true)
12 | {
13 | $gdata->DownloadCSV($website);
14 | }
15 | } catch (Exception $e) {
16 | die($e->getMessage());
17 | }
18 | ?>
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "eyecatchup/webmaster-tools-data",
3 | "description": "An easy way to automate downloading of data tables from Google Webmaster Tools",
4 | "license": "Apache-2.0",
5 | "authors": [
6 | {
7 | "name": "Stephan Schmitz",
8 | "email": "eyecatchup@gmail.com",
9 | "role": "Developer"
10 | }
11 | ],
12 | "autoload": {
13 | "classmap": ["src"]
14 | },
15 | "require": {
16 | "ext-curl": "*"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/examples/example-daterange.php:
--------------------------------------------------------------------------------
1 | LogIn($email, $passwd) === true)
12 | {
13 | $sites = $gdata->GetSites();
14 | foreach($sites as $site)
15 | {
16 | $gdata->SetDaterange($daterange);
17 | $gdata->DownloadCSV($site);
18 | }
19 | }
20 | } catch (Exception $e) {
21 | die($e->getMessage());
22 | }
23 | ?>
--------------------------------------------------------------------------------
/examples/example-downloaded.php:
--------------------------------------------------------------------------------
1 | LogIn($email, $passwd) === true)
9 | {
10 | $sites = $gdata->GetSites();
11 | foreach($sites as $site)
12 | {
13 | $gdata->DownloadCSV($site, "./csv");
14 | }
15 |
16 | $files = $gdata->GetDownloadedFiles();
17 | foreach($files as $file)
18 | {
19 | print "Saved $file
";
20 | }
21 | }
22 | } catch (Exception $e) {
23 | die($e->getMessage());
24 | }
25 | ?>
--------------------------------------------------------------------------------
/examples/example-tables.php:
--------------------------------------------------------------------------------
1 | LogIn($email, $passwd) === true)
17 | {
18 | $gdata->SetTables($tables);
19 | $gdata->DownloadCSV($website);
20 | }
21 | } catch (Exception $e) {
22 | die($e->getMessage());
23 | }
24 | ?>
--------------------------------------------------------------------------------
/examples/example-language.php:
--------------------------------------------------------------------------------
1 | LogIn($email, $passwd) === true)
20 | {
21 | $gdata->SetLanguage($language);
22 | $gdata->SetDaterange($daterange);
23 | $gdata->SetTables($tables);
24 |
25 | $sites = $gdata->GetSites();
26 | foreach($sites as $site)
27 | {
28 | $gdata->DownloadCSV($site);
29 | }
30 | }
31 | } catch (Exception $e) {
32 | die($e->getMessage());
33 | }
34 | ?>
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GWTdata: Download website data from Google Webmaster Tools as CSV.
2 |
3 | ## Introduction
4 |
5 | This project provides an easy way to automate downloading of data tables from Google Webmaster Tools and tries to provide a PHP alternative to the Python script available here http://code.google.com/p/webmaster-tools-downloads/, for downloading CSV files from Google Webmaster Tools.
6 |
7 | Unlike the python script (or a perfect clone), this solution does NOT require an extra client library or zend package be installed in order to run.
8 | Also it has some advanced functionality.
9 |
10 | ### Features
11 |
12 | Since the official download list (used by the python script) returns download URLs for 1.) Top Search Queries and 2.) Top Pages only, but via the web interface there're much more downloads available, i extended the GWTdata class, so you can now download website data for:
13 |
14 | - TOP_PAGES
15 | - TOP_QUERIES
16 | - CRAWL_ERRORS
17 | - CONTENT_ERRORS
18 | - CONTENT_KEYWORDS
19 | - LATEST_BACKLINKS
20 | - INTERNAL_LINKS
21 | - EXTERNAL_LINKS
22 | - SOCIAL_ACTIVITY
23 |
24 | ### Update notice
25 |
26 | In case you want to automate downloading crawl errors, please go here: https://github.com/eyecatchup/GWT_CrawlErrors-php
27 |
28 | ## Usage
29 |
30 | This document explains how to automate the file download process from Google Webmaster Tools by showing examples for using the php class GWTdata.
31 |
32 | ### Get started
33 |
34 | To get started, the steps are as follows:
35 |
36 | - Download the php file gwtdata.php.
37 | - Create a folder and add the gwtdata.php script to it.
38 |
39 | ### Example 1 - `DownloadCSV()`
40 |
41 | To download CSV data for a single domain name of choice, the steps are as follows:
42 |
43 | - In the same folder where you added the gwtdata.php, create and run the following PHP script.
_You'll need to replace the example values for "email" and "password" with valid login details for your Google Account and for "website" with a valid URL for a site registered in your GWT account._
44 |
45 | ```php
46 | LogIn($email, $password) === true)
57 | {
58 | $gdata->DownloadCSV($website);
59 | }
60 | } catch (Exception $e) {
61 | die($e->getMessage());
62 | }
63 | ```
64 |
65 | This will download and save 9 CSV files to your hard disk:
66 |
67 | - `./TOP_PAGES-www.domain.com-YYYYmmdd-H:i:s.csv`
68 | - `./TOP_QUERIES-www.domain.com-YYYYmmdd-H:i:s.csv`
69 | - `./CRAWL_ERRORS-www.domain.com-YYYYmmdd-H:i:s.csv`
70 | - `./CONTENT_ERRORS-www.domain.com-YYYYmmdd-H:i:s.csv`
71 | - `./CONTENT_KEYWORDS-www.domain.com-YYYYmmdd-H:i:s.csv`
72 | - `./LATEST_BACKLINKS-www.domain.com-YYYYmmdd-H:i:s.csv`
73 | - `./INTERNAL_LINKS-www.domain.com-YYYYmmdd-H:i:s.csv`
74 | - `./EXTERNAL_LINKS-www.domain.com-YYYYmmdd-H:i:s.csv`
75 | - `./SOCIAL_ACTIVITY-www.domain.com-YYYYmmdd-H:i:s.csv`
76 |
77 | For an example how to limit the download to top search queries, or top pages etc. _only_, take a look at example 4.
78 |
79 | By default, the files will be saved to the same folder where you added the gwtdata.php (and run the script). However the `DownloadCSV()` method has a second optional parameter to adjust the savepath - see inline comments in gwtdata.php and/or 2nd example.
80 |
81 | ### Example 2 - `GetSites()`
82 |
83 | To download CSV data for all domains that are registered in your Google Webmaster Tools Account and to save the downloaded files to an extra folder, the steps are as follows:
84 |
85 | - In the same folder where you added the gwtdata.php, create a folder named `csv`.
86 | - In the same folder where you added the gwtdata.php, create and run the following PHP script.
_You'll need to replace the example values for "email" and "password" with valid login details for your Google Account._
87 |
88 | ```php
89 | LogIn($email, $password) === true)
97 | {
98 | $sites = $gdata->GetSites();
99 | foreach($sites as $site)
100 | {
101 | $gdata->DownloadCSV($site, "./csv");
102 | }
103 | }
104 | } catch (Exception $e) {
105 | die($e->getMessage());
106 | }
107 | ```
108 |
109 | This will download 8 CSV files for each domain that is registered in your Google Webmaster Tools Account and save them to the csv folder.
110 |
111 | ### Example 3 - `GetDownloadedFiles()`
112 |
113 | Same as example two, but using the `GetDownloadedFiles()` method to get feedback what files have been saved to your hard disk (returning absolute paths).
114 |
115 | ```php
116 | LogIn($email, $passwd) === true)
124 | {
125 | $sites = $gdata->GetSites();
126 | foreach($sites as $site)
127 | {
128 | $gdata->DownloadCSV($site, "./csv");
129 | }
130 |
131 | $files = $gdata->GetDownloadedFiles();
132 | foreach($files as $file)
133 | {
134 | print "Saved $file\n";
135 | }
136 | }
137 | } catch (Exception $e) {
138 | die($e->getMessage());
139 | }
140 | ```
141 |
142 | ### Example 4 - `SetTables()`
143 |
144 | To download CSV data for a single domain name of choice and top search query data _only_, the steps are as follows:
145 |
146 | - In the same folder where you added the gwtdata.php, create and run the following PHP script.
_You'll need to replace the example values for "email" and "password" with valid login details for your Google Account and for "website" with a valid URL for a site registered in your GWT account._
147 |
148 | ```php
149 | LogIn($email, $password) === true)
165 | {
166 | $gdata->SetTables($tables);
167 | $gdata->DownloadCSV($website);
168 | }
169 | } catch (Exception $e) {
170 | die($e->getMessage());
171 | }
172 | ```
173 |
174 | This will download and save one file only: `./TOP_QUERIES-www.domain.com-Ymd-H:i:s.csv`
175 |
176 | ### Example 5 - `SetDaterange()`
177 |
178 | To download CSV data for all domains that are registered in your Google Webmaster Tools Account and for a specific date range _only_, the steps are as follows:
179 |
180 | - In the same folder where you added the gwtdata.php, create and run the following PHP script.
_You'll need to replace the example values for "email" and "password" with valid login details for your Google Account._
181 |
182 | ```php
183 | LogIn($email, $password) === true)
194 | {
195 | $gdata->SetDaterange($daterange);
196 |
197 | $sites = $gdata->GetSites();
198 | foreach($sites as $site)
199 | {
200 | $gdata->DownloadCSV($site);
201 | }
202 | }
203 | } catch (Exception $e) {
204 | die($e->getMessage());
205 | }
206 | ```
207 |
208 | This will download 9 CSV files (see example #1) for each domain that is registered in your Google Webmaster Tools Account containing data for the specified date range.
209 |
210 | ### Example 6 - `SetLanguage()`
211 |
212 | To download data for all domains that are registered in your Google Webmaster Tools Account and top search query data _only_ and for a specific date range _only_ and you want to use a custom language for the CSV headline, the steps are as follows:
213 |
214 | - In the same folder where you added the gwtdata.php, create and run the following PHP script.
_You'll need to replace the example values for "email" and "password" with valid login details for your Google Account._
215 |
216 | ```php
217 | LogIn($email, $passwd) === true)
236 | {
237 | $gdata->SetLanguage($language);
238 | $gdata->SetDaterange($daterange);
239 | $gdata->SetTables($tables);
240 |
241 | $sites = $gdata->GetSites();
242 | foreach($sites as $site)
243 | {
244 | $gdata->DownloadCSV($site);
245 | }
246 | }
247 | } catch (Exception $e) {
248 | die($e->getMessage());
249 | }
250 | ```
251 |
252 | This will download one CSV file for each domain that is registered in your Google Webmaster Tools Account containing top queries data for the specified date range and with a german headline.
253 |
254 | That's it.
255 |
--------------------------------------------------------------------------------
/src/gwtdata.php:
--------------------------------------------------------------------------------
1 |
23 | * @link: https://code.google.com/p/php-webmaster-tools-downloads/
24 | * @link: https://github.com/eyecatchup/php-webmaster-tools-downloads/
25 | */
26 |
27 | class GWTdata
28 | {
29 | const HOST = "https://www.google.com";
30 | const SERVICEURI = "/webmasters/tools/";
31 |
32 | public $_language, $_tables, $_daterange, $_downloaded, $_skipped;
33 | private $_auth, $_logged_in;
34 |
35 | public function __construct()
36 | {
37 | $this->_auth = false;
38 | $this->_logged_in = false;
39 | $this->_language = "en";
40 | $this->_daterange = array("","");
41 | $this->_tables = array("TOP_PAGES", "TOP_QUERIES",
42 | "CRAWL_ERRORS", "CONTENT_ERRORS", "CONTENT_KEYWORDS",
43 | "INTERNAL_LINKS", "EXTERNAL_LINKS", "SOCIAL_ACTIVITY",
44 | "LATEST_BACKLINKS"
45 | );
46 | $this->_errTablesSort = array(0 => "http",
47 | 1 => "not-found", 2 => "restricted-by-robotsTxt",
48 | 3 => "unreachable", 4 => "timeout", 5 => "not-followed",
49 | "kAppErrorSoft-404s" => "soft404", "sitemap" => "in-sitemaps"
50 | );
51 | $this->_errTablesType = array(0 => "web-crawl-errors",
52 | 1 => "mobile-wml-xhtml-errors", 2 => "mobile-chtml-errors",
53 | 3 => "mobile-operator-errors", 4 => "news-crawl-errors"
54 | );
55 | $this->_downloaded = array();
56 | $this->_skipped = array();
57 | }
58 |
59 | /**
60 | * Sets content language.
61 | *
62 | * @param $str String Valid ISO 639-1 language code, supported by Google.
63 | */
64 | public function SetLanguage($str)
65 | {
66 | $this->_language = $str;
67 | }
68 |
69 | /**
70 | * Sets features that should be downloaded.
71 | *
72 | * @param $arr Array Valid array values are:
73 | * "TOP_PAGES", "TOP_QUERIES", "CRAWL_ERRORS", "CONTENT_ERRORS",
74 | * "CONTENT_KEYWORDS", "INTERNAL_LINKS", "EXTERNAL_LINKS",
75 | * "SOCIAL_ACTIVITY".
76 | */
77 | public function SetTables($arr)
78 | {
79 | if(is_array($arr) && !empty($arr) && sizeof($arr) <= 2) {
80 | $valid = array("TOP_PAGES","TOP_QUERIES","CRAWL_ERRORS","CONTENT_ERRORS",
81 | "CONTENT_KEYWORDS","INTERNAL_LINKS","EXTERNAL_LINKS","SOCIAL_ACTIVITY",
82 | "LATEST_BACKLINKS");
83 | $this->_tables = array();
84 | for($i=0; $i < sizeof($arr); $i++) {
85 | if(in_array($arr[$i], $valid)) {
86 | array_push($this->_tables, $arr[$i]);
87 | } else { throw new Exception("Invalid argument given."); }
88 | }
89 | } else { throw new Exception("Invalid argument given."); }
90 | }
91 |
92 | /**
93 | * Sets daterange for download data.
94 | *
95 | * @param $arr Array Array containing two ISO 8601 formatted date strings.
96 | */
97 | public function SetDaterange($arr)
98 | {
99 | if(is_array($arr) && !empty($arr) && sizeof($arr) == 2) {
100 | if(self::IsISO8601($arr[0]) === true &&
101 | self::IsISO8601($arr[1]) === true) {
102 | $this->_daterange = array(str_replace("-", "", $arr[0]),
103 | str_replace("-", "", $arr[1]));
104 | return true;
105 | } else { throw new Exception("Invalid argument given."); }
106 | } else { throw new Exception("Invalid argument given."); }
107 | }
108 |
109 | /**
110 | * Returns array of downloaded filenames.
111 | *
112 | * @return Array Array of filenames that have been written to disk.
113 | */
114 | public function GetDownloadedFiles()
115 | {
116 | return $this->_downloaded;
117 | }
118 |
119 | /**
120 | * Returns array of downloaded filenames.
121 | *
122 | * @return Array Array of filenames that have been written to disk.
123 | */
124 | public function GetSkippedFiles()
125 | {
126 | return $this->_skipped;
127 | }
128 |
129 | /**
130 | * Checks if client has logged into their Google account yet.
131 | *
132 | * @return Boolean Returns true if logged in, or false if not.
133 | */
134 | private function IsLoggedIn()
135 | {
136 | return $this->_logged_in;
137 | }
138 |
139 | /**
140 | * Attempts to log into the specified Google account.
141 | *
142 | * @param $email String User's Google email address.
143 | * @param $pwd String Password for Google account.
144 | * @return Boolean Returns true when Authentication was successful,
145 | * else false.
146 | */
147 | public function LogIn($email, $pwd)
148 | {
149 | $url = self::HOST . "/accounts/ClientLogin";
150 | $postRequest = array(
151 | 'accountType' => 'HOSTED_OR_GOOGLE',
152 | 'Email' => $email,
153 | 'Passwd' => $pwd,
154 | 'service' => "sitemaps",
155 | 'source' => "Google-WMTdownloadscript-0.1"
156 | );
157 |
158 | // Before PHP version 5.2.0 and when the first char of $pass is an @ symbol,
159 | // send data in CURLOPT_POSTFIELDS as urlencoded string.
160 | if ('@' === (string)$pwd[0] || version_compare(PHP_VERSION, '5.2.0') < 0) {
161 | $postRequest = http_build_query($postRequest);
162 | }
163 |
164 | $ch = curl_init();
165 | curl_setopt($ch, CURLOPT_URL, $url);
166 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
167 | curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
168 | curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
169 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
170 | curl_setopt($ch, CURLOPT_POST, true);
171 | curl_setopt($ch, CURLOPT_POSTFIELDS, $postRequest);
172 | $output = curl_exec($ch);
173 | $info = curl_getinfo($ch);
174 | curl_close($ch);
175 | if($info['http_code'] == 200) {
176 | preg_match('/Auth=(.*)/', $output, $match);
177 | if(isset($match[1])) {
178 | $this->_auth = $match[1];
179 | $this->_logged_in = true;
180 | return true;
181 | } else { return false; }
182 | } else { return false; }
183 | }
184 |
185 | /**
186 | * Attempts authenticated GET Request.
187 | *
188 | * @param $url String URL for the GET request.
189 | * @return Mixed Curl result as String,
190 | * or false (Boolean) when Authentication fails.
191 | */
192 | public function GetData($url)
193 | {
194 | if(self::IsLoggedIn() === true) {
195 | $url = self::HOST . $url;
196 | $head = array("Authorization: GoogleLogin auth=".$this->_auth,
197 | "GData-Version: 2");
198 | $ch = curl_init();
199 | curl_setopt($ch, CURLOPT_URL, $url);
200 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
201 | curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
202 | curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
203 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
204 | curl_setopt($ch, CURLOPT_ENCODING, true);
205 | curl_setopt($ch, CURLOPT_HTTPHEADER, $head);
206 | $result = curl_exec($ch);
207 | $info = curl_getinfo($ch);
208 | curl_close($ch);
209 | return ($info['http_code']!=200) ? false : $result;
210 | } else { return false; }
211 | }
212 |
213 | /**
214 | * Gets all available sites from Google Webmaster Tools account.
215 | *
216 | * @return Mixed Array with all site URLs registered in GWT account,
217 | * or false (Boolean) if request failed.
218 | */
219 | public function GetSites()
220 | {
221 | if(self::IsLoggedIn() === true) {
222 | $feed = self::GetData(self::SERVICEURI."feeds/sites/");
223 | if($feed !== false) {
224 | $sites = array();
225 | $doc = new DOMDocument();
226 | $doc->loadXML($feed);
227 | foreach ($doc->getElementsByTagName('entry') as $node) {
228 | array_push($sites,
229 | $node->getElementsByTagName('title')->item(0)->nodeValue);
230 | }
231 | return $sites;
232 | } else { return false; }
233 | } else { return false; }
234 | }
235 |
236 | /**
237 | * Gets the download links for an available site
238 | * from the Google Webmaster Tools account.
239 | *
240 | * @param $url String Site URL registered in GWT.
241 | * @return Mixed Array with keys TOP_PAGES and TOP_QUERIES,
242 | * or false (Boolean) when Authentication fails.
243 | */
244 | public function GetDownloadUrls($url)
245 | {
246 | if(self::IsLoggedIn() === true) {
247 | $_url = sprintf(self::SERVICEURI."downloads-list?hl=%s&siteUrl=%s",
248 | $this->_language,
249 | urlencode($url));
250 | $downloadList = self::GetData($_url);
251 | return json_decode($downloadList, true);
252 | } else { return false; }
253 | }
254 |
255 | /**
256 | * Downloads the file based on the given URL.
257 | *
258 | * @param $site String Site URL available in GWT Account.
259 | * @param $savepath String Optional path to save CSV to (no trailing slash!).
260 | */
261 | public function DownloadCSV($site, $savepath=".")
262 | {
263 | if(self::IsLoggedIn() === true) {
264 | $downloadUrls = self::GetDownloadUrls($site);
265 | $filename = parse_url($site, PHP_URL_HOST) ."-". date("Ymd-His");
266 | $tables = $this->_tables;
267 | foreach($tables as $table) {
268 | if($table=="CRAWL_ERRORS") {
269 | self::DownloadCSV_CrawlErrors($site, $savepath);
270 | }
271 | elseif($table=="CONTENT_ERRORS") {
272 | self::DownloadCSV_XTRA($site, $savepath,
273 | "html-suggestions", "\)", "CONTENT_ERRORS", "content-problems-dl");
274 | }
275 | elseif($table=="CONTENT_KEYWORDS") {
276 | self::DownloadCSV_XTRA($site, $savepath,
277 | "keywords", "\)", "CONTENT_KEYWORDS", "content-words-dl");
278 | }
279 | elseif($table=="INTERNAL_LINKS") {
280 | self::DownloadCSV_XTRA($site, $savepath,
281 | "internal-links", "\)", "INTERNAL_LINKS", "internal-links-dl");
282 | }
283 | elseif($table=="EXTERNAL_LINKS") {
284 | self::DownloadCSV_XTRA($site, $savepath,
285 | "external-links-domain", "\)", "EXTERNAL_LINKS", "external-links-domain-dl");
286 | }
287 | elseif($table=="SOCIAL_ACTIVITY") {
288 | self::DownloadCSV_XTRA($site, $savepath,
289 | "social-activity", "x26", "SOCIAL_ACTIVITY", "social-activity-dl");
290 | }
291 | elseif($table=="LATEST_BACKLINKS") {
292 | self::DownloadCSV_XTRA($site, $savepath,
293 | "external-links-domain", "\)", "LATEST_BACKLINKS", "backlinks-latest-dl");
294 | }
295 | else {
296 | $finalName = "$savepath/$table-$filename.csv";
297 | $finalUrl = $downloadUrls[$table] ."&prop=ALL&db=%s&de=%s&more=true";
298 | $finalUrl = sprintf($finalUrl, $this->_daterange[0], $this->_daterange[1]);
299 | self::SaveData($finalUrl,$finalName);
300 | }
301 | }
302 | } else { return false; }
303 | }
304 |
305 | /**
306 | * Downloads "unofficial" downloads based on the given URL.
307 | *
308 | * @param $site String Site URL available in GWT Account.
309 | * @param $savepath String Optional path to save CSV to (no trailing slash!).
310 | */
311 | public function DownloadCSV_XTRA($site, $savepath=".", $tokenUri, $tokenDelimiter, $filenamePrefix, $dlUri)
312 | {
313 | if(self::IsLoggedIn() === true) {
314 | $uri = self::SERVICEURI . $tokenUri . "?hl=%s&siteUrl=%s";
315 | $_uri = sprintf($uri, $this->_language, $site);
316 | $token = self::GetToken($_uri, $tokenDelimiter, $dlUri);
317 | $filename = parse_url($site, PHP_URL_HOST) ."-". date("Ymd-His");
318 | $finalName = "$savepath/$filenamePrefix-$filename.csv";
319 | $url = self::SERVICEURI . $dlUri . "?hl=%s&siteUrl=%s&security_token=%s&prop=ALL&db=%s&de=%s&more=true";
320 | $_url = sprintf($url, $this->_language, $site, $token, $this->_daterange[0], $this->_daterange[1]);
321 | self::SaveData($_url,$finalName);
322 | } else { return false; }
323 | }
324 |
325 | /**
326 | * Downloads the Crawl Errors file based on the given URL.
327 | *
328 | * @param $site String Site URL available in GWT Account.
329 | * @param $savepath String Optional: Path to save CSV to (no trailing slash!).
330 | * @param $separated Boolean Optional: If true, the method saves separated CSV files
331 | * for each error type. Default: Merge errors in one file.
332 | */
333 | public function DownloadCSV_CrawlErrors($site, $savepath=".", $separated=false)
334 | {
335 | if(self::IsLoggedIn() === true) {
336 | $type_param = "we";
337 | $filename = parse_url($site, PHP_URL_HOST) ."-". date("Ymd-His");
338 | if($separated) {
339 | foreach($this->_errTablesSort as $sortid => $sortname) {
340 | foreach($this->_errTablesType as $typeid => $typename) {
341 | if($typeid == 1) {
342 | $type_param = "mx";
343 | } else if($typeid == 2) {
344 | $type_param = "mc";
345 | } else {
346 | $type_param = "we";
347 | }
348 | $uri = self::SERVICEURI."crawl-errors?hl=en&siteUrl=$site&tid=$type_param";
349 | $token = self::GetToken($uri,"x26");
350 | $finalName = "$savepath/CRAWL_ERRORS-$typename-$sortname-$filename.csv";
351 | $url = self::SERVICEURI."crawl-errors-dl?hl=%s&siteUrl=%s&security_token=%s&type=%s&sort=%s";
352 | $_url = sprintf($url, $this->_language, $site, $token, $typeid, $sortid);
353 | self::SaveData($_url,$finalName);
354 | }
355 | }
356 | }
357 | else {
358 | $uri = self::SERVICEURI."crawl-errors?hl=en&siteUrl=$site&tid=$type_param";
359 | $token = self::GetToken($uri,"x26");
360 | $finalName = "$savepath/CRAWL_ERRORS-$filename.csv";
361 | $url = self::SERVICEURI."crawl-errors-dl?hl=%s&siteUrl=%s&security_token=%s&type=0";
362 | $_url = sprintf($url, $this->_language, $site, $token);
363 | self::SaveData($_url,$finalName);
364 | }
365 | } else { return false; }
366 | }
367 |
368 | /**
369 | * Saves data to a CSV file based on the given URL.
370 | *
371 | * @param $finalUrl String CSV Download URI.
372 | * @param $finalName String Filepointer to save location.
373 | */
374 | private function SaveData($finalUrl, $finalName)
375 | {
376 | $data = self::GetData($finalUrl);
377 | if(strlen($data) > 1 && file_put_contents($finalName, utf8_decode($data))) {
378 | array_push($this->_downloaded, realpath($finalName));
379 | return true;
380 | } else {
381 | array_push($this->_skipped, $finalName);
382 | return false;
383 | }
384 | }
385 |
386 | /**
387 | * Regular Expression to find the Security Token for a download file.
388 | *
389 | * @param $uri String A Webmaster Tools Desktop Service URI.
390 | * @param $delimiter String Trailing delimiter for the regex.
391 | * @return String Returns a security token.
392 | */
393 | private function GetToken($uri, $delimiter, $dlUri='')
394 | {
395 | $matches = array();
396 | $tmp = self::GetData($uri);
397 | preg_match_all("#$dlUri.*?46security_token(.*?)$delimiter#si", $tmp, $matches);
398 | return isset($matches[1][0]) ? substr($matches[1][0],3,-1) : '';
399 | }
400 |
401 | /**
402 | * Validates ISO 8601 date format.
403 | *
404 | * @param $str String Valid ISO 8601 date string (eg. 2012-01-01).
405 | * @return Boolean Returns true if string has valid format, else false.
406 | */
407 | private function IsISO8601($str)
408 | {
409 | $stamp = strtotime($str);
410 | return (is_numeric($stamp) && checkdate(date('m', $stamp),
411 | date('d', $stamp), date('Y', $stamp))) ? true : false;
412 | }
413 | }
414 | ?>
415 |
--------------------------------------------------------------------------------