├── README.md ├── simple-example.php ├── advanced-example.php └── SitemapGenerator.php /README.md: -------------------------------------------------------------------------------- 1 | PHP Sitemap Generator 2 | ===================== 3 | 4 | This class can be used to generate sitemaps and notify updates to search engines. 5 | 6 | It can build a sitemap file from a list of URLs. The URLs may have attached the last modification date, a change frequency and a priority. The sitemap file may be saved in the compressed format. 7 | 8 | The class may also update the site robots.txt file with the sitemap address. 9 | 10 | When the sitemap is updated, the class can also notify search engines like Google, Bing, Yahoo and Ask. 11 | 12 | 13 | -------------------------------------------------------------------------------- /simple-example.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | addUrl($url, date('c'), 'daily', '1'); 20 | $sitemap->addUrl($url."page1", date('c'), 'daily', '0.5'); 21 | $sitemap->addUrl($url."page2", date('c'), 'daily'); 22 | $sitemap->addUrl($url."page3", date('c')); 23 | $sitemap->addUrl($url."page4"); 24 | $sitemap->addUrl($url."page/subpage1", date('c'), 'daily', '0.4'); 25 | $sitemap->addUrl($url."page/subpage2", date('c'), 'daily'); 26 | $sitemap->addUrl($url."page/subpage3", date('c')); 27 | $sitemap->addUrl($url."page/subpage4"); 28 | 29 | // create sitemap 30 | $sitemap->createSitemap(); 31 | 32 | // write sitemap as file 33 | $sitemap->writeSitemap(); 34 | 35 | // update robots.txt file 36 | $sitemap->updateRobots(); 37 | 38 | // submit sitemaps to search engines 39 | $sitemap->submitSitemap(); 40 | ?> 41 | 42 | 43 | -------------------------------------------------------------------------------- /advanced-example.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | createGZipFile = true; 19 | 20 | // determine how many urls should be put into one file 21 | $sitemap->maxURLsPerSitemap = 10000; 22 | 23 | // sitemap file name 24 | $sitemap->sitemapFileName = "sitemap.xml"; 25 | 26 | // sitemap index file name 27 | $sitemap->sitemapIndexFileName = "sitemap-index.xml"; 28 | 29 | // robots file name 30 | $sitemap->robotsFileName = "robots.txt"; 31 | 32 | $urls = array( 33 | array("http://your.app.com", date('c'), 'daily', '1'), 34 | array("http://your.app.com/mainpage1", date('c'), 'daily', '0.5'), 35 | array("http://your.app.com/mainpage2", date('c'), 'daily'), 36 | array("http://your.app.com/mainpage3", date('c')), 37 | array("http://your.app.com/mainpage4")); 38 | 39 | // add many URLs at one time 40 | $sitemap->addUrls($urls); 41 | 42 | // add urls one by one 43 | $sitemap->addUrl("http://your.app.com/page1", date('c'), 'daily', '0.5'); 44 | $sitemap->addUrl("http://your.app.com/page2", date('c'), 'daily'); 45 | $sitemap->addUrl("http://your.app.com/page3", date('c')); 46 | $sitemap->addUrl("http://your.app.com/page4"); 47 | $sitemap->addUrl("http://your.app.com/page/subpage1", date('c'), 'daily', '0.4'); 48 | $sitemap->addUrl("http://your.app.com/page/subpage2", date('c'), 'daily'); 49 | $sitemap->addUrl("http://your.app.com/page/subpage3", date('c')); 50 | $sitemap->addUrl("http://your.app.com/page/subpage4"); 51 | 52 | try { 53 | // create sitemap 54 | $sitemap->createSitemap(); 55 | 56 | // write sitemap as file 57 | $sitemap->writeSitemap(); 58 | 59 | // update robots.txt file 60 | $sitemap->updateRobots(); 61 | 62 | // submit sitemaps to search engines 63 | $result = $sitemap->submitSitemap("yahooAppId"); 64 | // shows each search engine submitting status 65 | echo "
";
66 |             print_r($result);
67 |             echo "
"; 68 | 69 | } 70 | catch (Exception $exc) { 71 | echo $exc->getTraceAsString(); 72 | } 73 | 74 | echo "Memory peak usage: ".number_format(memory_get_peak_usage()/(1024*1024),2)."MB"; 75 | $time2 = explode(" ",microtime()); 76 | $time2 = $time2[1]; 77 | echo "
Execution time: ".number_format($time2-$time)."s"; 78 | 79 | 80 | ?> 81 | 82 | 83 | -------------------------------------------------------------------------------- /SitemapGenerator.php: -------------------------------------------------------------------------------- 1 | 6 | * @copyright 2009 Paweł Antczak 7 | * @license http://www.gnu.org/licenses/gpl.html GPL V 2.0 8 | * @version 1.2.0 9 | * @see http://www.sitemaps.org/protocol.php 10 | * @see http://en.wikipedia.org/wiki/Sitemaps 11 | * @see http://en.wikipedia.org/wiki/Sitemap_index 12 | */ 13 | class SitemapGenerator { 14 | /** 15 | * Name of sitemap file 16 | * @var string 17 | * @access public 18 | */ 19 | public $sitemapFileName = "sitemap.xml"; 20 | /** 21 | * Name of sitemap index file 22 | * @var string 23 | * @access public 24 | */ 25 | 26 | public $sitemapIndexFileName = "sitemap-index.xml"; 27 | /** 28 | * Robots file name 29 | * @var string 30 | * @access public 31 | */ 32 | public $robotsFileName = "robots.txt"; 33 | /** 34 | * Quantity of URLs per single sitemap file. 35 | * According to specification max value is 50.000. 36 | * If Your links are very long, sitemap file can be bigger than 10MB, 37 | * in this case use smaller value. 38 | * @var int 39 | * @access public 40 | */ 41 | public $maxURLsPerSitemap = 50000; 42 | /** 43 | * If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt. 44 | * If true, .gz file will be submitted to search engines. 45 | * If quantity of URLs will be bigger than 50.000, option will be ignored, 46 | * all sitemap files except sitemap index will be compressed. 47 | * @var bool 48 | * @access public 49 | */ 50 | public $createGZipFile = false; 51 | /** 52 | * URL to Your site. 53 | * Script will use it to send sitemaps to search engines. 54 | * @var string 55 | * @access private 56 | */ 57 | private $baseURL; 58 | /** 59 | * Base path. Relative to script location. 60 | * Use this if Your sitemap and robots files should be stored in other 61 | * directory then script. 62 | * @var string 63 | * @access private 64 | */ 65 | private $basePath; 66 | /** 67 | * Version of this class 68 | * @var string 69 | * @access private 70 | */ 71 | private $classVersion = "1.2.0"; 72 | /** 73 | * Search engines URLs 74 | * @var array of strings 75 | * @access private 76 | */ 77 | private $searchEngines = array( 78 | array("http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=", 79 | "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap="), 80 | "http://www.google.com/webmasters/tools/ping?sitemap=", 81 | "http://submissions.ask.com/ping?sitemap=", 82 | "http://www.bing.com/webmaster/ping.aspx?siteMap=" 83 | ); 84 | /** 85 | * Array with urls 86 | * @var array of strings 87 | * @access private 88 | */ 89 | private $urls; 90 | /** 91 | * Array with sitemap 92 | * @var array of strings 93 | * @access private 94 | */ 95 | 96 | private $sitemaps; 97 | /** 98 | * Array with sitemap index 99 | * @var array of strings 100 | * @access private 101 | */ 102 | 103 | private $sitemapIndex; 104 | /** 105 | * Current sitemap full URL 106 | * @var string 107 | * @access private 108 | */ 109 | private $sitemapFullURL; 110 | 111 | /** 112 | * Constructor. 113 | * @param string $baseURL You site URL, with / at the end. 114 | * @param string|null $basePath Relative path where sitemap and robots should be stored. 115 | */ 116 | public function __construct($baseURL, $basePath = "") { 117 | $this->baseURL = $baseURL; 118 | $this->basePath = $basePath; 119 | } 120 | /** 121 | * Use this to add many URL at one time. 122 | * Each inside array can have 1 to 4 fields. 123 | * @param array of arrays of strings $urlsArray 124 | */ 125 | public function addUrls($urlsArray) { 126 | if (!is_array($urlsArray)) 127 | throw new InvalidArgumentException("Array as argument should be given."); 128 | foreach ($urlsArray as $url) { 129 | $this->addUrl(isset ($url[0]) ? $url[0] : null, 130 | isset ($url[1]) ? $url[1] : null, 131 | isset ($url[2]) ? $url[2] : null, 132 | isset ($url[3]) ? $url[3] : null); 133 | } 134 | } 135 | /** 136 | * Use this to add single URL to sitemap. 137 | * @param string $url URL 138 | * @param string $lastModified When it was modified, use ISO 8601 139 | * @param string $changeFrequency How often search engines should revisit this URL 140 | * @param string $priority Priority of URL on You site 141 | * @see http://en.wikipedia.org/wiki/ISO_8601 142 | * @see http://php.net/manual/en/function.date.php 143 | */ 144 | public function addUrl($url, $lastModified = null, $changeFrequency = null, $priority = null) { 145 | if ($url == null) 146 | throw new InvalidArgumentException("URL is mandatory. At least one argument should be given."); 147 | $urlLenght = extension_loaded('mbstring') ? mb_strlen($url) : strlen($url); 148 | if ($urlLenght > 2048) 149 | throw new InvalidArgumentException("URL lenght can't be bigger than 2048 characters. 150 | Note, that precise url length check is guaranteed only using mb_string extension. 151 | Make sure Your server allow to use mbstring extension."); 152 | $tmp = array(); 153 | $tmp['loc'] = $url; 154 | if (isset($lastModified)) $tmp['lastmod'] = $lastModified; 155 | if (isset($changeFrequency)) $tmp['changefreq'] = $changeFrequency; 156 | if (isset($priority)) $tmp['priority'] = $priority; 157 | $this->urls[] = $tmp; 158 | } 159 | /** 160 | * Create sitemap in memory. 161 | */ 162 | public function createSitemap() { 163 | if (!isset($this->urls)) 164 | throw new BadMethodCallException("To create sitemap, call addUrl or addUrls function first."); 165 | if ($this->maxURLsPerSitemap > 50000) 166 | throw new InvalidArgumentException("More than 50,000 URLs per single sitemap is not allowed."); 167 | 168 | $generatorInfo = ' 169 | 171 | '; 172 | $sitemapHeader = ''.$generatorInfo.' 173 | 178 | '; 179 | $sitemapIndexHeader = ''.$generatorInfo.' 180 | 185 | '; 186 | foreach(array_chunk($this->urls,$this->maxURLsPerSitemap) as $sitemap) { 187 | $xml = new SimpleXMLElement($sitemapHeader); 188 | foreach($sitemap as $url) { 189 | $row = $xml->addChild('url'); 190 | $row->addChild('loc',htmlspecialchars($url['loc'],ENT_QUOTES,'UTF-8')); 191 | if (isset($url['lastmod'])) $row->addChild('lastmod', $url['lastmod']); 192 | if (isset($url['changefreq'])) $row->addChild('changefreq',$url['changefreq']); 193 | if (isset($url['priority'])) $row->addChild('priority',$url['priority']); 194 | } 195 | if (strlen($xml->asXML()) > 10485760) 196 | throw new LengthException("Sitemap size is more than 10MB (10,485,760), 197 | please decrease maxURLsPerSitemap variable."); 198 | $this->sitemaps[] = $xml->asXML(); 199 | 200 | } 201 | if (sizeof($this->sitemaps) > 1000) 202 | throw new LengthException("Sitemap index can contains 1000 single sitemaps. 203 | Perhaps You trying to submit too many URLs."); 204 | if (sizeof($this->sitemaps) > 1) { 205 | for($i=0; $isitemaps); $i++) { 206 | $this->sitemaps[$i] = array( 207 | str_replace(".xml", ($i+1).".xml.gz", $this->sitemapFileName), 208 | $this->sitemaps[$i] 209 | ); 210 | } 211 | $xml = new SimpleXMLElement($sitemapIndexHeader); 212 | foreach($this->sitemaps as $sitemap) { 213 | $row = $xml->addChild('sitemap'); 214 | $row->addChild('loc',$this->baseURL.htmlentities($sitemap[0])); 215 | $row->addChild('lastmod', date('c')); 216 | } 217 | $this->sitemapFullURL = $this->baseURL.$this->sitemapIndexFileName; 218 | $this->sitemapIndex = array( 219 | $this->sitemapIndexFileName, 220 | $xml->asXML()); 221 | } 222 | else { 223 | if ($this->createGZipFile) 224 | $this->sitemapFullURL = $this->baseURL.$this->sitemapFileName.".gz"; 225 | else 226 | $this->sitemapFullURL = $this->baseURL.$this->sitemapFileName; 227 | $this->sitemaps[0] = array( 228 | $this->sitemapFileName, 229 | $this->sitemaps[0]); 230 | } 231 | } 232 | /** 233 | * Returns created sitemaps as array of strings. 234 | * Use it You want to work with sitemap without saving it as files. 235 | * @return array of strings 236 | * @access public 237 | */ 238 | public function toArray() { 239 | if (isset($this->sitemapIndex)) 240 | return array_merge(array($this->sitemapIndex),$this->sitemaps); 241 | else 242 | return $this->sitemaps; 243 | } 244 | /** 245 | * Will write sitemaps as files. 246 | * @access public 247 | */ 248 | public function writeSitemap() { 249 | if (!isset($this->sitemaps)) 250 | throw new BadMethodCallException("To write sitemap, call createSitemap function first."); 251 | if (isset($this->sitemapIndex)) { 252 | $this->_writeFile($this->sitemapIndex[1], $this->basePath, $this->sitemapIndex[0]); 253 | foreach($this->sitemaps as $sitemap) { 254 | $this->_writeGZipFile($sitemap[1], $this->basePath, $sitemap[0]); 255 | } 256 | } 257 | else { 258 | $this->_writeFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0]); 259 | if ($this->createGZipFile) 260 | $this->_writeGZipFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0].".gz"); 261 | } 262 | } 263 | /** 264 | * If robots.txt file exist, will update information about newly created sitemaps. 265 | * If there is no robots.txt will, create one and put into it information about sitemaps. 266 | * @access public 267 | */ 268 | public function updateRobots() { 269 | if (!isset($this->sitemaps)) 270 | throw new BadMethodCallException("To update robots.txt, call createSitemap function first."); 271 | $sampleRobotsFile = "User-agent: *\nAllow: /"; 272 | if (file_exists($this->basePath.$this->robotsFileName)) { 273 | $robotsFile = explode("\n", file_get_contents($this->basePath.$this->robotsFileName)); 274 | $robotsFileContent = ""; 275 | foreach($robotsFile as $key=>$value) { 276 | if(substr($value, 0, 8) == 'Sitemap:') unset($robotsFile[$key]); 277 | else $robotsFileContent .= $value."\n"; 278 | } 279 | $robotsFileContent .= "Sitemap: $this->sitemapFullURL"; 280 | if ($this->createGZipFile && !isset($this->sitemapIndex)) 281 | $robotsFileContent .= "\nSitemap: ".$this->sitemapFullURL.".gz"; 282 | file_put_contents($this->basePath.$this->robotsFileName,$robotsFileContent); 283 | } 284 | else { 285 | $sampleRobotsFile = $sampleRobotsFile."\n\nSitemap: ".$this->sitemapFullURL; 286 | if ($this->createGZipFile && !isset($this->sitemapIndex)) 287 | $sampleRobotsFile .= "\nSitemap: ".$this->sitemapFullURL.".gz"; 288 | file_put_contents($this->basePath.$this->robotsFileName, $sampleRobotsFile); 289 | } 290 | } 291 | /** 292 | * Will inform search engines about newly created sitemaps. 293 | * Google, Ask, Bing and Yahoo will be noticed. 294 | * If You don't pass yahooAppId, Yahoo still will be informed, 295 | * but this method can be used once per day. If You will do this often, 296 | * message that limit was exceeded will be returned from Yahoo. 297 | * @param string $yahooAppId Your site Yahoo appid. 298 | * @return array of messages and http codes from each search engine 299 | * @access public 300 | */ 301 | public function submitSitemap($yahooAppId = null) { 302 | if (!isset($this->sitemaps)) 303 | throw new BadMethodCallException("To submit sitemap, call createSitemap function first."); 304 | if (!extension_loaded('curl')) 305 | throw new BadMethodCallException("cURL library is needed to do submission."); 306 | $searchEngines = $this->searchEngines; 307 | $searchEngines[0] = isset($yahooAppId) ? str_replace("USERID", $yahooAppId, $searchEngines[0][0]) : $searchEngines[0][1]; 308 | $result = array(); 309 | for($i=0;$isitemapFullURL,ENT_QUOTES,'UTF-8')); 311 | curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true); 312 | $responseContent = curl_exec($submitSite); 313 | $response = curl_getinfo($submitSite); 314 | $submitSiteShort = array_reverse(explode(".",parse_url($searchEngines[$i], PHP_URL_HOST))); 315 | $result[] = array("site"=>$submitSiteShort[1].".".$submitSiteShort[0], 316 | "fullsite"=>$searchEngines[$i].htmlspecialchars($this->sitemapFullURL, ENT_QUOTES,'UTF-8'), 317 | "http_code"=>$response['http_code'], 318 | "message"=>str_replace("\n", " ", strip_tags($responseContent))); 319 | } 320 | return $result; 321 | } 322 | /** 323 | * Save file. 324 | * @param string $content 325 | * @param string $filePath 326 | * @param string $fileName 327 | * @return bool 328 | * @access private 329 | */ 330 | private function _writeFile($content, $filePath, $fileName) { 331 | $file = fopen($filePath.$fileName, 'w'); 332 | fwrite($file, $content); 333 | return fclose($file); 334 | } 335 | /** 336 | * Save GZipped file. 337 | * @param string $content 338 | * @param string $filePath 339 | * @param string $fileName 340 | * @return bool 341 | * @access private 342 | */ 343 | private function _writeGZipFile($content, $filePath, $fileName) { 344 | $file = gzopen($filePath.$fileName, 'w'); 345 | gzwrite($file, $content); 346 | return gzclose($file); 347 | } 348 | } 349 | ?> 350 | --------------------------------------------------------------------------------