├── README.md
├── simple-example.php
├── advanced-example.php
└── SitemapGenerator.php
/README.md:
--------------------------------------------------------------------------------
1 | PHP Sitemap Generator
2 | =====================
3 |
4 | This class can be used to generate sitemaps and notify updates to search engines.
5 |
6 | It can build a sitemap file from a list of URLs. The URLs may have attached the last modification date, a change frequency and a priority. The sitemap file may be saved in the compressed format.
7 |
8 | The class may also update the site robots.txt file with the sitemap address.
9 |
10 | When the sitemap is updated, the class can also notify search engines like Google, Bing, Yahoo and Ask.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/simple-example.php:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | addUrl($url, date('c'), 'daily', '1');
20 | $sitemap->addUrl($url."page1", date('c'), 'daily', '0.5');
21 | $sitemap->addUrl($url."page2", date('c'), 'daily');
22 | $sitemap->addUrl($url."page3", date('c'));
23 | $sitemap->addUrl($url."page4");
24 | $sitemap->addUrl($url."page/subpage1", date('c'), 'daily', '0.4');
25 | $sitemap->addUrl($url."page/subpage2", date('c'), 'daily');
26 | $sitemap->addUrl($url."page/subpage3", date('c'));
27 | $sitemap->addUrl($url."page/subpage4");
28 |
29 | // create sitemap
30 | $sitemap->createSitemap();
31 |
32 | // write sitemap as file
33 | $sitemap->writeSitemap();
34 |
35 | // update robots.txt file
36 | $sitemap->updateRobots();
37 |
38 | // submit sitemaps to search engines
39 | $sitemap->submitSitemap();
40 | ?>
41 |
42 |
43 |
--------------------------------------------------------------------------------
/advanced-example.php:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | createGZipFile = true;
19 |
20 | // determine how many urls should be put into one file
21 | $sitemap->maxURLsPerSitemap = 10000;
22 |
23 | // sitemap file name
24 | $sitemap->sitemapFileName = "sitemap.xml";
25 |
26 | // sitemap index file name
27 | $sitemap->sitemapIndexFileName = "sitemap-index.xml";
28 |
29 | // robots file name
30 | $sitemap->robotsFileName = "robots.txt";
31 |
32 | $urls = array(
33 | array("http://your.app.com", date('c'), 'daily', '1'),
34 | array("http://your.app.com/mainpage1", date('c'), 'daily', '0.5'),
35 | array("http://your.app.com/mainpage2", date('c'), 'daily'),
36 | array("http://your.app.com/mainpage3", date('c')),
37 | array("http://your.app.com/mainpage4"));
38 |
39 | // add many URLs at one time
40 | $sitemap->addUrls($urls);
41 |
42 | // add urls one by one
43 | $sitemap->addUrl("http://your.app.com/page1", date('c'), 'daily', '0.5');
44 | $sitemap->addUrl("http://your.app.com/page2", date('c'), 'daily');
45 | $sitemap->addUrl("http://your.app.com/page3", date('c'));
46 | $sitemap->addUrl("http://your.app.com/page4");
47 | $sitemap->addUrl("http://your.app.com/page/subpage1", date('c'), 'daily', '0.4');
48 | $sitemap->addUrl("http://your.app.com/page/subpage2", date('c'), 'daily');
49 | $sitemap->addUrl("http://your.app.com/page/subpage3", date('c'));
50 | $sitemap->addUrl("http://your.app.com/page/subpage4");
51 |
52 | try {
53 | // create sitemap
54 | $sitemap->createSitemap();
55 |
56 | // write sitemap as file
57 | $sitemap->writeSitemap();
58 |
59 | // update robots.txt file
60 | $sitemap->updateRobots();
61 |
62 | // submit sitemaps to search engines
63 | $result = $sitemap->submitSitemap("yahooAppId");
64 | // shows each search engine submitting status
65 | echo "";
66 | print_r($result);
67 | echo "
";
68 |
69 | }
70 | catch (Exception $exc) {
71 | echo $exc->getTraceAsString();
72 | }
73 |
74 | echo "Memory peak usage: ".number_format(memory_get_peak_usage()/(1024*1024),2)."MB";
75 | $time2 = explode(" ",microtime());
76 | $time2 = $time2[1];
77 | echo "
Execution time: ".number_format($time2-$time)."s";
78 |
79 |
80 | ?>
81 |
82 |
83 |
--------------------------------------------------------------------------------
/SitemapGenerator.php:
--------------------------------------------------------------------------------
1 |
6 | * @copyright 2009 Paweł Antczak
7 | * @license http://www.gnu.org/licenses/gpl.html GPL V 2.0
8 | * @version 1.2.0
9 | * @see http://www.sitemaps.org/protocol.php
10 | * @see http://en.wikipedia.org/wiki/Sitemaps
11 | * @see http://en.wikipedia.org/wiki/Sitemap_index
12 | */
13 | class SitemapGenerator {
14 | /**
15 | * Name of sitemap file
16 | * @var string
17 | * @access public
18 | */
19 | public $sitemapFileName = "sitemap.xml";
20 | /**
21 | * Name of sitemap index file
22 | * @var string
23 | * @access public
24 | */
25 |
26 | public $sitemapIndexFileName = "sitemap-index.xml";
27 | /**
28 | * Robots file name
29 | * @var string
30 | * @access public
31 | */
32 | public $robotsFileName = "robots.txt";
33 | /**
34 | * Quantity of URLs per single sitemap file.
35 | * According to specification max value is 50.000.
36 | * If Your links are very long, sitemap file can be bigger than 10MB,
37 | * in this case use smaller value.
38 | * @var int
39 | * @access public
40 | */
41 | public $maxURLsPerSitemap = 50000;
42 | /**
43 | * If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt.
44 | * If true, .gz file will be submitted to search engines.
45 | * If quantity of URLs will be bigger than 50.000, option will be ignored,
46 | * all sitemap files except sitemap index will be compressed.
47 | * @var bool
48 | * @access public
49 | */
50 | public $createGZipFile = false;
51 | /**
52 | * URL to Your site.
53 | * Script will use it to send sitemaps to search engines.
54 | * @var string
55 | * @access private
56 | */
57 | private $baseURL;
58 | /**
59 | * Base path. Relative to script location.
60 | * Use this if Your sitemap and robots files should be stored in other
61 | * directory then script.
62 | * @var string
63 | * @access private
64 | */
65 | private $basePath;
66 | /**
67 | * Version of this class
68 | * @var string
69 | * @access private
70 | */
71 | private $classVersion = "1.2.0";
72 | /**
73 | * Search engines URLs
74 | * @var array of strings
75 | * @access private
76 | */
77 | private $searchEngines = array(
78 | array("http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=",
79 | "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap="),
80 | "http://www.google.com/webmasters/tools/ping?sitemap=",
81 | "http://submissions.ask.com/ping?sitemap=",
82 | "http://www.bing.com/webmaster/ping.aspx?siteMap="
83 | );
84 | /**
85 | * Array with urls
86 | * @var array of strings
87 | * @access private
88 | */
89 | private $urls;
90 | /**
91 | * Array with sitemap
92 | * @var array of strings
93 | * @access private
94 | */
95 |
96 | private $sitemaps;
97 | /**
98 | * Array with sitemap index
99 | * @var array of strings
100 | * @access private
101 | */
102 |
103 | private $sitemapIndex;
104 | /**
105 | * Current sitemap full URL
106 | * @var string
107 | * @access private
108 | */
109 | private $sitemapFullURL;
110 |
111 | /**
112 | * Constructor.
113 | * @param string $baseURL You site URL, with / at the end.
114 | * @param string|null $basePath Relative path where sitemap and robots should be stored.
115 | */
116 | public function __construct($baseURL, $basePath = "") {
117 | $this->baseURL = $baseURL;
118 | $this->basePath = $basePath;
119 | }
120 | /**
121 | * Use this to add many URL at one time.
122 | * Each inside array can have 1 to 4 fields.
123 | * @param array of arrays of strings $urlsArray
124 | */
125 | public function addUrls($urlsArray) {
126 | if (!is_array($urlsArray))
127 | throw new InvalidArgumentException("Array as argument should be given.");
128 | foreach ($urlsArray as $url) {
129 | $this->addUrl(isset ($url[0]) ? $url[0] : null,
130 | isset ($url[1]) ? $url[1] : null,
131 | isset ($url[2]) ? $url[2] : null,
132 | isset ($url[3]) ? $url[3] : null);
133 | }
134 | }
135 | /**
136 | * Use this to add single URL to sitemap.
137 | * @param string $url URL
138 | * @param string $lastModified When it was modified, use ISO 8601
139 | * @param string $changeFrequency How often search engines should revisit this URL
140 | * @param string $priority Priority of URL on You site
141 | * @see http://en.wikipedia.org/wiki/ISO_8601
142 | * @see http://php.net/manual/en/function.date.php
143 | */
144 | public function addUrl($url, $lastModified = null, $changeFrequency = null, $priority = null) {
145 | if ($url == null)
146 | throw new InvalidArgumentException("URL is mandatory. At least one argument should be given.");
147 | $urlLenght = extension_loaded('mbstring') ? mb_strlen($url) : strlen($url);
148 | if ($urlLenght > 2048)
149 | throw new InvalidArgumentException("URL lenght can't be bigger than 2048 characters.
150 | Note, that precise url length check is guaranteed only using mb_string extension.
151 | Make sure Your server allow to use mbstring extension.");
152 | $tmp = array();
153 | $tmp['loc'] = $url;
154 | if (isset($lastModified)) $tmp['lastmod'] = $lastModified;
155 | if (isset($changeFrequency)) $tmp['changefreq'] = $changeFrequency;
156 | if (isset($priority)) $tmp['priority'] = $priority;
157 | $this->urls[] = $tmp;
158 | }
159 | /**
160 | * Create sitemap in memory.
161 | */
162 | public function createSitemap() {
163 | if (!isset($this->urls))
164 | throw new BadMethodCallException("To create sitemap, call addUrl or addUrls function first.");
165 | if ($this->maxURLsPerSitemap > 50000)
166 | throw new InvalidArgumentException("More than 50,000 URLs per single sitemap is not allowed.");
167 |
168 | $generatorInfo = '
169 |
171 | ';
172 | $sitemapHeader = ''.$generatorInfo.'
173 |
178 | ';
179 | $sitemapIndexHeader = ''.$generatorInfo.'
180 |
185 | ';
186 | foreach(array_chunk($this->urls,$this->maxURLsPerSitemap) as $sitemap) {
187 | $xml = new SimpleXMLElement($sitemapHeader);
188 | foreach($sitemap as $url) {
189 | $row = $xml->addChild('url');
190 | $row->addChild('loc',htmlspecialchars($url['loc'],ENT_QUOTES,'UTF-8'));
191 | if (isset($url['lastmod'])) $row->addChild('lastmod', $url['lastmod']);
192 | if (isset($url['changefreq'])) $row->addChild('changefreq',$url['changefreq']);
193 | if (isset($url['priority'])) $row->addChild('priority',$url['priority']);
194 | }
195 | if (strlen($xml->asXML()) > 10485760)
196 | throw new LengthException("Sitemap size is more than 10MB (10,485,760),
197 | please decrease maxURLsPerSitemap variable.");
198 | $this->sitemaps[] = $xml->asXML();
199 |
200 | }
201 | if (sizeof($this->sitemaps) > 1000)
202 | throw new LengthException("Sitemap index can contains 1000 single sitemaps.
203 | Perhaps You trying to submit too many URLs.");
204 | if (sizeof($this->sitemaps) > 1) {
205 | for($i=0; $isitemaps); $i++) {
206 | $this->sitemaps[$i] = array(
207 | str_replace(".xml", ($i+1).".xml.gz", $this->sitemapFileName),
208 | $this->sitemaps[$i]
209 | );
210 | }
211 | $xml = new SimpleXMLElement($sitemapIndexHeader);
212 | foreach($this->sitemaps as $sitemap) {
213 | $row = $xml->addChild('sitemap');
214 | $row->addChild('loc',$this->baseURL.htmlentities($sitemap[0]));
215 | $row->addChild('lastmod', date('c'));
216 | }
217 | $this->sitemapFullURL = $this->baseURL.$this->sitemapIndexFileName;
218 | $this->sitemapIndex = array(
219 | $this->sitemapIndexFileName,
220 | $xml->asXML());
221 | }
222 | else {
223 | if ($this->createGZipFile)
224 | $this->sitemapFullURL = $this->baseURL.$this->sitemapFileName.".gz";
225 | else
226 | $this->sitemapFullURL = $this->baseURL.$this->sitemapFileName;
227 | $this->sitemaps[0] = array(
228 | $this->sitemapFileName,
229 | $this->sitemaps[0]);
230 | }
231 | }
232 | /**
233 | * Returns created sitemaps as array of strings.
234 | * Use it You want to work with sitemap without saving it as files.
235 | * @return array of strings
236 | * @access public
237 | */
238 | public function toArray() {
239 | if (isset($this->sitemapIndex))
240 | return array_merge(array($this->sitemapIndex),$this->sitemaps);
241 | else
242 | return $this->sitemaps;
243 | }
244 | /**
245 | * Will write sitemaps as files.
246 | * @access public
247 | */
248 | public function writeSitemap() {
249 | if (!isset($this->sitemaps))
250 | throw new BadMethodCallException("To write sitemap, call createSitemap function first.");
251 | if (isset($this->sitemapIndex)) {
252 | $this->_writeFile($this->sitemapIndex[1], $this->basePath, $this->sitemapIndex[0]);
253 | foreach($this->sitemaps as $sitemap) {
254 | $this->_writeGZipFile($sitemap[1], $this->basePath, $sitemap[0]);
255 | }
256 | }
257 | else {
258 | $this->_writeFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0]);
259 | if ($this->createGZipFile)
260 | $this->_writeGZipFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0].".gz");
261 | }
262 | }
263 | /**
264 | * If robots.txt file exist, will update information about newly created sitemaps.
265 | * If there is no robots.txt will, create one and put into it information about sitemaps.
266 | * @access public
267 | */
268 | public function updateRobots() {
269 | if (!isset($this->sitemaps))
270 | throw new BadMethodCallException("To update robots.txt, call createSitemap function first.");
271 | $sampleRobotsFile = "User-agent: *\nAllow: /";
272 | if (file_exists($this->basePath.$this->robotsFileName)) {
273 | $robotsFile = explode("\n", file_get_contents($this->basePath.$this->robotsFileName));
274 | $robotsFileContent = "";
275 | foreach($robotsFile as $key=>$value) {
276 | if(substr($value, 0, 8) == 'Sitemap:') unset($robotsFile[$key]);
277 | else $robotsFileContent .= $value."\n";
278 | }
279 | $robotsFileContent .= "Sitemap: $this->sitemapFullURL";
280 | if ($this->createGZipFile && !isset($this->sitemapIndex))
281 | $robotsFileContent .= "\nSitemap: ".$this->sitemapFullURL.".gz";
282 | file_put_contents($this->basePath.$this->robotsFileName,$robotsFileContent);
283 | }
284 | else {
285 | $sampleRobotsFile = $sampleRobotsFile."\n\nSitemap: ".$this->sitemapFullURL;
286 | if ($this->createGZipFile && !isset($this->sitemapIndex))
287 | $sampleRobotsFile .= "\nSitemap: ".$this->sitemapFullURL.".gz";
288 | file_put_contents($this->basePath.$this->robotsFileName, $sampleRobotsFile);
289 | }
290 | }
291 | /**
292 | * Will inform search engines about newly created sitemaps.
293 | * Google, Ask, Bing and Yahoo will be noticed.
294 | * If You don't pass yahooAppId, Yahoo still will be informed,
295 | * but this method can be used once per day. If You will do this often,
296 | * message that limit was exceeded will be returned from Yahoo.
297 | * @param string $yahooAppId Your site Yahoo appid.
298 | * @return array of messages and http codes from each search engine
299 | * @access public
300 | */
301 | public function submitSitemap($yahooAppId = null) {
302 | if (!isset($this->sitemaps))
303 | throw new BadMethodCallException("To submit sitemap, call createSitemap function first.");
304 | if (!extension_loaded('curl'))
305 | throw new BadMethodCallException("cURL library is needed to do submission.");
306 | $searchEngines = $this->searchEngines;
307 | $searchEngines[0] = isset($yahooAppId) ? str_replace("USERID", $yahooAppId, $searchEngines[0][0]) : $searchEngines[0][1];
308 | $result = array();
309 | for($i=0;$isitemapFullURL,ENT_QUOTES,'UTF-8'));
311 | curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true);
312 | $responseContent = curl_exec($submitSite);
313 | $response = curl_getinfo($submitSite);
314 | $submitSiteShort = array_reverse(explode(".",parse_url($searchEngines[$i], PHP_URL_HOST)));
315 | $result[] = array("site"=>$submitSiteShort[1].".".$submitSiteShort[0],
316 | "fullsite"=>$searchEngines[$i].htmlspecialchars($this->sitemapFullURL, ENT_QUOTES,'UTF-8'),
317 | "http_code"=>$response['http_code'],
318 | "message"=>str_replace("\n", " ", strip_tags($responseContent)));
319 | }
320 | return $result;
321 | }
322 | /**
323 | * Save file.
324 | * @param string $content
325 | * @param string $filePath
326 | * @param string $fileName
327 | * @return bool
328 | * @access private
329 | */
330 | private function _writeFile($content, $filePath, $fileName) {
331 | $file = fopen($filePath.$fileName, 'w');
332 | fwrite($file, $content);
333 | return fclose($file);
334 | }
335 | /**
336 | * Save GZipped file.
337 | * @param string $content
338 | * @param string $filePath
339 | * @param string $fileName
340 | * @return bool
341 | * @access private
342 | */
343 | private function _writeGZipFile($content, $filePath, $fileName) {
344 | $file = gzopen($filePath.$fileName, 'w');
345 | gzwrite($file, $content);
346 | return gzclose($file);
347 | }
348 | }
349 | ?>
350 |
--------------------------------------------------------------------------------