├── .gitignore
├── demo.gif
├── data.xlsx
├── README.md
└── seo-checklist.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sundios/technical-seo-checklist/HEAD/demo.gif
--------------------------------------------------------------------------------
/data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sundios/technical-seo-checklist/HEAD/data.xlsx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # technical-seo-checklist
2 | This automated technical SEO checklist will help you check your on-page SEO in a few minutes.
3 |
4 |
5 | Work in progress.
6 |
7 | # SEO Checks
8 |
9 | - Mobile Friendly
10 | - Bot Accessibility
11 | - Indexation Status
12 | - Robots meta tag
13 | - X Robots tag
14 | - Canonicals
15 | - Schema
16 | - Core Web Vitals
17 |
18 | ## Usage
19 |
20 | Open the terminal and run the following:
21 |
22 | ```zsh
23 | python seo-checklist.py
24 | ```
25 |
26 | ## Contributing
27 |
28 | Pull requests are welcome.
29 |
30 |
31 | # Demo
32 | 
33 |
--------------------------------------------------------------------------------
/seo-checklist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Mon Apr 17 11:04:15 2023
5 |
6 | @author: konradburchardtpizzaro-local
7 | """
8 |
9 | #core web vitals
10 | import requests
11 |
12 | #mobile Friendly
13 | from bs4 import BeautifulSoup
14 | import pandas as pd
15 | from termcolor import colored
16 |
17 | # Import the halo module
18 | from halo import Halo
19 |
20 | import json
21 |
22 |
23 |
24 | #https://www.semrush.com/blog/on-page-seo-checklist
25 |
26 | # URL of the page you want to check
27 | url = input("Enter the URL of the page you want to check:")
28 |
29 |
30 |
31 | #Function that runs all other checklist functions
32 | def checklist(url):
33 | '''
34 |
35 | Parameters
36 | ----------
37 | url : TYPE
38 | DESCRIPTION.
39 |
40 | Returns
41 | -------
42 | None.
43 | W
44 | '''
45 |
46 | #making sure URL has https
47 | if not url.startswith("https://"):
48 | url = "https://" + url
49 |
50 | #creating Dataframe where we will store all checks
51 | df = pd.DataFrame(columns=[])
52 |
53 | # Set up the spinner animation
54 | spinner = Halo(text='', spinner='dots')
55 | # Start the spinner
56 | spinner.start()
57 |
58 | #Checklist functions starts here
59 | df = mobile_friendly(url,df)
60 | df = bot_accessibility(url,df)
61 | df = indexation_status(url,df)
62 | df = robots_meta_tag(url,df)
63 | df = check_x_robots_tag_noindex(url,df)
64 | df = check_canonical(url,df)
65 | df = check_schema_org(url, df)
66 | df = core_web_vitals(url, df)
67 |
68 |
69 |
70 | # Stop the spinner
71 | spinner.stop_and_persist(symbol='🤖'.encode('utf-8'), text='All Checks have been finalized!')
72 |
73 | df.to_excel('data.xlsx', index=False)
74 |
75 | print(df)
76 |
77 |
78 |
79 | # =============================================================================
80 | # Mobile Friendly
81 | # =============================================================================
82 |
83 |
84 | def mobile_friendly(url,df):
85 | '''
86 | Function that checks if URL is mobile friendly. It uses viewport
87 |
88 | Parameters
89 | ----------
90 | url (str): The URL to check.
91 | df (pandas.DataFrame): The pandas DataFrame to append the result to.
92 |
93 | Returns
94 | -------
95 | pandas.DataFrame: The updated pandas DataFrame.
96 |
97 | '''
98 | print(colored("- Is the Page Mobile Friendly?" ,'black',attrs=['bold']))
99 | try:
100 | # Send a GET request to the URL
101 | response = requests.get(url)
102 |
103 | # Parse the HTML content of the response
104 | soup = BeautifulSoup(response.content, 'html.parser')
105 |
106 | # Check if the meta viewport tag exists
107 | viewport_tag = soup.find('meta', attrs={'name': 'viewport'})
108 | if viewport_tag is None:
109 | print(f"{url} is not mobile-friendly ❌")
110 | a = f"{url} is not mobile-friendly ❌ "
111 | else:
112 | a = f"{url} is mobile-friendly ✅"
113 | print(f"{url} is mobile-friendly ✅ ")
114 |
115 | # Create a new DataFrame with the row(s) to append
116 | new_row = pd.DataFrame({'URL': [url], 'Mobile Friendly': [a]})
117 |
118 | # Concatenate the new DataFrame with the existing DataFrame
119 | df = pd.concat([df, new_row], ignore_index=True)
120 |
121 | except Exception as e:
122 | # Handle the exception
123 | print(f"Mobile Friendly Check failed with error: {e}🚫🚫🚫🚫")
124 |
125 | # Create a new DataFrame with the row(s) to append
126 | new_row = pd.DataFrame({'URL': [url], 'Mobile Friendly': f'Mobile Friendly Check failed with error: {e} '})
127 |
128 | # Concatenate the new DataFrame with the existing DataFrame
129 | df = pd.concat([df, new_row], ignore_index=True)
130 |
131 | return df
132 |
133 |
134 | # =============================================================================
135 | # Core Web Vitals
136 | # =============================================================================
137 |
138 | def cwv_threshold(value, threshold1, threshold2):
139 | '''
140 | Given a value and two thresholds, return a string indicating the quality of the value.
141 |
142 | Parameters
143 | ----------
144 | value : int or float
145 | The value to be evaluated.
146 | threshold1 : int or float
147 | The lower threshold value.
148 | threshold2 : int or float
149 | The upper threshold value.
150 |
151 | Returns
152 | -------
153 | str
154 | A string indicating the quality of the value, which can be one of:
155 | - "good ✅" if the value is less than threshold1.
156 | - "needs improvement ⚠️" if the value is between threshold1 and threshold2 (inclusive).
157 | - "poor ❌" if the value is greater than threshold2.
158 | - "invalid input" if the value is not a number.
159 |
160 | '''
161 | if value < threshold1:
162 | return "good ✅"
163 | elif value >= threshold1 and value <= threshold2:
164 | return "needs improvement ⚠️"
165 | elif value > threshold2:
166 | return "poor ❌"
167 | else:
168 | return "invalid input"
169 |
170 |
171 | def core_web_vitals(url,df):
172 |
173 | '''
174 | Function that checks the core web vitals of a URL using the PageSpeed Insights API.
175 |
176 | Parameters
177 | ----------
178 | url (str): The URL to check.
179 | df (pandas.DataFrame): The pandas DataFrame to append the result to.
180 |
181 | Returns
182 | -------
183 | pandas.DataFrame: The updated pandas DataFrame.
184 |
185 | '''
186 |
187 | # Define the endpoint URL for the PageSpeed Insights API
188 | endpoint = 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed'
189 |
190 | # Define the parameters for the API request
191 | params = {
192 | 'url': url,
193 | 'strategy': 'mobile' # or 'desktop'
194 | }
195 |
196 | # Make the API request
197 | response = requests.get(endpoint, params=params)
198 |
199 | # Check the response status code
200 | if response.status_code == 200:
201 | # Parse the response JSON
202 | data = response.json()
203 | # Extract the performance score for DF
204 | lcp = data['lighthouseResult']['audits']['largest-contentful-paint']['displayValue']
205 | cls = data['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue']
206 | si = data['lighthouseResult']['audits']['speed-index']['displayValue']
207 | fcp = data['lighthouseResult']['audits']['first-contentful-paint']['displayValue']
208 | tbt = data['lighthouseResult']['audits']['total-blocking-time']['displayValue']
209 | #tti = data['lighthouseResult']['audits']['interactive']['displayValue']
210 | #score = data['lighthouseResult']['categories']['performance']['score']
211 |
212 | #Extract the scores for thresholds.
213 | lcp_int = data['lighthouseResult']['audits']['largest-contentful-paint']['numericValue']
214 | cls_int = data['lighthouseResult']['audits']['cumulative-layout-shift']['numericValue']
215 | si_int = data['lighthouseResult']['audits']['speed-index']['numericValue']
216 | fcp_int = data['lighthouseResult']['audits']['first-contentful-paint']['numericValue']
217 | tbt_int = data['lighthouseResult']['audits']['total-blocking-time']['numericValue']
218 | #tti_int= data['lighthouseResult']['audits']['interactive']['numericValue']
219 |
220 |
221 |
222 | #checking if we are passing Each Value.
223 | lcp_row = [cwv_threshold(lcp_int,2500,4000), lcp ]
224 | cls_row = [cwv_threshold(cls_int,0.1,0.25), cls]
225 | si_row = [cwv_threshold(si_int,3400,5800), si]
226 | fcp_row = [cwv_threshold(fcp_int,1800,3000), fcp]
227 | tbt_row = [cwv_threshold(tbt_int,200,600), tbt]
228 |
229 |
230 | # Create a new DataFrame with the row(s) to append
231 | cwv_new_row = pd.DataFrame({'Largest Contentful Paint': [lcp_row[1]], 'LCP Result': [lcp_row[0]],
232 | 'Cumulative Layout Shift': [cls_row[1]], 'CLS Results': [cls_row[0]],
233 | 'Speed Index': [si_row[1]], 'SI Result': [si_row[0]],
234 | 'First Contentful Paint': [fcp_row[1]], 'FCP Result': [fcp_row[0]],
235 | 'Total Blocking Time': [tbt_row[1]], 'TBT Result': [tbt_row[0]]})
236 |
237 | # Concatenate the new DataFrame with the existing DataFrame
238 | df = pd.concat([df, cwv_new_row], axis=1)
239 | print(colored(f"- Core Web Vitals Performance score for {url}:" ,'black',attrs=['bold']))
240 | print(f"- Largest Contentful Paint: {lcp} - {lcp_row[0]}")
241 | print(f'- Cumulative Layout Shift: {cls} - {cls_row[0]}')
242 | print(f'- Speed Index: {si} - {si_row[0]}')
243 | print(f'- First Contentful Paint: {fcp} - {fcp_row[0]}')
244 | print(f'- Total Blocking Time: {tbt} - {tbt_row[0]}')
245 | else:
246 | print(f"Error {response.status_code}: {response.text}")
247 |
248 | return df
249 |
250 |
251 |
252 | # =============================================================================
253 | # Check indexation Status of URL
254 | # =============================================================================
255 |
256 | def indexation_status(url,df):
257 |
258 | '''
259 | Function that checks if URL is currently indexed on Google.
260 |
261 | Parameters
262 | ----------
263 | url (str): The URL to check.
264 | df (pandas.DataFrame): The pandas DataFrame to append the result to.
265 |
266 | Returns
267 | -------
268 | pandas.DataFrame: The updated pandas DataFrame.
269 |
270 | '''
271 | print(colored("- Is the Page indexed in Google?" ,'black',attrs=['bold']))
272 | try:
273 | # Define the search query
274 | query = f"site:{url}"
275 |
276 | # Define the URL for the Google search results page
277 | google_url = f"https://www.google.com/search?q={query}"
278 |
279 | # Set the user agent header
280 | headers = {
281 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
282 |
283 | # Make the HTTP GET request to Google with the user agent header
284 | response = requests.get(google_url, headers=headers)
285 |
286 |
287 | # Parse the HTML using BS4
288 | soup = BeautifulSoup(response.text, "html.parser")
289 |
290 | # Check for search results containing the URL
291 | search_results = soup.find_all('a', href=True)
292 |
293 | url_indexed = any(url in link['href'] for link in search_results)
294 |
295 | # Print the resultss
296 | if url_indexed:
297 | print(f"{url} is indexed in Google. ✅")
298 | a = f"{url} is indexed in Google. ✅"
299 | else:
300 | print(f"{url} is not indexed in Google.❌")
301 | a = f"{url} is not indexed in Google.❌"
302 |
303 |
304 | # Create a new DataFrame with the row(s) to append
305 | new_row = pd.DataFrame({'Indexation': [a]})
306 |
307 | # Concatenate the new DataFrame with the existing DataFrame
308 | df = pd.concat([df, new_row], axis=1)
309 |
310 | except Exception as e:
311 | # Handle the exception
312 | print(f"Indexation Check failed with error: {e}🚫🚫🚫🚫")
313 |
314 | # Create a new DataFrame with the row(s) to append
315 | new_row = pd.DataFrame({'Indexation': f'Indexation Check failed with error: {e} '})
316 |
317 | # Concatenate the new DataFrame with the existing DataFrame
318 | df = pd.concat([df, new_row], axis=1)
319 |
320 | return df
321 |
322 | def bot_accessibility(url,df):
323 | # Set the user agents for Googlebot and Bingbot
324 | user_agents = {
325 | "GoogleBot": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
326 | "Bingbot":"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
327 | "Yahoo Slurp":"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
328 | "DuckDuckGo":"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
329 | "Baidu":"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
330 | "Yandex":"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
331 | "Applebot":"Mozilla/5.0 (Device; OS_version) AppleWebKit/WebKit_version (KHTML, like Gecko)"
332 | }
333 |
334 | print(colored(f"- Is the page accessible for Bots?:" ,'black',attrs=['bold']))
335 |
336 |
337 | for key, user_agent in user_agents.items():
338 | try:
339 | response = requests.get(url, headers={"User-Agent": user_agent})
340 | print(key, response)
341 |
342 | if response.status_code == 200:
343 | print(f"{url} is accessible for", key, " ✅")
344 | a = f"Response {response.status_code}. {url} is accessible for {key} ✅"
345 | else:
346 | print(f"The page {url} is not accessible for", key," ❌")
347 | a = f"Response {response.status_code}. {url} is not accessible for {key}❌"
348 |
349 | # Create a new DataFrame with the row(s) to append
350 | new_row = pd.DataFrame({key: [a]})
351 |
352 | # Concatenate the new DataFrame with the existing DataFrame
353 | df = pd.concat([df, new_row], axis=1)
354 |
355 |
356 | except Exception as e:
357 | # Handle the exception
358 | print(f"Bot Accessibility Check failed with error: {e}🚫🚫🚫🚫")
359 |
360 | # Create a new DataFrame with the row(s) to append
361 | new_row = pd.DataFrame({ 'Bot Accessibility': f'Bot Accessibility failed with error: {e} '})
362 |
363 | # Concatenate the new DataFrame with the existing DataFrame
364 | df = pd.concat([df, new_row], axis=1)
365 |
366 |
367 | return df
368 |
369 |
370 | def robots_meta_tag(url, df):
371 | #check 1 Meta robots tag
372 |
373 | print(colored("- Indexability #1 - Does the page contains a no index tag on the header?:" ,'black',attrs=['bold']))
374 | try:
375 | response = requests.get(url)
376 |
377 | soup = BeautifulSoup(response.text, 'html.parser')
378 | meta_robots = soup.find('meta', attrs={'name': 'robots'})
379 |
380 | if meta_robots and 'noindex' in meta_robots.get('content', ''):
381 | print(f'The URL {url} is not indexable as it contains the tag in the header. ❌')
382 | a= f"The URL {url} is not indexable as it contains the tag in the header. ❌"
383 |
384 | else:
385 | print(f'The URL {url} does not contain the tag in the header.✅')
386 | a = f'The URL {url} does not contain the tag in the header.✅'
387 |
388 | # Create a new DataFrame with the row(s) to append
389 | new_row = pd.DataFrame({"No index Meta Tag": [a]})
390 |
391 | # Concatenate the new DataFrame with the existing DataFrame
392 | df = pd.concat([df, new_row], axis=1)
393 |
394 | except requests.exceptions.RequestException as e:
395 | # Handle the exception
396 | print(f"No index test failed with errors: {e}🚫🚫🚫🚫")
397 |
398 | # Create a new DataFrame with the row(s) to append
399 | new_row = pd.DataFrame({ 'No Index Meta Tag': f'No index test failed with errors:: {e} '})
400 |
401 | # Concatenate the new DataFrame with the existing DataFrame
402 | df = pd.concat([df, new_row], axis=1)
403 | return df
404 |
405 |
406 |
407 |
408 | def check_x_robots_tag_noindex(url,df):
409 |
410 | print(colored("- Indexability #2 - Does the page contains a HTTP response header: X-Robots-Tag: noindex ?:" ,'black',attrs=['bold']))
411 |
412 | try:
413 | response = requests.get(url)
414 |
415 | x_robots_tag = response.headers.get('X-Robots-Tag')
416 |
417 | if x_robots_tag and ('noindex' in x_robots_tag or 'none' in x_robots_tag):
418 | print(f'The URL {url} is not indexable. It contains the HTTP response header: X-Robots-Tag: noindex ❌')
419 | a = f"The URL {url} is not indexable. It contains the HTTP response header: X-Robots-Tag: noindex ❌"
420 | else:
421 | print(f'The URL {url} is indexable. It does not contain the HTTPS response header X-Robots-Tag: noindex ✅')
422 | a = f'The URL {url} is indexable. It does not contain the HTTPS response header X-Robots-Tag: noindex ✅'
423 |
424 | # Create a new DataFrame with the row(s) to append
425 | new_row = pd.DataFrame({"No index Response Header": [a]})
426 |
427 | # Concatenate the new DataFrame with the existing DataFrame
428 | df = pd.concat([df, new_row], axis=1)
429 |
430 | except requests.exceptions.RequestException as e:
431 | # Handle the exception
432 | print(f"No index Response header test failed with errors: {e}🚫🚫🚫🚫")
433 |
434 | # Create a new DataFrame with the row(s) to append
435 | new_row = pd.DataFrame({ 'No index response header': f'No index response header failed with errors: {e} '})
436 |
437 | # Concatenate the new DataFrame with the existing DataFrame
438 | df = pd.concat([df, new_row], axis=1)
439 |
440 | return df
441 |
442 |
443 | def check_canonical(url,df):
444 |
445 | print(colored("- Indexability #3 - Is the page self canonical?" ,'black',attrs=['bold']))
446 | try:
447 | response = requests.get(url)
448 | if response.status_code == 200:
449 | soup = BeautifulSoup(response.content, 'html.parser')
450 | canonical_tag = soup.find('link', {'rel': 'canonical'})
451 | canonical_url = canonical_tag.get('href')
452 |
453 | if canonical_url == url:
454 | print(f'The URL {url} is indexable. The url is self canonicalized. {url} = {canonical_url} ✅')
455 | a = f'The URL {url} is indexable. The url is self canonicalized. {url} = {canonical_url} ✅'
456 |
457 | else:
458 | print(f'The URL {url} is not indexable. The canonical url ( {canonical_url} ) is different than the page url. {url} ≠ {canonical_url} ❌')
459 | a = f'The URL {url} is not indexable. The canonical url ( {canonical_url} ) is different than the page url. {url} ≠ {canonical_url} ❌'
460 |
461 | #Create a new DataFrame with the row(s) to append
462 | new_row = pd.DataFrame({"Canonical": [a]})
463 |
464 | # Concatenate the new DataFrame with the existing DataFrame
465 | df = pd.concat([df, new_row], axis=1)
466 |
467 | else:
468 | print(f'The URL {url} is not indexable.The page has a status code of{response.status_code} ❌')
469 | a = f'The URL {url} is not indexable.The page has a status code of{response.status_code} ❌'
470 |
471 | #Create a new DataFrame with the row(s) to append
472 | new_row = pd.DataFrame({"Canonical": [a]})
473 |
474 | # Concatenate the new DataFrame with the existing DataFrame
475 | df = pd.concat([df, new_row], axis=1)
476 |
477 |
478 |
479 | except requests.exceptions.RequestException as e:
480 | # Handle the exception
481 | print(f"No index Response header test failed with errors: {e}🚫🚫🚫🚫")
482 |
483 | # Create a new DataFrame with the row(s) to append
484 | new_row = pd.DataFrame({ 'No index response header': f'No index response header failed with errors: {e} '})
485 |
486 | # Concatenate the new DataFrame with the existing DataFrame
487 | df = pd.concat([df, new_row], axis=1)
488 |
489 | return df
490 |
491 | def check_schema_org(url, df):
492 | print(colored("- Schema.org Check -", 'black', attrs=['bold']))
493 | try:
494 | response = requests.get(url)
495 | if response.status_code == 200:
496 | soup = BeautifulSoup(response.content, 'html.parser')
497 | schema_types = set()
498 |
499 | # JSON-LD
500 | for script_tag in soup.find_all('script', type='application/ld+json'):
501 | try:
502 | data = json.loads(script_tag.string)
503 | if isinstance(data, list):
504 | for item in data:
505 | if '@type' in item:
506 | schema_types.add(item['@type'])
507 | elif '@type' in data:
508 | schema_types.add(data['@type'])
509 | except json.JSONDecodeError:
510 | pass
511 |
512 | # Microdata
513 | for microdata_tag in soup.find_all(attrs={"itemtype": True}):
514 | schema_types.add(microdata_tag['itemtype'])
515 |
516 | # RDFa
517 | for rdfa_tag in soup.find_all(attrs={"typeof": True}):
518 | schema_types.add(rdfa_tag['typeof'])
519 |
520 | if schema_types:
521 | print(f"The URL {url} has schema.org structure(s): {', '.join(schema_types)} ✅")
522 | a = f"The URL {url} has schema.org structure(s): {', '.join(schema_types)} ✅"
523 | else:
524 | print(f"The URL {url} does not have any identifiable schema.org structures ❌")
525 | a = f"The URL {url} does not have any identifiable schema.org structures ❌"
526 | else:
527 | print(f"The URL {url} could not be accessed. The page has a status code of {response.status_code} ❌")
528 | a = f"The URL {url} could not be accessed. The page has a status code of {response.status_code} ❌"
529 |
530 | except requests.exceptions.RequestException as e:
531 | print(f"Schema.org check failed with errors: {e} 🚫")
532 | a = f"Schema.org check failed with errors: {e} 🚫"
533 |
534 | # Create a new DataFrame with the row(s) to append
535 | new_row = pd.DataFrame({"Schema.org": [a]})
536 |
537 | # Concatenate the new DataFrame with the existing DataFrame
538 | df = pd.concat([df, new_row], axis=1)
539 |
540 | return df
541 |
542 |
543 |
544 |
545 |
546 |
547 | checklist(url)
548 |
549 |
550 |
551 |
--------------------------------------------------------------------------------