├── .gitignore ├── demo.gif ├── data.xlsx ├── README.md └── seo-checklist.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sundios/technical-seo-checklist/HEAD/demo.gif -------------------------------------------------------------------------------- /data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sundios/technical-seo-checklist/HEAD/data.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # technical-seo-checklist 2 | This automated technical SEO checklist will help you check your on-page SEO in a few minutes. 3 | 4 | 5 | Work in progress. 6 | 7 | # SEO Checks 8 | 9 | - Mobile Friendly 10 | - Bot Accessibility 11 | - Indexation Status 12 | - Robots meta tag 13 | - X Robots tag 14 | - Canonicals 15 | - Schema 16 | - Core Web Vitals 17 | 18 | ## Usage 19 | 20 | Open the terminal and run the following: 21 | 22 | ```zsh 23 | python seo-checklist.py 24 | ``` 25 | 26 | ## Contributing 27 | 28 | Pull requests are welcome. 29 | 30 | 31 | # Demo 32 | ![](demo.gif) 33 | -------------------------------------------------------------------------------- /seo-checklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Apr 17 11:04:15 2023 5 | 6 | @author: konradburchardtpizzaro-local 7 | """ 8 | 9 | #core web vitals 10 | import requests 11 | 12 | #mobile Friendly 13 | from bs4 import BeautifulSoup 14 | import pandas as pd 15 | from termcolor import colored 16 | 17 | # Import the halo module 18 | from halo import Halo 19 | 20 | import json 21 | 22 | 23 | 24 | #https://www.semrush.com/blog/on-page-seo-checklist 25 | 26 | # URL of the page you want to check 27 | url = input("Enter the URL of the page you want to check:") 28 | 29 | 30 | 31 | #Function that runs all other checklist functions 32 | def checklist(url): 33 | ''' 34 | 35 | Parameters 36 | ---------- 37 | url : TYPE 38 | DESCRIPTION. 39 | 40 | Returns 41 | ------- 42 | None. 43 | W 44 | ''' 45 | 46 | #making sure URL has https 47 | if not url.startswith("https://"): 48 | url = "https://" + url 49 | 50 | #creating Dataframe where we will store all checks 51 | df = pd.DataFrame(columns=[]) 52 | 53 | # Set up the spinner animation 54 | spinner = Halo(text='', spinner='dots') 55 | # Start the spinner 56 | spinner.start() 57 | 58 | #Checklist functions starts here 59 | df = mobile_friendly(url,df) 60 | df = bot_accessibility(url,df) 61 | df = indexation_status(url,df) 62 | df = robots_meta_tag(url,df) 63 | df = check_x_robots_tag_noindex(url,df) 64 | df = check_canonical(url,df) 65 | df = check_schema_org(url, df) 66 | df = core_web_vitals(url, df) 67 | 68 | 69 | 70 | # Stop the spinner 71 | spinner.stop_and_persist(symbol='🤖'.encode('utf-8'), text='All Checks have been finalized!') 72 | 73 | df.to_excel('data.xlsx', index=False) 74 | 75 | print(df) 76 | 77 | 78 | 79 | # ============================================================================= 80 | # Mobile Friendly 81 | # ============================================================================= 82 | 83 | 84 | def mobile_friendly(url,df): 85 | ''' 86 | Function that checks if URL is mobile friendly. It uses viewport 87 | 88 | Parameters 89 | ---------- 90 | url (str): The URL to check. 91 | df (pandas.DataFrame): The pandas DataFrame to append the result to. 92 | 93 | Returns 94 | ------- 95 | pandas.DataFrame: The updated pandas DataFrame. 96 | 97 | ''' 98 | print(colored("- Is the Page Mobile Friendly?" ,'black',attrs=['bold'])) 99 | try: 100 | # Send a GET request to the URL 101 | response = requests.get(url) 102 | 103 | # Parse the HTML content of the response 104 | soup = BeautifulSoup(response.content, 'html.parser') 105 | 106 | # Check if the meta viewport tag exists 107 | viewport_tag = soup.find('meta', attrs={'name': 'viewport'}) 108 | if viewport_tag is None: 109 | print(f"{url} is not mobile-friendly ❌") 110 | a = f"{url} is not mobile-friendly ❌ " 111 | else: 112 | a = f"{url} is mobile-friendly ✅" 113 | print(f"{url} is mobile-friendly ✅ ") 114 | 115 | # Create a new DataFrame with the row(s) to append 116 | new_row = pd.DataFrame({'URL': [url], 'Mobile Friendly': [a]}) 117 | 118 | # Concatenate the new DataFrame with the existing DataFrame 119 | df = pd.concat([df, new_row], ignore_index=True) 120 | 121 | except Exception as e: 122 | # Handle the exception 123 | print(f"Mobile Friendly Check failed with error: {e}🚫🚫🚫🚫") 124 | 125 | # Create a new DataFrame with the row(s) to append 126 | new_row = pd.DataFrame({'URL': [url], 'Mobile Friendly': f'Mobile Friendly Check failed with error: {e} '}) 127 | 128 | # Concatenate the new DataFrame with the existing DataFrame 129 | df = pd.concat([df, new_row], ignore_index=True) 130 | 131 | return df 132 | 133 | 134 | # ============================================================================= 135 | # Core Web Vitals 136 | # ============================================================================= 137 | 138 | def cwv_threshold(value, threshold1, threshold2): 139 | ''' 140 | Given a value and two thresholds, return a string indicating the quality of the value. 141 | 142 | Parameters 143 | ---------- 144 | value : int or float 145 | The value to be evaluated. 146 | threshold1 : int or float 147 | The lower threshold value. 148 | threshold2 : int or float 149 | The upper threshold value. 150 | 151 | Returns 152 | ------- 153 | str 154 | A string indicating the quality of the value, which can be one of: 155 | - "good ✅" if the value is less than threshold1. 156 | - "needs improvement ⚠️" if the value is between threshold1 and threshold2 (inclusive). 157 | - "poor ❌" if the value is greater than threshold2. 158 | - "invalid input" if the value is not a number. 159 | 160 | ''' 161 | if value < threshold1: 162 | return "good ✅" 163 | elif value >= threshold1 and value <= threshold2: 164 | return "needs improvement ⚠️" 165 | elif value > threshold2: 166 | return "poor ❌" 167 | else: 168 | return "invalid input" 169 | 170 | 171 | def core_web_vitals(url,df): 172 | 173 | ''' 174 | Function that checks the core web vitals of a URL using the PageSpeed Insights API. 175 | 176 | Parameters 177 | ---------- 178 | url (str): The URL to check. 179 | df (pandas.DataFrame): The pandas DataFrame to append the result to. 180 | 181 | Returns 182 | ------- 183 | pandas.DataFrame: The updated pandas DataFrame. 184 | 185 | ''' 186 | 187 | # Define the endpoint URL for the PageSpeed Insights API 188 | endpoint = 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed' 189 | 190 | # Define the parameters for the API request 191 | params = { 192 | 'url': url, 193 | 'strategy': 'mobile' # or 'desktop' 194 | } 195 | 196 | # Make the API request 197 | response = requests.get(endpoint, params=params) 198 | 199 | # Check the response status code 200 | if response.status_code == 200: 201 | # Parse the response JSON 202 | data = response.json() 203 | # Extract the performance score for DF 204 | lcp = data['lighthouseResult']['audits']['largest-contentful-paint']['displayValue'] 205 | cls = data['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue'] 206 | si = data['lighthouseResult']['audits']['speed-index']['displayValue'] 207 | fcp = data['lighthouseResult']['audits']['first-contentful-paint']['displayValue'] 208 | tbt = data['lighthouseResult']['audits']['total-blocking-time']['displayValue'] 209 | #tti = data['lighthouseResult']['audits']['interactive']['displayValue'] 210 | #score = data['lighthouseResult']['categories']['performance']['score'] 211 | 212 | #Extract the scores for thresholds. 213 | lcp_int = data['lighthouseResult']['audits']['largest-contentful-paint']['numericValue'] 214 | cls_int = data['lighthouseResult']['audits']['cumulative-layout-shift']['numericValue'] 215 | si_int = data['lighthouseResult']['audits']['speed-index']['numericValue'] 216 | fcp_int = data['lighthouseResult']['audits']['first-contentful-paint']['numericValue'] 217 | tbt_int = data['lighthouseResult']['audits']['total-blocking-time']['numericValue'] 218 | #tti_int= data['lighthouseResult']['audits']['interactive']['numericValue'] 219 | 220 | 221 | 222 | #checking if we are passing Each Value. 223 | lcp_row = [cwv_threshold(lcp_int,2500,4000), lcp ] 224 | cls_row = [cwv_threshold(cls_int,0.1,0.25), cls] 225 | si_row = [cwv_threshold(si_int,3400,5800), si] 226 | fcp_row = [cwv_threshold(fcp_int,1800,3000), fcp] 227 | tbt_row = [cwv_threshold(tbt_int,200,600), tbt] 228 | 229 | 230 | # Create a new DataFrame with the row(s) to append 231 | cwv_new_row = pd.DataFrame({'Largest Contentful Paint': [lcp_row[1]], 'LCP Result': [lcp_row[0]], 232 | 'Cumulative Layout Shift': [cls_row[1]], 'CLS Results': [cls_row[0]], 233 | 'Speed Index': [si_row[1]], 'SI Result': [si_row[0]], 234 | 'First Contentful Paint': [fcp_row[1]], 'FCP Result': [fcp_row[0]], 235 | 'Total Blocking Time': [tbt_row[1]], 'TBT Result': [tbt_row[0]]}) 236 | 237 | # Concatenate the new DataFrame with the existing DataFrame 238 | df = pd.concat([df, cwv_new_row], axis=1) 239 | print(colored(f"- Core Web Vitals Performance score for {url}:" ,'black',attrs=['bold'])) 240 | print(f"- Largest Contentful Paint: {lcp} - {lcp_row[0]}") 241 | print(f'- Cumulative Layout Shift: {cls} - {cls_row[0]}') 242 | print(f'- Speed Index: {si} - {si_row[0]}') 243 | print(f'- First Contentful Paint: {fcp} - {fcp_row[0]}') 244 | print(f'- Total Blocking Time: {tbt} - {tbt_row[0]}') 245 | else: 246 | print(f"Error {response.status_code}: {response.text}") 247 | 248 | return df 249 | 250 | 251 | 252 | # ============================================================================= 253 | # Check indexation Status of URL 254 | # ============================================================================= 255 | 256 | def indexation_status(url,df): 257 | 258 | ''' 259 | Function that checks if URL is currently indexed on Google. 260 | 261 | Parameters 262 | ---------- 263 | url (str): The URL to check. 264 | df (pandas.DataFrame): The pandas DataFrame to append the result to. 265 | 266 | Returns 267 | ------- 268 | pandas.DataFrame: The updated pandas DataFrame. 269 | 270 | ''' 271 | print(colored("- Is the Page indexed in Google?" ,'black',attrs=['bold'])) 272 | try: 273 | # Define the search query 274 | query = f"site:{url}" 275 | 276 | # Define the URL for the Google search results page 277 | google_url = f"https://www.google.com/search?q={query}" 278 | 279 | # Set the user agent header 280 | headers = { 281 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"} 282 | 283 | # Make the HTTP GET request to Google with the user agent header 284 | response = requests.get(google_url, headers=headers) 285 | 286 | 287 | # Parse the HTML using BS4 288 | soup = BeautifulSoup(response.text, "html.parser") 289 | 290 | # Check for search results containing the URL 291 | search_results = soup.find_all('a', href=True) 292 | 293 | url_indexed = any(url in link['href'] for link in search_results) 294 | 295 | # Print the resultss 296 | if url_indexed: 297 | print(f"{url} is indexed in Google. ✅") 298 | a = f"{url} is indexed in Google. ✅" 299 | else: 300 | print(f"{url} is not indexed in Google.❌") 301 | a = f"{url} is not indexed in Google.❌" 302 | 303 | 304 | # Create a new DataFrame with the row(s) to append 305 | new_row = pd.DataFrame({'Indexation': [a]}) 306 | 307 | # Concatenate the new DataFrame with the existing DataFrame 308 | df = pd.concat([df, new_row], axis=1) 309 | 310 | except Exception as e: 311 | # Handle the exception 312 | print(f"Indexation Check failed with error: {e}🚫🚫🚫🚫") 313 | 314 | # Create a new DataFrame with the row(s) to append 315 | new_row = pd.DataFrame({'Indexation': f'Indexation Check failed with error: {e} '}) 316 | 317 | # Concatenate the new DataFrame with the existing DataFrame 318 | df = pd.concat([df, new_row], axis=1) 319 | 320 | return df 321 | 322 | def bot_accessibility(url,df): 323 | # Set the user agents for Googlebot and Bingbot 324 | user_agents = { 325 | "GoogleBot": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", 326 | "Bingbot":"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)", 327 | "Yahoo Slurp":"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", 328 | "DuckDuckGo":"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", 329 | "Baidu":"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", 330 | "Yandex":"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", 331 | "Applebot":"Mozilla/5.0 (Device; OS_version) AppleWebKit/WebKit_version (KHTML, like Gecko)" 332 | } 333 | 334 | print(colored(f"- Is the page accessible for Bots?:" ,'black',attrs=['bold'])) 335 | 336 | 337 | for key, user_agent in user_agents.items(): 338 | try: 339 | response = requests.get(url, headers={"User-Agent": user_agent}) 340 | print(key, response) 341 | 342 | if response.status_code == 200: 343 | print(f"{url} is accessible for", key, " ✅") 344 | a = f"Response {response.status_code}. {url} is accessible for {key} ✅" 345 | else: 346 | print(f"The page {url} is not accessible for", key," ❌") 347 | a = f"Response {response.status_code}. {url} is not accessible for {key}❌" 348 | 349 | # Create a new DataFrame with the row(s) to append 350 | new_row = pd.DataFrame({key: [a]}) 351 | 352 | # Concatenate the new DataFrame with the existing DataFrame 353 | df = pd.concat([df, new_row], axis=1) 354 | 355 | 356 | except Exception as e: 357 | # Handle the exception 358 | print(f"Bot Accessibility Check failed with error: {e}🚫🚫🚫🚫") 359 | 360 | # Create a new DataFrame with the row(s) to append 361 | new_row = pd.DataFrame({ 'Bot Accessibility': f'Bot Accessibility failed with error: {e} '}) 362 | 363 | # Concatenate the new DataFrame with the existing DataFrame 364 | df = pd.concat([df, new_row], axis=1) 365 | 366 | 367 | return df 368 | 369 | 370 | def robots_meta_tag(url, df): 371 | #check 1 Meta robots tag 372 | 373 | print(colored("- Indexability #1 - Does the page contains a no index tag on the header?:" ,'black',attrs=['bold'])) 374 | try: 375 | response = requests.get(url) 376 | 377 | soup = BeautifulSoup(response.text, 'html.parser') 378 | meta_robots = soup.find('meta', attrs={'name': 'robots'}) 379 | 380 | if meta_robots and 'noindex' in meta_robots.get('content', ''): 381 | print(f'The URL {url} is not indexable as it contains the tag in the header. ❌') 382 | a= f"The URL {url} is not indexable as it contains the tag in the header. ❌" 383 | 384 | else: 385 | print(f'The URL {url} does not contain the tag in the header.✅') 386 | a = f'The URL {url} does not contain the tag in the header.✅' 387 | 388 | # Create a new DataFrame with the row(s) to append 389 | new_row = pd.DataFrame({"No index Meta Tag": [a]}) 390 | 391 | # Concatenate the new DataFrame with the existing DataFrame 392 | df = pd.concat([df, new_row], axis=1) 393 | 394 | except requests.exceptions.RequestException as e: 395 | # Handle the exception 396 | print(f"No index test failed with errors: {e}🚫🚫🚫🚫") 397 | 398 | # Create a new DataFrame with the row(s) to append 399 | new_row = pd.DataFrame({ 'No Index Meta Tag': f'No index test failed with errors:: {e} '}) 400 | 401 | # Concatenate the new DataFrame with the existing DataFrame 402 | df = pd.concat([df, new_row], axis=1) 403 | return df 404 | 405 | 406 | 407 | 408 | def check_x_robots_tag_noindex(url,df): 409 | 410 | print(colored("- Indexability #2 - Does the page contains a HTTP response header: X-Robots-Tag: noindex ?:" ,'black',attrs=['bold'])) 411 | 412 | try: 413 | response = requests.get(url) 414 | 415 | x_robots_tag = response.headers.get('X-Robots-Tag') 416 | 417 | if x_robots_tag and ('noindex' in x_robots_tag or 'none' in x_robots_tag): 418 | print(f'The URL {url} is not indexable. It contains the HTTP response header: X-Robots-Tag: noindex ❌') 419 | a = f"The URL {url} is not indexable. It contains the HTTP response header: X-Robots-Tag: noindex ❌" 420 | else: 421 | print(f'The URL {url} is indexable. It does not contain the HTTPS response header X-Robots-Tag: noindex ✅') 422 | a = f'The URL {url} is indexable. It does not contain the HTTPS response header X-Robots-Tag: noindex ✅' 423 | 424 | # Create a new DataFrame with the row(s) to append 425 | new_row = pd.DataFrame({"No index Response Header": [a]}) 426 | 427 | # Concatenate the new DataFrame with the existing DataFrame 428 | df = pd.concat([df, new_row], axis=1) 429 | 430 | except requests.exceptions.RequestException as e: 431 | # Handle the exception 432 | print(f"No index Response header test failed with errors: {e}🚫🚫🚫🚫") 433 | 434 | # Create a new DataFrame with the row(s) to append 435 | new_row = pd.DataFrame({ 'No index response header': f'No index response header failed with errors: {e} '}) 436 | 437 | # Concatenate the new DataFrame with the existing DataFrame 438 | df = pd.concat([df, new_row], axis=1) 439 | 440 | return df 441 | 442 | 443 | def check_canonical(url,df): 444 | 445 | print(colored("- Indexability #3 - Is the page self canonical?" ,'black',attrs=['bold'])) 446 | try: 447 | response = requests.get(url) 448 | if response.status_code == 200: 449 | soup = BeautifulSoup(response.content, 'html.parser') 450 | canonical_tag = soup.find('link', {'rel': 'canonical'}) 451 | canonical_url = canonical_tag.get('href') 452 | 453 | if canonical_url == url: 454 | print(f'The URL {url} is indexable. The url is self canonicalized. {url} = {canonical_url} ✅') 455 | a = f'The URL {url} is indexable. The url is self canonicalized. {url} = {canonical_url} ✅' 456 | 457 | else: 458 | print(f'The URL {url} is not indexable. The canonical url ( {canonical_url} ) is different than the page url. {url} ≠ {canonical_url} ❌') 459 | a = f'The URL {url} is not indexable. The canonical url ( {canonical_url} ) is different than the page url. {url} ≠ {canonical_url} ❌' 460 | 461 | #Create a new DataFrame with the row(s) to append 462 | new_row = pd.DataFrame({"Canonical": [a]}) 463 | 464 | # Concatenate the new DataFrame with the existing DataFrame 465 | df = pd.concat([df, new_row], axis=1) 466 | 467 | else: 468 | print(f'The URL {url} is not indexable.The page has a status code of{response.status_code} ❌') 469 | a = f'The URL {url} is not indexable.The page has a status code of{response.status_code} ❌' 470 | 471 | #Create a new DataFrame with the row(s) to append 472 | new_row = pd.DataFrame({"Canonical": [a]}) 473 | 474 | # Concatenate the new DataFrame with the existing DataFrame 475 | df = pd.concat([df, new_row], axis=1) 476 | 477 | 478 | 479 | except requests.exceptions.RequestException as e: 480 | # Handle the exception 481 | print(f"No index Response header test failed with errors: {e}🚫🚫🚫🚫") 482 | 483 | # Create a new DataFrame with the row(s) to append 484 | new_row = pd.DataFrame({ 'No index response header': f'No index response header failed with errors: {e} '}) 485 | 486 | # Concatenate the new DataFrame with the existing DataFrame 487 | df = pd.concat([df, new_row], axis=1) 488 | 489 | return df 490 | 491 | def check_schema_org(url, df): 492 | print(colored("- Schema.org Check -", 'black', attrs=['bold'])) 493 | try: 494 | response = requests.get(url) 495 | if response.status_code == 200: 496 | soup = BeautifulSoup(response.content, 'html.parser') 497 | schema_types = set() 498 | 499 | # JSON-LD 500 | for script_tag in soup.find_all('script', type='application/ld+json'): 501 | try: 502 | data = json.loads(script_tag.string) 503 | if isinstance(data, list): 504 | for item in data: 505 | if '@type' in item: 506 | schema_types.add(item['@type']) 507 | elif '@type' in data: 508 | schema_types.add(data['@type']) 509 | except json.JSONDecodeError: 510 | pass 511 | 512 | # Microdata 513 | for microdata_tag in soup.find_all(attrs={"itemtype": True}): 514 | schema_types.add(microdata_tag['itemtype']) 515 | 516 | # RDFa 517 | for rdfa_tag in soup.find_all(attrs={"typeof": True}): 518 | schema_types.add(rdfa_tag['typeof']) 519 | 520 | if schema_types: 521 | print(f"The URL {url} has schema.org structure(s): {', '.join(schema_types)} ✅") 522 | a = f"The URL {url} has schema.org structure(s): {', '.join(schema_types)} ✅" 523 | else: 524 | print(f"The URL {url} does not have any identifiable schema.org structures ❌") 525 | a = f"The URL {url} does not have any identifiable schema.org structures ❌" 526 | else: 527 | print(f"The URL {url} could not be accessed. The page has a status code of {response.status_code} ❌") 528 | a = f"The URL {url} could not be accessed. The page has a status code of {response.status_code} ❌" 529 | 530 | except requests.exceptions.RequestException as e: 531 | print(f"Schema.org check failed with errors: {e} 🚫") 532 | a = f"Schema.org check failed with errors: {e} 🚫" 533 | 534 | # Create a new DataFrame with the row(s) to append 535 | new_row = pd.DataFrame({"Schema.org": [a]}) 536 | 537 | # Concatenate the new DataFrame with the existing DataFrame 538 | df = pd.concat([df, new_row], axis=1) 539 | 540 | return df 541 | 542 | 543 | 544 | 545 | 546 | 547 | checklist(url) 548 | 549 | 550 | 551 | --------------------------------------------------------------------------------