├── .github ├── og-preview.png └── workflows │ └── test.yml ├── .gitignore ├── Dockerfile ├── README.md ├── action.yml ├── check_links.py ├── requirements.txt └── test.md /.github/og-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paramt/url-checker/8b4bd2318ece05363245764d3c3adce63b537f66/.github/og-preview.png -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test LinkChecker 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Run LinkChecker 17 | id: self-check 18 | uses: ./ 19 | with: 20 | files: test.md 21 | blacklist: https://www.github.com/paramt/this-doesnt-exist 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:latest 2 | 3 | ADD check_links.py /check_links.py 4 | ADD requirements.txt /requirements.txt 5 | 6 | RUN pip install -r requirements.txt 7 | RUN chmod +x check_links.py 8 | ENTRYPOINT ["/check_links.py"] 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # URL Checker 2 | A GitHub action to test for broken links on markdown files 3 | 4 | ### Sample Workflow 5 | ```yml 6 | name: Sample Workflow 7 | 8 | on: [push] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Check URLs 17 | uses: paramt/url-checker@master 18 | with: 19 | files: "README.md,SUPPORT.md" 20 | ``` 21 | 22 | ### Arguments 23 | - `files`: A comma-separated list of files to check. Defaults to README.md 24 | - `blacklist`: A comma-separated list of URLs to ignore 25 | 26 | ### Sample Output 27 | [![Example](https://i.imgur.com/35zldHS.png)](https://github.com/paramt/url-checker/commit/093ef6cb5f7e9eff8300887f07eb0c3a55f4aa82/checks) 28 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: 'URL Checker' 2 | author: 'Param Thakkar' 3 | description: 'Automatically check for broken links in markdown files' 4 | inputs: 5 | files: 6 | description: 'A list of all the markdown files in the repo, separated by commas. Defaults to README.md' 7 | required: false 8 | default: 'README.md' 9 | blacklist: 10 | description: 'A comma-separated list of URLs to skip over.' 11 | required: false 12 | default: '' 13 | runs: 14 | using: 'docker' 15 | image: 'Dockerfile' 16 | branding: 17 | icon: 'link' 18 | color: 'green' 19 | -------------------------------------------------------------------------------- /check_links.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | import os 5 | from urlextract import URLExtract 6 | import requests 7 | 8 | # URLs to skip over 9 | blacklisted = os.getenv("INPUT_BLACKLIST", "").split(",") 10 | 11 | files = os.getenv('INPUT_FILES').split(",") 12 | repo = os.getenv("GITHUB_REPOSITORY") 13 | links = [] 14 | exit_status = 0 15 | 16 | def remove_duplicates(urls): 17 | return list(set(urls)) 18 | 19 | 20 | def get_text_from_file(file): 21 | # Assume the local file has been checked out in the action 22 | try: 23 | with open('./' + file) as f: 24 | text = f.readlines() 25 | print("Found file in the locally checked out repo") 26 | return ' '.join(text) 27 | except FileNotFoundError as e: 28 | print("Could not find file checked out locally, falling back to using public link") 29 | 30 | # Fall-back to pulling from the public URL for backawards comaptibility 31 | try: 32 | filepath = "https://raw.githubusercontent.com/" + repo + "/master/" + file 33 | r = requests.get(filepath) 34 | r.raise_for_status() 35 | return r.text 36 | except requests.exceptions.HTTPError as err: 37 | print("Could not find file using fallback public link") 38 | 39 | 40 | for file in files: 41 | 42 | text = get_text_from_file(file) 43 | 44 | extractor = URLExtract() 45 | file_links = extractor.find_urls(text) 46 | 47 | # Remove mailto links 48 | links = [url for url in file_links if "mailto://" not in url] 49 | linksToRequest = [] 50 | 51 | # Remove blacklisted links 52 | for link in links: 53 | if link in blacklisted: 54 | print(f"Removed {link}") 55 | else: 56 | linksToRequest.append(link) 57 | 58 | print(f"Checking URLs from {file}") 59 | 60 | # Remove Duplicate links 61 | linksToRequest = remove_duplicates(linksToRequest) 62 | 63 | print(f"Removing duplicate URLs from {file}") 64 | 65 | for url in linksToRequest: 66 | try: 67 | request = requests.get(url) 68 | if request.status_code == 200: 69 | print(f"✓ 200 {url}") 70 | elif request.status_code >= 400: 71 | print(f"✕ {request.status_code} {url}") 72 | exit_status = 1 73 | else: 74 | print(f"⚪ {request.status_code} {url}") 75 | 76 | except: 77 | print(f"✕ ERR {url}") 78 | 79 | # Continue through all URLs but fail test at the end 80 | exit_status = 1 81 | continue 82 | 83 | # Newline to separate URLs from different files 84 | print() 85 | 86 | exit(exit_status) 87 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.31.0 2 | urlextract==1.8.0 3 | -------------------------------------------------------------------------------- /test.md: -------------------------------------------------------------------------------- 1 | ### Test links 2 | This repo uses url-checker to test for broken links. This file has been added using the `files` input, so all the links in this file will be checked. 3 | 4 | #### These links are all working: 5 | - [My Website](https://www.param.me) 6 | - [My GitHub profile](https://github.com/paramt) 7 | 8 | 9 | #### This link is broken: 10 | - [Non-existing repository](https://www.github.com/paramt/this-doesnt-exist) 11 | 12 | The broken link is blacklisted using the `blacklist` argument, so the workflow on this repo should pass 13 | --------------------------------------------------------------------------------