$lychee_config
192 | scheme = ["https", "http"]
193 | accept = [200, 201, 202, 203, 204, 206, 302, 429]
194 | timeout = $timeout
195 | retry_wait_time = $retry_wait_time
196 | max_retries = 2
197 | insecure = true
198 | skip_missing = true
199 | include_mail = false
200 | include_verbatim = true
201 | exclude_all_private = true
202 | no_progress = true
203 | cache = false
204 | EOF
205 | # Set variables used in later steps.
206 | echo "lychee_config=$lychee_config" >> $GITHUB_ENV
207 | echo "lychee_output=.git/lychee-report.md" >> $GITHUB_ENV
208 |
209 | # Implementation notes:
210 | # - This purposefully doesn't use lychee's caching facility, because
211 | # turning it on results in lychee NOT reporting the ORIGINAL error when
212 | # a cached URL is encountered. This is very unhelpful in this context.
213 | #
214 | # - More information about optional settings for the lychee-action GHA
215 | # can be found at https://github.com/lycheeverse/lychee-action
216 | #
217 | # - The documented exit codes for lychee are as follows (based on
218 | # https://github.com/lycheeverse/lychee#exit-codes as of 2023-12-14):
219 | # 0 = success (links checked successfully or skipped as configured)
220 | # 1 = missing inputs, unexpected runtime failure, or config error
221 | # 2 = link check failures (if any non-excluded link failed the check)
222 | # 3 = errors in the config file
223 |
224 | - name: Run link checker to test URLs inside Markdown files.
225 | if: env.continue == 'true'
226 | shell: bash {0}
227 | continue-on-error: true
228 | run: |
229 | lychee_tmp="$(mktemp)"
230 | changed_files=${{env.considered}}
231 | lychee -c ${{env.lychee_config}} -o $lychee_tmp -f markdown ${changed_files[@]}
232 | exit_code=$?
233 | echo "lychee_exit_code=$exit_code" >> $GITHUB_ENV
234 | if [[ $exit_code == 2 ]]; then
235 | sed -e 's/^## Summary//' \
236 | -e 's/^|.*//g' \
237 | -e 's/^## Errors per input//' \
238 | -e 's/{.*$//g' \
239 | -e 's/| Failed:/– Failed:/g' \
240 | -e 's/| Timeout:/– Timeout:/g' \
241 | -e 's/\(.*\)\[\(.*\)\]\(.*\)/\1[`\2`]\3/' \
242 | < $lychee_tmp > ${{env.lychee_output}}
243 | echo >> ${{env.lychee_output}}
244 | endpoint="https://github.com/${GITHUB_REPOSITORY}/actions/runs"
245 | workflow="$endpoint/${GITHUB_RUN_ID}?check_suite_focus=true"
246 | note="This content was produced by a [GitHub Action]($workflow)."
247 | echo "$note" >> ${{env.lychee_output}}
248 | elif [[ $exit_code == 1 || $exit_code == 3 ]]; then
249 | # Inform the user.
250 | msg="Run-time error
251 | Baler encountered an exception. This was most likely caused by a
252 | bug in Baler itself. Please report this to the developers. You can
253 | report it by opening an issue in the GitHub repository at
254 |
255 | https://github.com/caltechlibrary/baler.
"
256 | echo "$msg" >> $GITHUB_STEP_SUMMARY
257 | # Bail with an error.
258 | exit 2
259 | fi
260 |
261 | - name: Check if we've already opened a duplicate issue.
262 | if: env.continue == 'true' && env.lychee_exit_code != 0
263 | shell: bash
264 | run: |
265 | # First, save the current issue text body, to use for comparisons.
266 | # Strip the end of the body b/c it has this workflow's unique run
267 | # id, which can never match any other issue. Also, sort the body;
268 | # this is a way to normalize the contents to avoid false positives
269 | # when, e.g., two runs end up putting URLs in different orders.
270 | sed_cmd='s/\(.*\)This content was produced by.*/\1/'
271 | current=$(sed "$sed_cmd" < ${{env.lychee_output}} | sort)
272 | # Get the issue numbers for the last N issues, where N = lookback.
273 | endpoint="https://api.github.com/repos/${{github.repository}}/issues"
274 | query="q=state:open&per_page=${{inputs.lookback}}"
275 | accept="Accept: application/vnd.github+json"
276 | auth="Authorization: Bearer ${{github.token}}"
277 | issues=$(curl -s -H "$accept" -H "$auth" "$endpoint?$query")
278 | issue_numbers=($(jq '.[].number' <<<"$issues"))
279 | # Iterate over the issues & compare the bodies.
280 | for number in ${issue_numbers[@]}; do
281 | previous=$(curl -s -H "$accept" -H "$auth" "$endpoint/$number" | jq -r '.body')
282 | previous=$(echo "$previous" | sed 's/\\n/\n/g' | sed "$sed_cmd" | sort)
283 | if [[ "$current" == "$previous" ]]; then
284 | link="https://github.com/${{github.repository}}/issues/$number"
285 |
286 | # Report that we found a match.
287 | msg="Bad links found, but issue creation skipped
288 | One or more invalid URL(s) have been found; however, no new
289 | issue has been opened in the repository because the same URL(s)
290 | were reported in issue #$number.
"
291 | echo "$msg" >> $GITHUB_STEP_SUMMARY
292 |
293 | # Skip the rest of the workflow.
294 | echo "continue=false" >> $GITHUB_ENV
295 | break
296 | fi
297 | done
298 |
299 | - name: Open a new issue/ticket to report the problems.
300 | if: env.continue == 'true' && env.lychee_exit_code != 0
301 | id: create-issue
302 | uses: peter-evans/create-issue-from-file@v5.0.0
303 | with:
304 | title: Invalid URLs in Markdown files
305 | content-filepath: ${{env.lychee_output}}
306 | labels: ${{env.labels}}
307 | token: ${{github.token}}
308 |
309 | - name: Put a link to the issue in the workflow output.
310 | if: env.continue == 'true' && env.lychee_exit_code != 0
311 | env:
312 | issue-number: ${{steps.create-issue.outputs.issue-number}}
313 | shell: bash
314 | run: |
315 | number="${{env.issue-number}}"
316 | link="https://github.com/${{github.repository}}/issues/$number"
317 | msg="
Invalid URLs found
318 | Ticket #${{env.issue-number}} has been
319 | created.
"
320 | echo "$msg" >> $GITHUB_STEP_SUMMARY
321 | # Report a failure.
322 | exit 1
323 |
324 | - name: Log internal variable values in debug mode or if a failure occurs.
325 | if: inputs.debug == 'true' || failure()
326 | shell: bash
327 | run: |
328 | echo "Summary of run-time variables"
329 | echo "============================="
330 | echo "repository_owner = ${{github.repository_owner}}"
331 | echo "repository = ${{github.event.repository.name}}"
332 | echo "event name = ${{github.event_name}}"
333 | echo "files: ${{inputs.files}}"
334 | echo "ignore: ${{inputs.ignore}}"
335 | echo "labels: ${{inputs.labels}}"
336 | echo "lookback: ${{inputs.lookback}}"
337 | echo "timeout: ${{inputs.timeout}}"
338 | echo "debug: ${{inputs.debug}}"
339 | echo "depth_needed (for git history) = ${{env.depth_needed}}"
340 | echo "files considered = ${{env.considered}}"
341 | echo "continue = ${{env.continue}}"
342 | echo "env.lychee_exit_code = ${{env.lychee_exit_code}}"
343 | echo ""
344 | echo "Content of lychee config file"
345 | echo "============================="
346 | cat ${lychee_config}
347 | echo "============================="
348 |
349 | - name: Report if checks were successful.
350 | if: env.continue == 'true' && env.lychee_exit_code == 0
351 | shell: bash
352 | run: |
353 | msg="No invalid URLs found
354 | No bad links were found in the files matched by this pattern:
355 | ${{inputs.files}}
"
356 | echo "$msg" >> $GITHUB_STEP_SUMMARY
357 |
358 |
359 | author: Michael Hucka – https://github.com/mhucka
360 | branding:
361 | icon: thumbs-down
362 | color: orange
363 |
--------------------------------------------------------------------------------
/assets/css/style.scss:
--------------------------------------------------------------------------------
1 | ---
2 | ---
3 | /* ↖︎ Make sure to leave the two triple-dash lines above!
4 |
5 | Summary: allow GitHub Pages version of README to work in dark mode.
6 |
7 | The reason for the existence of this file is to allow the GitHub Pages
8 | version of the README file to respect the user's dark/light mode settings,
9 | which is something that the default GitHub Pages theme doesn't do. This
10 | file was originally based on the approach implemented by user "ggorlen" in
11 | https://github.com/ggorlen/resources and referenced in a Stack Overflow
12 | comment on 2023-07-10.
13 | */
14 |
15 | @import "https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.5.1/github-markdown.min.css";
16 |
17 | body {
18 | margin: 0;
19 | }
20 |
21 | @media (prefers-color-scheme: light) {
22 | body {
23 | background-color: #ffffff;
24 | }
25 | }
26 |
27 | @media (prefers-color-scheme: dark) {
28 | body {
29 | background-color: #0d1117;
30 | }
31 | }
32 |
33 | .markdown-body {
34 | box-sizing: border-box;
35 | min-width: 200px;
36 | max-width: 980px;
37 | margin: 0 auto;
38 | padding: 45px;
39 | }
40 |
41 | @media (max-width: 767px) {
42 | .markdown-body {
43 | padding: 15px;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/codemeta.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
3 | "@type": "SoftwareSourceCode",
4 | "name": "Baler – BAd Link reportER",
5 | "identifier": "baler",
6 | "description": "Baler is a GitHub Action that tests the URLs inside Markdown files in your GitHub repository and opens an issue if it finds any problems.",
7 | "version": "2.0.4",
8 | "datePublished": "2024-02-09",
9 | "dateCreated": "2023-12-11",
10 | "author": [
11 | {
12 | "@type": "Person",
13 | "givenName": "Michael",
14 | "familyName": "Hucka",
15 | "affiliation": {
16 | "@type": "Organization",
17 | "name": "California Institute of Technology Library"
18 | },
19 | "email": "mhucka@caltech.edu",
20 | "@id": "https://orcid.org/0000-0001-9105-5960"
21 | }
22 | ],
23 | "maintainer": [
24 | {
25 | "@type": "Person",
26 | "givenName": "Michael",
27 | "familyName": "Hucka",
28 | "affiliation": {
29 | "@type": "Organization",
30 | "name": "California Institute of Technology Library"
31 | },
32 | "email": "mhucka@caltech.edu",
33 | "@id": "https://orcid.org/0000-0001-9105-5960"
34 | }
35 | ],
36 | "funder": {
37 | "@id": "https://ror.org/05dxps055",
38 | "@type": "Organization",
39 | "name": "California Institute of Technology Library"
40 | },
41 | "copyrightHolder": [
42 | {
43 | "@id": "https://ror.org/05dxps055",
44 | "@type": "Organization",
45 | "name": "California Institute of Technology"
46 | }
47 | ],
48 | "copyrightYear": 2024,
49 | "license": "https://github.com/caltechlibrary/baler/blob/main/LICENSE",
50 | "isAccessibleForFree": true,
51 | "url": "https://caltechlibrary.github.io/baler",
52 | "codeRepository": "https://github.com/caltechlibrary/baler",
53 | "readme": "https://github.com/caltechlibrary/baler/blob/main/README.md",
54 | "releaseNotes": "https://github.com/caltechlibrary/baler/blob/main/CHANGES.md",
55 | "issueTracker": "https://github.com/caltechlibrary/baler/issues",
56 | "downloadUrl": "https://github.com/caltechlibrary/baler/releases",
57 | "relatedLink": "https://data.caltech.edu/records/j6vdk-0y403",
58 | "keywords": [
59 | "software",
60 | "automation",
61 | "GitHub Actions",
62 | "GitHub Automation"
63 | ],
64 | "developmentStatus": "active"
65 | }
66 |
--------------------------------------------------------------------------------
/sample-workflow.yml:
--------------------------------------------------------------------------------
1 | # GitHub Actions workflow for Baler (BAd Link reportER) version 2.0.4.
2 | # This is available as the file "sample-workflow.yml" from the source
3 | # code repository for Baler: https://github.com/caltechlibrary/baler
4 |
5 | name: Bad Link Reporter
6 |
7 | # Configure this section ─────────────────────────────────────────────
8 |
9 | env:
10 | # Files to check. (Put patterns on separate lines, no leading dash.)
11 | files: |
12 | **/*.md
13 |
14 | # Label assigned to issues created by this workflow:
15 | labels: bug
16 |
17 | # Number of previous issues to check for duplicate reports.
18 | lookback: 10
19 |
20 | # Time (sec) to wait on an unresponsive URL before trying once more.
21 | timeout: 15
22 |
23 | # Optional file containing a list of URLs to ignore, one per line:
24 | ignore: .github/workflows/ignored-urls.txt
25 |
26 | on:
27 | schedule: # Cron syntax is: "min hr day-of-month month day-of-week"
28 | - cron: 00 04 * * 1
29 | push:
30 | paths: ['**.md']
31 | workflow_dispatch:
32 |
33 | # The rest of this file should be left as-is ─────────────────────────
34 |
35 | run-name: Test links in Markdown files
36 | jobs:
37 | Baler:
38 | name: Link checker and reporter
39 | runs-on: ubuntu-latest
40 | permissions:
41 | issues: write
42 | steps:
43 | - uses: caltechlibrary/baler@v2
44 | with:
45 | files: ${{github.event.inputs.files || env.files}}
46 | labels: ${{github.event.inputs.labels || env.labels}}
47 | ignore: ${{github.event.inputs.ignore || env.ignore}}
48 | timeout: ${{github.event.inputs.timeout || env.timeout}}
49 | lookback: ${{github.event.inputs.lookback || env.lookback}}
50 |
--------------------------------------------------------------------------------