├── .editorconfig ├── .gitattributes ├── .github ├── SECURITY.md └── workflows │ ├── bad-link-reporter.yml │ ├── iga.yml │ ├── ignored-urls.txt │ ├── markdown-linter.yml │ ├── update-release-tag.yml │ ├── waystation.yml │ └── yaml-linter.yml ├── .gitignore ├── .graphics ├── baler.jpg ├── caltech-round.png ├── caltech-round.svg ├── github-run-workflow-button-dark.png ├── github-run-workflow-button.png ├── github-run-workflow-dark.png ├── github-run-workflow.png ├── github-workflow-run-button-dark.png ├── github-workflow-run-button.png ├── github-workflow-running-dark.png └── github-workflow-running.png ├── .jsonlintrc.json ├── .markdownlint.json ├── .yamllint.yml ├── CHANGES.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SUPPORT.md ├── action.yml ├── assets └── css │ └── style.scss ├── codemeta.json └── sample-workflow.yml /.editorconfig: -------------------------------------------------------------------------------- 1 | # Summary: EditorConfig file for this project. -*- conf -*- 2 | # 3 | # For more information, see https://EditorConfig.org 4 | # 5 | # Copyright 2024 California Institute of Technology. 6 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 7 | # Website: https://github.com/caltechlibrary/baler 8 | 9 | root = true 10 | 11 | [*] 12 | charset = utf-8 13 | end_of_line = lf 14 | indent_size = 4 15 | indent_style = space 16 | insert_final_newline = true 17 | max_line_length = 90 18 | tab_width = 4 19 | trim_trailing_whitespace = true 20 | 21 | [*.cfg] 22 | indent_size = 2 23 | 24 | [*.json] 25 | indent_size = 2 26 | 27 | [*.{yml, yaml}] 28 | indent_size = 2 29 | 30 | # Shell scripts on Windows. 31 | [*.{cmd, bat}] 32 | end_of_line = crlf 33 | 34 | [Makefile, makefile] 35 | indent_size = 4 36 | indent_style = tab 37 | tab_width = 8 38 | 39 | [.applescript] 40 | indent_size = 4 41 | indent_style = tab 42 | tab_width = 4 43 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Summary: repository-specific file attributes assignments for git. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | 7 | # Set default interpretation of line endings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8 | 9 | * text=auto 10 | 11 | # Interpretation of common text files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | 13 | *.bat text eol=crlf 14 | *.cff text 15 | *.cfg text 16 | *.css text diff=css 17 | *.env text 18 | *.html text diff=html 19 | *.ini text 20 | *.ipynb text eol=lf 21 | *.js text 22 | *.json text 23 | *.md text diff=markdown 24 | *.py text diff=python 25 | *.rst text 26 | *.sh text 27 | *.sql text 28 | *.svg text 29 | *.toml text 30 | *.tex text diff=tex 31 | *.txt text 32 | *.yaml text merge=yaml 33 | *.yml text merge=yaml 34 | 35 | # RC files like .babelrc, .eslintrc, etc. 36 | *.*rc text 37 | 38 | LICENSE text 39 | Makefile text 40 | 41 | *.gitattributes text 42 | *.*ignore text 43 | .gitconfig text 44 | 45 | # Interpretation of common binary files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | *.bz binary 48 | *.DOC binary 49 | *.doc binary 50 | *.DOCX binary 51 | *.docx binary 52 | *.DOT binary 53 | *.dot binary 54 | *.gz binary 55 | *.jpeg binary 56 | *.jpg binary 57 | *.PDF binary 58 | *.pdf binary 59 | *.RTF binary 60 | *.rtf binary 61 | *.tar binary 62 | *.tgz binary 63 | *.tif binary 64 | *.tiff binary 65 | *.xls binary 66 | *.zip binary 67 | 68 | # Special case for CSV files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | # This avoids compatibility issues between Windows and Mac. 70 | 71 | *.csv text eol=crlf 72 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting security issues 2 | 3 | Please report security issues using the [issue tracker](https://github.com/caltechlibrary/baler/issues) for this repository. 4 | -------------------------------------------------------------------------------- /.github/workflows/bad-link-reporter.yml: -------------------------------------------------------------------------------- 1 | # ╭─────────────────── Notice ── Notice ── Notice ───────────────────╮ 2 | # │ This is a custom Baler workflow file. It is different from the │ 3 | # │ sample workflow suggested for users because it is designed to │ 4 | # │ allow testing Baler. DO NOT COPY THIS FILE; instead, use the │ 5 | # │ sample workflow file named "sample-workflow.yml" from the Baler │ 6 | # │ source repository at https://github.com/caltechlibrary/baler/. │ 7 | # ╰─────────────────── Notice ── Notice ── Notice ───────────────────╯ 8 | 9 | name: Bad Link Reporter 10 | 11 | # Configure this section ───────────────────────────────────────────── 12 | 13 | env: 14 | # Files examined by the workflow: 15 | files: '*.md' 16 | 17 | # Label assigned to issues created by this workflow: 18 | labels: bug 19 | 20 | # Number of previous issues to check for duplicate reports. 21 | lookback: 10 22 | 23 | # Time (sec) to wait on an unresponsive URL before trying once more. 24 | timeout: 15 25 | 26 | # Optional file containing a list of URLs to ignore, one per line: 27 | ignore: .github/workflows/ignored-urls.txt 28 | 29 | on: 30 | schedule: # Cron syntax is: "min hr day-of-month month day-of-week" 31 | - cron: 00 04 * * 1 32 | push: 33 | paths: 34 | - .github/workflows/bad-link-reporter.yml 35 | - .github/workflows/ignored-urls.txt 36 | workflow_dispatch: 37 | inputs: 38 | files: 39 | description: Comma-separated paths or regexp's 40 | default: '*.md' 41 | labels: 42 | description: Comma-separated issue labels 43 | default: bug 44 | ignore: 45 | description: File containing URLs to ignore 46 | default: .github/workflows/ignored-urls.txt 47 | lookback: 48 | description: No. of previous issues to check 49 | default: 10 50 | debug: 51 | description: Run in debug mode 52 | type: boolean 53 | 54 | # The rest of this file should be left as-is ───────────────────────── 55 | 56 | run-name: Test links in Markdown files 57 | jobs: 58 | Baler: 59 | name: Link checker and reporter 60 | runs-on: ubuntu-latest 61 | permissions: 62 | issues: write 63 | steps: 64 | - uses: caltechlibrary/baler@main 65 | with: 66 | files: ${{github.event.inputs.files || env.files}} 67 | labels: ${{github.event.inputs.labels || env.labels}} 68 | ignore: ${{github.event.inputs.ignore || env.ignore}} 69 | timeout: ${{github.event.inputs.timeout || env.timeout}} 70 | lookback: ${{github.event.inputs.lookback || env.lookback}} 71 | -------------------------------------------------------------------------------- /.github/workflows/iga.yml: -------------------------------------------------------------------------------- 1 | # GitHub Actions workflow for InvenioRDM GitHub Archiver version 1.2.2 2 | # This is available as the file "sample-workflow.yml" from the open- 3 | # source repository for IGA at https://github.com/caltechlibrary/iga/. 4 | 5 | # ╭────────────────────────────────────────────╮ 6 | # │ Configure this section │ 7 | # ╰────────────────────────────────────────────╯ 8 | 9 | env: 10 | INVENIO_SERVER: https://data.caltech.edu 11 | 12 | # Set to an InvenioRDM record ID to mark release as a new version. 13 | parent_record: kyxhk-bpa60 14 | 15 | # The variables below are other IGA options. Please see the docs. 16 | community: none 17 | draft: false 18 | all_assets: false 19 | all_metadata: false 20 | debug: false 21 | 22 | # ╭────────────────────────────────────────────╮ 23 | # │ The rest of this file should be left as-is │ 24 | # ╰────────────────────────────────────────────╯ 25 | 26 | name: InvenioRDM GitHub Archiver 27 | on: 28 | release: 29 | types: [published] 30 | workflow_dispatch: 31 | inputs: 32 | release_tag: 33 | description: The release tag (empty = latest) 34 | parent_record: 35 | description: ID of parent record (for versioning) 36 | community: 37 | description: Name of InvenioRDM community (if any) 38 | draft: 39 | description: Mark the record as a draft 40 | type: boolean 41 | all_assets: 42 | description: Attach all GitHub assets 43 | type: boolean 44 | all_metadata: 45 | description: Include additional GitHub metadata 46 | type: boolean 47 | debug: 48 | description: Print debug info in the GitHub log 49 | type: boolean 50 | 51 | run-name: Archive ${{inputs.release_tag || 'latest release'}} in InvenioRDM 52 | jobs: 53 | run_iga: 54 | name: Send to ${{needs.get_repository.outputs.server}} 55 | runs-on: ubuntu-latest 56 | needs: get_repository 57 | steps: 58 | - uses: caltechlibrary/iga@main 59 | with: 60 | INVENIO_SERVER: ${{env.INVENIO_SERVER}} 61 | INVENIO_TOKEN: ${{secrets.INVENIO_TOKEN}} 62 | all_assets: ${{github.event.inputs.all_assets || env.all_assets}} 63 | all_metadata: ${{github.event.inputs.all_metadata || env.all_metadata}} 64 | debug: ${{github.event.inputs.debug || env.debug}} 65 | draft: ${{github.event.inputs.draft || env.draft}} 66 | community: ${{github.event.inputs.community || env.community}} 67 | parent_record: ${{github.event.inputs.parent_record || env.parent_record}} 68 | release_tag: ${{github.event.inputs.release_tag || 'latest'}} 69 | get_repository: 70 | name: Get repository name 71 | runs-on: ubuntu-latest 72 | outputs: 73 | server: ${{steps.parse.outputs.host}} 74 | steps: 75 | - name: Extract name from INVENIO_SERVER 76 | id: parse 77 | run: echo "host=$(cut -d'/' -f3 <<< ${{env.INVENIO_SERVER}} | cut -d':' -f1)" >> $GITHUB_OUTPUT 78 | -------------------------------------------------------------------------------- /.github/workflows/ignored-urls.txt: -------------------------------------------------------------------------------- 1 | https://example/ 2 | -------------------------------------------------------------------------------- /.github/workflows/markdown-linter.yml: -------------------------------------------------------------------------------- 1 | # Summary: GitHub Actions workflow to run a Markdown linter on .md files. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | 7 | name: Markdown file linter 8 | run-name: Lint Markdown files after ${{github.event_name}} by ${{github.actor}} 9 | 10 | on: 11 | push: 12 | branches: 13 | - main 14 | paths: 15 | - '*.md' 16 | pull_request: 17 | branches: 18 | - main 19 | paths: 20 | - '*.md' 21 | workflow_dispatch: 22 | paths: 23 | - '*.md' 24 | 25 | jobs: 26 | lint: 27 | name: Run Markdown linter 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: Check out copy of git repository. 31 | uses: actions/checkout@v4 32 | 33 | - name: Run Markdownlint on .md files. 34 | uses: DavidAnson/markdownlint-cli2-action@v15 35 | with: 36 | config: .markdownlint.json 37 | globs: | 38 | *.md 39 | -------------------------------------------------------------------------------- /.github/workflows/update-release-tag.yml: -------------------------------------------------------------------------------- 1 | # Summary: move the tag for the major version number to the latest release. 2 | # This workflow originally came from the 2024-02-09 example shown at 3 | # https://github.com/marketplace/actions/release-tag-tracker 4 | 5 | name: Release tag tracker 6 | 7 | # Only run when new semver tag is pushed 8 | on: 9 | workflow_dispatch: 10 | 11 | jobs: 12 | update-release-tags: 13 | name: Update tag for latest release 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: write 17 | steps: 18 | - name: Check out source repository. 19 | uses: actions/checkout@v4 20 | with: 21 | # Get complete history 22 | fetch-depth: 0 23 | 24 | - name: Update major version and latest tags. 25 | uses: bewuethr/release-tracker-action@v1 26 | env: 27 | # GitHub token to enable pushing tags 28 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 29 | with: 30 | # Move "latest" tag 31 | update-latest: false 32 | # Don't update the vX.Y tags 33 | update-minor: false 34 | # Expect vX.Y.Z format (default) 35 | prepend-v: true 36 | -------------------------------------------------------------------------------- /.github/workflows/waystation.yml: -------------------------------------------------------------------------------- 1 | # GitHub Actions workflow for Waystation version 1.8.0. 2 | # Available as the file "sample-workflow.yml" from the software 3 | # repository at https://github.com/caltechlibrary/waystation 4 | 5 | name: Archive GitHub Pages 6 | run-name: Archive GitHub Pages in the Wayback Machine 7 | 8 | on: 9 | release: 10 | types: [published] 11 | workflow_dispatch: 12 | inputs: 13 | dry_run: 14 | description: Run without actually sending URLs 15 | type: boolean 16 | 17 | jobs: 18 | run-waystation: 19 | name: Run Waystation 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: caltechlibrary/waystation@v1.8 23 | with: 24 | dry_run: ${{github.event.inputs.dry_run || false}} 25 | -------------------------------------------------------------------------------- /.github/workflows/yaml-linter.yml: -------------------------------------------------------------------------------- 1 | # Summary: GitHub Actions workflow to run a YAML linter on .yml files. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | 7 | name: YAML file linter 8 | 9 | on: 10 | pull_request: 11 | types: [opened, synchronize] 12 | paths: 13 | - '**.yml' 14 | - '**.yaml' 15 | push: 16 | branches: 17 | - main 18 | paths: 19 | - '**.yml' 20 | - '**.yaml' 21 | 22 | run-name: Run linter on YAML files 23 | jobs: 24 | Yamllint: 25 | name: GitHub YAMLlint 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Check out copy of git repository 29 | uses: actions/checkout@v4 30 | 31 | - name: Run YAMLlint 32 | uses: ibiqlik/action-yamllint@v3.1.1 33 | with: 34 | config_file: .yamllint.yml 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Summary: rules for files and subdirectories to be ignored by git. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | # 7 | # ╭─────────────────────── Notice ── Notice ── Notice ────────────────────────╮ 8 | # │ The recommended approach is to add ONLY project-specific rules to the │ 9 | # │ .gitignore of a repo. Users can put rules that apply to their individual │ 10 | # │ ways of doing things into global git ignore files that they set up using │ 11 | # │ (e.g.) "git config --global core.excludesfile ~/.gitignore_global". For │ 12 | # │ example, a number of files such as Emacs checkpoint and backup files are │ 13 | # │ things that are not specific to a given project; rather, Emacs creates │ 14 | # │ them everywhere, in all projects, because they're a byproduct of how it │ 15 | # │ works. Thus, rules to ignore them belong in users' own global .gitignore │ 16 | # │ files, not in a project's .gitignore. │ 17 | # │ │ 18 | # │ A useful starting point for global .gitignore file contents can be found │ 19 | # │ at https://github.com/github/gitignore/tree/main/Global (as of 2022-07-14)│ 20 | # ╰───────────────────────────────────────────────────────────────────────────╯ 21 | 22 | *.bak 23 | .tern-port 24 | -------------------------------------------------------------------------------- /.graphics/baler.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/baler.jpg -------------------------------------------------------------------------------- /.graphics/caltech-round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/caltech-round.png -------------------------------------------------------------------------------- /.graphics/caltech-round.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Caltech Icon 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.graphics/github-run-workflow-button-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-run-workflow-button-dark.png -------------------------------------------------------------------------------- /.graphics/github-run-workflow-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-run-workflow-button.png -------------------------------------------------------------------------------- /.graphics/github-run-workflow-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-run-workflow-dark.png -------------------------------------------------------------------------------- /.graphics/github-run-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-run-workflow.png -------------------------------------------------------------------------------- /.graphics/github-workflow-run-button-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-workflow-run-button-dark.png -------------------------------------------------------------------------------- /.graphics/github-workflow-run-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-workflow-run-button.png -------------------------------------------------------------------------------- /.graphics/github-workflow-running-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-workflow-running-dark.png -------------------------------------------------------------------------------- /.graphics/github-workflow-running.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caltechlibrary/baler/d767415f95e0a68b84b4615f37379c70c36604da/.graphics/github-workflow-running.png -------------------------------------------------------------------------------- /.jsonlintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "comments": false, 3 | "trailing-commas": false, 4 | "duplicate-keys": false, 5 | "log-files": false, 6 | "compact": true, 7 | "continue": true, 8 | "patterns": ["**/*.json"] 9 | } 10 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "blank_lines": { 3 | "maximum": 2 4 | }, 5 | "html": { 6 | "allowed_elements": [ 7 | "a", 8 | "b", 9 | "br", 10 | "code", 11 | "details", 12 | "div", 13 | "em", 14 | "i", 15 | "img", 16 | "ins", 17 | "kbd", 18 | "p", 19 | "picture", 20 | "source", 21 | "span", 22 | "sup", 23 | "summary" 24 | ] 25 | }, 26 | "line-length": { 27 | "line_length": 10000 28 | }, 29 | "no-alt-text": true, 30 | "no-duplicate-heading": { 31 | "allow_different_nesting": true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.yamllint.yml: -------------------------------------------------------------------------------- 1 | # Summary: configuration file for .github/workflows/yaml-linter.yml. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | 7 | rules: 8 | colons: 9 | max-spaces-after: -1 10 | quoted-strings: 11 | required: only-when-needed 12 | document-start: 13 | present: false 14 | document-end: 15 | present: false 16 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Change log for Baler 2 | 3 | ## Version 2.0.4 (2024-02-09) 4 | 5 | * Changes in this version: 6 | 7 | * Fixed problem in which the sample workflow embedded in the GitHub Pages site () was damaged because the contents were interpreted by Jekyll as variable references. 8 | * Fixed a bug in the Makefile preventing the post-release workflow from running. 9 | * Added a call to `jsonlint` in the `make lint` action, and a `jsonlint` config file. 10 | * Switched to a different CLI for `markdownlint` that has more options. 11 | 12 | 13 | ## Version 2.0.3 (2024-02-06) 14 | 15 | Changes in this version: 16 | 17 | * Fix a minor bug in the Makefile that prevented `post-release` from being invoked automatically. 18 | * Replace relative links to images in `README.md` with absolute URLs to the files in raw.githubusercontent.com, to solve broken images in the GitHub Pages version. 19 | 20 | 21 | ## Version 2.0.2 (2024-01-31) 22 | 23 | Changes in this version: 24 | 25 | * The documentation did not adequately explain how to specify more than one path/pattern for the `files` parameter. Now fixed (hopefully), with new examples in `README.md`. 26 | 27 | 28 | ## Version 2.0.1 (2024-01-30) 29 | 30 | Changes in this version: 31 | 32 | * Use v5 of [peter-evans/create-issue-from-file](https://github.com/peter-evans/create-issue-from-file) to solve warning about Node version deprecation. 33 | * Update more repository workflows to their latest versions. 34 | * Added a `.editorconfig` file for good measure. 35 | * Add more repo metadata fields to `CITATION.cff` and `codemeta.json`. 36 | * In `README.md`, use a better URL that always points to the latest version of the archived copy in our InvenioRDM server. 37 | 38 | 39 | ## Version 2.0.0 40 | 41 | Changes in this version: 42 | 43 | * Use just-released v20 of [tj-actions/glob](https://github.com/tj-actions/glob) to solve warning about Node version deprecation in v19 of the action. 44 | 45 | 46 | ## Version 1.0.0 47 | 48 | First full release. 49 | 50 | 51 | ## Version 0.0.2 52 | 53 | This version features overhauled logic, updated sample workflow, and updated documentation. 54 | 55 | 56 | ## Version 0.0.1 57 | 58 | First complete version for testing. 59 | 60 | 61 | ## Version 0.0.0 62 | 63 | Created this repository. 64 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: If you use this software, please cite it as below. 3 | authors: 4 | - family-names: Hucka 5 | given-names: Michael 6 | email: mhucka@caltech.edu 7 | orcid: https://orcid.org/0000-0001-9105-5960 8 | title: Baler – BAd Link reportER 9 | abstract: Baler is a GitHub Action that tests the URLs inside Markdown files in your GitHub repository and opens an issue if it finds any problems. 10 | version: 2.0.4 11 | date-released: 2024-02-09 12 | url: https://caltechlibrary.github.io/baler 13 | repository-code: https://github.com/caltechlibrary/baler 14 | license-url: https://github.com/caltechlibrary/baler/blob/main/LICENSE 15 | doi: 10.22002/j6vdk-0y403 16 | type: software 17 | keywords: 18 | - automation 19 | - software 20 | - GitHub Actions 21 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project contributors are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project contributors have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project contributors. 34 | 35 | ## Enforcement 36 | 37 | If a contributor engages in harassing behaviour, the project organizers may take any action they deem appropriate, including warning the offender or expelling them from online forums, online project resources, face-to-face meetings, or any other project-related activity or resource. 38 | 39 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the project team immediately. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 40 | 41 | ## Attribution 42 | 43 | Portions of this Code of Conduct were adapted from Electron's [Contributor Covenant Code of Conduct](https://github.com/electron/electron/blob/master/CODE_OF_CONDUCT.md), which itself was adapted from the [Contributor Covenant](http://contributor-covenant.org/version/1/4), version 1.4. 44 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Guidelines for contributing to this project 2 | 3 | Any constructive contributions – bug reports, pull requests (code or documentation), suggestions for improvements, and more – are welcome. 4 | 5 | ## Conduct 6 | 7 | Everyone is asked to read and respect the [code of conduct](CODE_OF_CONDUCT.md) before participating in this project. 8 | 9 | ## Coordinating work 10 | 11 | A quick way to find out what is currently in the near-term plans for this project is to look at the [GitHub issue tracker](https://github.com/caltechlibrary/baler/issues), but the possibilities are not limited to what you see there – if you have ideas for new features and enhancements, please feel free to write them up as a new issue or contact the developers directly! 12 | 13 | ## Submitting contributions 14 | 15 | Please feel free to contact the author directly, or even better, jump right in and use the standard GitHub approach of forking the repo and creating a pull request. When committing code changes and submitting pull requests, please write a clear log message for your commits. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023-2024, Caltech 2 | All rights not granted herein are expressly reserved by Caltech. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for developing and releasing Baler. 2 | # Run "make" or "make help" to get a list of commands in this makefile. 3 | # 4 | # ╭──────────────────────── Notice ── Notice ── Notice ───────────────────────╮ 5 | # │ The codemeta.json file is considered the master source for version and │ 6 | # │ other info. Information is pulled out of codemeta.json to update other │ 7 | # │ files like setup.cfg, the README, and others. Maintainers should update │ 8 | # │ codemeta.json and not edit other files to update version numbers & URLs. │ 9 | # │ │ 10 | # │ The parts involving the DOI in this makefile make 3 assumptions: │ 11 | # │ * The DOI identifies the released version of this software by │ 12 | # │ referencing a copy in a research data repository (RDM) system │ 13 | # │ * The RDM server used is based on InvenioRDM (roughly same as Zenodo) │ 14 | # │ * The codemeta.json file contains a "relatedLink" field whose value │ 15 | # │ contains the URL of a copy of this software stored in the RDM server. │ 16 | # │ With these assumptions, we can automatically get the latest DOI for a │ 17 | # │ release in RDM (because given any release, RDM can be queried for the │ 18 | # │ latest one) and we don't have to hardwire URLs or id's in this makefile. │ 19 | # ╰───────────────────────────────────────────────────────────────────────────╯ 20 | # 21 | # Copyright 2024 California Institute of Technology. 22 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 23 | # Website: https://github.com/caltechlibrary/baler 24 | 25 | SHELL=/bin/bash 26 | .ONESHELL: # Run all commands in the same shell. 27 | .SHELLFLAGS += -e # Exit at the first error. 28 | 29 | # This Makefile uses syntax that needs at least GNU Make version 3.82. 30 | # The following test is based on the approach posted by Eldar Abusalimov to 31 | # Stack Overflow in 2012 at https://stackoverflow.com/a/12231321/743730 32 | 33 | ifeq ($(filter undefine,$(value .FEATURES)),) 34 | $(error Unsupported version of Make. \ 35 | This Makefile does not work properly with GNU Make $(MAKE_VERSION); \ 36 | it needs GNU Make version 3.82 or later) 37 | endif 38 | 39 | # Before we go any further, test if certain programs are available. 40 | # The following is based on the approach posted by Jonathan Ben-Avraham to 41 | # Stack Overflow in 2014 at https://stackoverflow.com/a/25668869 42 | 43 | programs_needed = curl git gh jq jsonlint yamllint markdownlint 44 | TEST := $(foreach p,$(programs_needed),\ 45 | $(if $(shell which $(p)),_,$(error Cannot find program "$(p)"))) 46 | 47 | # Set some basic variables. These are quick to set; we set additional ones 48 | # using the dependency named "vars" but only when the others are needed. 49 | 50 | name := $(strip $(shell jq -r .name codemeta.json)) 51 | progname := $(strip $(shell jq -r '.identifier | ascii_downcase' codemeta.json)) 52 | version := $(strip $(shell jq -r .version codemeta.json)) 53 | repo := $(shell git ls-remote --get-url | sed -e 's/.*:\(.*\).git/\1/') 54 | repo_url := https://github.com/$(repo) 55 | branch := $(shell git rev-parse --abbrev-ref HEAD) 56 | today := $(shell date "+%F") 57 | 58 | 59 | # Print help if no command is given ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 60 | 61 | # The help scheme works by looking for lines beginning with "#:" above make 62 | # targets in this file. Originally based on code posted to Stack Overflow on 63 | # 2019-11-28 by Richard Kiefer at https://stackoverflow.com/a/59087509/743730 64 | 65 | #: Print a summary of available commands. 66 | help: 67 | @echo "This is the Makefile for $(bright)$(name)$(reset)." 68 | @echo "Available commands:" 69 | @echo 70 | @grep -B1 -E "^[a-zA-Z0-9_-]+\:([^\=]|$$)" $(MAKEFILE_LIST) \ 71 | | grep -v -- -- \ 72 | | sed 'N;s/\n/###/' \ 73 | | sed -n 's/^#: \(.*\)###\(.*\):.*/$(color)\2$(reset):###\1/p' \ 74 | | column -t -s '###' 75 | 76 | #: Summarize how to do a release using this makefile. 77 | instructions:; 78 | $(info $(instructions_text)) 79 | 80 | define instructions_text = 81 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 82 | ┃ Steps for doing a release ┃ 83 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 84 | 1. Run $(color)make lint$(reset), fix any problems, and commit any changes. 85 | 2. Run $(color)make tests$(reset) fix any problems, and commit any changes. 86 | 3. Update the version number in codemeta.json. 87 | 4. Update CHANGES.md if needed & commit changes. 88 | 5. Check the output of $(color)make report$(reset) (ignoring current id & DOI). 89 | 6. Run $(color)make clean$(reset). 90 | 7. Run $(color)make release$(reset); after some steps, it will open a file 91 | in your editor to write GitHub release notes. Copy the notes 92 | from CHANGES.md. Save the opened file to finish the process. 93 | 8. Wait for the IGA GitHub Action to finish uploading to InvenioRDM 94 | 9. Check that everything looks okay with the GitHub release at 95 | $(link)$(repo_url)/releases$(reset) 96 | endef 97 | 98 | 99 | # Gather additional values we sometimes need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 100 | 101 | # These variables take longer to compute, and for some actions like "make help" 102 | # they are unnecessary and annoying to wait for. 103 | vars: doi-vars 104 | $(eval url := $(strip $(shell jq -r '.url // empty' codemeta.json))) 105 | $(eval url := $(or $(url),$(repo_url))) 106 | $(eval license := $(strip $(shell jq -r .license codemeta.json))) 107 | $(eval desc := $(strip $(shell jq -r .description codemeta.json))) 108 | $(eval author := \ 109 | $(strip $(shell jq -r '.author[0].givenName + " " + .author[0].familyName' codemeta.json))) 110 | $(eval email := $(strip $(shell jq -r .author[0].email codemeta.json))) 111 | 112 | # If this software isn't getting archived in InvenioRDM, the next rule will 113 | # leave rdm_id & new_doi undefined. Other rules in this makefile test for that. 114 | .SILENT: doi-vars 115 | doi-vars: 116 | $(eval rdm_link := \ 117 | $(strip $(shell jq -r '.relatedLink | if type == "array" then .[0] else . end' codemeta.json))) 118 | ifneq ($(rdm_link),null) 119 | $(eval rdm_url := $(shell cut -d'/' -f 1-3 <<< $(rdm_link))) 120 | $(eval rdm_id := $(shell sed -r 's|.*/(.*)$$|\1|' <<< $(rdm_link))) 121 | $(eval vers_url := $(rdm_url)/api/records/$(rdm_id)/versions/latest) 122 | $(eval latest_doi := $(shell curl -L -s $(vers_url) | jq -r .pids.doi.identifier)) 123 | endif 124 | 125 | #: Print variables set in this Makefile from various sources. 126 | .SILENT: report 127 | report: vars 128 | echo "$(color)name$(reset) = $(name)" | expand -t 21 129 | echo "$(color)progname$(reset) = $(progname)" | expand -t 21 130 | echo "$(color)url$(reset) = $(url)" | expand -t 21 131 | echo "$(color)desc$(reset) = $(desc)" | expand -t 21 132 | echo "$(color)version$(reset) = $(version)" | expand -t 21 133 | echo "$(color)author$(reset) = $(author)" | expand -t 21 134 | echo "$(color)email$(reset) = $(email)" | expand -t 21 135 | echo "$(color)license$(reset) = $(license)" | expand -t 21 136 | echo "$(color)url$(reset) = $(url)" | expand -t 21 137 | echo "$(color)repo url$(reset) = $(repo_url)" | expand -t 21 138 | echo "$(color)branch$(reset) = $(branch)" | expand -t 21 139 | echo "$(color)rdm_id$(reset) = $(rdm_id)" | expand -t 21 140 | echo "$(color)latest_doi$(reset) = $(latest_doi)" | expand -t 21 141 | 142 | 143 | # make lint & make test ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 144 | 145 | #: Run code and other files through linters. 146 | lint: 147 | markdownlint $(shell find . -name '*.md') 148 | yamllint CITATION.cff $(shell find . -name '*.yml') 149 | jsonlint -q codemeta.json 150 | 151 | #: Run unit tests and coverage tests. 152 | test tests:; 153 | $(error "There are no tests in this repo yet. They need to be added.") 154 | 155 | 156 | # make release ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 157 | 158 | #: Make a release on GitHub. 159 | release: | test-branch confirm-release release-on-github wait-on-iga update-doi 160 | 161 | test-branch: 162 | ifneq ($(branch),main) 163 | $(error Current git branch != main. Merge changes into main first!) 164 | endif 165 | 166 | confirm-release: 167 | @read -p "Have you updated the version number? [y/N] " ans && : $${ans:=N} ;\ 168 | if [ $${ans::1} != y ]; then \ 169 | echo ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 170 | echo ┃ Update the version number in codemeta.json first. ┃ 171 | echo ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 172 | exit 1 173 | fi 174 | 175 | update-all: update-meta update-citation update-example 176 | 177 | # Note that this doesn't replace "version" in codemeta.json, because that's the 178 | # variable from which this makefile gets its version number in the first place. 179 | update-meta: 180 | @sed -i .bak -e '/"datePublished"/ s|: ".*"|: "$(today)"|' codemeta.json 181 | @echo codemeta.json updated ✨ 182 | 183 | update-citation: vars 184 | @sed -i .bak -e '/^url:/ s|:.*|: $(url)|' CITATION.cff 185 | @sed -i .bak -e '/^title:/ s|:.*|: $(name)|' CITATION.cff 186 | @sed -i .bak -e '/^version:/ s|:.*|: $(version)|' CITATION.cff 187 | @sed -i .bak -e '/^abstract:/ s|:.*|: $(desc)|' CITATION.cff 188 | @sed -i .bak -e '/^license-url:/ s|:.*|: $(license)|' CITATION.cff 189 | @sed -i .bak -e '/^date-released:/ s|:.*|: $(today)|' CITATION.cff 190 | @sed -i .bak -e '/^repository-code:/ s|:.*|: $(repo_url)|' CITATION.cff 191 | @echo CITATION.cff updated ✨ 192 | 193 | update-example: 194 | @sed -i .bak -E -e "/.* version [0-9].[0-9]+.[0-9]+/ s/[0-9].[0-9]+.[0-9]+/$(version)/" sample-workflow.yml 195 | @sed -i .bak -E -e "/.* version [0-9].[0-9]+.[0-9]+/ s/[0-9].[0-9]+.[0-9]+/$(version)/" README.md 196 | @echo sample-workflow.yml updated ✨ 197 | 198 | edited := codemeta.json CITATION.cff sample-workflow.yml README.md 199 | 200 | commit-updates: 201 | git add $(edited) 202 | git diff-index --quiet HEAD $(edited) || \ 203 | git commit -m"chore: update stored version number" $(edited) 204 | 205 | release-on-github: | update-all commit-updates 206 | $(eval tmp_file := $(shell mktemp /tmp/release-notes-$(progname).XXXX)) 207 | $(eval tag := "v$(shell tr -d '()' <<< "$(version)" | tr ' ' '-')") 208 | git push -v --all 209 | git push -v --tags 210 | @$(info ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓) 211 | @$(info ┃ Write release notes in the file that gets opened in your ┃) 212 | @$(info ┃ editor. Close the editor to complete the release process. ┃) 213 | @$(info ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛) 214 | sleep 2 215 | $(EDITOR) $(tmp_file) 216 | gh release create $(tag) -t "Release $(version)" -F $(tmp_file) 217 | gh release edit $(tag) --latest 218 | 219 | wait-on-iga: 220 | @$(info ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓) 221 | @$(info ┃ Wait for the archiving workflow to finish on GitHub ┃) 222 | @$(info ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛) 223 | sleep 2 224 | $(eval pid := $(shell gh run list --workflow=iga.yml --limit 1 | tail -1 | awk -F $$'\t' '{print $$7}')) 225 | gh run watch $(pid) 226 | $(MAKE) post-release 227 | 228 | print-next-steps: vars 229 | @$(info ┏━━━━━━━━━━━━┓) 230 | @$(info ┃ Next steps ┃) 231 | @$(info ┗━━━━━━━━━━━━┛) 232 | @$(info Next steps: ) 233 | @$(info 1. Check $(repo_url)/releases ) 234 | @$(info 2. Run "make post-release" ) 235 | @$(info 3. Update the GitHub Marketplace version ) 236 | 237 | # We only do the following steps if this is software we archive in InvenioRDM. 238 | # 239 | # The DOI badge in README.md uses a URL that gets redirected automatically by 240 | # InvenioRDM to the latest release. However, the DOI in CITATION.cff and the 241 | # field relatedLink in codemeta.json need to point to the release we just made. 242 | 243 | post-release: update-citation-doi update-codemeta-link push-updates 244 | 245 | update-citation-doi: vars 246 | @if [ -n "$(latest_doi)" ]; then 247 | sed -i .bak -e '/doi:/ s|doi: .*|doi: $(latest_doi)|' CITATION.cff 248 | git add CITATION.cff 249 | git diff-index --quiet HEAD CITATION.cff || \ 250 | git commit -m"chore: update DOI in CITATION.cff" CITATION.cff 251 | fi 252 | 253 | update-codemeta-link: vars 254 | @if [ -n "$(latest_doi)" ]; then 255 | $(eval new_id := $(shell cut -f'2' -d'/' <<< $(latest_doi))) 256 | $(eval new_link := $(rdm_url)/records/$(new_id)) 257 | @sed -i .bak -e '/"relatedLink"/ s|: ".*"|: "$(new_link)"|' codemeta.json 258 | git add codemeta.json 259 | git diff-index --quiet HEAD codemeta.json || \ 260 | git commit -m"chore: update relatedLink in codemeta.json" codemeta.json 261 | fi 262 | 263 | push-updates: 264 | ifdef latest_doi 265 | git push -v --all 266 | endif 267 | 268 | 269 | # Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 270 | 271 | #: Clean this directory of temporary and backup files. 272 | clean: clean-release 273 | @echo 🧼 Cleaned! 🧽 274 | 275 | clean-release:; 276 | rm -rf codemeta.json.bak README.md.bak sample-workflow.yml.bak 277 | 278 | 279 | # Miscellaneous directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 280 | 281 | #: Print a random joke from https://icanhazdadjoke.com/. 282 | joke: 283 | @echo "$(shell curl -s https://icanhazdadjoke.com/)" 284 | 285 | # Color codes used in messages. 286 | color := $(shell tput bold; tput setaf 6) 287 | bright := $(shell tput bold; tput setaf 15) 288 | dim := $(shell tput setaf 66) 289 | link := $(shell tput setaf 111) 290 | reset := $(shell tput sgr0) 291 | 292 | .PHONY: help vars report release test-branch test tests update-all \ 293 | update-init update-meta update-citation update-example commit-updates \ 294 | update-setup release-on-github print-instructions update-doi \ 295 | packages test-pypi pypi clean really-clean completely-clean \ 296 | clean-dist really-clean-dist clean-build really-clean-build \ 297 | clean-release clean-other 298 | 299 | .SILENT: clean clean-dist clean-build clean-release clean-other really-clean \ 300 | really-clean-dist really-clean-build completely-clean vars 301 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BalerA baler making bales of hay on a farm 2 | 3 | Baler (bad link reporter) is a [GitHub Action](https://docs.github.com/actions) that tests the URLs inside Markdown files of your GitHub repository. If any of them are invalid, Baler automatically opens a GitHub issue to report the problem(s). 4 | 5 | [![License](https://img.shields.io/badge/License-BSD--like-lightgrey?style=flat-square)](https://github.com/caltechlibrary/baler/blob/main/LICENSE) 6 | ![GitHub](https://img.shields.io/badge/GitHub-%23000000.svg?logo=github&label=Actions&logoColor=white&style=flat-square) 7 | [![Latest release](https://img.shields.io/github/v/release/caltechlibrary/baler.svg?color=b44e88&label=Release&style=flat-square)](https://github.com/caltechlibrary/baler/releases) 8 | [![DOI](https://img.shields.io/badge/dynamic/json.svg?label=DOI&style=flat-square&colorA=gray&colorB=navy&query=$.pids.doi.identifier&uri=https://data.caltech.edu/api/records/0qetp-p3g60/versions/latest)](https://data.caltech.edu/records/0qetp-p3g60/latest) 9 | [![GitHub marketplace](https://img.shields.io/badge/marketplace-Baler-green?logo=github&color=e4722f&style=flat-square&label=Marketplace)](https://github.com/marketplace/actions/baler-bad-link-reporter) 10 | 11 | 12 | ## Table of contents 13 | 14 | * [Introduction](#introduction) 15 | * [Installation](#installation) 16 | * [Quick start](#quick-start) 17 | * [Usage](#usage) 18 | * [Known issues and limitations](#known-issues-and-limitations) 19 | * [Getting help](#getting-help) 20 | * [Contributing](#contributing) 21 | * [License](#license) 22 | * [Acknowledgments](#acknowledgments) 23 | 24 | 25 | ## Introduction 26 | 27 | The URLs of hyperlinks inside Markdown files may be invalid for any number of reasons, including inaccuracies, typographical errors, and destinations that disappear over time. Manually testing the validity of links on a regular basis is laborious and error-prone. This is clearly a situation where automation helps, and that's where Baler comes in. 28 | 29 | Baler (Bad link reporter) is a [GitHub Action](https://docs.github.com/actions) for automatically testing the links inside Markdown files in your repository, and filing issue reports when problems are found. It's designed to run when changes are pushed to a repository as well as on a regular schedule; the latter helps detect when previously-valid links stop working because of [link rot](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0115253) or other problems. Though it’s not the only GitHub Action available for this purpose, some features set Baler apart from the others: 30 | 31 | * _Simplicity_: a single, short workflow handles both testing files and opening an issue. 32 | * _Smart issue handling_: before it opens a new issue, Baler looks at open issues in the repository. If any previously reported the same URLs, Baler doesn't open a new issue. 33 | * _Informative issue reports_: the issues opened by Baler not only list the URLs that failed; they also describe the reasons for the failures. 34 | * _Simple exclusion list_: fake URLs meant as examples, or real URLs that nevertheless fail when tested from GitHub’s cloud runners, can be skipped by adding them to a file in your repository. 35 | 36 | Baler lets you follow the proverb [“make hay while the sun shines”](https://grammarist.com/make-hay/) – take advantage of opportunities (in this case, easy automated testing) when they’re available. 37 | 38 | 39 | ## Installation 40 | 41 | To use Baler, you need to create a GitHub Actions workflow file in your repository. Follow these simple steps: 42 | 43 | 1. In the main branch of your repository, create a `.github/workflows` directory if one does not already exist. 44 | 2. In the `.github/workflows` directory, create a file named `bad-link-reporter.yml`. 45 | 3. Copy and paste the [contents of `sample-workflow.yml`](https://raw.githubusercontent.com/caltechlibrary/baler/main/sample-workflow.yml) into your `bad-link-reporter.yml` file: 46 | 47 | ```yml 48 | # GitHub Actions workflow for Baler (BAd Link reportER) version 2.0.4. 49 | # This is available as the file "sample-workflow.yml" from the source 50 | # code repository for Baler: https://github.com/caltechlibrary/baler 51 | 52 | name: Bad Link Reporter 53 | 54 | # Configure this section ───────────────────────────────────────────── 55 | 56 | env: 57 | # Files to check. (Put patterns on separate lines, no leading dash.) 58 | files: | 59 | **/*.md 60 | 61 | # Label assigned to issues created by this workflow: 62 | labels: bug 63 | 64 | # Number of previous issues to check for duplicate reports. 65 | lookback: 10 66 | 67 | # Time (sec) to wait on an unresponsive URL before trying once more. 68 | timeout: 15 69 | 70 | # Optional file containing a list of URLs to ignore, one per line: 71 | ignore: .github/workflows/ignored-urls.txt 72 | 73 | on: 74 | schedule: # Cron syntax is: "min hr day-of-month month day-of-week" 75 | - cron: 00 04 * * 1 76 | push: 77 | paths: ['**.md'] 78 | workflow_dispatch: 79 | 80 | # The rest of this file should be left as-is ───────────────────────── 81 | 82 | run-name: Test links in Markdown files 83 | jobs: 84 | Baler: 85 | name: Link checker and reporter 86 | runs-on: ubuntu-latest 87 | permissions: 88 | issues: write 89 | steps: 90 | - uses: caltechlibrary/baler@v2 91 | with: 92 | files: ${{github.event.inputs.files || env.files}} 93 | labels: ${{github.event.inputs.labels || env.labels}} 94 | ignore: ${{github.event.inputs.ignore || env.ignore}} 95 | timeout: ${{github.event.inputs.timeout || env.timeout}} 96 | lookback: ${{github.event.inputs.lookback || env.lookback}} 97 | ``` 98 | 99 | 4. Save the file, add it to your git repository, and commit the changes. 100 | 5. (If you did the steps above outside of GitHub) Push your repository changes to GitHub. 101 | 102 | 103 | ## Quick start 104 | 105 | Once the workflow is installed in your repository on GitHub, Baler will run whenever a configured trigger event occurs. The trigger conditions are specified in the `on` statement of the `bad-link-reporter.yml` workflow file. The default workflow sets the conditions to be pull requests, a scheduled run once a week, and manual execution. 106 | 107 | Right after installing the workflow in your GitHub repository, it's wise to do a manual test run in order to check that things are working as expected. 108 | 109 | 1. Go to the _Actions_ tab in your repository and click on the workflow named "Bad Link Reporter" in the sidebar on the left: 110 |

Screenshot of GitHub actions workflow list 111 |

112 | 2. In the page shown by GitHub next, click the Run workflow button in the right-hand side of the blue strip: 113 |

Screenshot of GitHub Actions workflow run button 114 |

115 | 3. In the pull-down, click the green Run workflow button near the bottom: 116 |

Screenshot of GitHub Actions workflow run menu

117 | 4. Refresh the web page and a new line will be shown named after your workflow file: 118 |

Screenshot of GitHub Actions running

119 | 5. Click the title of that running workflow to make GitHub show the progress and results. 120 | 121 | At the conclusion of the run, if any invalid or unreachable URLs were found in your repository's Markdown files, Baler will have opened a new issue to report the problems. If Baler found no problems, it will only print a message to that effect in the job results page. 122 | 123 | 124 | ## Usage 125 | 126 | Baler’s behavior is controlled by the `bad-link-reporter.yml` workflow file. There are two aspects of the behavior: (a) events that cause Baler to run, and (b) characteristics that can be controlled by setting parameter values in the workflow file. 127 | 128 | 129 | ### Triggers that cause workflow execution 130 | 131 | The default triggers in the sample workflow are: 132 | 133 | * push requests that involve `.md` files 134 | * weekly scheduled runs 135 | * manual dispatch execution of the workflow 136 | 137 | Triggering the workflow on pushes is typically the expected behavior: when you save changes to a file, you would probably like to be notified if it contains a broken link. Triggering on pushes also supports users who edit files on GitHub and use pull requests to add their changes, because it ensures the workflow is only executed once and not twice (which would happen if it also used `pull_request` as a trigger). When triggered this way, Baler only tests links in `.md` files that actually changed in the push compared to the versions of those files in the destination branch. 138 | 139 | Triggering on pushes does have a downside: if you make several edits in a row, the workflow will run on each push (or each file save, if editing on GitHub). If there is a bad link in the file, it could lead to multiple identical issues being filed – except that it won't, because Baler is smart enough to check if a past issue already reported the same URLs. So although each push will trigger a workflow run, no new issues will be opened if nothing has changed in terms of bad links. 140 | 141 | A once-a-week cron/scheduled execution is an important way to find links that worked in the past but stopped working due to link rot or other problems. If the Markdown files in your repository are not edited for an extended period of time, no pushes will occur to cause Baler to run; thus, it makes sense to run it periodically irrespective of editing activity, to make sure that links in the Markdown files are still valid. When invoked by cron, the workflow tests all `.md` files matched by the pattern defined by `files`, regardless of whether the files were modified in the most recent commit. 142 | 143 | Finally, the manual dispatch lets you start the workflow manually. When invoked this way, the workflow again tests all `.md` files matched by the pattern defined by `files`, regardless of whether the files were modified in the latest commit. Rationale: if you're invoking the action manually, you probably intend to test all the files as they exist in the repository now, and not just the files changed in the last commit. 144 | 145 | For more information about schedule-based execution, please see the GitHub document ["Workflow syntax for GitHub Actions"](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#onschedule). For more information about other triggers you can use, please see the GitHub document ["Triggering a workflow"](https://docs.github.com/en/actions/using-workflows/triggering-a-workflow). 146 | 147 | 148 | ### Parameters that affect Baler's behavior 149 | 150 | A few parameters control the behavior of Baler, as described below. 151 | 152 | #### `files` 153 | 154 | The input parameter `files` sets the file name pattern that identifies the Markdown files Baler examines. A `*` in a pattern matches zero or more characters but does not match `/`; a `**` in a pattern matches zero or more characters including `/`. For example, the following matches any `.md` file anywhere at the top level and in any subdirectory: 155 | 156 | ```yml 157 | files: | 158 | **/*.md 159 | ``` 160 | 161 | The following matches only top-level `.md` files and those at or below the top-level directory `docs`, but not other subdirectories: 162 | 163 | ```yml 164 | files: | 165 | *.md 166 | docs/**/*.md 167 | ``` 168 | 169 | 170 | #### `labels` 171 | 172 | When Baler opens a new issue after it finds problems, it can optionally assign a label to the issue. The value of this input parameter should be the name of one or more labels that is already defined in the GitHub repository's issue system. Multiple issue labels can be written, with commas between them. The default label is `bug`. 173 | 174 | #### `lookback` 175 | 176 | If Baler finds invalid URLs, then before it opens an issue, it checks the previous open issues in the repository and compares the reports. If a previous issue looks like a duplicate of what the new issue _would_ be, Baler does not create a new issue. The number of previous issues to check is determined by the input parameter `lookback`. The maximum number is 100. 177 | 178 | #### `timeout` 179 | 180 | The time (in seconds) that Baler should wait on a URL that doesn't respond before giving up and trying to reach the URL one more time. Baler will wait 15 seconds between attempts. It will try an unresponsive URL a total of two times before reporting a timeout. 181 | 182 | #### `ignore` 183 | 184 | The value of the input parameter `ignore` should be a plain text file file containing URLs that Baler should ignore. The default value is `.github/workflows/ignored-urls.txt`. The file does not have to exist; if it doesn't exist, this parameter simply has no effect. The parameter can only reference a file in the repository and not an external file. Each URL should be written alone on a separate line of the file. They can be written as regular expressions; e.g., `https://example\.(com|org)`. 185 | 186 | Telling Baler to ignore certain URLs is useful if some of your files contain fake URLs used as examples in documentation, or when certain real URLs are repeatedly flagged as unreachable when the workflow runs in GitHub's computing environment (see [next section below](#known-issues-and-limitations)). 187 | 188 | 189 | ## Known issues and limitations 190 | 191 | Baler is designed to test only URLs that use the scheme `https` or `http`. 192 | 193 | When Baler runs on GitHub, it will sometimes mysteriously report a link as unreachable even though you can access it without trouble from your local computer. It's not yet clear what causes this. My current best guess is that it's due to network routing or DNS issues in the environment where the link checker actually runs (i.e., GitHub's computing environment). 194 | 195 | Baler may take a long time to run if one or more links in a file are timing out. The reason is that it has to wait for a timeout to, well, _time out_, and then wait some more before trying one more time (and then wait for another timeout period if the retry fails). It has to do this for every link that times out. The more URLs that do this, the longer the overall process will take. If you encounter links that time out when Baler runs as a GitHub Action but that resolve properly when you visit them from your browser, you can add those URLs to the ignore list (see [above](#ignore)) to skip testing them in the future. 196 | 197 | Adding problematic URLs to the `ignore` file is a simple workaround, but there is a downside. If they are never tested, then Baler can't report if they really _do_ go stale in the future. A better solution would be to implement an adaptive algorithm by which Baler remembers timeout failures and stops testing the problematic URLs only for a time, then resumes testing them again automatically in the future. Unfortunately, this can't be implemented until the problem described in the first paragraph (that some URLs time out _only_ when Baler runs on GitHub) is resolved. 198 | 199 | Finally, some sites deliberately block access from GitHub, presumably in an attempt to block scrapers or bots or other processes running from people's GitHub actions. This will typically show up in Baler's reports as HTTP code 403, "Failed: Network error: Forbidden". The only thing to do in such cases is to double-check that the URLs are valid from your local computer, and if they are, add them to the ignore list (see [above](#ignore)). 200 | 201 | 202 | ## Getting help 203 | 204 | If you find an issue, please submit it in [the GitHub issue tracker](https://github.com/caltechlibrary/baler/issues) for this repository. 205 | 206 | 207 | ## Contributing 208 | 209 | Your help and participation in enhancing Baler is welcome! Please visit the [guidelines for contributing](https://github.com/caltechlibrary/baler/blob/main/CONTRIBUTING.md) for some tips on getting started. 210 | 211 | 212 | ## License 213 | 214 | Software produced by the Caltech Library is Copyright © 2024 California Institute of Technology. This software is freely distributed under a BSD-style license. Please see the [LICENSE](LICENSE) file for more information. 215 | 216 | 217 | ## Acknowledgments 218 | 219 | The image of a baler used at the top of this README file was obtained from [Wikimedia Commons](https://commons.wikimedia.org/wiki/File:Baling_Small_Square_Bales_with_Accumulator.jpg) on 2023-12-11. The photo was taken and contributed by [Glendon Kuhns](https://commons.wikimedia.org/wiki/User:Gkuhns) and made available under the [Creative Commons CC0 1.0 license](https://commons.wikimedia.org/wiki/File:Baling_Small_Square_Bales_with_Accumulator.jpg#Licensing). 220 | 221 | Numerous other broken link checkers similar to Baler can be found in GitHub. Some of them served as sources of ideas for what to do in Baler, and I want to acknowledge this debt. The following are notable programs that I looked at (and if you are the author of another one not listed here, please don't feel slighted – I probably missed it simply due to limited time, inadequate or incomplete search, or lack of serendipity): 222 | 223 | * [Broken Link Checker Action](https://github.com/marketplace/actions/broken-link-checker-action) 224 | * [GitHub Repo README.md Dead Link Finder](https://github.com/MrCull/GitHub-Repo-ReadMe-Dead-Link-Finder) 225 | * [linksnitch](https://github.com/marketplace/actions/linksnitch-action) 226 | * [Markdown link check](https://github.com/gaurav-nelson/github-action-markdown-link-check) 227 | * [md-links](https://github.com/raulingg/md-links) 228 | * [My Broken Link Checker](https://github.com/marketplace/actions/my-broken-link-checker) 229 | 230 | Baler makes use of the following excellent software packages and GitHub Actions: 231 | 232 | * [lychee](https://github.com/lycheeverse/lychee) – fast, async, stream-based link checker written in Rust 233 | * [peter-evans/create-issue-from-file](https://github.com/peter-evans/create-issue-from-file) – A GitHub action to create an issue 234 | * [tj-actions/changed-files](https://github.com/tj-actions/changed-files) – GitHub action to retrieve files and directories 235 | * [tj-actions/glob](https://github.com/tj-actions/changed-files) – GitHub action to match file glob patterns 236 | 237 | This work was funded by the California Institute of Technology Library. 238 | 239 |
240 |
241 | 242 | Caltech logo 243 | 244 |
245 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | Support 2 | ======= 3 | 4 | Thank you for your interest in this project. If you are experiencing problems or have questions, the following are the preferred methods of reaching someone: 5 | 6 | 1. Report a new issue using the [issue tracker](https://github.com/caltechlibrary/baler/issues). 7 | 2. Send email to the Caltech Library: [helpdesk@library.caltech.edu](mailto:helpdesk@library.caltech.edu). 8 | 3. Send email to an individual involved in the project. People's names appear in the top-level `README.md` file in the source code repository. 9 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | # Summary: GitHub Action definition file for Baler – the Bad Link Reporter. 2 | # 3 | # Copyright 2024 California Institute of Technology. 4 | # License: Modified BSD 3-clause – see file "LICENSE" in the project website. 5 | # Website: https://github.com/caltechlibrary/baler 6 | 7 | name: Baler – BAd Link reportER 8 | description: Test URLs in Markdown files and open an issue for problems found 9 | 10 | inputs: 11 | 12 | # User-level parameters. 13 | 14 | files: 15 | description: Regexes for files to test. Can be comma-separated list. 16 | default: '*.md' 17 | required: true 18 | type: string 19 | ignore: 20 | description: File containing a list of URLs to ignore, one per line. 21 | default: .github/workflows/ignored-urls.txt 22 | required: false 23 | type: string 24 | labels: 25 | description: Labels assigned to GitHub issues created by this workflow. 26 | default: bug 27 | required: false 28 | type: string 29 | lookback: 30 | description: How many previous issues to check for duplicate reports. 31 | default: 5 32 | required: false 33 | type: number 34 | timeout: 35 | description: Number of seconds to wait on unresponsive URL. 36 | default: 15 37 | required: false 38 | type: number 39 | 40 | # Advanced parameters. 41 | 42 | debug: 43 | description: Run in debug mode. 44 | default: false 45 | required: false 46 | type: boolean 47 | 48 | # Internal parameters. 49 | 50 | lyVersion: 51 | description: Version of Lychee link checker program. 52 | default: 0.14.1 53 | required: false 54 | lyDownloadBase: 55 | description: Base download URL for Lychee binaries. 56 | default: https://github.com/lycheeverse/lychee/releases/download 57 | required: false 58 | lyRetryWaitTime: 59 | description: Time to wait between retries, in seconds. 60 | default: 10 61 | required: false 62 | 63 | # Explanation for how different GitHub events are handled, and why: 64 | # 65 | # workflow_dispatch: Tests all .md files matched by ${{inputs.files}}, 66 | # regardless of whether they have been modified in the latest commit. 67 | # Rationale: if you're invoking the action manually, you probably intend 68 | # to test the files as they exist in the repository now, and not relative 69 | # to a past commit or other past event. 70 | # 71 | # schedule: Tests all .md files matched by ${{inputs.files}}, regardless 72 | # of whether they have been modified in the latest commit. Rationale: 73 | # (1) it wouldn't make sense to have a nightly run test only the files 74 | # modified in the latest commit, because a previous commit might also 75 | # have modified some Markdown files, which means the latest commit is 76 | # not a good reference point for this purpose; and (2) regularly testing 77 | # all Markdown files, regardless of recent edits, is good for discovering 78 | # links that worked in the past but stopped working due to (e.g.) link 79 | # rot. In case #2, there are no local file changes to trigger on. 80 | # 81 | # all other events: Test the .md files that are being changed as a result of 82 | # the request, compared to the versions of the files in the destination 83 | # branch. The trigger/invocation is controlled by the trigger rules of 84 | # the invoking workflow. (So, for example, in the sample workflow 85 | # provided for users, push events result in testing .md files that were 86 | # modified by the push.) 87 | 88 | runs: 89 | using: composite 90 | steps: 91 | - name: Set preliminary run-time configuration variables. 92 | shell: bash 93 | run: | 94 | if [[ ${{github.event_name == 'push'}} ]]; then 95 | echo "depth_needed=2" >> $GITHUB_ENV 96 | elif [[ ${{github.event_name == 'pull_request'}} ]]; then 97 | echo "depth_needed=1" >> $GITHUB_ENV 98 | else 99 | echo "depth_needed=0" >> $GITHUB_ENV 100 | fi 101 | 102 | - name: Check out source repository. 103 | uses: actions/checkout@v4 104 | with: 105 | fetch-depth: ${{env.depth_needed}} 106 | persist-credentials: true 107 | 108 | - name: Get list of all files matching the desired pattern. 109 | uses: tj-actions/glob@v20 110 | id: match-files 111 | with: 112 | files: ${{inputs.files}} 113 | 114 | - name: Check if any files matching the pattern have been modified. 115 | id: test-changes 116 | uses: tj-actions/changed-files@v42 117 | with: 118 | files: ${{inputs.files}} 119 | 120 | - name: Construct list of files to act on. 121 | shell: bash 122 | run: | 123 | event=${{github.event_name}} 124 | if [[ "$event" == "workflow_dispatch" || "$event" == "schedule" ]]; then 125 | echo "considered=(${{steps.match-files.outputs.paths}})" >> $GITHUB_ENV 126 | elif [[ "${{steps.test-changes.outputs.any_changed}}" == "true" ]]; then 127 | echo "considered=(${{steps.test-changes.outputs.all_changed_files}})" >> $GITHUB_ENV 128 | fi 129 | 130 | - name: Decide whether to proceed. 131 | shell: bash 132 | run: | 133 | if [[ "${{steps.test-changes.outputs.any_changed}}" == "true" \ 134 | || "${{github.event_name}}" == "workflow_dispatch" ]]; then 135 | echo "Continuing workflow." 136 | echo "continue=true" >> $GITHUB_ENV 137 | else 138 | echo "Stopping workflow." 139 | 140 | # Report that we have nothing to do. 141 | msg="

Relevant files are unchanged – nothing to do

142 |

Link testing was skipped because the relevant 143 | files have not changed. The files that were considered are the 144 | following:
${{inputs.files}}.

" 145 | echo "$msg" >> $GITHUB_STEP_SUMMARY 146 | 147 | # Skip the rest of the workflow. 148 | echo "continue=false" >> $GITHUB_ENV 149 | fi 150 | 151 | - name: Install link checker program. 152 | if: env.continue == 'true' 153 | shell: bash 154 | run: | 155 | echo "Downloading and installing lychee binary." 156 | tarball=lychee-v${{inputs.lyVersion}}-x86_64-unknown-linux-gnu.tar.gz 157 | # First clean up artifacts from previous run in case it crashed. 158 | rm -rf $tarball lychee 159 | curl -sLO "${{inputs.lyDownloadBase}}/v${{inputs.lyVersion}}/$tarball" 160 | tar -xzf $tarball 161 | rm -f $tarball 162 | install -t "$HOME/.local/bin" -D lychee 163 | rm lychee 164 | echo "$HOME/.local/bin" >> "$GITHUB_PATH" 165 | 166 | - name: Configure link checker. 167 | if: env.continue == 'true' 168 | shell: bash 169 | run: | 170 | # Copy user's URL ignore list to file name expected by lychee. 171 | if [ -e ${{env.ignore}} ]; then 172 | cp -f ${{env.ignore}} .lycheeignore 173 | echo "Using file of URLs to ignore: ${{env.ignore}}" 174 | else 175 | echo "File of URLs to ignore does not exist: ${{env.ignore}}" 176 | fi 177 | # If debug is on, shorten timeouts. 178 | if [[ "${{inputs.debug}}" == "true" ]]; then 179 | timeout=1 180 | retry_wait_time=1 181 | else 182 | timeout=${{inputs.timeout}} 183 | retry_wait_time=${{inputs.lyRetryWaitTime}} 184 | fi 185 | # Create configuration file for lychee. 186 | # Note: the name "max_retries" implies that it controls how many times 187 | # a URL is tested AFTER an initial failure (that's the meaning of 188 | # "retry", after all). However, I looked at the lychee code, and in 189 | # fact, max_retries is the total number of times it tests the URL. 190 | lychee_config=.git/lychee.toml 191 | cat < $lychee_config 192 | scheme = ["https", "http"] 193 | accept = [200, 201, 202, 203, 204, 206, 302, 429] 194 | timeout = $timeout 195 | retry_wait_time = $retry_wait_time 196 | max_retries = 2 197 | insecure = true 198 | skip_missing = true 199 | include_mail = false 200 | include_verbatim = true 201 | exclude_all_private = true 202 | no_progress = true 203 | cache = false 204 | EOF 205 | # Set variables used in later steps. 206 | echo "lychee_config=$lychee_config" >> $GITHUB_ENV 207 | echo "lychee_output=.git/lychee-report.md" >> $GITHUB_ENV 208 | 209 | # Implementation notes: 210 | # - This purposefully doesn't use lychee's caching facility, because 211 | # turning it on results in lychee NOT reporting the ORIGINAL error when 212 | # a cached URL is encountered. This is very unhelpful in this context. 213 | # 214 | # - More information about optional settings for the lychee-action GHA 215 | # can be found at https://github.com/lycheeverse/lychee-action 216 | # 217 | # - The documented exit codes for lychee are as follows (based on 218 | # https://github.com/lycheeverse/lychee#exit-codes as of 2023-12-14): 219 | # 0 = success (links checked successfully or skipped as configured) 220 | # 1 = missing inputs, unexpected runtime failure, or config error 221 | # 2 = link check failures (if any non-excluded link failed the check) 222 | # 3 = errors in the config file 223 | 224 | - name: Run link checker to test URLs inside Markdown files. 225 | if: env.continue == 'true' 226 | shell: bash {0} 227 | continue-on-error: true 228 | run: | 229 | lychee_tmp="$(mktemp)" 230 | changed_files=${{env.considered}} 231 | lychee -c ${{env.lychee_config}} -o $lychee_tmp -f markdown ${changed_files[@]} 232 | exit_code=$? 233 | echo "lychee_exit_code=$exit_code" >> $GITHUB_ENV 234 | if [[ $exit_code == 2 ]]; then 235 | sed -e 's/^## Summary//' \ 236 | -e 's/^|.*//g' \ 237 | -e 's/^## Errors per input//' \ 238 | -e 's/{.*$//g' \ 239 | -e 's/| Failed:/– Failed:/g' \ 240 | -e 's/| Timeout:/– Timeout:/g' \ 241 | -e 's/\(.*\)\[\(.*\)\]\(.*\)/\1[`\2`]\3/' \ 242 | < $lychee_tmp > ${{env.lychee_output}} 243 | echo >> ${{env.lychee_output}} 244 | endpoint="https://github.com/${GITHUB_REPOSITORY}/actions/runs" 245 | workflow="$endpoint/${GITHUB_RUN_ID}?check_suite_focus=true" 246 | note="This content was produced by a [GitHub Action]($workflow)." 247 | echo "$note" >> ${{env.lychee_output}} 248 | elif [[ $exit_code == 1 || $exit_code == 3 ]]; then 249 | # Inform the user. 250 | msg="

Run-time error

251 |

Baler encountered an exception. This was most likely caused by a 252 | bug in Baler itself. Please report this to the developers. You can 253 | report it by opening an issue in the GitHub repository at 254 | 255 | https://github.com/caltechlibrary/baler.

" 256 | echo "$msg" >> $GITHUB_STEP_SUMMARY 257 | # Bail with an error. 258 | exit 2 259 | fi 260 | 261 | - name: Check if we've already opened a duplicate issue. 262 | if: env.continue == 'true' && env.lychee_exit_code != 0 263 | shell: bash 264 | run: | 265 | # First, save the current issue text body, to use for comparisons. 266 | # Strip the end of the body b/c it has this workflow's unique run 267 | # id, which can never match any other issue. Also, sort the body; 268 | # this is a way to normalize the contents to avoid false positives 269 | # when, e.g., two runs end up putting URLs in different orders. 270 | sed_cmd='s/\(.*\)This content was produced by.*/\1/' 271 | current=$(sed "$sed_cmd" < ${{env.lychee_output}} | sort) 272 | # Get the issue numbers for the last N issues, where N = lookback. 273 | endpoint="https://api.github.com/repos/${{github.repository}}/issues" 274 | query="q=state:open&per_page=${{inputs.lookback}}" 275 | accept="Accept: application/vnd.github+json" 276 | auth="Authorization: Bearer ${{github.token}}" 277 | issues=$(curl -s -H "$accept" -H "$auth" "$endpoint?$query") 278 | issue_numbers=($(jq '.[].number' <<<"$issues")) 279 | # Iterate over the issues & compare the bodies. 280 | for number in ${issue_numbers[@]}; do 281 | previous=$(curl -s -H "$accept" -H "$auth" "$endpoint/$number" | jq -r '.body') 282 | previous=$(echo "$previous" | sed 's/\\n/\n/g' | sed "$sed_cmd" | sort) 283 | if [[ "$current" == "$previous" ]]; then 284 | link="https://github.com/${{github.repository}}/issues/$number" 285 | 286 | # Report that we found a match. 287 | msg="

Bad links found, but issue creation skipped

288 |

One or more invalid URL(s) have been found; however, no new 289 | issue has been opened in the repository because the same URL(s) 290 | were reported in issue #$number.

" 291 | echo "$msg" >> $GITHUB_STEP_SUMMARY 292 | 293 | # Skip the rest of the workflow. 294 | echo "continue=false" >> $GITHUB_ENV 295 | break 296 | fi 297 | done 298 | 299 | - name: Open a new issue/ticket to report the problems. 300 | if: env.continue == 'true' && env.lychee_exit_code != 0 301 | id: create-issue 302 | uses: peter-evans/create-issue-from-file@v5.0.0 303 | with: 304 | title: Invalid URLs in Markdown files 305 | content-filepath: ${{env.lychee_output}} 306 | labels: ${{env.labels}} 307 | token: ${{github.token}} 308 | 309 | - name: Put a link to the issue in the workflow output. 310 | if: env.continue == 'true' && env.lychee_exit_code != 0 311 | env: 312 | issue-number: ${{steps.create-issue.outputs.issue-number}} 313 | shell: bash 314 | run: | 315 | number="${{env.issue-number}}" 316 | link="https://github.com/${{github.repository}}/issues/$number" 317 | msg="

Invalid URLs found

318 |

Ticket #${{env.issue-number}} has been 319 | created.

" 320 | echo "$msg" >> $GITHUB_STEP_SUMMARY 321 | # Report a failure. 322 | exit 1 323 | 324 | - name: Log internal variable values in debug mode or if a failure occurs. 325 | if: inputs.debug == 'true' || failure() 326 | shell: bash 327 | run: | 328 | echo "Summary of run-time variables" 329 | echo "=============================" 330 | echo "repository_owner = ${{github.repository_owner}}" 331 | echo "repository = ${{github.event.repository.name}}" 332 | echo "event name = ${{github.event_name}}" 333 | echo "files: ${{inputs.files}}" 334 | echo "ignore: ${{inputs.ignore}}" 335 | echo "labels: ${{inputs.labels}}" 336 | echo "lookback: ${{inputs.lookback}}" 337 | echo "timeout: ${{inputs.timeout}}" 338 | echo "debug: ${{inputs.debug}}" 339 | echo "depth_needed (for git history) = ${{env.depth_needed}}" 340 | echo "files considered = ${{env.considered}}" 341 | echo "continue = ${{env.continue}}" 342 | echo "env.lychee_exit_code = ${{env.lychee_exit_code}}" 343 | echo "" 344 | echo "Content of lychee config file" 345 | echo "=============================" 346 | cat ${lychee_config} 347 | echo "=============================" 348 | 349 | - name: Report if checks were successful. 350 | if: env.continue == 'true' && env.lychee_exit_code == 0 351 | shell: bash 352 | run: | 353 | msg="

No invalid URLs found

354 |

No bad links were found in the files matched by this pattern:
355 | ${{inputs.files}}

" 356 | echo "$msg" >> $GITHUB_STEP_SUMMARY 357 | 358 | 359 | author: Michael Hucka – https://github.com/mhucka 360 | branding: 361 | icon: thumbs-down 362 | color: orange 363 | -------------------------------------------------------------------------------- /assets/css/style.scss: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | /* ↖︎ Make sure to leave the two triple-dash lines above! 4 | 5 | Summary: allow GitHub Pages version of README to work in dark mode. 6 | 7 | The reason for the existence of this file is to allow the GitHub Pages 8 | version of the README file to respect the user's dark/light mode settings, 9 | which is something that the default GitHub Pages theme doesn't do. This 10 | file was originally based on the approach implemented by user "ggorlen" in 11 | https://github.com/ggorlen/resources and referenced in a Stack Overflow 12 | comment on 2023-07-10. 13 | */ 14 | 15 | @import "https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.5.1/github-markdown.min.css"; 16 | 17 | body { 18 | margin: 0; 19 | } 20 | 21 | @media (prefers-color-scheme: light) { 22 | body { 23 | background-color: #ffffff; 24 | } 25 | } 26 | 27 | @media (prefers-color-scheme: dark) { 28 | body { 29 | background-color: #0d1117; 30 | } 31 | } 32 | 33 | .markdown-body { 34 | box-sizing: border-box; 35 | min-width: 200px; 36 | max-width: 980px; 37 | margin: 0 auto; 38 | padding: 45px; 39 | } 40 | 41 | @media (max-width: 767px) { 42 | .markdown-body { 43 | padding: 15px; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "name": "Baler – BAd Link reportER", 5 | "identifier": "baler", 6 | "description": "Baler is a GitHub Action that tests the URLs inside Markdown files in your GitHub repository and opens an issue if it finds any problems.", 7 | "version": "2.0.4", 8 | "datePublished": "2024-02-09", 9 | "dateCreated": "2023-12-11", 10 | "author": [ 11 | { 12 | "@type": "Person", 13 | "givenName": "Michael", 14 | "familyName": "Hucka", 15 | "affiliation": { 16 | "@type": "Organization", 17 | "name": "California Institute of Technology Library" 18 | }, 19 | "email": "mhucka@caltech.edu", 20 | "@id": "https://orcid.org/0000-0001-9105-5960" 21 | } 22 | ], 23 | "maintainer": [ 24 | { 25 | "@type": "Person", 26 | "givenName": "Michael", 27 | "familyName": "Hucka", 28 | "affiliation": { 29 | "@type": "Organization", 30 | "name": "California Institute of Technology Library" 31 | }, 32 | "email": "mhucka@caltech.edu", 33 | "@id": "https://orcid.org/0000-0001-9105-5960" 34 | } 35 | ], 36 | "funder": { 37 | "@id": "https://ror.org/05dxps055", 38 | "@type": "Organization", 39 | "name": "California Institute of Technology Library" 40 | }, 41 | "copyrightHolder": [ 42 | { 43 | "@id": "https://ror.org/05dxps055", 44 | "@type": "Organization", 45 | "name": "California Institute of Technology" 46 | } 47 | ], 48 | "copyrightYear": 2024, 49 | "license": "https://github.com/caltechlibrary/baler/blob/main/LICENSE", 50 | "isAccessibleForFree": true, 51 | "url": "https://caltechlibrary.github.io/baler", 52 | "codeRepository": "https://github.com/caltechlibrary/baler", 53 | "readme": "https://github.com/caltechlibrary/baler/blob/main/README.md", 54 | "releaseNotes": "https://github.com/caltechlibrary/baler/blob/main/CHANGES.md", 55 | "issueTracker": "https://github.com/caltechlibrary/baler/issues", 56 | "downloadUrl": "https://github.com/caltechlibrary/baler/releases", 57 | "relatedLink": "https://data.caltech.edu/records/j6vdk-0y403", 58 | "keywords": [ 59 | "software", 60 | "automation", 61 | "GitHub Actions", 62 | "GitHub Automation" 63 | ], 64 | "developmentStatus": "active" 65 | } 66 | -------------------------------------------------------------------------------- /sample-workflow.yml: -------------------------------------------------------------------------------- 1 | # GitHub Actions workflow for Baler (BAd Link reportER) version 2.0.4. 2 | # This is available as the file "sample-workflow.yml" from the source 3 | # code repository for Baler: https://github.com/caltechlibrary/baler 4 | 5 | name: Bad Link Reporter 6 | 7 | # Configure this section ───────────────────────────────────────────── 8 | 9 | env: 10 | # Files to check. (Put patterns on separate lines, no leading dash.) 11 | files: | 12 | **/*.md 13 | 14 | # Label assigned to issues created by this workflow: 15 | labels: bug 16 | 17 | # Number of previous issues to check for duplicate reports. 18 | lookback: 10 19 | 20 | # Time (sec) to wait on an unresponsive URL before trying once more. 21 | timeout: 15 22 | 23 | # Optional file containing a list of URLs to ignore, one per line: 24 | ignore: .github/workflows/ignored-urls.txt 25 | 26 | on: 27 | schedule: # Cron syntax is: "min hr day-of-month month day-of-week" 28 | - cron: 00 04 * * 1 29 | push: 30 | paths: ['**.md'] 31 | workflow_dispatch: 32 | 33 | # The rest of this file should be left as-is ───────────────────────── 34 | 35 | run-name: Test links in Markdown files 36 | jobs: 37 | Baler: 38 | name: Link checker and reporter 39 | runs-on: ubuntu-latest 40 | permissions: 41 | issues: write 42 | steps: 43 | - uses: caltechlibrary/baler@v2 44 | with: 45 | files: ${{github.event.inputs.files || env.files}} 46 | labels: ${{github.event.inputs.labels || env.labels}} 47 | ignore: ${{github.event.inputs.ignore || env.ignore}} 48 | timeout: ${{github.event.inputs.timeout || env.timeout}} 49 | lookback: ${{github.event.inputs.lookback || env.lookback}} 50 | --------------------------------------------------------------------------------