├── .dockerignore ├── .shellcheckrc ├── tex ├── .texqc ├── .latexmkrc ├── aspell.en.pws ├── .gitignore ├── .texsc ├── README.md └── Makefile ├── .gitmodules ├── renovate.json ├── requirements.txt ├── .yamllint.yml ├── typos_config.toml ├── metrics ├── ir.sh ├── irc.sh ├── irloc.sh ├── irca.sh ├── authors.sh ├── cloc.sh ├── hoc.sh ├── aocih.sh ├── jpeek.sh ├── bug_discover.sh ├── raf.sh ├── rfvc.sh ├── rfvh.sh ├── pmd.sh ├── README.md └── multimetric.sh ├── sonar-project.properties ├── help ├── is-macos.sh ├── is-linux.sh ├── to-csv.sh ├── parallel.sh ├── sudo.sh ├── assert-tool.sh ├── float.sh ├── check-make.sh ├── gnu-utils.sh ├── texlive-bin.sh └── tdiff.sh ├── .gitignore ├── fixtures └── jaxec │ ├── src │ └── main │ │ └── java │ │ └── com │ │ └── yegor256 │ │ └── Foo.java │ └── pom.xml ├── tests ├── help │ ├── test-texlive-bin.sh │ ├── test-to-csv.sh │ ├── test-assert-tool.sh │ ├── test-parallel.sh │ ├── test-tdiff.sh │ └── test-float.sh ├── metrics │ ├── test-cloc.sh │ ├── test-bug_discover.sh │ ├── test-authors.sh │ ├── test-aocih.sh │ ├── test-multimetric.sh │ ├── test-pmd.sh │ ├── test-hoc.sh │ ├── test-jpeek.sh │ ├── test-rfvh.sh │ ├── test-ir.sh │ ├── test-raf.sh │ ├── test-irc.sh │ ├── test-rfvc.sh │ └── test-irca.sh ├── steps │ ├── test-discover.sh │ ├── test-jpeek.sh │ ├── test-clone-repo.sh │ ├── test-unregister.sh │ ├── test-discover-repos.sh │ ├── test-polish.sh │ ├── test-clone.sh │ ├── test-aggregate-repo.sh │ ├── test-aggregate-join.sh │ ├── test-jpeek-repo.sh │ ├── test-aggregate.sh │ ├── test-zip.sh │ ├── test-filter.sh │ ├── aggregation-functions │ │ ├── test-90-percentile.sh │ │ ├── test-mean.sh │ │ └── test-median.sh │ ├── test-measure.sh │ ├── test-summarize.sh │ └── test-report.sh ├── filters │ ├── test-030-delete-tests.sh │ ├── test-021-delete-module-info.sh │ ├── test-020-delete-package-info.sh │ ├── test-080-delete-symlinks.sh │ ├── test-090-delete-empty-directories.sh │ ├── test-010-delete-non-java-files.sh │ ├── test-999-move-gits-back.sh │ ├── test-060-delete-non-classes.sh │ ├── test-delete-unparsable.sh │ ├── test-001-move-gits-to-temp.sh │ ├── test-040-delete-unparsable.sh │ ├── test-070-delete-invalid-files.sh │ ├── test-delete-wrong-encoding.sh │ ├── test-delete-non-classes.sh │ └── test-050-delete-long-lines.sh ├── before │ ├── test-layout.sh │ └── test-executability.sh └── after │ └── test-integration.sh ├── .github └── workflows │ ├── bibcop.yml │ ├── hadolint.yml │ ├── copyrights.yml │ ├── reuse.yml │ ├── xcop.yml │ ├── pdd.yml │ ├── checkmake.yml │ ├── shellcheck.yml │ ├── cff-validator.yml │ ├── markdown-lint.yml │ ├── typos.yml │ ├── yamllint.yml │ ├── pylint.yml │ ├── flake8.yml │ ├── bashate.yml │ ├── sonarcloud.yml │ ├── actionlint.yml │ ├── make.yml │ ├── up.yml │ └── vulture.yml ├── .rubocop.yml ├── installs ├── install-pip.sh ├── install-gems.sh ├── install-poppler.sh ├── install-pmd.sh ├── install-gradle.sh ├── install-jpeek.sh ├── install-texlive-base.sh └── install-texlive-depends.sh ├── mypy.ini ├── filters ├── 999-move-gits-back.sh ├── 021-delete-module-info.sh ├── 020-delete-package-info.sh ├── 001-move-gits-to-temp.sh ├── 080-delete-symlinks.sh ├── 010-delete-non-java-files.sh ├── 090-delete-empty-directories.sh ├── delete-invalid-files.py ├── delete-non-classes.py ├── delete-unparsable.py ├── 030-delete-tests.sh ├── delete-wrong-encoding.py ├── 040-delete-unparsable.sh ├── 060-delete-non-classes.sh ├── 050-delete-long-lines.sh ├── 070-delete-invalid-files.sh └── 031-delete-wrong-encoding.sh ├── CITATION.cff ├── steps ├── aggregation-functions │ ├── mean.sh │ ├── median.sh │ └── 90-percentile.sh ├── unregister.sh ├── jpeek.sh ├── clone.sh ├── README.md ├── measure-file.sh ├── clone-repo.sh ├── measure.sh ├── zip.sh ├── env.sh ├── tests.sh ├── aggregate-join.sh ├── polish.sh ├── lint.sh ├── filter.sh ├── discover.sh ├── summarize.sh ├── aggregate-repo.sh └── aggregate.sh ├── .rultor.yml ├── DEPENDS.txt ├── LICENSE.txt ├── LICENSES └── MIT.txt ├── REUSE.toml ├── pylint_plugins └── custom_checkers.py └── Dockerfile /.dockerignore: -------------------------------------------------------------------------------- 1 | dataset 2 | dataset/**/* 3 | -------------------------------------------------------------------------------- /.shellcheckrc: -------------------------------------------------------------------------------- 1 | disable=SC2030,SC2031 2 | -------------------------------------------------------------------------------- /tex/.texqc: -------------------------------------------------------------------------------- 1 | --ignore=You have called 2 | -------------------------------------------------------------------------------- /tex/.latexmkrc: -------------------------------------------------------------------------------- 1 | $pdflatex = 'pdflatex %O -halt-on-error -interaction=batchmode -shell-escape %S'; 2 | -------------------------------------------------------------------------------- /tex/aspell.en.pws: -------------------------------------------------------------------------------- 1 | personal_ws-1.1 en 741 utf-8 2 | Yegor 3 | Bugayenko 4 | yegor 5 | github 6 | repo 7 | printf 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tex/bibliography"] 2 | path = tex/bibliography 3 | url = https://github.com/yegor256/bibliography 4 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:base" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | javalang==0.13.0 2 | flake8==7.0.0 3 | pylint==4.0.4 4 | multimetric==1.3.0 5 | chardet==5.2.0 6 | mypy==1.12.1 7 | cffconvert==2.0.0 8 | samples-filter==0.5.1 9 | -------------------------------------------------------------------------------- /.yamllint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | rules: 5 | line-length: 6 | max: 999 7 | allow-non-breakable-words: true 8 | -------------------------------------------------------------------------------- /typos_config.toml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | [default.extend-words] 4 | ## Project-specific acronym 5 | astroid = "astroid" # library name -------------------------------------------------------------------------------- /tex/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | exec.tex 3 | _minted-* 4 | *.pdf 5 | *.bbl 6 | *.bcf 7 | *.blg 8 | *.fdb_latexmk 9 | *.fls 10 | *.log 11 | *.run.xml 12 | *.aux 13 | *.pyg 14 | *.out 15 | *.synctex.gz 16 | *.ret 17 | *.tmp 18 | -------------------------------------------------------------------------------- /metrics/ir.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | output=$(realpath "$2") 7 | 8 | echo "IR 0.000 " >"${output}" 9 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | 4 | sonar.projectKey=yegor256_cam2 5 | sonar.organization=yegor256 6 | sonar.exclusions = **/*.java 7 | -------------------------------------------------------------------------------- /metrics/irc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | output=$(realpath "$2") 7 | 8 | echo "IRC 0.000 " >"${output}" 9 | -------------------------------------------------------------------------------- /metrics/irloc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | output=$(realpath "$2") 7 | 8 | echo "IRLoC 0.000 " >"${output}" 9 | -------------------------------------------------------------------------------- /help/is-macos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | if [[ "$OSTYPE" == "darwin"* ]]; then 7 | exit 0 8 | else 9 | exit 1 10 | fi 11 | -------------------------------------------------------------------------------- /help/is-linux.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | if [[ "$OSTYPE" == "linux-gnu"* ]]; then 7 | exit 0 8 | else 9 | exit 1 10 | fi 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/ 3 | *.pdf 4 | *.zip 5 | dataset/ 6 | install-tl.log 7 | install-tl.zip.* 8 | install-tl/ 9 | Library/ 10 | node_modules/ 11 | pipeline/ 12 | predictions.csv 13 | pylint_plugins/__pycache__ 14 | target/ 15 | test-zone/ 16 | venv/ 17 | -------------------------------------------------------------------------------- /fixtures/jaxec/src/main/java/com/yegor256/Foo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | package com.yegor256; 6 | 7 | /** 8 | * @since 0.0.1 9 | */ 10 | public final class Foo { 11 | } 12 | -------------------------------------------------------------------------------- /metrics/irca.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | output=$(realpath "$2") 7 | 8 | # REPLACE WITH ACTUAL METRIC CODE 9 | echo "IRCA 0.000 " >"${output}" 10 | -------------------------------------------------------------------------------- /help/to-csv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | # Take the STDIN and send it to STDOUT, while making it 7 | # suitable for CSV: replacing commas. 8 | 9 | data=$(cat) 10 | printf '%s' "${data}" | sed 's/,/\\,/g' 11 | -------------------------------------------------------------------------------- /tests/help/test-texlive-bin.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | "${LOCAL}/help/texlive-bin.sh" 10 | path=$("${LOCAL}/help/texlive-bin.sh") 11 | echo "${path}" | grep '/bin' 12 | } > "${stdout}" 2>&1 13 | echo "👍🏻 Found TeXLive directory" 14 | -------------------------------------------------------------------------------- /tests/help/test-to-csv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | test "$(echo 'a b c' | "${LOCAL}/help/to-csv.sh")" = 'a b c' 7 | echo "👍🏻 Correctly formatted simple text" 8 | 9 | test "$(echo 'a,b,c' | "${LOCAL}/help/to-csv.sh")" = 'a\,b\,c' 10 | echo "👍🏻 Correctly formatted commas" 11 | -------------------------------------------------------------------------------- /.github/workflows/bibcop.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: bibcop 6 | 'on': 7 | push: 8 | pull_request: 9 | jobs: 10 | bibcop: 11 | timeout-minutes: 15 12 | runs-on: ubuntu-24.04 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: yegor256/bibcop-action@master 16 | -------------------------------------------------------------------------------- /.github/workflows/hadolint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: hadolint 6 | 'on': 7 | push: 8 | pull_request: 9 | jobs: 10 | hadolint: 11 | timeout-minutes: 15 12 | runs-on: ubuntu-24.04 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: hadolint/hadolint-action@v3.3.0 16 | -------------------------------------------------------------------------------- /.github/workflows/copyrights.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: copyrights 6 | 'on': 7 | push: 8 | pull_request: 9 | jobs: 10 | copyrights: 11 | timeout-minutes: 15 12 | runs-on: ubuntu-24.04 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: yegor256/copyrights-action@0.0.12 16 | -------------------------------------------------------------------------------- /tex/.texsc: -------------------------------------------------------------------------------- 1 | --pws=aspell.en.pws 2 | --ignore=nospell 3 | --ignore=equation* 4 | --ignore=ffcode 5 | --ignore=setminted 6 | --ignore=opt,grp,T,V,few,RE 7 | --ignore=newminted:opp 8 | --ignore=newtcbox:pp 9 | --ignore=settopmatter 10 | --ignore=CJK 11 | --ignore=lref:p,lrefs:pp 12 | --ignore=tikzstyle,tikzpicture,usetikzlibrary,ingraph 13 | --ignore=tikz:op 14 | --ignore=textcolor:pp 15 | --ignore=newmdenv:op 16 | --ignore=f,code,nospell,citet,citep 17 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | AllCops: 5 | SuggestExtensions: false 6 | NewCops: enable 7 | Exclude: 8 | - 'venv/**/*' 9 | 10 | Layout/EmptyLineAfterGuardClause: 11 | Enabled: false 12 | Metrics/BlockLength: 13 | Max: 30 14 | Layout/ElseAlignment: 15 | Enabled: false 16 | Layout/EndAlignment: 17 | Enabled: false 18 | Layout/IndentationWidth: 19 | Enabled: false 20 | -------------------------------------------------------------------------------- /.github/workflows/reuse.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: reuse 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | reuse: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: fsfe/reuse-action@v6 20 | -------------------------------------------------------------------------------- /.github/workflows/xcop.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: xcop 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | xcop: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: g4s8/xcop-action@master 20 | -------------------------------------------------------------------------------- /.github/workflows/pdd.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: pdd 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | pdd: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: volodya-lombrozo/pdd-action@master 20 | -------------------------------------------------------------------------------- /tests/help/test-assert-tool.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | "${LOCAL}/help/assert-tool.sh" echo hello 10 | } > "${stdout}" 2>&1 11 | echo "👍🏻 Positive assertion works" 12 | 13 | { 14 | if "${LOCAL}/help/assert-tool.sh" bla-bla-bla; then 15 | exit 1 16 | fi 17 | } > "${stdout}" 2>&1 18 | echo "👍🏻 Negative assertion works" 19 | -------------------------------------------------------------------------------- /.github/workflows/checkmake.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: checkmake 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | checkmake: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: Uno-Takashi/checkmake-action@v2 20 | -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: shellcheck 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | shellcheck: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: ludeeus/action-shellcheck@master 20 | -------------------------------------------------------------------------------- /.github/workflows/cff-validator.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: cff-validator 6 | 'on': 7 | push: 8 | paths: 9 | - CITATION.cff 10 | pull_request: 11 | paths: 12 | - CITATION.cff 13 | jobs: 14 | cff-validator: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: dieghernan/cff-validator@v4 20 | -------------------------------------------------------------------------------- /metrics/authors.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java=$1 8 | output=$(realpath "$2") 9 | 10 | cd "$(dirname "${java}")" 11 | 12 | if git status > /dev/null 2>&1; then 13 | noca=$(git log --pretty=format:'%an%x09' "$(basename "${java}")" | sort | uniq | wc -l | xargs) 14 | else 15 | noca=0 16 | fi 17 | 18 | echo "NoGA ${noca} Number of unique Git committers of a file" > "${output}" 19 | -------------------------------------------------------------------------------- /metrics/cloc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java=$1 8 | output=$2 9 | 10 | out=$(cloc --timeout 0 --quiet --csv "${java}" | tail -1) 11 | IFS=',' read -r -a M <<< "${out}" 12 | cat < "${output}" 13 | NoBL ${M[2]} Number of Blank Lines 14 | NoCL ${M[3]} Number of Commenting Lines 15 | LoC ${M[4]} Total physical lines of source code, including commenting lines and blank lines 16 | EOT 17 | -------------------------------------------------------------------------------- /help/parallel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | jobs=$1 7 | x=$2 8 | if [ -z "${x}" ]; then 9 | x=1; 10 | fi 11 | 12 | cores=$(echo "$(nproc) * ${x}" | bc) 13 | args=( 14 | '--halt-on-error=now,fail=1' 15 | '--halt=now,fail=1' 16 | '--retries=3' 17 | "--load=8" 18 | "--joblog=${jobs}.log" 19 | "--max-procs=${cores}" 20 | "--will-cite" 21 | ) 22 | uniq "${jobs}" | parallel "${args[@]}" 23 | -------------------------------------------------------------------------------- /.github/workflows/markdown-lint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: markdown-lint 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | markdown-lint: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: DavidAnson/markdownlint-cli2-action@v20.0.0 20 | -------------------------------------------------------------------------------- /installs/install-pip.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -ex 6 | set -o pipefail 7 | 8 | "${LOCAL}/help/assert-tool.sh" python3 --version 9 | "${LOCAL}/help/assert-tool.sh" pip3 --version 10 | 11 | if [ ! -d "venv" ]; then 12 | python3 -m venv venv 13 | fi 14 | 15 | # shellcheck source=/dev/null 16 | source venv/bin/activate 17 | 18 | python3 --version 19 | pip3 --version 20 | pip3 install -r "${LOCAL}/requirements.txt" 21 | -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: typos 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | typos: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: crate-ci/typos@v1.40.0 20 | with: 21 | config: ./typos_config.toml 22 | -------------------------------------------------------------------------------- /.github/workflows/yamllint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: yamllint 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | yamllint: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: ibiqlik/action-yamllint@v3 20 | with: 21 | config_file: .yamllint.yml 22 | -------------------------------------------------------------------------------- /installs/install-gems.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | "${LOCAL}/help/assert-tool.sh" ruby -v 8 | "${LOCAL}/help/assert-tool.sh" gem -v 9 | 10 | if ! gem list -i rubocop; then 11 | gem install --no-document rubocop -v 1.56.3 12 | fi 13 | 14 | if ! gem list -i octokit; then 15 | gem install --no-document octokit -v 4.21.0 16 | fi 17 | 18 | if ! gem list -i slop; then 19 | gem install --no-document slop -v 4.9.1 20 | fi 21 | -------------------------------------------------------------------------------- /tests/metrics/test-cloc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | java="${temp}/Foo long 'weird' name (--).java" 11 | echo "class Foo {}" > "${java}" 12 | "${LOCAL}/metrics/cloc.sh" "${java}" "${temp}/stdout" 13 | grep "LoC 1 " "${temp}/stdout" 14 | grep "NoBL 0 " "${temp}/stdout" 15 | grep "NoCL 0 " "${temp}/stdout" 16 | } > "${stdout}" 2>&1 17 | echo "👍🏻 Correctly counted lines of code" 18 | -------------------------------------------------------------------------------- /tex/README.md: -------------------------------------------------------------------------------- 1 | # Introduction Paper about CaM 2 | 3 | [![make](https://github.com/yegor256/cam/actions/workflows/latexmk.yml/badge.svg)](https://github.com/yegor256/cam/actions/workflows/latexmk.yml) 4 | 5 | It's published [in arXiv](https://arxiv.org/abs/2403.08488). 6 | 7 | To build it, just run: 8 | 9 | ```bash 10 | make 11 | ``` 12 | 13 | You need to have 14 | [`aspell`](http://aspell.net/), 15 | LaTeX, 16 | [`biblint`](https://github.com/Kingsford-Group/biblint), 17 | [`texsc`](https://rubygems.org/gems/texsc), 18 | and 19 | [`texqc`](https://rubygems.org/gems/texqc) 20 | installed. 21 | -------------------------------------------------------------------------------- /help/sudo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | if "${LOCAL}/help/is-linux.sh" || "${LOCAL}/help/is-macos.sh" ; then 7 | if [ ! "$(id -u)" = 0 ]; then 8 | echo "You should run it as root: 'sudo make install'" 9 | exit 1 10 | fi 11 | 12 | if [ "$1" == "--as-user" ]; then 13 | shift 14 | sudo -u "$SUDO_USER" "$@" 15 | else 16 | # If no flag is provided, root is the default option 17 | "$@" 18 | fi 19 | else 20 | sudo "$@" 21 | fi 22 | -------------------------------------------------------------------------------- /help/assert-tool.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | cmd=$1 7 | arg=$2 8 | 9 | if [ -z "${TARGET}" ]; then 10 | TARGET=$(dirname "$0")/../target 11 | fi 12 | 13 | out=${TARGET}/temp/assert-tool.out 14 | mkdir -p "$(dirname "${out}")" 15 | if ! "${cmd}" "${arg}" > "${out}" 2>&1; then 16 | cat "${out}" 17 | echo "I can't continue, because '${cmd}' command line tool is not available." 18 | echo "Try to install it somehow and then restart me." 19 | exit 1 20 | fi 21 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | name: pylint 4 | "on": 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | branches: 10 | - master 11 | jobs: 12 | pylint: 13 | timeout-minutes: 15 14 | runs-on: ubuntu-24.04 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v6 18 | with: 19 | python-version: 3.11 20 | - run: | 21 | pip install pylint 22 | pylint --load-plugins=custom_checkers -d all -e C0411 . 23 | -------------------------------------------------------------------------------- /.github/workflows/flake8.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: flake8 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | flake8: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v6 20 | with: 21 | python-version: 3.11 22 | - run: pip install flake8 23 | - run: flake8 --max-line-length=120 . 24 | -------------------------------------------------------------------------------- /metrics/hoc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | java=$1 7 | output=$(realpath "$2") 8 | 9 | cd "$(dirname "${java}")" 10 | base=$(basename "${java}") 11 | 12 | # To check that file was added in commit any time 13 | if git status > /dev/null 2>&1 && test -n "$(git log --oneline -- "${base}")"; then 14 | hoc=$(git log -L:"class\s:${java}" | grep -E "^[+-].*$" | grep -Ev "^\-\-\-\s\S+$" | grep -Evc "^\+\+\+\s\S+$") 15 | else 16 | hoc=0 17 | fi 18 | 19 | echo "HoC ${hoc} Hits Of Code for file" > "${output}" 20 | -------------------------------------------------------------------------------- /.github/workflows/bashate.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | name: bashate 4 | 'on': 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | branches: 10 | - master 11 | jobs: 12 | bashate: 13 | timeout-minutes: 15 14 | runs-on: ubuntu-24.04 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v6 18 | with: 19 | python-version: 3.11 20 | - run: pip install bashate 21 | - run: | 22 | readarray -t files < <(find . -name '*.sh') 23 | bashate -i E006,E003 "${files[@]}" 24 | -------------------------------------------------------------------------------- /tests/help/test-parallel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | jobs=${TARGET}/jobs.txt 9 | for i in A B C D E F G H I J; do 10 | echo "echo -n ${i}" >> "${jobs}" 11 | done 12 | msg=$("${LOCAL}/help/parallel.sh" "${jobs}") 13 | echo "${msg}" >> "${stdout}" 14 | test "${#msg}" = '10' 15 | echo "👍🏻 Correctly ran parallel" 16 | 17 | jobs=${TARGET}/jobs-2.txt 18 | q=0 19 | while [[ q -lt 1000 ]]; do 20 | q=$(( q + 1 )) 21 | echo 'printf ""' >> "${jobs}" 22 | done 23 | test "$("${LOCAL}/help/parallel.sh" "${jobs}")" = '' 24 | echo "👍🏻 Correctly ran parallel with large input" 25 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | ; SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | ; SPDX-License-Identifier: MIT 3 | 4 | [mypy-javalang.*] 5 | ignore_missing_imports = True 6 | 7 | [mypy-astroid.*] 8 | ignore_missing_imports = True 9 | 10 | ; Why do ignore it? 11 | ; If we simply run `mypy --strict .` without any ignore, we will get the following error: 12 | 13 | ; metrics/ast.py:28: error: Skipping analyzing "javalang": module is installed, but missing library stubs or py.typed marker [import-untyped] 14 | 15 | ; If we generate stubs for "javalang", we will get .pyi that are not always typed. "mypy" will not allow this. 16 | ; In order to fix it, we should rewrite "javalang" module. However, this is the third-party library. 17 | -------------------------------------------------------------------------------- /filters/999-move-gits-back.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | temp=$2 8 | 9 | if [ ! -e "${temp}/gits" ]; then 10 | exit; 11 | fi 12 | 13 | repos=${temp}/git-repos-moving-back.txt 14 | 15 | find "${temp}/gits" -maxdepth 2 -mindepth 2 -type d -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${temp}/gits" {} \; > "${repos}" 16 | 17 | if [ -s "${repos}" ]; then 18 | while IFS= read -r repo; do 19 | dest=${TARGET}/github/${repo} 20 | if [ ! -e "${dest}" ]; then 21 | continue; 22 | fi 23 | mv "${temp}/gits/${repo}" "${dest}/.git" 24 | done < "${repos}" 25 | fi 26 | -------------------------------------------------------------------------------- /help/float.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | #!/usr/bin/env bash 6 | 7 | is_float() { 8 | local re="^-?[0-9]*\.?[0-9]+$" 9 | [[ $1 =~ $re ]] 10 | } 11 | 12 | set -e -o pipefail 13 | 14 | num=$(cat) 15 | 16 | if [[ -z "${num}" || "${num}" =~ ^[[:space:]]+$ ]]; then 17 | echo "0.000" 18 | exit 19 | fi 20 | 21 | if [ "${num}" == 'NaN' ]; then 22 | printf '%s' "${num}" 23 | exit 24 | fi 25 | if ! is_float "$num"; then 26 | echo "0.000" 27 | exit 1 28 | fi 29 | 30 | num_truncated=$(echo "$num * 1000 / 1" | bc) 31 | LC_NUMERIC=C printf '%.3f' "$(echo "$num_truncated / 1000" | bc -l)" 2>/dev/null || echo "0.000" 32 | -------------------------------------------------------------------------------- /installs/install-poppler.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | if pdftotext -v; then 8 | echo "Poppler is already installed" 9 | exit 10 | fi 11 | 12 | 13 | tmp=$(mktemp -d) 14 | mkdir -p "${tmp}" 15 | cd "${tmp}" 16 | 17 | wget --quiet https://poppler.freedesktop.org/poppler-data-0.4.9.tar.gz 18 | tar -xf poppler-data-0.4.9.tar.gz 19 | cd poppler-data-0.4.9 20 | make install 21 | cd .. 22 | 23 | wget --quiet https://poppler.freedesktop.org/poppler-20.08.0.tar.xz 24 | tar -xf poppler-20.08.0.tar.xz 25 | cd poppler-20.08.0 26 | mkdir build 27 | cd build 28 | cmake .. 29 | 30 | make 31 | make install 32 | ldconfig 33 | -------------------------------------------------------------------------------- /filters/021-delete-module-info.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/module-info-files.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | find "${home}" -type f -a -name 'module-info.java' -print > "${list}" 17 | while IFS= read -r f; do 18 | rm -f "${f}" 19 | done < "${list}" 20 | 21 | if [ -s "${list}" ]; then 22 | printf "%'d files named as \\\ff{module-info.java} were deleted" \ 23 | "$(wc -l < "${list}" | xargs)" 24 | else 25 | printf "There were no files named \\\ff{module-info.java}, that's why nothing was deleted" 26 | fi 27 | -------------------------------------------------------------------------------- /filters/020-delete-package-info.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/package-info-files.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | find "${home}" -type f -a -name 'package-info.java' -print > "${list}" 17 | while IFS= read -r f; do 18 | rm -f "${f}" 19 | done < "${list}" 20 | 21 | if [ -s "${list}" ]; then 22 | printf "%'d files named as \\\ff{package-info.java} were deleted" \ 23 | "$(wc -l < "${list}" | xargs)" 24 | else 25 | printf "There were no files named \\\ff{package-info.java}, that's why nothing was deleted" 26 | fi 27 | -------------------------------------------------------------------------------- /.github/workflows/sonarcloud.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: sonarcloud 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | jobs: 11 | sonarcloud: 12 | if: github.repository == 'yegor256/cam' && 13 | github.event_name != 'pull_request' || 14 | github.event.pull_request.head.repo.full_name == github.repository 15 | runs-on: ubuntu-24.04 16 | steps: 17 | - uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | - uses: SonarSource/sonarqube-scan-action@master 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} 24 | -------------------------------------------------------------------------------- /tests/metrics/test-bug_discover.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | pmd --version 11 | xmllint --version 12 | } > "${stdout}" 2>&1 13 | echo "👍🏻 PMD dependencies are installed" 14 | 15 | { 16 | java="${temp}/Foo long 'weird' name (--).java" 17 | echo " 18 | public class StaticField { 19 | static int x; 20 | public FinalFields(int y) { 21 | x = y; // unsafe 22 | } 23 | }" > "${java}" 24 | "${LOCAL}/metrics/bug_discover.sh" "${java}" "${temp}/stdout" 25 | grep "BugNum 1 " "${temp}/stdout" 26 | } > "${stdout}" 2>&1 27 | echo "👍🏻 Correctly calculates the bug discovery metric" 28 | -------------------------------------------------------------------------------- /.github/workflows/actionlint.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: actionlint 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | actionlint: 15 | timeout-minutes: 15 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Download actionlint 20 | id: get_actionlint 21 | run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) 22 | shell: bash 23 | - name: Check workflow files 24 | run: ${{ steps.get_actionlint.outputs.executable }} -color 25 | shell: bash 26 | -------------------------------------------------------------------------------- /tests/steps/test-discover.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | csv=${TARGET}/repositories.csv 9 | 10 | { 11 | rm -f "${csv}" 12 | TOTAL=3 "${LOCAL}/steps/discover.sh" 13 | test -e "${csv}" 14 | test -s "${TARGET}/temp/repo-details.tex" 15 | test "$(wc -l < "${csv}" | xargs)" = '4' 16 | } > "${stdout}" 2>&1 17 | echo "👍🏻 A few repositories discovered correctly" 18 | 19 | { 20 | rm -f "${csv}" 21 | REPO=yegor256/jaxec "${LOCAL}/steps/discover.sh" 22 | test -e "${csv}" 23 | test -s "${TARGET}/temp/repo-details.tex" 24 | test "$(wc -l < "${csv}" | xargs)" = '2' 25 | } > "${stdout}" 2>&1 26 | echo "👍🏻 A single repository discovered correctly" 27 | -------------------------------------------------------------------------------- /filters/001-move-gits-to-temp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | temp=$2 8 | 9 | if [ ! -e "${TARGET}/github" ]; then 10 | exit; 11 | fi 12 | 13 | list=${temp}/git-to-move.txt 14 | find "${TARGET}/github" -maxdepth 2 -mindepth 2 -type d -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${TARGET}/github" {} \; > "${list}" 15 | 16 | gits=${temp}/gits 17 | mkdir -p "${gits}" 18 | 19 | if [ -s "${list}" ]; then 20 | while IFS= read -r repo; do 21 | src=${TARGET}/github/${repo}/.git 22 | if [ ! -e "${src}" ]; then 23 | continue; 24 | fi 25 | mkdir -p "$(dirname "${gits}/${repo}")" 26 | mv "${src}" "${gits}/${repo}" 27 | done < "${list}" 28 | fi 29 | -------------------------------------------------------------------------------- /filters/080-delete-symlinks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/deleted-symlinks.txt 11 | mkdir -p "$(dirname "${list}")" 12 | touch "${list}" 13 | 14 | while true; do 15 | slice=${temp}/symlinks-to-delete.txt 16 | find "${home}" -mindepth 1 -type l -print > "${slice}" 17 | if [ ! -s "${slice}" ];then 18 | break; 19 | fi 20 | while IFS= read -r link; do 21 | rm "${link}" 22 | echo "${link}" >> "${list}" 23 | done < "${slice}" 24 | done 25 | 26 | total=$(wc -l < "${list}" | xargs) 27 | if [ "${total}" -eq 0 ]; then 28 | printf "There were no symlinks" 29 | else 30 | printf "%'d symlinks were deleted" "${total}" 31 | fi 32 | -------------------------------------------------------------------------------- /tests/steps/test-jpeek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | repo="yegor256/jaxec" 10 | echo -e "name\n${repo}" > "${TARGET}/repositories.csv" 11 | rm -rf "${TARGET}/github" 12 | mkdir -p "${TARGET}/github/${repo}" 13 | cp -r "${LOCAL}/fixtures/jaxec"/* "${TARGET}/github/${repo}" 14 | msg=$("${LOCAL}/steps/jpeek.sh") 15 | echo "${msg}" 16 | echo "${msg}" | grep "Analyzed ${repo} through jPeek" 17 | mfile=${TARGET}/measurements/${repo}/src/main/java/com/yegor256/Jaxec.java.m.NHD 18 | value=$(cat "${mfile}") 19 | test ! "${value}" = '0' 20 | test ! "${value}" = 'NaN' 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 A simple repo analyzed with jpeek correctly" 23 | -------------------------------------------------------------------------------- /filters/010-delete-non-java-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | total=$(find "${home}" -type f | wc -l) 11 | 12 | list=${temp}/filter-lists/non-java-files.txt 13 | if [ -e "${list}" ]; then 14 | exit 15 | fi 16 | 17 | mkdir -p "$(dirname "${list}")" 18 | find "${home}" -type f -not -name '*.java' -print > "${list}" 19 | while IFS= read -r f; do 20 | rm -f "${f}" 21 | done < "${list}" 22 | 23 | if [ -s "${list}" ]; then 24 | printf "%'d files out of %'d without the \\\ff{.java} extension were deleted" \ 25 | "$(wc -l < "${list}" | xargs)" "${total}" 26 | else 27 | printf "%'d files were \\\ff{.java} files, nothing was deleted" \ 28 | "${total}" 29 | fi 30 | -------------------------------------------------------------------------------- /tests/filters/test-030-delete-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | list=${temp}/temp/filter-lists/test-files.txt 11 | java="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /FooTest.java" 12 | mkdir -p "$(dirname "${java}")" 13 | echo "class FooTest {}" > "${java}" 14 | rm -f "${list}" 15 | msg=$("${LOCAL}/filters/030-delete-tests.sh" "${temp}" "${temp}/temp") 16 | echo "${msg}" 17 | echo "${msg}" | grep "1 files out of 2 with \\\ff{Test} or \\\ff{ITCase} suffixes were deleted" 18 | test ! -e "${java}" 19 | test -e "${list}" 20 | test "$(wc -l < "${list}" | xargs)" = 1 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 A Java test file was deleted" 23 | -------------------------------------------------------------------------------- /tests/steps/test-clone-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | name=foo@ 11 | uri=${temp}/${name} 12 | git init --quiet "${uri}" 13 | cd "${uri}" 14 | git config user.email 'foo@example.com' 15 | git config user.name 'Foo' 16 | touch test.txt 17 | git add . 18 | git config commit.gpgsign false 19 | git commit --no-verify --quiet -am test 20 | rm -rf "${TARGET}/github" 21 | "${LOCAL}/steps/clone-repo.sh" "${uri}" . 1 1 22 | test -e "${TARGET}/github/files/${name}/test.txt" 23 | echo "👍🏻 A repo cloned correctly" 24 | "${LOCAL}/steps/clone-repo.sh" "${uri}" . 1 1 25 | } > "${stdout}" 2>&1 26 | echo "👍🏻 A re-clone worked correctly" 27 | -------------------------------------------------------------------------------- /tests/filters/test-021-delete-module-info.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | list=${temp}/temp/filter-lists/module-info-files.txt 11 | info="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /module-info.java" 12 | mkdir -p "$(dirname "${info}")" 13 | echo "module foo;" > "${info}" 14 | rm -f "${list}" 15 | msg=$("${LOCAL}/filters/021-delete-module-info.sh" "${temp}" "${temp}/temp") 16 | echo "${msg}" 17 | echo "${msg}" | grep "1 files named as \\\ff{module-info.java} were deleted" 18 | test ! -e "${info}" 19 | test -e "${list}" 20 | test "$(wc -l < "${list}" | xargs)" = 1 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 A module-info.java file was deleted" 23 | -------------------------------------------------------------------------------- /tests/filters/test-020-delete-package-info.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | list=${temp}/temp/filter-lists/package-info-files.txt 11 | info="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /package-info.java" 12 | mkdir -p "$(dirname "${info}")" 13 | echo "package foo;" > "${info}" 14 | rm -f "${list}" 15 | msg=$("${LOCAL}/filters/020-delete-package-info.sh" "${temp}" "${temp}/temp") 16 | echo "${msg}" 17 | echo "${msg}" | grep "1 files named as \\\ff{package-info.java} were deleted" 18 | test ! -e "${info}" 19 | test -e "${list}" 20 | test "$(wc -l < "${list}" | xargs)" = 1 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 A package-info.java file was deleted" 23 | -------------------------------------------------------------------------------- /filters/090-delete-empty-directories.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/deleted-empty-directories.txt 11 | mkdir -p "$(dirname "${list}")" 12 | touch "${list}" 13 | 14 | while true; do 15 | slice=${temp}/empty-directories-to-delete.txt 16 | find "${home}" -mindepth 1 -type d -empty -print > "${slice}" 17 | if [ ! -s "${slice}" ]; then 18 | break 19 | fi 20 | while IFS= read -r dir; do 21 | rm -r "${dir}" 22 | echo "${dir}" >> "${list}" 23 | done < "${slice}" 24 | done 25 | 26 | total=$(wc -l < "${list}" | xargs) 27 | if [ "${total}" -eq 0 ]; then 28 | printf "There were no empty directories" 29 | else 30 | printf "%'d empty directories were deleted" "${total}" 31 | fi 32 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | license: MIT 4 | repository-code: https://github.com/yegor256/cam 5 | abstract: | 6 | CAM is a dataset of open source Java classes and some metrics on them. 7 | Every now and then we make a new version of it using the scripts 8 | in this repository. You are welcome to use it in your researches. 9 | Each release has a fixed version. By referring to it in your research 10 | you avoid ambiguity and guarantees repeatability of your experiments. 11 | authors: 12 | - family-names: "Bugayenko" 13 | given-names: "Yegor" 14 | orcid: "https://orcid.org/0000-0001-6370-0678" 15 | title: "CAM: A Collection of Snapshots of GitHub Java Repositories Together with Metrics" 16 | version: 0.9.3 17 | doi: 10.48550/arXiv.2403.08488 18 | date-released: 2024-09-23 19 | url: "https://arxiv.org/abs/2403.08488" 20 | -------------------------------------------------------------------------------- /tests/steps/test-unregister.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | echo -e 'repo,branch\nfoo/bar,master,44,99\nboom/boom,master\n' > "${TARGET}/repositories.csv" 10 | rm -rf "${TARGET}/github" 11 | mkdir -p "${TARGET}/github/boom/boom" 12 | msg=$("${LOCAL}/steps/unregister.sh") 13 | echo "${msg}" 14 | cat "${TARGET}/temp/repositories-before-unregister.txt" 15 | cat "${TARGET}/repositories.csv" 16 | grep "boom/boom" "${TARGET}/repositories.csv" 17 | echo "${msg}" | grep "All 2 repositories checked" 18 | echo "${msg}" | grep "The clone of foo/bar is absent, unregistered" 19 | test "$(wc -l < "${TARGET}/repositories.csv" | xargs)" = '2' 20 | } > "${stdout}" 2>&1 21 | echo "👍🏻 A broken repo clone was unregistered" 22 | -------------------------------------------------------------------------------- /help/check-make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | make_version=$(make --version | awk 'NR==1 {print $3}') 8 | make_version_int=$(echo "${make_version}" | cut -d'.' -f1) 9 | 10 | if [ "${make_version_int}" -lt 4 ]; then 11 | echo "Make version must be 4 or higher. Current: ${make_version}" 12 | if "${LOCAL}/help/is-macos.sh" ; then 13 | echo "Try to update it with \"brew install make\"" 14 | if [ "$(uname -m)" = "arm64" ]; then 15 | echo "Make sure you have \"/opt/homebrew/opt/make/libexec/gnubin\" in your PATH" 16 | else 17 | echo "Make sure you have \"/usr/local/opt/make/libexec/gnubin\" in your PATH" 18 | fi 19 | else 20 | echo "Try to update it with \"sudo apt install make\"" 21 | fi 22 | exit 1 23 | fi 24 | -------------------------------------------------------------------------------- /installs/install-pmd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | if [ -z "${LOCAL}" ]; then 8 | LOCAL=$(dirname "$0")/.. 9 | fi 10 | 11 | "${LOCAL}/help/assert-tool.sh" javac --version 12 | 13 | if pmd --version >/dev/null 2>&1; then 14 | pmd --version 15 | echo "PMD is already installed" 16 | exit 17 | fi 18 | 19 | if [ ! -e /usr/local ]; then 20 | echo "The directory /usr/local must exist" 21 | exit 1 22 | fi 23 | 24 | pmd_version=7.5.0 25 | cd /usr/local 26 | name=pmd-dist-${pmd_version}-bin 27 | wget --quiet "https://github.com/pmd/pmd/releases/download/pmd_releases%2F${pmd_version}/${name}.zip" 28 | unzip -qq "${name}.zip" 29 | rm "${name}.zip" 30 | mv "pmd-bin-${pmd_version}" pmd 31 | ln -s /usr/local/pmd/bin/pmd /usr/local/bin/pmd 32 | echo "PMD installed into /usr/local/bin/pmd" 33 | -------------------------------------------------------------------------------- /filters/delete-invalid-files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | import sys 6 | import os 7 | from typing import Final 8 | import javalang 9 | 10 | if __name__ == '__main__': 11 | if len(sys.argv) != 3: 12 | print("Usage: python delete-invalid-files.py ") 13 | sys.exit(1) 14 | 15 | java: Final[str] = sys.argv[1] 16 | lst: Final[str] = sys.argv[2] 17 | try: 18 | with open(java, encoding='utf-8') as f: 19 | # pylint: disable=no-member 20 | raw = javalang.parse.parse(f.read()) 21 | if len(raw.types) != 1: 22 | os.remove(java) 23 | with open(lst, 'a+', encoding='utf-8') as others: 24 | others.write(java + "\n") 25 | except Exception: 26 | pass 27 | -------------------------------------------------------------------------------- /help/gnu-utils.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | 7 | sed() { 8 | if "$LOCAL/help/is-macos.sh"; then 9 | # Use grealpath on macOS 10 | gsed "$@" 11 | else 12 | # Use realpath on other systems 13 | command sed "$@" 14 | fi 15 | } 16 | export -f sed 17 | 18 | realpath() { 19 | if "$LOCAL/help/is-macos.sh"; then 20 | # Use grealpath on macOS 21 | grealpath "$@" 22 | else 23 | # Use realpath on other systems 24 | command realpath "$@" 25 | fi 26 | } 27 | export -f realpath 28 | 29 | date() { 30 | if "${LOCAL}/help/is-macos.sh"; then 31 | # Use gdate from coreutils 32 | gdate "$@" 33 | else 34 | # Use date for other operating systems 35 | command date "$@" 36 | fi 37 | } 38 | export -f date 39 | -------------------------------------------------------------------------------- /installs/install-gradle.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | if [ -z "${LOCAL}" ]; then 8 | LOCAL=$(dirname "$0")/.. 9 | fi 10 | 11 | "${LOCAL}/help/assert-tool.sh" javac --version 12 | 13 | if gradle --version >/dev/null 2>&1; then 14 | gradle --version 15 | echo "Gradle is already installed" 16 | exit 17 | fi 18 | 19 | if [ ! -e /usr/local ]; then 20 | echo "The directory /usr/local must exist" 21 | exit 1 22 | fi 23 | 24 | gradle_version=7.4 25 | cd /usr/local 26 | wget --quiet https://services.gradle.org/distributions/gradle-${gradle_version}-bin.zip 27 | unzip -qq gradle-${gradle_version}-bin.zip 28 | rm gradle-${gradle_version}-bin.zip 29 | mv gradle-${gradle_version} gradle 30 | ln -s /usr/local/gradle/bin/gradle /usr/local/bin/gradle 31 | echo "Gradle installed into /usr/local/gradle" 32 | gradle --version 33 | -------------------------------------------------------------------------------- /tests/metrics/test-authors.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | tmp=$(mktemp -d /tmp/XXXX) 11 | "${LOCAL}/metrics/authors.sh" "${tmp}" "${temp}/stdout" 12 | grep "NoGA 0 " "${temp}/stdout" 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 Didn't fail in non-git directory" 15 | 16 | { 17 | mkdir -p "${temp}/foo" 18 | cd "${temp}/foo" 19 | git init --quiet . 20 | git config user.email 'foo@example.com' 21 | git config user.name 'Foo' 22 | java="Foo long 'weird' name (--).java" 23 | echo "class Foo {}" > "${java}" 24 | git add "${java}" 25 | git config commit.gpgsign false 26 | git commit --no-verify --quiet -am start 27 | "${LOCAL}/metrics/authors.sh" "${java}" stdout 28 | grep "NoGA 1 " stdout 29 | } > "${stdout}" 2>&1 30 | echo "👍🏻 Correctly calculated authors" 31 | -------------------------------------------------------------------------------- /filters/delete-non-classes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | import sys 6 | import os 7 | from typing import Final 8 | import javalang 9 | 10 | if __name__ == '__main__': 11 | if len(sys.argv) != 3: 12 | print("Usage: python delete-non-classes.py ") 13 | sys.exit(1) 14 | 15 | java: Final[str] = sys.argv[1] 16 | lst: Final[str] = sys.argv[2] 17 | try: 18 | with open(java, encoding='utf-8') as f: 19 | raw = javalang.parse.parse(f.read()) 20 | tree = raw.filter(javalang.tree.ClassDeclaration) 21 | if not (tree := list((value for value in tree))): 22 | os.remove(java) 23 | with open(lst, 'a+', encoding='utf-8') as others: 24 | others.write(java + "\n") 25 | except Exception: 26 | pass 27 | -------------------------------------------------------------------------------- /installs/install-jpeek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | "${LOCAL}/help/assert-tool.sh" javac --version 8 | 9 | if [ -e "${JPEEK}" ]; then 10 | java -jar "${JPEEK}" --help 11 | echo "jPeek JAR is already here: ${JPEEK}" 12 | exit 13 | fi 14 | 15 | jpeek_version=0.32.0 16 | cd /tmp 17 | wget --quiet https://repo1.maven.org/maven2/org/jpeek/jpeek/${jpeek_version}/jpeek-${jpeek_version}-jar-with-dependencies.jar 18 | if "${LOCAL}/help/is-macos.sh"; then 19 | "${LOCAL}/help/sudo.sh" mkdir -p "$(dirname "${JPEEK}")" 20 | "${LOCAL}/help/sudo.sh" mv "jpeek-${jpeek_version}-jar-with-dependencies.jar" "${JPEEK}" 21 | elif "${LOCAL}/help/is-linux.sh"; then 22 | mkdir -p "$(dirname "${JPEEK}")" 23 | mv "jpeek-${jpeek_version}-jar-with-dependencies.jar" "${JPEEK}" 24 | fi 25 | java -jar "${JPEEK}" --help 26 | echo "jPeek downloaded into ${JPEEK}" 27 | -------------------------------------------------------------------------------- /metrics/aocih.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java_file=$(realpath "$1") 8 | output=$(realpath "$2") 9 | 10 | age_in_hours=0 11 | cd "$(dirname "${java_file}")" 12 | 13 | if git status > /dev/null 2>&1; then 14 | repo_first_commit=$(git log --reverse --format=%at | head -n 1 || true) 15 | file_first_commit=$(git log --diff-filter=A --format=%at -- "$java_file" | tail -n 1 || true) 16 | if [[ -n "$repo_first_commit" && -n "$file_first_commit" ]]; then 17 | age_in_seconds=$((file_first_commit - repo_first_commit)) 18 | age_in_hours=$((age_in_seconds / 3600)) 19 | else 20 | echo "Warning: Unable to calculate commit age for $java_file" >&2 21 | fi 22 | else 23 | echo "Error: Not a Git repository or unable to access it." >&2 24 | fi 25 | 26 | echo "AoCiH $age_in_hours Age of Class in Hours" > "$output" 27 | -------------------------------------------------------------------------------- /metrics/jpeek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java=$1 8 | output=$2 9 | 10 | if [[ ! "$(realpath --relative-to="${TARGET}/github" "${java}")" =~ ^\.\. ]]; then 11 | exit 12 | fi 13 | 14 | cat < "${output}" 15 | MMAC 0 Method-Method through Attributes Cohesion~\citep{dallal2012} 16 | CAMC 0 Cohesion Among Methods in Class~\citep{bansiya1999class} 17 | NHD 0 Normalized Hamming Distance~\citep{counsell2006} 18 | LCOM5 0 Revision of the initial LCOM metric~\citep{HendersonSellers1996} 19 | SCOM 0 Sensitive Class Cohesion Metric~\citep{fernandez2006} 20 | MMAC-cvc 0 Same as MMAC, but constructors are excluded 21 | CAMC-cvc 0 Same as CAMC, but constructors are excluded 22 | NHD-cvc 0 Same as NHD, but constructors are excluded 23 | LCOM5-cvc 0 Same as LCOM5, but constructors are excluded 24 | SCOM-cvc 0 Same as SCOM, but constructors are excluded 25 | EOT 26 | -------------------------------------------------------------------------------- /help/texlive-bin.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | if tlmgr --version > /dev/null 2>&1; then 7 | echo -n "$(dirname "$(which tlmgr)")" 8 | exit 9 | fi 10 | 11 | root=/usr/local/texlive 12 | if [ ! -e "${root}" ]; then 13 | echo "The directory with TeXLive does not exist: ${root}" 14 | exit 1 15 | fi 16 | year=$(find "${root}/" -maxdepth 1 -type d -name '[0-9][0-9][0-9][0-9]' -exec basename {} \;) 17 | arc=$(find "${root}/${year}/bin/" -maxdepth 1 -type d -name '*-*' -exec basename {} \;) 18 | bin=${root}/${year}/bin/${arc} 19 | if [ ! -e "${bin}" ]; then 20 | echo "The directory with TeXLive does not exist: ${bin}" 21 | exit 1 22 | fi 23 | PATH=${bin}:${PATH} 24 | if ! tlmgr --version >/dev/null 2>&1; then 25 | echo "The directory with TeXLive does exist (${bin}), but 'tlmgr' doesn't run, can't understand why :(" 26 | exit 1 27 | fi 28 | 29 | echo -n "${bin}" 30 | -------------------------------------------------------------------------------- /steps/aggregation-functions/mean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | metric_file=$1 8 | output_folder=$2 9 | metric_name=$3 10 | 11 | mkdir -p "${output_folder}" 12 | 13 | sum=0 14 | count=0 15 | 16 | values=$(awk -F, 'NR > 1 {print $3}' "${metric_file}") 17 | 18 | if [ -n "${values}" ]; then 19 | while IFS= read -r value; do 20 | sum=$(echo "${sum} + ${value}" | bc) 21 | count=$((count + 1)) 22 | done <<< "${values}" 23 | fi 24 | 25 | output_file="${output_folder}/${metric_name}.mean.csv" 26 | 27 | if ((count > 0)); then 28 | mean=$(echo "scale=3; $sum / $count" | bc) 29 | formatted_mean=$(printf "%0.3f" "$mean") 30 | echo "$formatted_mean" > "${output_file}" 31 | echo "Aggregated mean for ${metric_name}: ${formatted_mean}" 32 | else 33 | rm -f "${output_file}" 34 | echo "No valid data to aggregate for ${metric_name}" 35 | fi 36 | -------------------------------------------------------------------------------- /tests/help/test-tdiff.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | start=$(( $(date +%s%N) - 5 * 1000 * 1000 * 1000 )) 9 | diff=$("${LOCAL}/help/tdiff.sh" "${start}") 10 | test "${diff}" = ', in 5s' 11 | echo "${diff}" >> "${stdout}" 12 | echo "👍🏻 Correctly calculated seconds" 13 | 14 | start=$(( $(date +%s%N) - 7 * 60 * 1000 * 1000 * 1000 - 15 * 1000 * 1000 * 1000 )) 15 | test "$("${LOCAL}/help/tdiff.sh" "${start}")" = ', in 7m15s' 16 | echo "👍🏻 Correctly calculated minutes" 17 | 18 | start=$(( $(date +%s%N) - 3 * 60 * 60 * 1000 * 1000 * 1000 )) 19 | test "$("${LOCAL}/help/tdiff.sh" "${start}")" = ', in 3h0m' 20 | echo "👍🏻 Correctly calculated hours" 21 | 22 | start=$(( $(date +%s%N) - 3 * 60 * 60 * 1000 * 1000 * 1000 + 25 * 60 * 1000 * 1000 * 1000 )) 23 | test "$("${LOCAL}/help/tdiff.sh" "${start}")" = ', in 2h35m' 24 | echo "👍🏻 Correctly calculated hours and minutes" 25 | -------------------------------------------------------------------------------- /.rultor.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | docker: 6 | image: yegor256/ruby 7 | assets: 8 | pwd: yegor256/home#assets/docker-password 9 | install: | 10 | pdd --file=/dev/null 11 | merge: 12 | script: | 13 | sudo make install -f "$(pwd)/Makefile" 14 | make env lint test 15 | release: 16 | pre: false 17 | script: | 18 | [[ "${tag}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] 19 | sed -i "s/0\.0\.0/${tag}/g" Makefile 20 | curl -s -L "https://download.docker.com/linux/static/stable/x86_64/docker-18.06.3-ce.tgz" | tar -xz -C /tmp 21 | sudo mv /tmp/docker/docker /usr/bin/ 22 | repo=yegor256/cam 23 | sudo docker build --no-cache --tag "${repo}:${tag}" "$(pwd)" 24 | mkdir /tmp/dataset 25 | sudo docker run --rm "${repo}:${tag}" make install env lint test 26 | cat ../pwd | sudo docker login --password-stdin --username yegor256 27 | sudo docker push "${repo}:${tag}" 28 | -------------------------------------------------------------------------------- /tests/before/test-layout.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | dirs=$(find "${LOCAL}/tests" -mindepth 1 -maxdepth 1 -type d -not -name "before" -not -name "after" -exec basename {} \;) 10 | echo "${dirs}" | while IFS= read -r d; do 11 | tests=$(find "${LOCAL}/tests/${d}" -mindepth 1 -maxdepth 1 -type f -name '*.sh' -exec basename {} \;) 12 | echo "${tests}" | while IFS= read -r t; do 13 | tail=${t:5:100} 14 | base=${tail%.*} 15 | name=${d}/${tail} 16 | if [ "$(find "${LOCAL}/${d}" -name "${base}.*" 2>/dev/null | wc -l)" -eq 0 ]; then 17 | echo "Script '${name}' doesn't exist, but its test exists in '${LOCAL}/tests/${d}/${t}'" 18 | exit 1 19 | fi 20 | done 21 | done 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 All test scripts have their live counterparts" 24 | -------------------------------------------------------------------------------- /filters/delete-unparsable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | import sys 6 | import os 7 | from typing import Final 8 | import javalang 9 | 10 | if __name__ == '__main__': 11 | if len(sys.argv) != 3: 12 | print("Usage: python delete-unparsable.py ") 13 | sys.exit(1) 14 | 15 | java: Final[str] = sys.argv[1] 16 | lst: Final[str] = sys.argv[2] 17 | try: 18 | with open(java, encoding='utf-8') as f: 19 | try: 20 | raw = javalang.parse.parse(f.read()) 21 | tree = raw.filter(javalang.tree.ClassDeclaration) 22 | list((value for value in tree)) 23 | except Exception: 24 | os.remove(java) 25 | with open(lst, 'a+', encoding='utf-8') as others: 26 | others.write(java + "\n") 27 | except Exception: 28 | pass 29 | -------------------------------------------------------------------------------- /filters/030-delete-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | total=$(find "${home}" -type f | wc -l | xargs) 11 | 12 | list=${temp}/filter-lists/test-files.txt 13 | if [ -e "${list}" ]; then 14 | exit 15 | fi 16 | 17 | mkdir -p "$(dirname "${list}")" 18 | 19 | { 20 | find "${home}" -type f -name '*Test.java' -print; 21 | find "${home}" -type f -name '*ITCase.java' -print; 22 | find "${home}" -type f -name '*Tests.java' -print; 23 | find "${home}" -type f -path '**/src/test/java/**/*.java' -print 24 | } > "${list}" 25 | while IFS= read -r f; do 26 | rm -f "${f}" 27 | done < "${list}" 28 | 29 | if [ -s "${list}" ]; then 30 | printf "%'d files out of %'d with \\\ff{Test} or \\\ff{ITCase} suffixes were deleted" \ 31 | "$(wc -l < "${list}" | xargs)" "${total}" 32 | else 33 | printf "There were no test files among %d files seen" "${total}" 34 | fi 35 | -------------------------------------------------------------------------------- /filters/delete-wrong-encoding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | import sys 6 | import os 7 | from typing import Final 8 | import chardet 9 | 10 | if __name__ == '__main__': 11 | if len(sys.argv) != 3: 12 | print("Usage: python delete-wrong-encoding.py ") 13 | sys.exit(1) 14 | 15 | java: Final[str] = sys.argv[1] 16 | lst: Final[str] = sys.argv[2] 17 | 18 | try: 19 | with open(java, 'rb') as f: 20 | rawdata = f.read() 21 | result = chardet.detect(rawdata) 22 | encoding = result['encoding'] 23 | confidence = result['confidence'] 24 | 25 | if not (encoding in ('ascii', 'UTF-8') and 1.0 - confidence < 0.01): 26 | os.remove(java) 27 | with open(lst, 'a+', encoding='utf-8') as others: 28 | others.write(java + "\n") 29 | 30 | except Exception: 31 | pass 32 | -------------------------------------------------------------------------------- /steps/unregister.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | csv=${TARGET}/repositories.csv 7 | 8 | if [ ! -e "${csv}" ]; then 9 | echo "Nothing to unregister, the CSV is absent: ${csv}" 10 | exit 11 | fi 12 | 13 | before="${TARGET}/temp/repositories-before-unregister.txt" 14 | mkdir -p "$(dirname "${before}")" 15 | tail -n +2 "${csv}" > "${before}" 16 | 17 | head=$(head -1 "${csv}") 18 | rm -f "${csv}" 19 | echo "${head}" > "${csv}" 20 | 21 | declare -i total=0 22 | declare -i good=0 23 | while IFS=',' read -r r tag tail; do 24 | if [ -z "${r}" ]; then 25 | continue; 26 | fi 27 | total=$((total+1)) 28 | if [ ! -e "${TARGET}/github/${r}" ]; then 29 | echo "The clone of ${r} is absent, unregistered" 30 | else 31 | printf "%s,%s,%s\n" "${r}" "${tag}" "${tail}" >> "${csv}" 32 | good=$((good+1)) 33 | fi 34 | done < "${before}" 35 | echo "All ${total} repositories checked, ${good} are good" 36 | -------------------------------------------------------------------------------- /help/tdiff.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$1 7 | if [ -z "${start}" ]; then 8 | echo 'One argument is required' 9 | exit 1 10 | fi 11 | 12 | pfx=', in ' 13 | mks=$(echo "($(date +%s%N) - ${start}) / 1000" | bc) 14 | if ((mks < 100)); then 15 | exit 16 | elif ((mks < 1000)); then 17 | printf '%s%dμs' "${pfx}" "${mks}" 18 | elif ((mks < 1000 * 1000)); then 19 | printf '%s%dms' "${pfx}" "$((mks / 1000))" 20 | elif ((mks < 60 * 1000 * 1000)); then 21 | printf '%s%ds' "${pfx}" "$((mks / (1000 * 1000)))" 22 | elif ((mks < 60 * 60 * 1000 * 1000)); then 23 | minutes=$((mks / (60 * 1000 * 1000))) 24 | seconds=$(((mks - minutes * 60 * 1000 * 1000) / (1000 * 1000))) 25 | printf '%s%dm%ds' "${pfx}" "${minutes}" "${seconds}" 26 | else 27 | hours=$((mks / (60 * 60 * 1000 * 1000))) 28 | minutes=$(((mks - hours * 60 * 60 * 1000 * 1000) / (60 * 1000 * 1000))) 29 | printf '%s%dh%dm' "${pfx}" "${hours}" "${minutes}" 30 | fi 31 | -------------------------------------------------------------------------------- /tests/filters/test-080-delete-symlinks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | msg=$("${LOCAL}/filters/080-delete-symlinks.sh" "${temp}" "${temp}/temp") 11 | echo "${msg}" 12 | echo "${msg}" | grep "There were no symlinks" 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 An empty directory didn't crash it" 15 | 16 | { 17 | link="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /a/b/c/Foo.java" 18 | mkdir -p "$(dirname "${link}")" 19 | file="${temp}/x/y- ;/dir (with) ; ' _ l/Bar.java" 20 | mkdir -p "$(dirname "${file}")" 21 | echo > "${file}" 22 | ln -s "${file}" "${link}" 23 | ln -s "${file}" "${temp}/another-link" 24 | msg=$("${LOCAL}/filters/080-delete-symlinks.sh" "${temp}" "${temp}/temp") 25 | echo "${msg}" 26 | echo "${msg}" | grep "2 symlinks were deleted" 27 | test ! -e "${link}" 28 | test -e "${file}" 29 | } > "${stdout}" 2>&1 30 | echo "👍🏻 A few symlinks were deleted" 31 | -------------------------------------------------------------------------------- /DEPENDS.txt: -------------------------------------------------------------------------------- 1 | hard acmart 2 | hard algorithmicx 3 | hard algpseudocodex 4 | hard anyfontsize 5 | hard babel-russian 6 | hard bibcop 7 | hard biber 8 | hard biblatex 9 | hard booktabs 10 | hard cancel 11 | hard catchfile 12 | hard changepage 13 | hard cjk 14 | hard cleveref 15 | hard cm-super 16 | hard cmap 17 | hard comment 18 | hard csquotes 19 | hard currfile 20 | hard cyrillic 21 | hard datetime 22 | hard doi 23 | hard enumitem 24 | hard environ 25 | hard everyshi 26 | hard fdsymbol 27 | hard ffcode 28 | hard float 29 | hard fmtcount 30 | hard footmisc 31 | hard framed 32 | hard fvextra 33 | hard href-ul 34 | hard huawei 35 | hard hyperxmp 36 | hard hyphen-russian 37 | hard iexec 38 | hard ifmtarg 39 | hard lastpage 40 | hard latexmk 41 | hard lh 42 | hard libertine 43 | hard makecell 44 | hard ncctools 45 | hard paralist 46 | hard preprint 47 | hard silence 48 | hard stmaryrd 49 | hard svg 50 | hard textcase 51 | hard textpos 52 | hard titlesec 53 | hard titling 54 | hard to-be-determined 55 | hard totpages 56 | hard transparent 57 | hard trimspaces 58 | hard upquote 59 | hard wrapfig 60 | hard xstring 61 | -------------------------------------------------------------------------------- /installs/install-texlive-base.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -ex 6 | set -o pipefail 7 | 8 | if ! "${LOCAL}/help/texlive-bin.sh"; then 9 | if [ ! -f "install-tl.zip" ]; then 10 | wget --quiet https://ftp.snt.utwente.nl/pub/software/tex/systems/texlive/tlnet/install-tl.zip 11 | fi 12 | rm -rf install-tl 13 | unzip install-tl.zip -d install-tl 14 | name=$(find install-tl/ -type d -name "install-tl-*" -exec basename {} \;) 15 | perl "./install-tl/${name}/install-tl" --scheme=scheme-minimal --no-interaction --repository=https://ftp.snt.utwente.nl/pub/software/tex/systems/texlive/tlnet 16 | rm -rf install-tl install-tl.zip 17 | fi 18 | 19 | if ! tlmgr --version >/dev/null 2>&1; then 20 | if "${LOCAL}/help/is-linux.sh" || "${LOCAL}/help/is-macos.sh"; then 21 | PATH=$PATH:$("${LOCAL}/help/texlive-bin.sh") 22 | export PATH 23 | else 24 | "${LOCAL}/help/assert-tool.sh" tlmgr --version 25 | fi 26 | fi 27 | 28 | "${LOCAL}/help/sudo.sh" tlmgr install collection-latex 29 | pdflatex -v 30 | -------------------------------------------------------------------------------- /tests/filters/test-090-delete-empty-directories.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | empty="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /a/b/c" 11 | mkdir -p "${empty}" 12 | full="${temp}/x/ ; 'w' \"nx\" /f/d/k" 13 | mkdir -p "${full}" 14 | java=${full}/Foo.java 15 | touch "${java}" 16 | msg=$("${LOCAL}/filters/090-delete-empty-directories.sh" "${temp}" "${temp}/temp") 17 | echo "${msg}" 18 | echo "${msg}" | grep "6 empty directories were deleted" 19 | test ! -e "${empty}" 20 | test -e "${full}" 21 | test -e "${java}" 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 A empty directory was deleted" 24 | 25 | { 26 | mkdir -p "${temp}/bar/a/b/c/d/e/f" 27 | msg=$("${LOCAL}/filters/090-delete-empty-directories.sh" "${temp}" "${temp}/temp") 28 | echo "${msg}" 29 | echo "${msg}" | grep "13 empty directories were deleted" 30 | test ! -e "${temp}/bar" 31 | } > "${stdout}" 2>&1 32 | echo "👍🏻 All empty directories deleted recursively" 33 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2021-2025 Yegor Bugayenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the 'Software'), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSES/MIT.txt: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2021-2025 Yegor Bugayenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the 'Software'), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/filters/test-010-delete-non-java-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | mkdir -p "${temp}/foo" 11 | msg=$("${LOCAL}/filters/010-delete-non-java-files.sh" "${temp}/foo" "${temp}/temp") 12 | echo "${msg}" 13 | echo "${msg}" | grep "nothing was deleted" 14 | } > "${stdout}" 2>&1 15 | echo "👍🏻 An empty directory didn't crash it" 16 | 17 | { 18 | list=${temp}/temp/filter-lists/non-java-files.txt 19 | png="${temp}/foo/dir (with) _ long & and 'java' \"name\" /test.png" 20 | mkdir -p "$(dirname "${png}")" 21 | echo "" > "${png}" 22 | mkdir -p "$(dirname "${list}")" 23 | rm -f "${list}" 24 | msg=$("${LOCAL}/filters/010-delete-non-java-files.sh" "${temp}/foo" "${temp}/temp") 25 | echo "${msg}" 26 | echo "${msg}" | grep "1 files out of 1 without the \\\ff{.java} extension were deleted" 27 | test ! -e "${png}" 28 | test -e "${list}" 29 | test "$(wc -l < "${list}" | xargs)" = 1 30 | } > "${stdout}" 2>&1 31 | echo "👍🏻 A binary non-Java file was deleted" 32 | -------------------------------------------------------------------------------- /tex/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | 4 | .SHELLFLAGS = -e -x -c 5 | .ONESHELL: 6 | SHELL=bash 7 | 8 | TLROOT=$$(kpsewhich -var-value TEXMFDIST) 9 | PACKAGES=ffcode to-be-determined href-ul bibcop iexec 10 | REPO=yegor256/cam 11 | 12 | zip: *.tex 13 | rm -rf package 14 | mkdir package 15 | cd package 16 | cp ../paper.tex . 17 | cp ../main.bib . 18 | for p in $(PACKAGES); do cp $(TLROOT)/tex/latex/$${p}/$${p}.sty .; done 19 | version=$$(curl --silent -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/$(REPO)/releases/latest | jq -r '.tag_name') 20 | echo "Version is: $${version}" 21 | gsed -i "s|0\.0\.0|$${version}|g" paper.tex 22 | gsed -i "s|REPOSITORY|$(REPO)|g" paper.tex 23 | pdflatex -shell-escape -halt-on-error paper.tex > /dev/null 24 | bibtex paper 25 | pdflatex -halt-on-error paper.tex > /dev/null 26 | pdflatex -halt-on-error paper.tex > /dev/null 27 | rm -rf *.aux *.bcf *.blg *.fdb_latexmk *.fls *.log *.run.xml *.out *.exc 28 | zip -x paper.pdf -r paper-$${version}.zip * 29 | mv paper-$${version}.zip .. 30 | cd .. 31 | 32 | clean: 33 | git clean -dfX 34 | -------------------------------------------------------------------------------- /steps/jpeek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$(date +%s%N) 7 | 8 | jobs=${TARGET}/temp/jobs/jpeek-jobs.txt 9 | rm -rf "${jobs}" 10 | mkdir -p "$(dirname "${jobs}")" 11 | touch "${jobs}" 12 | 13 | repos=$(find "${TARGET}/github" -maxdepth 2 -mindepth 2 -type d -print) 14 | total=$(echo "${repos}" | wc -l | xargs) 15 | 16 | : > "${TARGET}/temp/jpeek_failure.log" 17 | : > "${TARGET}/temp/jpeek_success.log" 18 | dir=${TARGET}/temp/jpeek/all 19 | mkdir -p "${dir}" 20 | 21 | declare -i repo=0 22 | sh="$(dirname "$0")/jpeek-repo.sh" 23 | for d in ${repos}; do 24 | r=$(realpath --relative-to="${TARGET}/github" "${d}" ) 25 | repo=$((repo+1)) 26 | printf "timeout 1h %s %s %s %s || true\n" "${sh@Q}" "${r@Q}" "${repo@Q}" "${total@Q}" >> "${jobs}" 27 | done 28 | 29 | "${LOCAL}/help/parallel.sh" "${jobs}" 30 | wait 31 | 32 | done=$(find "${dir}" -maxdepth 2 -mindepth 2 -type d -print | wc -l | xargs) 33 | 34 | echo "All ${total} repositories passed through jPeek, ${done} of them produced data, in $(nproc) threads$("${LOCAL}/help/tdiff.sh" "${start}")" 35 | -------------------------------------------------------------------------------- /tests/metrics/test-aocih.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | tmp=$(mktemp -d /tmp/XXXX) 11 | "${LOCAL}/metrics/aocih.sh" "${tmp}/Test.java" "${temp}/stdout" 12 | grep -q "AoCiH 0 " "${temp}/stdout" 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 Didn't fail in non-git directory" 15 | 16 | { 17 | mkdir -p "${temp}/foo" 18 | cd "${temp}/foo" 19 | git init --quiet . 20 | git config user.email 'foo@example.com' 21 | git config user.name 'Foo' 22 | git config commit.gpgsign false 23 | touch empty.file 24 | git add "empty.file" 25 | git commit --no-verify --quiet -am "Initial commit" 26 | java="Test.java" 27 | echo "class Foo {}" > "${java}" 28 | git add "${java}" 29 | git commit --no-verify --quiet -am "Second commit" 30 | git commit --no-verify --amend --no-edit --date="$(date -d "+1 hour" --rfc-2822)" 31 | "${LOCAL}/metrics/aocih.sh" "${java}" stdout 32 | grep -q "AoCiH 1" stdout 33 | } > "${stdout}" 2>&1 34 | echo "👍🏻 Correctly calculated AoCiH in the repository" 35 | -------------------------------------------------------------------------------- /installs/install-texlive-depends.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -ex 6 | set -o pipefail 7 | 8 | if ! tlmgr --version >/dev/null 2>&1; then 9 | PATH=$PATH:$("${LOCAL}/help/texlive-bin.sh") 10 | export PATH 11 | fi 12 | 13 | if "${LOCAL}/help/is-macos.sh"; then 14 | if [ ! -e "${HOME}/Library/texmf" ] && [ ! -e "${HOME}/texmf/tlpkg/texlive.tlpdb" ]; then 15 | "${LOCAL}/help/sudo.sh" tlmgr init-usertree 16 | fi 17 | elif "${LOCAL}/help/is-linux.sh"; then 18 | if [ ! -e "${HOME}/texmf" ]; then 19 | "${LOCAL}/help/sudo.sh" tlmgr init-usertree 20 | fi 21 | fi 22 | "${LOCAL}/help/sudo.sh" tlmgr option repository https://ftp.snt.utwente.nl/pub/software/tex/systems/texlive/tlnet 23 | "${LOCAL}/help/sudo.sh" tlmgr --verify-repo=none update --self 24 | packages=() 25 | while IFS= read -r p; do 26 | packages+=( "${p}" ) 27 | done < <( cut -d' ' -f2 "${LOCAL}/DEPENDS.txt" | uniq ) 28 | "${LOCAL}/help/sudo.sh" tlmgr --verify-repo=none install "${packages[@]}" 29 | "${LOCAL}/help/sudo.sh" tlmgr --verify-repo=none update --no-auto-remove "${packages[@]}" || echo 'Failed to update' 30 | -------------------------------------------------------------------------------- /filters/040-delete-unparsable.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/unparsable-files.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | touch "${list}" 17 | 18 | jobs=${temp}/jobs/delete-unparsable.txt 19 | rm -rf "${jobs}" 20 | mkdir -p "$(dirname "${jobs}")" 21 | touch "${jobs}" 22 | 23 | candidates=${temp}/files-to-parse.txt 24 | mkdir -p "$(dirname "${candidates}")" 25 | find "${home}" -type f -name '*.java' -print > "${candidates}" 26 | py=${LOCAL}/filters/delete-unparsable.py 27 | while IFS= read -r f; do 28 | printf "python3 %s %s %s\n" "${py@Q}" "${f@Q}" "${list@Q}" >> "${jobs}" 29 | done < "${candidates}" 30 | "${LOCAL}/help/parallel.sh" "${jobs}" 31 | wait 32 | 33 | total=$(wc -l < "${candidates}" | xargs) 34 | if [ -s "${list}" ]; then 35 | printf "%'d files out of %'d with an unparsable Java syntax were deleted" \ 36 | "$(wc -l < "${list}" | xargs)" "${total}" 37 | else 38 | printf "No files out of %'d had an unparsable Java syntax" \ 39 | "${total}" 40 | fi 41 | -------------------------------------------------------------------------------- /tests/metrics/test-multimetric.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | # To make sure it is installed 10 | multimetric --help >/dev/null 11 | 12 | { 13 | java="${temp}/Foo long 'weird' name (--).java" 14 | cat > "${java}" < "${stdout}" 2>&1 38 | echo "👍🏻 Correctly counted a few metrics" 39 | -------------------------------------------------------------------------------- /steps/clone.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$(date +%s%N) 7 | 8 | jobs=${TARGET}/temp/jobs/clone-jobs.txt 9 | rm -rf "${jobs}" 10 | mkdir -p "$(dirname "${jobs}")" 11 | touch "${jobs}" 12 | 13 | repos="${TARGET}/temp/repositories.txt" 14 | mkdir -p "$(dirname "${repos}")" 15 | tail -n +2 "${TARGET}/repositories.csv" > "${repos}" 16 | total=$(wc -l < "${repos}" | xargs) 17 | 18 | "${LOCAL}/help/assert-tool.sh" git --version 19 | 20 | declare -i repo=0 21 | sh="$(dirname "$0")/clone-repo.sh" 22 | while IFS=',' read -r r tag tail; do 23 | repo=$((repo+1)) 24 | if [ -z "${tag}" ]; then 25 | tag='master'; 26 | fi 27 | if [ "${tag}" = '.' ]; then 28 | tag='master'; 29 | fi 30 | if [ -e "${TARGET}/github/${r}" ]; then 31 | echo "${r}: Git repo is already here (${tail})" 32 | else 33 | printf "%s %s %s %s %s\n" "${sh@Q}" "${r@Q}" "${tag@Q}" "${repo@Q}" "${total@Q}" >> "${jobs}" 34 | fi 35 | done < "${repos}" 36 | 37 | "${LOCAL}/help/parallel.sh" "${jobs}" 8 38 | wait 39 | 40 | echo "Cloned ${total} repositories in $(nproc) threads$("${LOCAL}/help/tdiff.sh" "${start}")" 41 | -------------------------------------------------------------------------------- /filters/060-delete-non-classes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/non-class-files.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | touch "${list}" 17 | 18 | jobs=${temp}/jobs/delete-non-classes.txt 19 | rm -rf "${jobs}" 20 | mkdir -p "$(dirname "${jobs}")" 21 | touch "${jobs}" 22 | 23 | candidates=${temp}/classes-to-challenge.txt 24 | mkdir -p "$(dirname "${candidates}")" 25 | find "${home}" -type f -name '*.java' -print > "${candidates}" 26 | py=${LOCAL}/filters/delete-non-classes.py 27 | while IFS= read -r f; do 28 | printf "python3 %s %s %s\n" "${py@Q}" "${f@Q}" "${list@Q}" >> "${jobs}" 29 | done < "${candidates}" 30 | "${LOCAL}/help/parallel.sh" "${jobs}" 31 | wait 32 | 33 | total=$(wc -l < "${candidates}" | xargs) 34 | if [ -s "${list}" ]; then 35 | printf "%'d files out of %'d with interfaces or enums (instead of classes) inside were deleted" \ 36 | "$(wc -l < "${list}" | xargs)" "${total}" 37 | else 38 | printf "All %d files are Java classes, nothing to delete" \ 39 | "${total}" 40 | fi 41 | -------------------------------------------------------------------------------- /metrics/bug_discover.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java=$1 8 | output=$(realpath "$2") 9 | 10 | tmp=$(mktemp -d) 11 | mkdir -p "${tmp}" 12 | 13 | cat < "${tmp}/config.xml" 14 | 15 | 18 | Bug discovering 19 | 20 | 21 | EOT 22 | cp "${java}" "${tmp}/foo.java" 23 | 24 | export PMD_JAVA_OPTS=${JVM_OPTS} 25 | # We don't use --cache here, because it becomes too big and leads to "Out Of Memory" error 26 | pmd check -R "${tmp}/config.xml" -d "${tmp}" --format xml --no-fail-on-error --no-fail-on-violation > "${tmp}/result.xml" 2> "${tmp}/stderr.txt" || (cat "${tmp}/stderr.txt"; exit 1) 27 | 28 | violation_num=$(awk '/ "${output}" 30 | -------------------------------------------------------------------------------- /tests/before/test-executability.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | declare -a excludes=() 9 | for e in dataset lib pylint_plugins venv; do 10 | excludes+=(-not -path "${LOCAL}/${e}/**") 11 | done 12 | 13 | { 14 | scripts=$(find "${LOCAL}" "${excludes[@]}" -type f -name '*.sh') 15 | echo "${scripts}" | while IFS= read -r sh; do 16 | if [ ! -x "${sh}" ]; then 17 | echo "Script '${sh}' is not executable, try running 'chmod +x ${sh}'" 18 | exit 1 19 | fi 20 | done 21 | echo "All $(echo "${scripts}" | wc -w | xargs) scripts are executable, it's OK" 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 All .sh scripts are executable" 24 | 25 | { 26 | scripts=$(find "${LOCAL}" "${excludes[@]}" -type f -name '*.py') 27 | echo "${scripts}" | while IFS= read -r py; do 28 | if [ ! -x "${py}" ]; then 29 | echo "Script '${py}' is not executable, try running 'chmod +x ${py}'" 30 | exit 1 31 | fi 32 | done 33 | echo "All $(echo "${scripts}" | wc -w | xargs) scripts are executable, it's OK" 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 All .py scripts are executable" 36 | -------------------------------------------------------------------------------- /filters/050-delete-long-lines.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | max=1024 11 | 12 | list=${temp}/filter-lists/files-with-long-lines.txt 13 | if [ -e "${list}" ]; then 14 | exit 15 | fi 16 | 17 | mkdir -p "$(dirname "${list}")" 18 | touch "${list}" 19 | 20 | candidates=${temp}/files-to-check-line-lengths.txt 21 | find "${home}" -type f -name '*.java' -print > "${candidates}" 22 | while IFS= read -r f; do 23 | length=$(LC_ALL=C awk '{ print length($0) }' < "${f}" | sort -n | tail -1) 24 | if [ -z "${length}" ]; then 25 | continue; 26 | fi 27 | if [ "${length}" -gt "${max}" ]; then 28 | echo "${f}" >> "${list}" 29 | rm "${f}" 30 | fi 31 | done < "${candidates}" 32 | 33 | total=$(wc -l < "${candidates}" | xargs) 34 | if [ -s "${list}" ]; then 35 | printf "%'d files out of %'d with at least one line longer than %'d characters, which most probably is a symptom of an auto-generated code, were deleted" \ 36 | "$(wc -l < "${list}" | xargs)" "${total}" "${max}" 37 | else 38 | printf "No files out of %'d had lines longer than %'d characters" \ 39 | "${total}" "${max}" 40 | fi 41 | -------------------------------------------------------------------------------- /tests/metrics/test-pmd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | pmd --version 11 | xmllint --version 12 | ruby --version 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 PMD dependencies are installed" 15 | 16 | { 17 | echo ' 18 | 21 | ' > "${temp}/config.xml" 22 | java="${temp}/Foo.java" 23 | mkdir -p "$(dirname "${java}")" 24 | echo "class Foo {}" > "${java}" 25 | pmd check -R "${temp}/config.xml" -d "$(dirname "${java}")" > "${temp}/stdout" 26 | } > "${stdout}" 2>&1 27 | echo "👍🏻 PMD tool works correctly" 28 | 29 | { 30 | java="${temp}/Foo long 'weird' name (--).java" 31 | mkdir -p "$(dirname "${java}")" 32 | echo "class Foo {}" > "${java}" 33 | "${LOCAL}/metrics/pmd.sh" "${java}" "${temp}/stdout" 34 | grep "CoCo 0 " "${temp}/stdout" 35 | } > "${stdout}" 2>&1 36 | echo "👍🏻 Correctly calculated cognitive complexity" 37 | -------------------------------------------------------------------------------- /.github/workflows/make.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: make 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | pull_request: 11 | branches: 12 | - master 13 | jobs: 14 | make: 15 | timeout-minutes: 30 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: true 21 | - run: sudo make install FORCE_INSTALL=true 22 | - run: make env lint test 23 | make-macos: 24 | timeout-minutes: 30 25 | runs-on: macos-15 26 | env: 27 | PYTORCH_MPS_HIGH_WATERMARK_RATIO: '0.0' 28 | steps: 29 | - uses: actions/checkout@v4 30 | with: 31 | submodules: true 32 | - uses: actions/setup-java@v5 33 | with: 34 | distribution: 'temurin' 35 | java-version: 21 36 | - run: sudo chown -R "$(whoami)" /usr/local 37 | - run: | 38 | for c in make bash grep; do 39 | brew install "${c}" --overwrite 40 | echo "$(brew --prefix)/opt/${c}/libexec/gnubin" >> "$GITHUB_PATH" 41 | done 42 | - run: sudo make install FORCE_INSTALL=true 43 | - run: make env lint test 44 | -------------------------------------------------------------------------------- /steps/aggregation-functions/median.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | metric_file=$1 8 | output_folder=$2 9 | metric_name=$3 10 | 11 | mkdir -p "${output_folder}" 12 | 13 | values=$(awk -F, 'NR > 1 {print $3}' "${metric_file}") 14 | 15 | values_array=() 16 | while IFS= read -r value; do 17 | if [[ -n "$value" ]]; then 18 | values_array+=("$value") 19 | fi 20 | done <<< "$values" 21 | 22 | mapfile -t sorted_values < <(for value in "${values_array[@]}"; do echo "$value"; done | sort -n) 23 | 24 | count=${#sorted_values[@]} 25 | output_file="${output_folder}/${metric_name}.median.csv" 26 | 27 | if ((count > 0)); then 28 | if ((count % 2 == 1)); then 29 | median="${sorted_values[$((count / 2))]}" 30 | else 31 | mid1=$((count / 2 - 1)) 32 | mid2=$((count / 2)) 33 | median=$(echo "scale=3; (${sorted_values[$mid1]} + ${sorted_values[$mid2]}) / 2" | bc) 34 | fi 35 | formatted_median=$(printf "%0.3f" "$median") 36 | echo "$formatted_median" > "${output_file}" 37 | echo "Aggregated median for ${metric_name}: $formatted_median" 38 | else 39 | echo "0.000" > "${output_file}" 40 | echo "No valid data to aggregate for ${metric_name}" 41 | fi 42 | -------------------------------------------------------------------------------- /steps/README.md: -------------------------------------------------------------------------------- 1 | # How It Works 2 | 3 | The entire process of gathering metrics from GitHub repos consists of these 4 | steps: 5 | 6 | * **Discovering**. Here we fetch the list of repos from GitHub and then create 7 | directories for them. 8 | * **Polishing**. Then we delete directories that don't exist in the list of 9 | required repositories. 10 | * **Unregistering**. During this step, we clean directories from the CSV 11 | register if their clones are absent. 12 | * **Cloning**. In this step we run `git clone` on found repositories. 13 | * **JPEEK**. Here, we build those gathered repositories and run 14 | [jpeek](https://github.com/cqfn/jpeek) on them. 15 | * **Filtering**. Is where we apply 16 | [all the filters](https://github.com/yegor256/cam/tree/master/filters) 17 | in order to get rid of irrelevant classes (such as `*Test`, `*ITCase`, invalid 18 | files and so on). Whole filtering process will be printed in the final report, 19 | you can check it [here](http://cam.yegor256.com/cam-2024-03-02.pdf). 20 | * **Measuring**. We calculate metrics for each file using these 21 | [metrics](https://github.com/yegor256/cam/tree/master/metrics). 22 | * **Aggregating**. We aggregate all metrics in summary CSV files. 23 | * **Summarization**. Generate summary statistics (count, sum, average, etc.) 24 | for each metric and save them in data/summary/{metric}.csv. 25 | -------------------------------------------------------------------------------- /steps/measure-file.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | java=$1 7 | javam=$2 8 | pos=$3 9 | total=$4 10 | 11 | start=$(date +%s%N) 12 | 13 | mkdir -p "$(dirname "${javam}")" 14 | touch "${javam}" 15 | metrics=$(find "${LOCAL}/metrics/" -type f -exec test -x {} \; -exec basename {} \;) 16 | echo "${metrics}" | { 17 | sum=0 18 | while IFS= read -r m; do 19 | if timeout 30m "metrics/${m}" "${java}" "${javam}"; then 20 | while IFS= read -r t; do 21 | IFS=' ' read -r -ra M <<< "${t}" 22 | value=$(echo "${M[1]}" | "${LOCAL}/help/float.sh") 23 | echo "${value}" > "${javam}.${M[0]}" 24 | if [ ! "${value}" = "NaN" ]; then 25 | sum=$(echo "${sum} + ${value}" | bc | "${LOCAL}/help/float.sh") 26 | fi 27 | done < "${javam}" 28 | else 29 | echo "Failed to collect ${m} for ${java}" 30 | fi 31 | done 32 | echo "$(echo "${metrics}" | wc -w | xargs) scripts \ 33 | collected $(find "$(dirname "${javam}")" -type f -name "$(basename "${javam}").*" | wc -l | xargs) metrics (sum=${sum}) \ 34 | for: $(basename "${java}") (${pos}/${total})$("${LOCAL}/help/tdiff.sh" "${start}")" 35 | } 36 | -------------------------------------------------------------------------------- /REUSE.toml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | 4 | version = 1 5 | [[annotations]] 6 | path = [ 7 | ".dockerignore", 8 | ".DS_Store", 9 | ".gitattributes", 10 | ".gitignore", 11 | ".gitmodules", 12 | ".latexmkrc", 13 | ".shellcheckrc", 14 | ".texqc", 15 | ".texsc", 16 | "**.csv", 17 | "**.jpg", 18 | "**.json", 19 | "**.md", 20 | "**.pdf", 21 | "**.png", 22 | "**.svg", 23 | "**.txt", 24 | "**/.dockerignore", 25 | "**/.DS_Store", 26 | "**/.gitignore", 27 | "**/.gitmodules", 28 | "**/.latexmkrc", 29 | "**/.shellcheckrc", 30 | "**/.texqc", 31 | "**/.texsc", 32 | "**/*.csv", 33 | "**/*.jpg", 34 | "**/*.json", 35 | "**/*.md", 36 | "**/*.pdf", 37 | "**/*.png", 38 | "**/*.svg", 39 | "**/*.txt", 40 | "**/*.vm", 41 | "**/aspell.en.pws", 42 | "**/CITATION.cff", 43 | "**/CNAME", 44 | "**/DEPENDS.txt", 45 | "aspell.en.pws", 46 | "CITATION.cff", 47 | "DEPENDS.txt", 48 | "README.md", 49 | "renovate.json", 50 | "tex/.latexmkrc", 51 | "tex/.texqc", 52 | "tex/.texsc", 53 | "tex/aspell.en.pws", 54 | ] 55 | precedence = "override" 56 | SPDX-FileCopyrightText = "Copyright (c) 2025 Yegor Bugayenko" 57 | SPDX-License-Identifier = "MIT" 58 | -------------------------------------------------------------------------------- /steps/aggregation-functions/90-percentile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | metric_file=$1 8 | output_folder=$2 9 | metric_name=$3 10 | 11 | mkdir -p "${output_folder}" 12 | 13 | values=$(awk -F, 'NR > 1 {print $3}' "${metric_file}") 14 | 15 | values_array=() 16 | while IFS= read -r value; do 17 | if [[ -n "$value" ]]; then 18 | values_array+=("$value") 19 | fi 20 | done <<< "$values" 21 | 22 | mapfile -t sorted_values < <(for value in "${values_array[@]}"; do echo "$value"; done | sort -n) 23 | 24 | count=${#sorted_values[@]} 25 | output_file="${output_folder}/${metric_name}.90th_percentile.csv" 26 | 27 | if ((count > 0)); then 28 | percentile_rank=$(echo "scale=0; $count * 0.9" | bc) 29 | percentile_rank=${percentile_rank%.*} 30 | if ((percentile_rank == 0)); then 31 | percentile_rank=1 32 | elif ((percentile_rank >= count)); then 33 | percentile_rank=$count 34 | fi 35 | percentile_value="${sorted_values[$((percentile_rank - 1))]}" 36 | echo "$percentile_value" > "${output_file}" 37 | echo "Aggregated 90th percentile for ${metric_name}: $percentile_value" 38 | else 39 | echo "0.000" > "${output_file}" 40 | echo "No valid data to aggregate for ${metric_name}" 41 | fi 42 | -------------------------------------------------------------------------------- /filters/070-delete-invalid-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/invalid-files.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | touch "${list}" 17 | 18 | jobs=${temp}/jobs/delete-invalid-files.txt 19 | rm -rf "${jobs}" 20 | mkdir -p "$(dirname "${jobs}")" 21 | touch "${jobs}" 22 | 23 | candidates=${temp}/classes-to-filter.txt 24 | mkdir -p "$(dirname "${candidates}")" 25 | find "${home}" -type f -name '*.java' -print > "${candidates}" 26 | py=${LOCAL}/filters/delete-invalid-files.py 27 | while IFS= read -r f; do 28 | printf "python3 %s %s %s\n" "${py@Q}" "${f@Q}" "${list@Q}" >> "${jobs}" 29 | done < "${candidates}" 30 | "${LOCAL}/help/parallel.sh" "${jobs}" 31 | wait 32 | 33 | total=$(wc -l < "${candidates}" | xargs) 34 | if [ -s "${list}" ]; then 35 | printf "%'d files out of %'d with more than one Java class inside were deleted" \ 36 | "$(wc -l < "${list}")" "${total}" 37 | else 38 | if [ "${total}" -eq 0 ]; then 39 | printf "There were no Java classes, nothing to delete" 40 | else 41 | printf "All %'d files are Java classes, nothing to delete" \ 42 | "${total}" 43 | fi 44 | fi 45 | -------------------------------------------------------------------------------- /pylint_plugins/custom_checkers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | from astroid import nodes 6 | from pylint.checkers import BaseChecker 7 | from pylint.lint import PyLinter 8 | 9 | 10 | class ConstantChecker(BaseChecker): 11 | """Class for checking the correct location of constants. 12 | Variables in UPPERCASE should only be declared at the top level of the module 13 | """ 14 | name = 'constant-checker' 15 | priority = -1 16 | msgs = { 17 | 'E1019': ( 18 | 'Constant "%s" should be defined at the top of the module', 19 | 'non-top-level-constant', 20 | 'Emitted when a constant is not defined in the top level' 21 | ) 22 | } 23 | 24 | def visit_assignname(self, node: nodes.AssignName) -> None: 25 | """Called when a variable definition is node in the ast tree 26 | """ 27 | if node.name.isupper() and not isinstance(node.parent.parent, nodes.Module): 28 | self.add_message('non-top-level-constant', node=node, args=(node.name,)) 29 | 30 | 31 | def register(linter: PyLinter) -> None: 32 | """Function to register extra pylint checkers. 33 | If you want to write your own checker don't forget to register it here 34 | """ 35 | linter.register_checker(ConstantChecker(linter)) 36 | -------------------------------------------------------------------------------- /filters/031-delete-wrong-encoding.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | home=$1 8 | temp=$2 9 | 10 | list=${temp}/filter-lists/wrong-encoding.txt 11 | if [ -e "${list}" ]; then 12 | exit 13 | fi 14 | 15 | mkdir -p "$(dirname "${list}")" 16 | touch "${list}" 17 | 18 | jobs=${temp}/jobs/delete-wrong-encoding.txt 19 | rm -rf "${jobs}" 20 | mkdir -p "$(dirname "${jobs}")" 21 | touch "${jobs}" 22 | 23 | candidates=${temp}/classes-to-filter.txt 24 | mkdir -p "$(dirname "${candidates}")" 25 | find "${home}" -type f -name '*.java' -print > "${candidates}" 26 | py=${LOCAL}/filters/delete-wrong-encoding.py 27 | while IFS= read -r f; do 28 | printf "python3 %s %s %s\n" "${py@Q}" "${f@Q}" "${list@Q}" >> "${jobs}" 29 | done < "${candidates}" 30 | "${LOCAL}/help/parallel.sh" "${jobs}" 31 | wait 32 | 33 | total=$(wc -l < "${candidates}" | xargs) 34 | if [ -s "${list}" ]; then 35 | printf "%'d files out of %'d with wrong encoding were deleted" \ 36 | "$(wc -l < "${list}")" "${total}" 37 | else 38 | if [ "${total}" -eq 0 ]; then 39 | printf "There were no Java classes with wrong encoding, nothing to delete" 40 | else 41 | printf "All %'d files are with correct encoding (utf-8/ascii), nothing to delete" \ 42 | "${total}" 43 | fi 44 | fi 45 | -------------------------------------------------------------------------------- /tests/metrics/test-hoc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | hoc_script_path="${LOCAL}/metrics/hoc.sh" 11 | cd "${temp}" 12 | 13 | rm -rf ./* 14 | rm -rf .git 15 | 16 | git init --quiet . 17 | git config user.email 'foo@example.com' 18 | git config user.name 'Foo' 19 | git config commit.gpgsign false 20 | 21 | java_dir="./foo/dir/" 22 | java="FooTest.java" 23 | 24 | mkdir -p "${java_dir}" 25 | cd ${java_dir} 26 | 27 | touch "${java}" 28 | touch "stdout" 29 | 30 | printf "class Foo {}" > "${java}" 31 | git add "${java}" 32 | git commit --no-verify --quiet -m "first commit" 33 | ${hoc_script_path} "${java}" "stdout" 34 | grep "HoC 1" "stdout" 35 | 36 | printf "class Foo {\n\tint x;\n\tbool y;\n}\n" > "${java}" 37 | git add "${java}" 38 | git commit --no-verify --quiet -m "+second commit" 39 | ${hoc_script_path} "${java}" "stdout" 40 | grep "HoC 6" "stdout" 41 | 42 | printf "class Foo {\n\tbool z;\n}\n" > "${java}" 43 | git add "${java}" 44 | git commit --no-verify --quiet -m "-third commit" 45 | ${hoc_script_path} "${java}" "stdout" 46 | grep "HoC 9" "stdout" 47 | } > "${stdout}" 2>&1 48 | echo "👍🏻 Correctly calculated hits of code" 49 | -------------------------------------------------------------------------------- /tests/filters/test-999-move-gits-back.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | temp=$1 8 | stdout=$2 9 | 10 | { 11 | rm -rf "${temp}/gits" 12 | msg=$("${LOCAL}/filters/999-move-gits-back.sh" "${temp}" "${temp}") 13 | echo "${msg}" 14 | } > "${stdout}" 2>&1 15 | echo "👍🏻 An absent directory didn't crash it" 16 | 17 | { 18 | rm -rf "${temp}/gits" 19 | mkdir -p "${temp}/gits" 20 | msg=$("${LOCAL}/filters/999-move-gits-back.sh" "${temp}" "${temp}") 21 | echo "${msg}" 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 An empty directory didn't crash it" 24 | 25 | { 26 | rm -rf "${temp}/gits" 27 | mkdir -p "${temp}/gits/foo/bar" 28 | msg=$("${LOCAL}/filters/999-move-gits-back.sh" "${temp}" "${temp}") 29 | echo "${msg}" 30 | } > "${stdout}" 2>&1 31 | echo "👍🏻 An empty repo directory didn't crash it" 32 | 33 | { 34 | repo=foo/bar 35 | mkdir -p "${TARGET}/github/${repo}" 36 | mkdir -p "${temp}/gits/${repo}/.git/foo.txt" 37 | msg=$("${LOCAL}/filters/999-move-gits-back.sh" "${temp}" "${temp}") 38 | echo "${msg}" 39 | ls -al "${TARGET}/github/${repo}" 40 | test -e "${TARGET}/github/${repo}/.git" 41 | ls -al "${temp}/gits" 42 | test ! -e "${temp}/gits/${repo}/test.txt" 43 | } > "${stdout}" 2>&1 44 | echo "👍🏻 A git repository was moved back" 45 | -------------------------------------------------------------------------------- /metrics/raf.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | java=$1 7 | output=$(realpath "$2") 8 | 9 | cd "$(dirname "${java}")" 10 | base=$(basename "${java}") 11 | 12 | # To check that file was added in commit any time 13 | if git status > /dev/null 2>&1 && test -n "$(git log --oneline -- "${base}")"; then 14 | file_creation=$(git log --pretty=format:"%ci" --date=default -- "${base}" | tail -n 1) 15 | repo_creation=$(git log --reverse --format="format:%ci" | sed -n 1p) 16 | current_time=$(date "+%Y-%m-%d %H:%M:%S %z") 17 | file_creation_timestamp=$(date -d "$file_creation" +%s) 18 | repo_creation_timestamp=$(date -d "$repo_creation" +%s) 19 | current_time_timestamp=$(date -d "$current_time" +%s) 20 | from_file_creation=$((current_time_timestamp - file_creation_timestamp)) 21 | from_repo_creation=$((current_time_timestamp - repo_creation_timestamp)) 22 | raf=$(echo 'import sys; print(round(float(sys.argv[1])/float(sys.argv[2]), 1) if float(sys.argv[2]) != 0 else 1.0)' | python3 - $((from_file_creation)) $((from_repo_creation))) 23 | else 24 | raf=0 25 | fi 26 | 27 | echo "RAF ${raf} Relative Age of File (in the entire timeframe of repository existence), \ 28 | where 0.0 means the file was added in the first commit and 1.0 means that \ 29 | the file was added in the last commit" > "${output}" 30 | -------------------------------------------------------------------------------- /tests/metrics/test-jpeek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | if ! javac -version; then 11 | echo "Java is not installed, that's why can't run this test" 12 | exit 1 13 | fi 14 | if ! mvn --version; then 15 | echo "Maven is not installed, that's why can't run this test" 16 | exit 1 17 | fi 18 | if ! gradle --version; then 19 | echo "Gradle is not installed, that's why can't run this test" 20 | exit 1 21 | fi 22 | if ! java -jar "${JPEEK}" --help; then 23 | echo "jPeek JAR is not available at '${JPEEK}'" 24 | exit 1 25 | fi 26 | } > "${stdout}" 2>&1 27 | echo "👍🏻 jPeek dependencies are installed" 28 | 29 | { 30 | java="${TARGET}/github/foo/bar/Foo.java" 31 | mkdir -p "$(dirname "${java}")" 32 | echo "class Foo {}" > "${java}" 33 | "${LOCAL}/metrics/jpeek.sh" "${java}" "${temp}/stdout" 34 | test ! -e "${temp}/stdout" 35 | } > "${stdout}" 2>&1 36 | echo "👍🏻 Correctly ignored metrics generation" 37 | 38 | { 39 | java="${TARGET}/temp/Test.java" 40 | mkdir -p "$(dirname "${java}")" 41 | echo "class Foo {}" > "${java}" 42 | "${LOCAL}/metrics/jpeek.sh" "${java}" "${temp}/stdout" 43 | test -e "${temp}/stdout" 44 | } > "${stdout}" 2>&1 45 | echo "👍🏻 Correctly generated metrics description" 46 | -------------------------------------------------------------------------------- /.github/workflows/up.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | # yamllint disable rule:line-length 5 | name: up 6 | 'on': 7 | push: 8 | branches: 9 | - master 10 | tags: 11 | - '*' 12 | concurrency: 13 | group: up-${{ github.ref }} 14 | cancel-in-progress: true 15 | jobs: 16 | up: 17 | timeout-minutes: 15 18 | runs-on: ubuntu-24.04 19 | steps: 20 | - uses: actions/checkout@v4 21 | - run: |- 22 | git fetch --tags --force && \ 23 | latest=$(git tag --sort=creatordate | tail -1) && \ 24 | sed -E -i "s|yegor256/cam:[^ ]+|yegor256/cam:${latest}|g" README.md 25 | sed -E -i "s|yegor256/cam:[^ ]+|yegor256/cam:${latest}|g" \ 26 | .github/workflows/make.yml 27 | sed -E -i "s|^version: [^ ]+|version: ${latest}|g" CITATION.cff 28 | date=$(git tag --sort=creatordate \ 29 | --format="%(creatordate:short)" | tail -1) 30 | sed -E -i "s|^date-released: [^ ]+|date-released: ${date}|g" \ 31 | CITATION.cff 32 | - uses: peter-evans/create-pull-request@v7 33 | with: 34 | sign-commits: true 35 | branch: version-up 36 | commit-message: 'new version in README and CITATION.cff' 37 | delete-branch: true 38 | title: 'New version in README and CITATION.cff' 39 | assignees: yegor256 40 | base: master 41 | -------------------------------------------------------------------------------- /tests/metrics/test-rfvh.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | metric_script_path="${LOCAL}/metrics/rfvh.sh" 11 | cd "${temp}" 12 | 13 | rm -rf ./* 14 | rm -rf .git 15 | 16 | git init --quiet . 17 | git config user.email 'foo@example.com' 18 | git config user.name 'Foo' 19 | git config commit.gpgsign false 20 | 21 | java_dir="./foo/dir/" 22 | java1="FooTest.java" 23 | java2="FooTest2.java" 24 | java3="FooTest3.java" 25 | 26 | mkdir -p "${java_dir}" 27 | cd ${java_dir} 28 | 29 | touch "${java1}" 30 | touch "stdout" 31 | 32 | printf "class Foo {}" > "${java1}" 33 | git add "${java1}" 34 | git commit --no-verify --quiet -m "first commit" 35 | ${metric_script_path} "${java1}" "stdout" 36 | grep "RFVH 1" "stdout" 37 | 38 | printf "class Foo {}" > "${java2}" 39 | git add "${java2}" 40 | git commit --no-verify --quiet -m "+second commit" 41 | ${metric_script_path} "${java1}" "stdout" 42 | grep "RFVH 0.5" "stdout" 43 | 44 | printf "class Foo {}" > "${java3}" 45 | git add "${java3}" 46 | git commit --no-verify --quiet -m "-third commit" 47 | ${metric_script_path} "${java1}" "stdout" 48 | grep "RFVH 0.33" "stdout" 49 | } > "${stdout}" 2>&1 50 | echo "👍🏻 Correctly calculated relative file volatility by hits" 51 | -------------------------------------------------------------------------------- /tests/steps/test-discover-repos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | csv=${TARGET}/foo.csv 9 | tex=${TARGET}/foo.tex 10 | 11 | { 12 | rm -f "${csv}" 13 | msg=$("${LOCAL}/steps/discover-repos.rb" --dry --pause=0 --total=3 --page-size=1 --min-stars=100 --max-stars=1000 "--csv=${csv}" "--tex=${tex}") 14 | echo "${msg}" 15 | echo "${msg}" | grep "Completed querying for year $(date +%Y). Found 3 repositories so far." 16 | echo "${msg}" | grep "Found 3 total repositories in GitHub" 17 | test -e "${csv}" 18 | test -s "${tex}" 19 | test "$(wc -l < "${csv}" | xargs)" = '4' 20 | test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 Small repositories discovery test is succeed" 23 | 24 | { 25 | rm -f "${csv}" 26 | msg=$("${LOCAL}/steps/discover-repos.rb" --dry --pause=0 --total=35 --page-size=30 --min-stars=100 --max-stars=1000 "--csv=${csv}" "--tex=${tex}") 27 | echo "${msg}" 28 | echo "${msg}" | grep "Found 60 total repositories in GitHub" 29 | echo "${msg}" | grep "We will use only the first 35 repositories" 30 | test -e "${csv}" 31 | test -s "${tex}" 32 | test "$(wc -l < "${csv}" | xargs)" = '36' 33 | test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 Medium repositories discovery test is succeed" 36 | -------------------------------------------------------------------------------- /tests/metrics/test-ir.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | # shellcheck disable=SC2317 5 | set -e -o pipefail 6 | 7 | # TODO: #259 ENABLE THIS TESTS RIGHT AFTER IMPLEMENTING ir.sh VIA REMOVING `exit 0` AND REMOVE `shellcheck disable=SC2317` on the top RIGHT AFTER IMPLEMENTING ir.sh 8 | exit 0 9 | 10 | temp=$1 11 | stdout=$2 12 | 13 | { 14 | tmp=$(mktemp -d /tmp/XXXX) 15 | cd "${tmp}" 16 | mkdir -p "${LOCAL}/${temp}" 17 | if ! "${LOCAL}/metrics/ir.sh" ./ "${LOCAL}/${temp}/stdout"; then 18 | exit 1 19 | fi 20 | } >"${stdout}" 2>&1 21 | echo "👍🏻 Failed in non-git directory" 22 | 23 | { 24 | tmp=$(mktemp -d /tmp/XXXX) 25 | cd "${tmp}" 26 | rm -rf ./* 27 | rm -rf .git 28 | git init --quiet . 29 | git config user.email 'foo@example.com' 30 | git config user.name 'Foo' 31 | if ! "${LOCAL}/metrics/ir.sh" ./ "t0"; then 32 | exit 1 33 | fi 34 | file1="one.java" 35 | file2="two.java" 36 | touch "${file1}" 37 | git add "${file1}" 38 | git config commit.gpgsign false 39 | git commit --no-verify --quiet -m "first file" 40 | "${LOCAL}/metrics/ir.sh" ./ "t1" 41 | touch "${file2}" 42 | git add "${file2}" 43 | git commit --no-verify --quiet -m "second file" 44 | "${LOCAL}/metrics/ir.sh" ./ "t2" 45 | grep "IR 1" "t1" # Single file in repo 46 | grep "IR 0.5 " "t2" # Two files in repo 47 | } >"${stdout}" 2>&1 48 | echo "👍🏻 Correctly calculated the IR (Impact Ratio)" 49 | -------------------------------------------------------------------------------- /steps/clone-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | repo=$1 7 | tag=$2 8 | pos=$3 9 | total=$4 10 | 11 | start=$(date +%s%N) 12 | 13 | if [[ "${repo}" =~ '@' ]]; then 14 | uri=file://${repo} 15 | repo=files/$(basename "${repo}") 16 | else 17 | uri=https://github.com/${repo} 18 | fi 19 | 20 | dir=${TARGET}/github/${repo} 21 | 22 | if [ -e "${dir}" ]; then 23 | echo "The repo directory #${pos}/${total} is already here: ${dir} ($(du -sh "${dir}" | cut -f1 | xargs))" 24 | exit 25 | fi 26 | 27 | declare -a args=('--quiet') 28 | if [ ! "${tag}" = '.' ]; then 29 | args+=("--branch=${tag}") 30 | fi 31 | 32 | echo "${repo} (${pos}/${total}): trying to clone it..." 33 | declare -i re=0 34 | until timeout 1h git clone "${args[@]}" "${uri}" "${dir}"; do 35 | if [ "${re}" -gt 5 ]; then 36 | echo "Too many failures (${re}) for ${repo}" 37 | exit 1 38 | fi 39 | re=$((re+1)) 40 | rm -rf "${dir}" 41 | echo "Retry #${re} for ${repo}..." 42 | sleep "${re}" 43 | done 44 | 45 | hashes=${TARGET}/hashes.csv 46 | if [ ! -e "${hashes}" ]; then 47 | printf "repo,hash\n" > "${hashes}" 48 | fi 49 | printf "%s,%s\n" "$(echo "${repo}" | "${LOCAL}/help/to-csv.sh")" "$(git --git-dir "${dir}/.git" rev-parse HEAD)" >> "${hashes}" 50 | 51 | echo "${repo} cloned (${pos}/${total}), $(du -sh "${dir}" | cut -f1 | xargs)$("${LOCAL}/help/tdiff.sh" "${start}")" 52 | -------------------------------------------------------------------------------- /steps/measure.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$(date +%s%N) 7 | 8 | echo "Searching for all .java files in ${TARGET}/github (may take some time, stay calm...)" 9 | 10 | javas=$(find "${TARGET}/github" -name '*.java' -type f -print) 11 | total=$(echo "${javas}" | wc -l | xargs) 12 | echo "Found ${total} Java files, starting to collect metrics..." 13 | 14 | jobs=${TARGET}/temp/jobs/measure-jobs.txt 15 | rm -rf "${jobs}" 16 | mkdir -p "$(dirname "${jobs}")" 17 | touch "${jobs}" 18 | 19 | declare -i file=0 20 | sh="$(dirname "$0")/measure-file.sh" 21 | pstart=$(date +%s%N) 22 | echo "${javas}" | while IFS= read -r java; do 23 | file=$((file+1)) 24 | rel=$(realpath --relative-to="${TARGET}/github" "${java}") 25 | javam=${TARGET}/measurements/${rel}.m 26 | if [ -e "${javam}" ]; then 27 | echo "Metrics already exist for $(basename "${java}") (${file}/${total})" 28 | continue 29 | fi 30 | printf "%s %s %s %s %s\n" "${sh@Q}" "${java@Q}" "${javam@Q}" "${file@Q}" "${total@Q}" >> "${jobs}" 31 | if [ "${file: -4}" = '0000' ]; then 32 | echo "Prepared ${file} jobs out of ${total}$("${LOCAL}/help/tdiff.sh" "${pstart}")..." 33 | pstart=$(date +%s%N) 34 | fi 35 | done 36 | 37 | "${LOCAL}/help/parallel.sh" "${jobs}" 38 | wait 39 | 40 | echo "All metrics calculated in ${total} files in $(nproc) threads$("${LOCAL}/help/tdiff.sh" "${start}")" 41 | -------------------------------------------------------------------------------- /tests/steps/test-polish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | echo -e 'repo,branch\nfoo/bar,master,44,55' > "${TARGET}/repositories.csv" 9 | rm -rf "${TARGET}/github" 10 | mkdir -p "${TARGET}/github/foo/bar" 11 | msg=$("${LOCAL}/steps/polish.sh") 12 | test -e "${TARGET}/github/foo/bar" 13 | { 14 | echo "${msg}" 15 | echo "${msg}" | grep "foo/bar is already here" 16 | } > "${stdout}" 2>&1 17 | echo "👍🏻 A correct directory was not deleted" 18 | 19 | touch "${TARGET}/repositories.csv" 20 | rm -rf "${TARGET}/github" 21 | mkdir -p "${TARGET}/github/foo/bar" 22 | msg=$("${LOCAL}/steps/polish.sh") 23 | { 24 | echo "${msg}" 25 | echo "${msg}" | grep -v "foo/bar is obsolete and was deleted" 26 | echo "${msg}" | grep "All 1 repo directories" 27 | } > "${stdout}" 2>&1 28 | echo "👍🏻 An obsolete directory was deleted" 29 | 30 | touch "${TARGET}/repositories.csv" 31 | rm -rf "${TARGET}/github" 32 | mkdir -p "${TARGET}/github" 33 | msg=$("${LOCAL}/steps/polish.sh") 34 | { 35 | echo "${msg}" 36 | echo "${msg}" | grep "No repo directories" 37 | } > "${stdout}" 2>&1 38 | echo "👍🏻 An empty directory was checked" 39 | 40 | TARGET=${TARGET}/dir-is-absent 41 | msg=$("${LOCAL}/steps/polish.sh") 42 | { 43 | echo "${msg}" 44 | echo "${msg}" | grep "Nothing to polish, the directory is absent" 45 | } > "${stdout}" 2>&1 46 | echo "👍🏻 An absent directory passed filtering" 47 | -------------------------------------------------------------------------------- /tests/steps/test-clone.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | name=foo@ 11 | uri=${temp}/${name} 12 | git init --quiet --initial-branch=master "${uri}" 13 | cd "${uri}" 14 | git config user.email 'foo@example.com' 15 | git config user.name 'Foo' 16 | touch test.txt 17 | git add test.txt 18 | git config commit.gpgsign false 19 | git commit --no-verify --quiet -am test 20 | echo -e "name\n${uri},master,4,5,5,6" > "${TARGET}/repositories.csv" 21 | rm -rf "${TARGET}/github" 22 | "${LOCAL}/steps/clone.sh" 23 | test -e "${TARGET}/github/files/${name}/test.txt" 24 | } > "${stdout}" 2>&1 25 | echo "👍🏻 A repo cloned correctly" 26 | 27 | { 28 | name=bar@ 29 | uri=${temp}/${name} 30 | git init --quiet --initial-branch=master "${uri}" 31 | cd "${uri}" 32 | git config user.email 'something@example.com' 33 | git config user.name 'Bar' 34 | touch test.txt 35 | git add test.txt 36 | git config commit.gpgsign false 37 | git commit --no-verify --quiet -am test 38 | TARGET="${TARGET}/another/ж\"' () привет /t" 39 | mkdir -p "${TARGET}" 40 | echo -e "name\n${uri}" > "${TARGET}/repositories.csv" 41 | rm -rf "${TARGET}/github" 42 | "${LOCAL}/steps/clone.sh" 43 | test -e "${TARGET}/github/files/${name}/test.txt" 44 | } > "${stdout}" 2>&1 45 | echo "👍🏻 A repo cloned correctly into weird directory" 46 | -------------------------------------------------------------------------------- /tests/steps/test-aggregate-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | repo="foo/bar test, ; " 10 | dir="${TARGET}/measurements/${repo}/a, ; -" 11 | mkdir -p "${dir}" 12 | m="Foo,- ;Bar.java.m" 13 | touch "${dir}/${m}" 14 | echo ".75" > "${dir}/${m}.nhd" 15 | echo "42" > "${dir}/${m}.loc" 16 | msg=$("${LOCAL}/steps/aggregate-repo.sh" "${repo}" 1 1 'loc nhd') 17 | test "$(echo "${msg}" | grep -c "sum=0")" == 0 18 | test "$(echo "${msg}" | grep -c "files=0")" == 0 19 | test -e "${TARGET}/data/${repo}/all.csv" 20 | grep "/a\\\\, ; -/Foo\\\\,- ;Bar.java,42.000,0.750" "${TARGET}/data/${repo}/all.csv" 21 | grep "java_file,loc,nhd" "${TARGET}/data/${repo}/all.csv" 22 | test -e "${TARGET}/data/${repo}/loc.csv" 23 | grep "java_file,loc" "${TARGET}/data/${repo}/loc.csv" 24 | grep "/a\\\\, ; -/Foo\\\\,- ;Bar.java,42" "${TARGET}/data/${repo}/loc.csv" 25 | test -e "${TARGET}/data/${repo}/nhd.csv" 26 | grep ",42" "${TARGET}/data/${repo}/loc.csv" 27 | } > "${stdout}" 2>&1 28 | echo "👍🏻 A repo aggregated correctly" 29 | 30 | { 31 | repo="dog/cat" 32 | dir="${TARGET}/data/${repo}" 33 | mkdir -p "${dir}" 34 | touch "${dir}/loc.csv" 35 | msg=$("${LOCAL}/steps/aggregate-repo.sh" "${repo}" 1 1 'loc nhd') 36 | echo "${msg}" | grep "Not all 2 metrics aggregated" 37 | } > "${stdout}" 2>&1 38 | echo "👍🏻 A partially aggregated repo processed correctly" 39 | -------------------------------------------------------------------------------- /metrics/rfvc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | java=$1 7 | output=$(realpath "$2") 8 | 9 | cd "$(dirname "${java}")" 10 | base=$(basename "${java}") 11 | 12 | # To check that file was added in commit any time 13 | if git status > /dev/null 2>&1 && test -n "$(git log --oneline -- "${base}")"; then 14 | my_rvc=$(git log --pretty=format:"%h" "${java}" | wc -l) 15 | my_rvc=$((my_rvc+1)) 16 | files=$(git ls-tree -r "$(git branch --show-current)" --name-only) 17 | all_rvcs=0 18 | 19 | # source: https://stackoverflow.com/questions/44440506/split-string-with-literal-n-in-a-for-loop 20 | while [[ $files ]]; do # iterate as long as we have input 21 | if [[ $files = *$'\n'* ]]; then # if there's a '\n' sequence later... 22 | first=${files%%$'\n'*} # put everything before it into 'first' 23 | rest=${files#*$'\n'} # and put everything after it in 'rest' 24 | else # if there's no '\n' later... 25 | first=${files} # then put the whole rest of the string in 'first' 26 | rest='' # and there is no 'rest' 27 | fi 28 | rvc=$(git log --pretty=format:"%h" "$first" | wc -l) 29 | rvc=$((rvc+1)) 30 | all_rvcs=$((all_rvcs+rvc)) 31 | files=$rest 32 | done 33 | rfvc=$(python3 -c "print(${my_rvc} / ${all_rvcs})") 34 | else 35 | rfvc=0 36 | fi 37 | 38 | echo "RFVC ${rfvc} Relative File Volatility by Commits for file" > "${output}" 39 | -------------------------------------------------------------------------------- /steps/zip.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$(date +%s%N) 7 | 8 | name=cam-$(date +%Y-%m-%d) 9 | 10 | zip=${TARGET}/temp/${name}.zip 11 | mkdir -p "$(dirname "${zip}")" 12 | zip=$(readlink -f "$(dirname "${zip}")")/$(basename "${zip}") 13 | 14 | if [ -e "${zip}" ]; then 15 | echo "Zip archive already exists: ${zip}" 16 | exit 17 | fi 18 | 19 | cam_repo_target_dir="${TARGET}/cam-sources" 20 | 21 | if [ ! -d "${cam_repo_target_dir}" ]; then 22 | git clone --depth 1 https://github.com/yegor256/cam.git "${cam_repo_target_dir}" 23 | rm -rf "${cam_repo_target_dir}/.git" 24 | fi 25 | 26 | if [ -e "${TARGET}/github" ]; then 27 | echo "Deleting .git directories (may take some time) ..." 28 | find "${TARGET}/github" -maxdepth 3 -mindepth 3 -type d -name '.git' -exec rm -rf {} \; 29 | fi 30 | 31 | echo "Archiving the data into ${zip} (may take some time) ..." 32 | 33 | zip -qq -x "${TARGET}/temp/*" -x "${TARGET}/measurements/*" -r "${zip}" "${TARGET}" 34 | 35 | mv "${zip}" "${TARGET}" 36 | 37 | echo "ZIP archive created at ${zip} ($(du -k "${TARGET}/${name}.zip" | cut -f1) Kb)$("${LOCAL}/help/tdiff.sh" "${start}")" 38 | 39 | echo "Lines in repositories.csv: $(wc -l "${TARGET}/repositories.csv" | xargs)" 40 | echo ".java files in github/: $(find "${TARGET}/github" -name '*.java' -type f -print | wc -l | xargs)" 41 | echo "Lines in data/all.csv: $(wc -l "${TARGET}/data/all.csv" | xargs)" 42 | echo ".csv files in data/: $(find "${TARGET}/data" -name '*.csv' -type f -print | wc -l | xargs)" 43 | -------------------------------------------------------------------------------- /tests/steps/test-aggregate-join.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | repo="foo/bar,1" 10 | dir="${TARGET}/data/${repo}" 11 | mkdir -p "${dir}" 12 | echo -e "java_file,loc\nFoo.java,42\nBar.java,256" > "${dir}/loc.csv" 13 | msg=$("${LOCAL}/steps/aggregate-join.sh" "${repo}" "${dir}" 1 1) 14 | echo "${msg}" 15 | test "$(echo "${msg}" | grep -c "sum=0")" = 0 16 | test -e "${TARGET}/data/loc.csv" 17 | grep "repo,java_file,loc" "${TARGET}/data/loc.csv" 18 | grep "foo/bar\\\\,1,Foo.java,42" "${TARGET}/data/loc.csv" 19 | } > "${stdout}" 2>&1 20 | echo "👍🏻 A data joined correctly" 21 | 22 | { 23 | dir1="${TARGET}/data/first/a" 24 | mkdir -p "${dir1}" 25 | dir2="${TARGET}/data/second/b" 26 | mkdir -p "${dir2}" 27 | dir3="${TARGET}/data/third/c" 28 | mkdir -p "${dir3}" 29 | echo -e "java_file,LCOM5\nFirst.java,42" > "${dir1}/LCOM5.csv" 30 | echo -e "java_file,LCOM5\nSecond.java,256" > "${dir2}/LCOM5.csv" 31 | echo -e "java_file,LCOM5\nThird.java,0" > "${dir3}/LCOM5.csv" 32 | "${LOCAL}/steps/aggregate-join.sh" first/a "${dir1}" 1 1 33 | "${LOCAL}/steps/aggregate-join.sh" second/b "${dir2}" 1 1 34 | "${LOCAL}/steps/aggregate-join.sh" third/c "${dir3}" 1 1 35 | grep "first/a,First.java" "${TARGET}/data/LCOM5.csv" 36 | grep "second/b,Second.java" "${TARGET}/data/LCOM5.csv" 37 | grep "third/c,Third.java" "${TARGET}/data/LCOM5.csv" 38 | } > "${stdout}" 2>&1 39 | echo "👍🏻 A data joined into existing file correctly" 40 | -------------------------------------------------------------------------------- /tests/filters/test-060-delete-non-classes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | list=${temp}/temp/filter-lists/non-class-files.txt 10 | 11 | { 12 | java="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /Foo.java" 13 | mkdir -p "$(dirname "${java}")" 14 | echo "interface Foo {}" > "${java}" 15 | rm -f "${list}" 16 | msg=$("${LOCAL}/filters/060-delete-non-classes.sh" "${temp}" "${temp}/temp") 17 | echo "${msg}" 18 | echo "${msg}" | grep "1 files out of 1 with interfaces or enums (instead of classes) inside were deleted" 19 | test ! -e "${java}" 20 | test -e "${list}" 21 | test "$(wc -l < "${list}" | xargs)" = 1 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 A file with a Java interface was deleted" 24 | 25 | { 26 | if ! "${LOCAL}/filters/delete-non-classes.py" > "${temp}/message"; then 27 | grep "Usage: python delete-non-classes.py " "${temp}/message" 28 | fi 29 | 30 | if ! "${LOCAL}/filters/delete-non-classes.py" "${java}" > "${temp}/message"; then 31 | grep "Usage: python delete-non-classes.py " "${temp}/message" 32 | fi 33 | 34 | if ! "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/stdout" "${temp}/stdout" > "${temp}/message"; then 35 | grep "Usage: python delete-non-classes.py " "${temp}/message" 36 | fi 37 | } > "${stdout}" 2>&1 38 | echo "👍🏻 Usage works correctly" 39 | -------------------------------------------------------------------------------- /metrics/rfvh.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | java=$1 7 | output=$(realpath "$2") 8 | 9 | cd "$(dirname "${java}")" 10 | base=$(basename "${java}") 11 | 12 | # To check that file was added in commit any time 13 | if git status > /dev/null 2>&1 && test -n "$(git log --oneline -- "${base}")"; then 14 | my_rvh=$(git log -L:"class\s:${java}" | grep -E "^[+-].*$" | grep -Ev "^\-\-\-\s\S+$" | grep -Evc "^\+\+\+\s\S+$") 15 | files=$(git ls-tree -r "$(git branch --show-current)" --name-only) 16 | all_rvhs=0 17 | 18 | # source: https://stackoverflow.com/questions/44440506/split-string-with-literal-n-in-a-for-loop 19 | while [[ $files ]]; do # iterate as long as we have input 20 | if [[ $files = *$'\n'* ]]; then # if there's a '\n' sequence later... 21 | first=${files%%$'\n'*} # put everything before it into 'first' 22 | rest=${files#*$'\n'} # and put everything after it in 'rest' 23 | else # if there's no '\n' later... 24 | first=${files} # then put the whole rest of the string in 'first' 25 | rest='' # and there is no 'rest' 26 | fi 27 | rvh=$(git log -L:"class\s:${first}" | grep -E "^[+-].*$" | grep -Ev "^\-\-\-\s\S+$" | grep -Evc "^\+\+\+\s\S+$") 28 | all_rvhs=$((all_rvhs+rvh)) 29 | files=$rest 30 | done 31 | rfvh=$(python3 -c "print(${my_rvh} / ${all_rvhs})") 32 | else 33 | rfvh=0 34 | fi 35 | 36 | echo "RFVH ${rfvh} Relative File Volatility by Hits for file" > "${output}" 37 | -------------------------------------------------------------------------------- /steps/env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | set -x 6 | 7 | echo "TARGET=${TARGET}" 8 | echo "LOCAL=${LOCAL}" 9 | echo "SHELL=${SHELL}" 10 | echo "HOME=${HOME}" 11 | 12 | env 13 | 14 | bash_version=${BASH_VERSINFO:-0} 15 | if [ "${bash_version}" -lt 5 ]; then 16 | "${SHELL}" --version 17 | ps -p $$ 18 | echo "${SHELL} version must be 5 or higher. Current ${SHELL} version: ${bash_version}" 19 | exit 1 20 | fi 21 | 22 | ruby -v 23 | rubocop -v 24 | 25 | if [[ "$(python3 --version 2>&1 | cut -f2 -d' ')" =~ ^[1-2] ]]; then 26 | python3 --version 27 | echo "Python must be 3+" 28 | exit 1 29 | fi 30 | flake8 --version 31 | pylint --version 32 | 33 | if ! tlmgr --version >/dev/null 2>&1; then 34 | PATH=$PATH:$("${LOCAL}/help/texlive-bin.sh") 35 | export PATH 36 | fi 37 | pdflatex --version 38 | pdftotext -v 39 | inkscape --version 40 | aspell --version 41 | latexmk --version 42 | 43 | xmlstarlet --version 44 | 45 | shellcheck --version 46 | 47 | jq --version 48 | 49 | multimetric --help > /dev/null 50 | 51 | awk --version 52 | 53 | parallel --version 54 | 55 | git --version 56 | 57 | cloc --version 58 | 59 | pmd --version 60 | 61 | nproc --version 62 | 63 | # Part of coreutils (by GNU): 64 | sed --version 65 | 66 | # Part of coreutils (by GNU): 67 | realpath --version 68 | 69 | # If fails on macOS, see this: https://stackoverflow.com/questions/16658333 70 | echo 'hello' | grep -q --perl-regexp 'e' 71 | 72 | bc -v 73 | 74 | javac -version 75 | java -jar "${JPEEK}" --help 76 | gradle --version 77 | mvn --version 78 | 79 | locale 80 | -------------------------------------------------------------------------------- /tests/help/test-float.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | num=$(echo '.42' | "${LOCAL}/help/float.sh") 9 | test "${num}" = '0.420' 10 | echo "${num}" >> "${stdout}" 11 | echo "👍🏻 Corrected floating point number" 12 | 13 | test "$(echo '254.42' | "${LOCAL}/help/float.sh")" = '254.420' 14 | echo "👍🏻 Corrected longer floating point number" 15 | 16 | test "$(echo '256' | "${LOCAL}/help/float.sh")" = '256.000' 17 | echo "👍🏻 Corrected integer number" 18 | 19 | test "$(echo '09' | "${LOCAL}/help/float.sh")" = '9.000' 20 | echo "👍🏻 Corrected integer number with leading zero" 21 | 22 | test "$(echo '' | "${LOCAL}/help/float.sh")" = '0.000' 23 | echo "👍🏻 Corrected integer number with empty text" 24 | 25 | test "$(echo ' ' | "${LOCAL}/help/float.sh")" = '0.000' 26 | echo "👍🏻 Corrected integer number with spaces" 27 | 28 | test "$(echo 'Blank' | "${LOCAL}/help/float.sh")" = '0.000' 29 | echo "👍🏻 Corrected integer number with text input" 30 | 31 | test "$(echo 'NaN' | "${LOCAL}/help/float.sh")" = 'NaN' 32 | echo "👍🏻 Corrected integer number with NaN" 33 | 34 | test "$(echo '.000000099' | "${LOCAL}/help/float.sh")" = '0.000' 35 | echo "👍🏻 Corrected small precision number" 36 | 37 | test "$(echo '254' | "${LOCAL}/help/float.sh")" = '254.000' 38 | echo "👍🏻 Printed decimal number with 3 digits" 39 | 40 | test "$(echo '0.3' | "${LOCAL}/help/float.sh")" = '0.300' 41 | echo "👍🏻 Printed decimal number with 3 digits" 42 | 43 | test "$(echo '0.00023' | "${LOCAL}/help/float.sh")" = '0.000' 44 | echo "👍🏻 Printed decimal number with 3 digits" 45 | -------------------------------------------------------------------------------- /tests/filters/test-delete-unparsable.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | java="${temp}/Foo (; '''''\" привет.java" 11 | echo "this is not Java code at all" > "${java}" 12 | "${LOCAL}/filters/delete-unparsable.py" "${java}" "${temp}/deleted.txt" 13 | test ! -e "${java}" 14 | grep "${java}" "${temp}/deleted.txt" 15 | } > "${stdout}" 2>&1 16 | echo "👍🏻 A Java file with a broken syntax inside was deleted correctly" 17 | 18 | { 19 | fixtures=$(realpath "$(dirname "$0")/../../fixtures/filters/unparsable") 20 | find "${fixtures}" -name '*.java' | while IFS= read -r f; do 21 | java="${temp}/$(basename "${f}")" 22 | cp "${f}" "${java}" 23 | "${LOCAL}/filters/delete-unparsable.py" "${java}" "${temp}/deleted.txt" 24 | test ! -e "${java}" 25 | grep "${java}" "${temp}/deleted.txt" 26 | done 27 | } > "${stdout}" 2>&1 28 | echo "👍🏻 All fixtures with broken syntax were deleted correctly" 29 | 30 | { 31 | java=${temp}/Bar.java 32 | echo "class привет { --- }" > "${java}" 33 | "${LOCAL}/filters/delete-unparsable.py" "${java}" "${temp}/deleted.txt" 34 | test ! -e "${java}" 35 | grep "${java}" "${temp}/deleted.txt" 36 | } > "${stdout}" 2>&1 37 | echo "👍🏻 Another broken syntax inside was deleted correctly" 38 | 39 | { 40 | "${LOCAL}/filters/delete-unparsable.py" "${temp}/file-is-absent.java" "${temp}/deleted.txt" 41 | grep -v "${temp}/file-is-absent.java" "${temp}/deleted.txt" 42 | } > "${stdout}" 2>&1 43 | echo "👍🏻 Absent file didn't fail the script" 44 | -------------------------------------------------------------------------------- /tests/filters/test-001-move-gits-to-temp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | temp=$1 8 | stdout=$2 9 | 10 | { 11 | rm -rf "${TARGET}/github" 12 | msg=$("${LOCAL}/filters/001-move-gits-to-temp.sh" "${temp}" "${temp}") 13 | echo "${msg}" 14 | } > "${stdout}" 2>&1 15 | echo "👍🏻 An empty directory didn't crash it" 16 | 17 | { 18 | rm -rf "${TARGET}/github" 19 | mkdir -p "${TARGET}/github/foo/bar" 20 | msg=$("${LOCAL}/filters/001-move-gits-to-temp.sh" "${temp}" "${temp}") 21 | echo "${msg}" 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 An empty repo directory didn't crash it" 24 | 25 | { 26 | repo=foo/bar 27 | mkdir -p "${TARGET}/github/${repo}/.git/test.txt" 28 | msg=$("${LOCAL}/filters/001-move-gits-to-temp.sh" "${temp}" "${temp}") 29 | echo "${msg}" 30 | ls -al "${TARGET}/github/${repo}" 31 | test ! -e "${TARGET}/github/${repo}/.git" 32 | ls -al "${temp}/gits/${repo}" 33 | test -e "${temp}/gits/${repo}/test.txt" 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 A git repository was moved to temp dir" 36 | 37 | { 38 | first=foo/first 39 | second=foo/second 40 | mkdir -p "${TARGET}/github/${first}/.git/test.txt" 41 | mkdir -p "${TARGET}/github/${second}/.git/test.txt" 42 | msg=$("${LOCAL}/filters/001-move-gits-to-temp.sh" "${temp}" "${temp}") 43 | echo "${msg}" 44 | ls -al "${TARGET}/github/${first}" 45 | test ! -e "${TARGET}/github/${first}/.git" 46 | ls -al "${temp}/gits/${second}" 47 | test -e "${temp}/gits/${second}/test.txt" 48 | } > "${stdout}" 2>&1 49 | echo "👍🏻 A pair of git repositories were moved to temp dir" 50 | -------------------------------------------------------------------------------- /steps/tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=${LOCAL}/test-zone 7 | mkdir -p "${temp}" 8 | 9 | export CAMTESTS=1 10 | 11 | dir="${LOCAL}/tests" 12 | tests=$( 13 | find "${dir}" -mindepth 2 -type f -name '*.sh' -path "${dir}/before/**"; 14 | find "${dir}" -mindepth 2 -type f -name '*.sh' -not -path "${dir}/before/**" -not -path "${dir}/after/**" | sort; 15 | find "${dir}" -mindepth 2 -type f -name '*.sh' -path "${dir}/after/**" 16 | ) 17 | echo "There are $(echo "${tests}" | wc -l | xargs) tests in ${dir}" 18 | echo "${tests}" | while IFS= read -r test; do 19 | name=$(realpath --relative-to="${LOCAL}/tests" "${test}") 20 | if [ -n "${TEST}" ] && [ ! "${TEST}" = "${name}" ] && [ ! "${TEST}" = "tests/${name}" ]; then 21 | echo "Skipped ${name}" 22 | continue 23 | fi 24 | echo -e "\n${name}:" 25 | t=${temp}/${name} 26 | if [ -e "${t}" ]; then 27 | rm -rf "${t}" 28 | fi 29 | mkdir -p "${t}" 30 | tgt=${t}/target 31 | if [ -e "${tgt}" ]; then 32 | rm -rf "${tgt}" 33 | fi 34 | mkdir -p "${tgt}" 35 | stdout=${t}/stdout.log 36 | mkdir -p "$(dirname "${stdout}")" 37 | touch "${stdout}" 38 | if ! TARGET="${tgt}" "${test}" "${t}" "${stdout}"; then 39 | if [ ! -e "${stdout}" ]; then 40 | echo "Can't find log file after a failed test: ${stdout}" 41 | tree "${t}/" 42 | else 43 | cat "${stdout}" 44 | fi 45 | echo "❌ Non-zero exit code (TARGET=${tgt})" 46 | echo "You can run this particular test in isolation: make test TEST=tests/${name}" 47 | exit 1 48 | fi 49 | done 50 | -------------------------------------------------------------------------------- /.github/workflows/vulture.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | --- 4 | 5 | name: vulture 6 | 7 | on: 8 | push: 9 | branches: [master] 10 | pull_request: 11 | branches: [master] 12 | 13 | jobs: 14 | vulture: 15 | name: Dead-Code Detection 16 | runs-on: ubuntu-24.04 17 | timeout-minutes: 15 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: actions/setup-python@v6 23 | with: 24 | python-version: "3.11" 25 | 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install docopt vulture 30 | 31 | - name: Run Vulture scan 32 | run: | 33 | set +e 34 | python -X dev -W ignore -m vulture . \ 35 | --min-confidence 60 \ 36 | --exclude 'venv/**' \ 37 | --exclude '.git/**' \ 38 | --exclude 'tests/**' \ 39 | --exclude 'installs/**' \ 40 | --exclude '**/__pycache__/**' \ 41 | --exclude '**/site-packages/**' \ 42 | --exclude '**/pylint_plugins/**' \ 43 | > vulture_report.txt 44 | vulture_exit=$? 45 | 46 | if [ $vulture_exit -ne 0 ]; then 47 | echo "::error::Vulture found unused code!" 48 | cat vulture_report.txt 49 | exit 3 50 | fi 51 | set -e 52 | 53 | - name: Check report 54 | run: | 55 | if grep -q "[^[:space:]]" vulture_report.txt; then 56 | echo "::error::Dead code detected:" 57 | cat vulture_report.txt 58 | exit 1 59 | else 60 | echo "No dead code found" 61 | fi -------------------------------------------------------------------------------- /tests/filters/test-040-delete-unparsable.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | list=${temp}/temp/filter-lists/unparsable-files.txt 11 | java="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /Foo.java" 12 | mkdir -p "$(dirname "${java}")" 13 | echo "--- not java syntax at all ---" > "${java}" 14 | another="$(dirname "${java}")/Bar.java" 15 | echo "class Bar {}" > "${another}" 16 | rm -f "${list}" 17 | msg=$("${LOCAL}/filters/040-delete-unparsable.sh" "${temp}" "${temp}/temp") 18 | echo "${msg}" 19 | echo "${msg}" | grep "1 files out of 2 with an unparsable Java syntax were deleted" 20 | test ! -e "${java}" 21 | test -e "${another}" 22 | test -e "${list}" 23 | test "$(wc -l < "${list}" | xargs)" = 1 24 | } > "${stdout}" 2>&1 25 | echo "👍🏻 An unparsable Java file was deleted" 26 | 27 | { 28 | if ! "${LOCAL}/filters/delete-unparsable.py" > "${temp}/message"; then 29 | grep "Usage: python delete-unparsable.py " "${temp}/message" 30 | fi 31 | 32 | if ! "${LOCAL}/filters/delete-unparsable.py" "${java}" > "${temp}/message"; then 33 | grep "Usage: python delete-unparsable.py " "${temp}/message" 34 | fi 35 | 36 | if ! "${LOCAL}/filters/delete-unparsable.py" "${java}" "${temp}/stdout" "${temp}/stdout" > "${temp}/message"; then 37 | grep "Usage: python delete-unparsable.py " "${temp}/message" 38 | fi 39 | } > "${stdout}" 2>&1 40 | echo "👍🏻 Usage works correctly" 41 | -------------------------------------------------------------------------------- /tests/steps/test-jpeek-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | javac -version 10 | mvn --version 11 | gradle --version 12 | java -jar "${JPEEK}" --help 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 jPeek dependencies are installed" 15 | 16 | { 17 | repo="yegor256/jaxec" 18 | echo -e "name\n${repo}" > "${TARGET}/repositories.csv" 19 | rm -rf "${TARGET}/github" 20 | mkdir -p "${TARGET}/github/${repo}" 21 | cp -r "${LOCAL}/fixtures/jaxec"/* "${TARGET}/github/${repo}" 22 | msg=$("${LOCAL}/steps/jpeek-repo.sh" "${repo}" 1 1) 23 | test "$(echo "${msg}" | grep -c "0 classes, sum is 0")" = 0 24 | echo "${msg}" | grep "Analyzed ${repo} through jPeek" 25 | echo "${msg}" | grep ", 2 classes" 26 | mfile=${TARGET}/measurements/${repo}/src/main/java/com/yegor256/Jaxec.java.m.NHD 27 | test -e "${mfile}" 28 | value=$(cat "${mfile}") 29 | test ! "${value}" = '0' 30 | test ! "${value}" = 'NaN' 31 | test -e "${TARGET}/measurements/${repo}/src/main/java/com/yegor256/Jaxec.java.m.NHD-cvc" 32 | test ! -e "${TARGET}/measurements/${repo}/src/main/java/com/yegor256/Jaxec.java.m.NHD-cvc-cvc" 33 | test "$(grep -Ec 'Success.*Maven' "${TARGET}/temp/jpeek_success.log")" = '1' 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 A simple repo analyzed with jpeek correctly" 36 | 37 | { 38 | repo="foo/bar" 39 | rm -rf "${TARGET}/github" 40 | mkdir -p "${TARGET}/temp/jpeek-logs/${repo}" 41 | msg=$("${LOCAL}/steps/jpeek-repo.sh" "${repo}" 1 1) 42 | echo "${msg}" | grep "Repo ${repo} already analyzed by jPeek" 43 | } > "${stdout}" 2>&1 44 | echo "👍🏻 A duplicate analysis didn't happen" 45 | -------------------------------------------------------------------------------- /tests/steps/test-aggregate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | repo="foo/bar test ; " 10 | dir="${TARGET}/measurements/${repo}/a" 11 | mkdir -p "${dir}" 12 | touch "${dir}/Foo.java.m" 13 | echo "42" > "${dir}/Foo.java.m.loc" 14 | "${LOCAL}/steps/aggregate.sh" 15 | test -e "${TARGET}/data/${repo}/all.csv" 16 | test -e "${TARGET}/data/${repo}/loc.csv" 17 | grep ",42" < "${TARGET}/data/${repo}/loc.csv" 18 | test -e "${TARGET}/data/all.csv" 19 | test -e "${TARGET}/data/loc.csv" 20 | grep ",42" < "${TARGET}/data/loc.csv" 21 | } > "${stdout}" 2>&1 22 | echo "👍🏻 A repo aggregated correctly" 23 | 24 | { 25 | rm -rf "${TARGET}/data" 26 | rm -rf "${TARGET}/measurements" 27 | dir1="${TARGET}/measurements/first/a" 28 | mkdir -p "${dir1}" 29 | touch "${dir1}/First.java.m" 30 | echo "42" > "${dir1}/First.java.m.LCOM5" 31 | dir2="${TARGET}/measurements/second/b" 32 | mkdir -p "${dir2}" 33 | touch "${dir2}/Second.java.m" 34 | echo "7" > "${dir2}/Second.java.m.LCOM5" 35 | echo "256" > "${dir2}/Second.java.m.NHD" 36 | echo "10000" > "${dir2}/Second.java.m.loc" 37 | dir3="${TARGET}/measurements/third/c" 38 | mkdir -p "${dir3}" 39 | touch "${dir3}/Third.java.m" 40 | echo "700" > "${dir3}/Third.java.m.LCOM5" 41 | "${LOCAL}/steps/aggregate.sh" 42 | test -e "${TARGET}/data/first/a/LCOM5.csv" 43 | test -e "${TARGET}/data/first/a/all.csv" 44 | test -e "${TARGET}/data/second/b/LCOM5.csv" 45 | test -e "${TARGET}/data/second/b/all.csv" 46 | grep ",42" < "${TARGET}/data/LCOM5.csv" 47 | } > "${stdout}" 2>&1 48 | echo "👍🏻 A pair of repos aggregated correctly" 49 | -------------------------------------------------------------------------------- /tests/steps/test-zip.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | mkdir -p "${TARGET}/something" 10 | "${LOCAL}/steps/zip.sh" 11 | test -e "${TARGET}"/*.zip 12 | } > "${stdout}" 2>&1 13 | echo "👍🏻 A zip archive generated correctly" 14 | 15 | { 16 | mkdir -p "${TARGET}/something" 17 | zip=${TARGET}/cam-$(date +%Y-%m-%d).zip 18 | "${LOCAL}/steps/zip.sh" 19 | list=$(unzip -l "${zip}") 20 | echo "${list}" | grep "cam-sources/" > /dev/null 21 | echo "${list}" | grep --invert-match "cam-sources/.git" > /dev/null 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 A zip archive contains the CaM repository (without .git)" 24 | 25 | { 26 | mkdir -p "${TARGET}/measurements/a/b/baam.m.cloc" 27 | mkdir -p "${TARGET}/temp/a/b/hello.txt" 28 | zip=${TARGET}/cam-$(date +%Y-%m-%d).zip 29 | "${LOCAL}/steps/zip.sh" 30 | list=$(unzip -l "${zip}") 31 | echo "${list}" 32 | test "$(echo "${list}" | grep -c "baam.m.cloc")" = '0' 33 | test "$(echo "${list}" | grep -c "hello.txt")" = '0' 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 A zip archive doesn't contain measurements/ and temp/" 36 | 37 | { 38 | mkdir -p "${TARGET}/github/a/b/.git/baam/baam/boom/baam" 39 | touch "${TARGET}/github/a/b/hello" 40 | mkdir -p "${TARGET}/github/a/b/legal/.git/place" 41 | zip=${TARGET}/cam-$(date +%Y-%m-%d).zip 42 | "${LOCAL}/steps/zip.sh" 43 | list=$(unzip -l "${zip}") 44 | echo "${list}" 45 | echo "${list}" | grep "hello" > /dev/null 46 | test "$(echo "${list}" | grep -c "boom")" = '0' 47 | test "$(echo "${list}" | grep -c "place")" = '1' 48 | } > "${stdout}" 2>&1 49 | echo "👍🏻 A zip archive generated without .git directories inside" 50 | -------------------------------------------------------------------------------- /steps/aggregate-join.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | # This script is executed for every repository that look like `data//` directories. 6 | # The script should find all `.csv` files available and join them with the 7 | # files in the `data/` directory. For example, the content of the `data/yegor256/jaxec/LCOM5.csv` file 8 | # will be appended to the content of the `data/LCOM5.csv` file. 9 | # 10 | # It is expected that the content of the "data/yegor256/jaxec/LCOM5.csv" looks like this (the first 11 | # line is the CSV header): 12 | # 13 | # ``` 14 | # java_file,LCOM5 15 | # /src/main/java/foo/Hello.java,32 16 | # /src/main/java/bar/test/Another.java,14 17 | # ``` 18 | # 19 | # The content of the "data/LCOM5.csv" file should look like this (again, the first line 20 | # is the CSV header): 21 | # 22 | # ``` 23 | # repo,java_file,LCOM5 24 | # yegor256/jaxec,/src/main/java/foo/Hello.java,32 25 | # yegor256/jaxec,/src/main/java/bar/test/Another.java,14 26 | # ``` 27 | 28 | set -e -o pipefail 29 | 30 | repo=$1 31 | dir=$2 32 | pos=$3 33 | total=$4 34 | 35 | start=$(date +%s%N) 36 | 37 | csvs=$(find "${dir}" -type f -name '*.csv' -maxdepth 1 -exec basename {} \;) 38 | 39 | echo "${csvs}" | while IFS= read -r csv; do 40 | join=${TARGET}/data/${csv} 41 | mkdir -p "$(dirname "${join}")" 42 | if [ ! -e "${join}" ]; then 43 | printf 'repo,%s\n' "$(head -1 "${dir}/${csv}")" > "${join}" 44 | fi 45 | tail -n +2 "${dir}/${csv}" | while IFS= read -r t; do 46 | printf '%s,%s\n' "$(echo "${repo}" | "${LOCAL}/help/to-csv.sh")" "${t}" >> "${join}" 47 | done 48 | done 49 | 50 | files=$(echo "${csvs}" | wc -l | xargs) 51 | echo "${files} .csv files of ${repo} joined into data/.csv (${pos}/${total})$("${LOCAL}/help/tdiff.sh" "${start}")" 52 | -------------------------------------------------------------------------------- /metrics/pmd.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -ex 6 | set -o pipefail 7 | 8 | java=$1 9 | output=$2 10 | 11 | tmp=$(mktemp -d) 12 | mkdir -p "${tmp}" 13 | 14 | cat < "${tmp}/config.xml" 15 | 16 | 19 | Only CoCo 20 | 21 | 22 | 23 | 24 | 25 | 26 | EOT 27 | 28 | cp "${java}" "${tmp}/foo.java" 29 | 30 | export PMD_JAVA_OPTS=${JVM_OPTS} 31 | # We don't use --cache here, because it becomes too big and leads to "Out Of Memory" error 32 | pmd check -R "${tmp}/config.xml" -d "${tmp}" --format xml --no-fail-on-error --no-fail-on-violation > "${tmp}/result.xml" 2> "${tmp}/stderr.txt" || (cat "${tmp}/stderr.txt"; exit 1) 33 | 34 | tail='Cognitive Complexity~\\citep{campbell2018cognitive} values for all methods in a class' 35 | sed 's/xmlns=".*"//g' "${tmp}/result.xml" | \ 36 | (xmllint --xpath '//violation[@rule="CognitiveComplexity"]/text()' - 2>/dev/null || echo '') | \ 37 | sed -E "s/.*complexity of ([0-9]+).*/\1/" | \ 38 | sed '/^[[:space:]]*$/d' | \ 39 | ruby -e " 40 | a = STDIN.read.split(' ').map(&:to_i) 41 | sum = a.inject(&:+) 42 | puts \"CoCo #{a.empty? ? 0 : sum} Summary of ${tail}\" 43 | puts \"ACoCo #{a.empty? ? 0 : sum / a.count} Average of ${tail}\" 44 | puts \"CoCoMx #{a.empty? ? 0 : a.max} Maximum ${tail}\" 45 | puts \"CoCoMn #{a.empty? ? 0 : a.min} Minimum ${tail}\" 46 | " > "${output}" 47 | 48 | rm -rf "${tmp}" 49 | -------------------------------------------------------------------------------- /steps/polish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | dir=${TARGET}/github 7 | if [ ! -e "${dir}" ]; then 8 | echo "Nothing to polish, the directory is absent: ${dir}" 9 | exit 10 | fi 11 | 12 | rlist=${TARGET}/temp/repos-to-polish.txt 13 | mkdir -p "$(dirname "${rlist}")" 14 | echo "Wait a bit, searching for repos in '${dir}'..." 15 | find "${dir}" -maxdepth 2 -mindepth 2 -type d -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${dir}" {} \; > "${rlist}" 16 | 17 | if [ -s "${rlist}" ]; then 18 | declare -i rtotal=0 19 | while IFS= read -r repo; do 20 | if grep "${repo}," "${TARGET}/repositories.csv"; then 21 | echo "Directory of ${repo} is already here" 22 | else 23 | rm -rf "${dir:?}/${repo}" 24 | echo "Directory of ${repo} is obsolete and was deleted" 25 | fi 26 | rtotal=$((rtotal+1)) 27 | done < "${rlist}" 28 | echo "All ${rtotal} repo directories inside ${dir} were checked" 29 | else 30 | echo "No repo directories inside ${dir}" 31 | exit 32 | fi 33 | 34 | olist=${TARGET}/temp/orgs-to-polish.txt 35 | mkdir -p "$(dirname "${olist}")" 36 | echo "Wait a bit, searching for orgs in '${dir}'..." 37 | 38 | find "${dir}" -maxdepth 1 -mindepth 1 -type d -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${dir}" {} \; > "${olist}" 39 | 40 | if [ -s "${olist}" ]; then 41 | declare -i ototal=0 42 | while IFS= read -r org; do 43 | if [ "$(find "${dir}/${org}" -type d | wc -l | xargs)" == '0' ]; then 44 | rm -rf "${dir:?}/${org}" 45 | echo "Organization ${org} is empty and was deleted" 46 | fi 47 | ototal=$((ototal+1)) 48 | done < "${olist}" 49 | echo "All ${ototal} org directories inside ${dir} were checked" 50 | else 51 | echo "No org directories inside ${dir}" 52 | exit 53 | fi 54 | -------------------------------------------------------------------------------- /tests/after/test-integration.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | set -x 10 | make clean "TARGET=${TARGET}" 11 | repo=yegor256/tojos 12 | log=$(make "TARGET=${TARGET}" "REPO=${repo}") 13 | echo "${log}" 14 | echo "${log}" | grep "Using one repo: yegor256/tojos" 15 | echo "${log}" | grep "No repo directories inside" 16 | echo "${log}" | grep "Cloned 1 repositories" 17 | echo "${log}" | grep "All 1 repositories checked" 18 | echo "${log}" | grep "All 1 repositories passed through jPeek" 19 | echo "${log}" | grep "All metrics calculated" 20 | echo "${log}" | grep "All metrics aggregated" 21 | echo "${log}" | grep "PDF report generated" 22 | echo "${log}" | grep "ZIP archive" 23 | echo "${log}" | grep "SUCCESS" 24 | echo "${log}" | grep -v "Failed to collect" 25 | test -d "${TARGET}" 26 | for f in start.txt hashes.csv report.pdf repositories.csv; do 27 | test -f "${TARGET}/${f}" 28 | done 29 | test "$(find "${TARGET}" -maxdepth 1 | wc -l | xargs)" = 11 30 | test -f "${TARGET}/data/${repo}/NCSS.csv" 31 | test -f "${TARGET}/data/${repo}/NHD.csv" 32 | test -f "${TARGET}/data/${repo}/SCOM-cvc.csv" 33 | test -f "${TARGET}/data/NCSS.csv" 34 | test -f "${TARGET}/data/NHD.csv" 35 | test -f "${TARGET}/data/SCOM-cvc.csv" 36 | test -d "${TARGET}/measurements/${repo}/src/main/java" 37 | test -d "${TARGET}/temp/jpeek/all/${repo}" 38 | test -d "${TARGET}/temp/jpeek/cvc/${repo}" 39 | test -f "${TARGET}/temp/reports/010-delete-non-java-files.sh.tex" 40 | test -f "${TARGET}/temp/pdf-report/report.tex" 41 | test -f "${TARGET}"/*.zip 42 | if grep "NaN" "${TARGET}/data/${repo}/NHD.csv"; then 43 | echo "NaN found in jpeek report" 44 | exit 1 45 | fi 46 | set +x 47 | } > "${stdout}" 2>&1 48 | echo "👍🏻 A full package processed correctly" 49 | -------------------------------------------------------------------------------- /tests/filters/test-070-delete-invalid-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | list=${temp}/temp/filter-lists/invalid-files.txt 10 | 11 | { 12 | java="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /Foo.java" 13 | mkdir -p "$(dirname "${java}")" 14 | echo "class Foo{} class Bar{}" > "${java}" 15 | rm -f "${list}" 16 | msg=$("${LOCAL}/filters/070-delete-invalid-files.sh" "${temp}" "${temp}/temp") 17 | echo "${msg}" 18 | echo "${msg}" | grep "1 files out of 1 with more than one Java class inside were deleted" 19 | test ! -e "${java}" 20 | test -e "${list}" 21 | test "$(wc -l < "${list}" | xargs)" = 1 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 An invalid Java file was deleted" 24 | 25 | { 26 | rm -f "${list}" 27 | mkdir -p "${temp}/empty" 28 | msg=$("${LOCAL}/filters/070-delete-invalid-files.sh" "${temp}/empty" "${temp}/temp") 29 | echo "${msg}" 30 | echo "${msg}" | grep "There were no Java classes, nothing to delete" 31 | } > "${stdout}" 2>&1 32 | echo "👍🏻 A empty directory didn't fail the script" 33 | 34 | { 35 | if ! "${LOCAL}/filters/delete-invalid-files.py" > "${temp}/message"; then 36 | grep "Usage: python delete-invalid-files.py " "${temp}/message" 37 | fi 38 | 39 | if ! "${LOCAL}/filters/delete-invalid-files.py" "${java}" > "${temp}/message"; then 40 | grep "Usage: python delete-invalid-files.py " "${temp}/message" 41 | fi 42 | 43 | if ! "${LOCAL}/filters/delete-invalid-files.py" "${java}" "${temp}/stdout" "${temp}/stdout" > "${temp}/message"; then 44 | grep "Usage: python delete-invalid-files.py " "${temp}/message" 45 | fi 46 | } > "${stdout}" 2>&1 47 | echo "👍🏻 Usage works correctly" 48 | -------------------------------------------------------------------------------- /tests/metrics/test-raf.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | tmp=$(mktemp -d /tmp/XXXX) 11 | cd "${tmp}" 12 | touch "temp_file" 13 | mkdir -p "${tmp}" 14 | "${LOCAL}/metrics/raf.sh" "temp_file" "${temp}/stdout" 15 | grep "RAF 0 " "${temp}/stdout" 16 | } > "${stdout}" 2>&1 17 | echo "👍🏻 Didn't fail in non-git directory" 18 | 19 | { 20 | # shellcheck source=../../help/gnu-utils.sh disable=SC1091 21 | source "${LOCAL}/help/gnu-utils.sh" 22 | tmp=$(mktemp -d /tmp/XXXX) 23 | cd "${tmp}" 24 | rm -rf ./* 25 | rm -rf .git 26 | git init --quiet . 27 | git config user.email 'foo@example.com' 28 | git config user.name 'Foo' 29 | file1="temp_file1" 30 | file2="temp_file2" 31 | file3="temp_file3" 32 | touch "${file1}" 33 | git add "${file1}" 34 | git config commit.gpgsign false 35 | GIT_COMMITTER_DATE="$(LC_ALL=C date -d "100 minutes ago")" git commit --no-verify --date "100 minutes ago" --quiet -m "first" 36 | "${LOCAL}/metrics/raf.sh" "${file1}" ./log1 37 | touch "${file2}" 38 | git add "${file2}" 39 | GIT_COMMITTER_DATE="$(LC_ALL=C date -d "50 minutes ago")" git commit --no-verify --date "50 minutes ago" --quiet -m "second" 40 | "${LOCAL}/metrics/raf.sh" "${file2}" ./log2 41 | touch "${file3}" 42 | git add "${file3}" 43 | git commit --no-verify --quiet -m "third" 44 | "${LOCAL}/metrics/raf.sh" "${file3}" ./log3 45 | if ! grep "RAF 1.0" "log1"; then 46 | echo "The RAF metric is wrong for '${file1}' (file created first):" 47 | cat ./log1 48 | exit 1 49 | fi 50 | if ! grep "RAF 0.5" "log2"; then 51 | echo "The RAF metric is wrong for '${file2}' (file created exactly in the middle):" 52 | cat ./log2 53 | exit 1 54 | fi 55 | if ! grep "RAF 0.0" "log3"; then 56 | echo "The RAF metric is wrong for '${file3}' (file created last):" 57 | cat ./log3 58 | exit 1 59 | fi 60 | } > "${stdout}" 2>&1 61 | echo "👍🏻 Correctly calculated the Relative Age of File" 62 | -------------------------------------------------------------------------------- /steps/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | cffconvert --validate 7 | 8 | mypy --strict "${LOCAL}/" --exclude "${TARGET}/.*" 9 | 10 | flake8 --max-line-length=140 --exclude venv "${LOCAL}/" 11 | 12 | export PYTHONPATH="${PYTHONPATH}:${LOCAL}/pylint_plugins/" 13 | 14 | find "${LOCAL}" -type f -name '*.py' -not -path "${LOCAL}/venv/**" -not -path "${TARGET}/**" -print0 | xargs -0 -n1 pylint --enable-all-extensions --load-plugins=custom_checkers \ 15 | --disable=empty-comment \ 16 | --disable=missing-module-docstring \ 17 | --disable=invalid-name \ 18 | --disable=too-many-try-statements \ 19 | --disable=broad-exception-caught \ 20 | --disable=magic-value-comparison \ 21 | --disable=line-too-long \ 22 | --disable=confusing-consecutive-elif \ 23 | --disable=use-set-for-membership \ 24 | --disable=duplicate-code 25 | 26 | rubocop 27 | 28 | if ! bibcop --version >/dev/null 2>&1; then 29 | PATH=$PATH:$("${LOCAL}/help/texlive-bin.sh") 30 | export PATH 31 | fi 32 | 33 | while IFS= read -r sh; do 34 | shellcheck --shell=bash --severity=style "${sh}" 35 | echo "${sh}: shellcheck OK" 36 | done < <(find "$(realpath "${LOCAL}")" -name '*.sh' -type f -not -path "$(realpath "${TARGET}")/**" -not -path "$(realpath "${LOCAL}")/venv/**") 37 | 38 | header="Copyright (c) 2021-$(date +%Y) Yegor Bugayenko" 39 | failed="false" 40 | for mask in *.sh *.py *.rb *.yml *.java Makefile; do 41 | while IFS= read -r file; do 42 | if ! grep -q "$header" "$file"; then 43 | failed="true" 44 | echo "⚠️ Copyright not found in file: $file" 45 | fi 46 | done < <(find "$(realpath "${LOCAL}")" -type f -name "${mask}" \ 47 | -not -path "$(realpath "${TARGET}")/**" \ 48 | -not -path "$(realpath "${LOCAL}")/fixtures/filters/unparsable/**" \ 49 | -not -path "$(realpath "${LOCAL}")/test-zone/**" \ 50 | -not -path "$(realpath "${LOCAL}")/venv/**") 51 | done 52 | if [[ "${failed}" = "true" ]]; then 53 | exit; 54 | fi 55 | -------------------------------------------------------------------------------- /metrics/README.md: -------------------------------------------------------------------------------- 1 | # Metrics 2 | 3 | ## How Metrics Work 4 | 5 | Every executable file in this directory is a calculator of a few 6 | metrics. They all are expected to be executed like this: 7 | 8 | ```bash 9 | ./cloc.sh Foo.java log.txt 10 | ``` 11 | 12 | Here, `Foo.java` is the path of the Java file to examine and 13 | `log.txt` is the path of the file where the output is supposed 14 | to be saved. 15 | 16 | It is expected, that the `log.txt` will contain the following 17 | text after the script finished successfully: 18 | 19 | ```text 20 | NoBL 42 Number of Blank Lines 21 | NoCL 44 Number of Commenting Lines 22 | LoC 323 Total physical lines of source code 23 | ``` 24 | 25 | There are three columns in the file. The first one should contain 26 | the name of the metric. The second one contains the value (float or integer). 27 | The third one contains the description of the metric (in general, NOT 28 | for this particular file). A space is mandatory between the first and the 29 | second column, and between the second and the third columns. 30 | 31 | ## Metrics Calculation 32 | 33 | Metrics calculation can be elegantly managed during the measure step outlined 34 | in the `Makefile`. This process initiates with the execution of `steps/measure.sh`, 35 | which performs several preparatory tasks: 36 | 37 | 1. Creating the necessary files and directories. 38 | 2. Collecting jobs to execute in parallel, 39 | with one job designated for each Java file to analyze metrics. 40 | 3. Running the jobs using `help/parallel.sh`. 41 | 42 | For each job, `steps/measure-file.sh` is executed, 43 | resulting in the following organized file structure: 44 | 45 | ```text 46 | dataset/ 47 | measurements/ 48 | yegor256/ 49 | cactoos/ 50 | Main.java.m 51 | Main.java.m.LOC 52 | Main.java.m.CC 53 | Main.java.m.CoCo 54 | ... 55 | ``` 56 | 57 | In this structure, `{file_name}.m` serves as a buffer 58 | for intermediate results during calculations. 59 | The files named `{file_name}.m.{metric_name}` store the calculated metrics, 60 | where `{metric_name}` corresponds to the specific metric 61 | for the file `{file_name}`. 62 | -------------------------------------------------------------------------------- /steps/filter.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | if [ -f "venv/bin/activate" ]; then 7 | # shellcheck source=venv/bin/activate disable=SC1091 8 | . venv/bin/activate 9 | else 10 | echo "Error: venv/bin/activate not found. Please make sure the virtual environment is set up." 11 | exit 1 12 | fi 13 | 14 | mkdir -p "${TARGET}/temp/reports" 15 | find "${LOCAL}/filters" -type f -name '*.sh' -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${LOCAL}/filters" {} \; | sort | while IFS= read -r filter; do 16 | tex=${TARGET}/temp/reports/${filter}.tex 17 | if [ ! -s "${tex}" ]; then 18 | echo "The ${filter} filter failed in previous run, cleaning up after it now..." 19 | rm -f "${tex}" 20 | fi 21 | if [ -e "${tex}" ]; then 22 | echo "The ${filter} filter was already completed earlier, see report in '${tex}'" 23 | else 24 | before=$(find "${TARGET}/github" -name '*' -type f -o -type l -o -type d | wc -l | xargs) 25 | echo "Running filter ${filter}... (may take some time)" 26 | start=$(date +%s%N) 27 | "${LOCAL}/filters/${filter}" "${TARGET}/github" "${TARGET}/temp" |\ 28 | tr -d '\n\r' |\ 29 | sed "s/^/\\\\item /" |\ 30 | sed "s/$/;/" \ 31 | > "${tex}" 32 | after=$(find "${TARGET}/github" -name '*' -type f -o -type l -o -type d | wc -l | xargs) 33 | if [ "${after}" -lt "${before}" ]; then 34 | diff="deleted $(echo "${before} - ${after}" | bc) files" 35 | elif [ "${after}" -gt "${before}" ]; then 36 | diff="added $(echo "${after} - ${before}" | bc) files" 37 | else 38 | diff="didn't touch any files" 39 | fi 40 | echo "Filter ${filter} finished$("${LOCAL}/help/tdiff.sh" "${start}"), ${diff} \ 41 | and published its results to ${TARGET}/temp/reports/${filter}.tex " 42 | fi 43 | done 44 | 45 | find "${TARGET}/temp/reports" -type f -exec basename {} \; | sort | while IFS= read -r f; do 46 | echo "${f}:" 47 | cat "${TARGET}/temp/reports/${f}" 48 | echo "" 49 | done 50 | -------------------------------------------------------------------------------- /steps/discover.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | details=${TARGET}/temp/repo-details.tex 7 | mkdir -p "$(dirname "${details}")" 8 | 9 | csv=${TARGET}/repositories.csv 10 | echo "" > "${details}" 11 | if [ -e "${csv}" ]; then 12 | echo "The list of repos is already here: ${csv}" 13 | if [ -n "${REPO}" ]; then 14 | echo "Before using REPO environment variable you should delete the ${csv} file ($(wc -l < "${csv}" | xargs) lines)" 15 | clones=${TARGET}/github 16 | if [ -e "${clones}" ]; then 17 | printf "ATTENTION: If you do this (delete the CSV file), and then run 'make' again, all cloned repositories in the '%s' directory will be deleted (%d directories). " \ 18 | "${TARGET}/github/" "$(find "${clones}" -type d -depth 2 | wc -l | xargs)" 19 | printf "After this, the dataset will not be suitable for further analysis! " 20 | printf "Think twice! If you just want to analyze one repository, do it in a different directory.\n" 21 | fi 22 | exit 1 23 | fi 24 | elif [ -n "${REPO}" ]; then 25 | echo "Using one repo: ${REPO}" 26 | echo -e "repo,\n${REPO}," > "${csv}" 27 | elif [ -z "${REPOS}" ] || [ ! -e "${REPOS}" ]; then 28 | echo "Using discover-repos.rb..." 29 | declare -a args=( \ 30 | "--token=${TOKEN}" \ 31 | "--total=${TOTAL}" \ 32 | "--csv=${csv}" \ 33 | "--tex=${TARGET}/temp/repo-details.tex" \ 34 | "--pause=2" \ 35 | "--min-stars=400" \ 36 | "--max-stars=10000" \ 37 | ) 38 | if [ -n "${CAMTESTS}" ]; then 39 | args+=('--dry' '--pause=0') 40 | fi 41 | "${LOCAL}/help/assert-tool.sh" ruby -v 42 | ruby "${LOCAL}/steps/discover-repos.rb" "${args[@]}" 43 | nosamples=${TARGET}/no-samples.csv 44 | declare -a fargs=( \ 45 | "--repositories=${csv}" \ 46 | "--out=${nosamples}" \ 47 | "--model=transformer" 48 | ) 49 | samples-filter filter "${fargs[@]}" 50 | rm "${csv}" 51 | mv "${nosamples}" "${csv}" 52 | else 53 | echo "Using the list of repositories from the '${REPOS}' file (defined by the REPOS environment variable)..." 54 | cat "${REPOS}" > "${csv}" 55 | fi 56 | 57 | cat "${csv}" 58 | -------------------------------------------------------------------------------- /tests/filters/test-delete-wrong-encoding.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | javaTemp="${temp}/Foo1.java.temp" 11 | echo "interface Foo {}" > "${javaTemp}" 12 | java="${temp}/Foo1.java" 13 | echo "" > "${java}" 14 | iconv -f ASCII -t UTF-16 "${javaTemp}" > "${java}" 15 | "${LOCAL}/filters/delete-wrong-encoding.py" "${java}" "${temp}/deleted.txt" 16 | test ! -e "${java}" 17 | grep "${java}" "${temp}/deleted.txt" 18 | } > "${stdout}" 2>&1 19 | echo "👍🏻 A Java file with a UTF-16 encoding deleted correctly" 20 | 21 | { 22 | javaTemp="${temp}/Foo2.java.temp" 23 | echo "interface Foo {}" > "${javaTemp}" 24 | java="${temp}/Foo2.java" 25 | echo "" > "${java}" 26 | iconv -f ASCII -t UTF-32 "${javaTemp}" > "${java}" 27 | "${LOCAL}/filters/delete-wrong-encoding.py" "${java}" "${temp}/deleted.txt" 28 | test ! -e "${java}" 29 | grep "${java}" "${temp}/deleted.txt" 30 | } > "${stdout}" 2>&1 31 | echo "👍🏻 A Java file with a UTF-32 encoding deleted correctly" 32 | 33 | { 34 | java="${temp}/Foo3.java" 35 | echo "interface Foo {}" > "${java}" 36 | "${LOCAL}/filters/delete-wrong-encoding.py" "${java}" "${temp}/deleted.txt" 37 | test -e "${java}" 38 | grep -v "${java}" "${temp}/deleted.txt" 39 | } > "${stdout}" 2>&1 40 | echo "👍🏻 A Java file with a UTF-8 encoding was not deleted" 41 | 42 | { 43 | if ! "${LOCAL}/filters/delete-wrong-encoding.py" > "${temp}/message"; then 44 | grep "Usage: python delete-wrong-encoding.py " "${temp}/message" 45 | fi 46 | if ! "${LOCAL}/filters/delete-wrong-encoding.py" "${java}" > "${temp}/message"; then 47 | grep "Usage: python delete-wrong-encoding.py " "${temp}/message" 48 | fi 49 | if ! "${LOCAL}/filters/delete-wrong-encoding.py" "${java}" "${temp}/stdout" "${temp}/stdout" > "${temp}/message"; then 50 | grep "Usage: python delete-wrong-encoding.py " "${temp}/message" 51 | fi 52 | } > "${stdout}" 2>&1 53 | echo "👍🏻 Usage works correctly" 54 | -------------------------------------------------------------------------------- /tests/steps/test-filter.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | rm -rf "${TARGET}/github" 10 | mkdir -p "${TARGET}/github/a/b" 11 | msg=$("${LOCAL}/steps/filter.sh") 12 | echo "${msg}" 13 | } > "${stdout}" 2>&1 14 | echo "👍🏻 A simple filtering ran smoothly" 15 | 16 | { 17 | rm -rf "${TARGET}/github" 18 | rm -rf "${TARGET}/temp/reports" 19 | rm -rf "${TARGET}/temp/filter-lists" 20 | dir=${TARGET}/github 21 | mkdir -p "${dir}" 22 | mkdir -p "${dir}/a/b/.git" 23 | touch "${dir}/a/b/.git/boom" 24 | echo "nothing" > "${dir}/package-info.java" 25 | echo "nothing" > "${dir}/module-info.java" 26 | echo "nothing" > "${dir}/FooTest.java" 27 | echo "class X {} class Y {} class Z {}" > "${dir}/XYZ.java" 28 | printf 'class Boom { String x = "a%.0s"; }' {1..5000} > "${dir}/Boom.java" 29 | javaTemp="${dir}/WrongEncoding.java.temp" 30 | echo 'class Foo { String x = "привет"; }' > "${javaTemp}" 31 | iconv -f UTF-8 -t UTF-16 "${javaTemp}" > "${dir}/WrongEncoding.java" 32 | ln -s "${dir}/FooTest.java" "${dir}/link.java" 33 | class="${dir}/a/b/foo - '(weird)'/привет/Foo.java" 34 | mkdir -p "$(dirname "${class}")" 35 | echo "class Foo {}" > "${class}" 36 | echo "nothing" > "${class}.bin" 37 | interface="${dir}/foo/-- \";'/тук тук/Boom.java" 38 | mkdir -p "$(dirname "${interface}")" 39 | echo "interface Boom {}" > "${interface}" 40 | broken="${dir}/''foo/;;'\"/вот/так/Broken-файл.java" 41 | mkdir -p "$(dirname "${broken}")" 42 | echo "broken code" > "${broken}" 43 | msg=$("${LOCAL}/steps/filter.sh") 44 | echo "${msg}" 45 | if [ ! "$(echo "${msg}" | grep -c "didn't touch any files")" -eq 0 ]; then 46 | echo "One of the filters didn't do anything, which is wrong." 47 | echo "This test is designed to trigger all available filters, without exception." 48 | echo "If you add a new filter to the filters/ directory, make sure it is triggered here too." 49 | exit 1 50 | fi 51 | test ! -e "${broken}" 52 | test ! -e "${interface}" 53 | } > "${stdout}" 2>&1 54 | echo "👍🏻 A more complex filtering ran smoothly" 55 | -------------------------------------------------------------------------------- /tests/filters/test-delete-non-classes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | java="${temp}/Foo (x).java" 11 | echo "interface Foo {}" > "${java}" 12 | "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/deleted.txt" 13 | test ! -e "${java}" 14 | grep "${java}" "${temp}/deleted.txt" 15 | } > "${stdout}" 2>&1 16 | echo "👍🏻 A Java file with a small interface inside was deleted correctly" 17 | 18 | { 19 | java="${temp}/foo/dir (with) _ long & and weird name /Foo.java" 20 | mkdir -p "$(dirname "${java}")" 21 | echo "/* hello */ package foo.bar.xxx; import java.io.File; public interface Foo { File bar(); }" > "${java}" 22 | "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/deleted.txt" 23 | test ! -e "${java}" 24 | grep "${java}" "${temp}/deleted.txt" 25 | } > "${stdout}" 2>&1 26 | echo "👍🏻 A Java file with a bigger interface inside was deleted correctly" 27 | 28 | { 29 | java=${temp}/Bar.java 30 | echo "enum Bar {}" > "${java}" 31 | "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/deleted.txt" 32 | test ! -e "${java}" 33 | grep "${java}" "${temp}/deleted.txt" 34 | } > "${stdout}" 2>&1 35 | echo "👍🏻 A Java file with a enum inside was deleted correctly" 36 | 37 | { 38 | java=${temp}/Broken.java 39 | echo "broken syntax" > "${java}" 40 | "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/deleted.txt" 41 | test -e "${java}" 42 | grep -v "${java}" "${temp}/deleted.txt" 43 | } > "${stdout}" 2>&1 44 | echo "👍🏻 A Java file with broken syntax was not deleted, this is correct" 45 | 46 | { 47 | "${LOCAL}/filters/delete-non-classes.py" "${temp}/file-is-absent.java" "${temp}/deleted.txt" 48 | grep -v "${temp}/file-is-absent.java" "${temp}/deleted.txt" 49 | } > "${stdout}" 2>&1 50 | echo "👍🏻 Absent file didn't fail the script" 51 | 52 | { 53 | java=${temp}/Good.java 54 | echo "class Good {}" > "${java}" 55 | "${LOCAL}/filters/delete-non-classes.py" "${java}" "${temp}/deleted.txt" 56 | test -e "${java}" 57 | grep -v "${java}" "${temp}/deleted.txt" 58 | } > "${stdout}" 2>&1 59 | echo "👍🏻 A good Java file was not deleted, it's correct behavior" 60 | -------------------------------------------------------------------------------- /tests/filters/test-050-delete-long-lines.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | list=${temp}/temp/filter-lists/files-with-long-lines.txt 10 | 11 | { 12 | java="${temp}/foo/dir (with) _ long & and 'weird' \"name\" /Foo.java" 13 | mkdir -p "$(dirname "${java}")" 14 | echo "some text in the file" > "${java}" 15 | rm -f "${list}" 16 | msg=$("${LOCAL}/filters/050-delete-long-lines.sh" "${temp}" "${temp}/temp") 17 | echo "${msg}" 18 | echo "${msg}" | grep "No files out of 1 had lines longer " 19 | test -e "${java}" 20 | test -e "${list}" 21 | test "$(wc -l < "${list}" | xargs)" = 0 22 | } > "${stdout}" 2>&1 23 | echo "👍🏻 A Java file with short lines wasn't deleted" 24 | 25 | { 26 | java="${temp}/foo/bb/Привет.java" 27 | mkdir -p "$(dirname "${java}")" 28 | printf 'a%.0s' {1..5000} > "${java}" 29 | rm -f "${list}" 30 | msg=$("${LOCAL}/filters/050-delete-long-lines.sh" "${temp}" "${temp}/temp") 31 | echo "${msg}" 32 | echo "${msg}" | grep "1 files out of 2 with at least one line longer " 33 | test ! -e "${java}" 34 | test -e "${list}" 35 | test "$(wc -l < "${list}" | xargs)" = 1 36 | } > "${stdout}" 2>&1 37 | echo "👍🏻 A Java file with a long line was deleted" 38 | 39 | { 40 | # see https://stackoverflow.com/questions/77169978/how-to-reproduce-awk-warning-invalid-multibyte-data-detected 41 | java="${temp}/foo/bb/привет/Привет.java" 42 | mkdir -p "$(dirname "${java}")" 43 | printf '\xC0\x80' > "${java}" 44 | rm -f "${list}" 45 | msg=$(LC_ALL=en_US.UTF-8 "${LOCAL}/filters/050-delete-long-lines.sh" "$(dirname "${java}")" "${temp}/temp" 2>&1) 46 | test "$(echo "${msg}" | grep -c "Invalid multibyte data detected")" = 0 47 | } > "${stdout}" 2>&1 48 | echo "👍🏻 A non-unicode file didn't cause awk troubles" 49 | 50 | { 51 | java="${temp}/--/empty.java" 52 | mkdir -p "$(dirname "${java}")" 53 | touch "${java}" 54 | rm -f "${list}" 55 | msg=$("${LOCAL}/filters/050-delete-long-lines.sh" "${temp}" "${temp}/temp") 56 | test -e "${java}" 57 | test -e "${list}" 58 | test "$(wc -l < "${list}" | xargs)" = 0 59 | } > "${stdout}" 2>&1 60 | echo "👍🏻 An empty Java file wasn't deleted" 61 | -------------------------------------------------------------------------------- /steps/summarize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | summary_dir="${TARGET}/data/summary" 8 | mkdir -p "${summary_dir}" 9 | metrics=$(find "${TARGET}/measurements" -type f -name '*.m.*' -print | sed "s|^.*\.\(.*\)$|\1|" | sort | uniq) 10 | metrics_count=0 11 | 12 | for metric in ${metrics}; do 13 | summary_file="${summary_dir}/${metric}.csv" 14 | echo "repository,count,sum,average,mean,min,max" > "${summary_file}" 15 | while IFS= read -r repo; do 16 | files=$(find "${TARGET}/measurements/${repo}" -type f -name "*.${metric}") 17 | count=0 18 | sum=0 19 | min=999999 20 | max=0 21 | all_values=() 22 | if [[ -z $files ]]; then 23 | continue 24 | fi 25 | while IFS= read -r file; do 26 | value=$(cat "${file}") 27 | if [[ -z "$value" || ! "$value" =~ ^[0-9]+$ ]]; then 28 | continue 29 | fi 30 | count=$((count + 1)) 31 | sum=$((sum + value)) 32 | all_values+=("${value}") 33 | if ((value < min)); then 34 | min=${value} 35 | fi 36 | if ((value > max)); then 37 | max=${value} 38 | fi 39 | done < <(echo "$files") 40 | if [[ ${count} -gt 0 ]]; then 41 | average=$(echo "scale=2; ${sum} / ${count}" | bc -l) 42 | else 43 | average=0 44 | fi 45 | if [[ ${#all_values[@]} -gt 0 ]]; then 46 | mapfile -t sorted_values < <(printf "%s\n" "${all_values[@]}" | sort -n) 47 | middle_index=$((count / 2)) 48 | if ((count % 2 == 0)); then 49 | mean=$(echo "scale=2; (${sorted_values[$((middle_index-1))]} + ${sorted_values[$middle_index]}) / 2" | bc -l) 50 | else 51 | mean=${sorted_values[middle_index]} 52 | fi 53 | else 54 | mean=0 55 | fi 56 | echo "${repo},${count},${sum},${average},${mean},${min},${max}" >> "${summary_file}" 57 | done < "${TARGET}/temp/repos-to-aggregate.txt" 58 | metrics_count=$((metrics_count + 1)) 59 | echo "Metric ${metric} summarized in ${summary_file}." 60 | done 61 | echo "All ${metrics_count} metrics summarized into ${summary_dir}." 62 | -------------------------------------------------------------------------------- /tests/metrics/test-irc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # shellcheck disable=SC2317 5 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 6 | # SPDX-License-Identifier: MIT 7 | set -e -o pipefail 8 | 9 | # TODO: #279 ENABLE THIS TESTS VIA REMOVING `exit 0` AND REMOVE `shellcheck disable=SC2317` on the top RIGHT AFTER IMPLEMENTING irc.sh 10 | exit 0 11 | 12 | temp=$1 13 | stdout=$2 14 | 15 | { 16 | tmp=$(mktemp -d /tmp/XXXX) 17 | cd "${tmp}" 18 | mkdir -p "${LOCAL}/${temp}" 19 | touch "${LOCAL}/${temp}/file.java" 20 | if ! "${LOCAL}/metrics/irc.sh" "${LOCAL}/${temp}/file.java" "${LOCAL}/${temp}/stdout" 21 | then 22 | exit 1 23 | fi 24 | } > "${stdout}" 2>&1 25 | echo "👍🏻 Failed in non-git directory" 26 | 27 | { 28 | tmp=$(mktemp -d /tmp/XXXX) 29 | cd "${tmp}" 30 | rm -rf ./* 31 | rm -rf .git 32 | git init --quiet . 33 | git config user.email 'foo@example.com' 34 | git config user.name 'Foo' 35 | file1="one.java" 36 | if ! "${LOCAL}/metrics/irc.sh" "./${file1}" "t0" 37 | then 38 | exit 1 39 | fi 40 | } > "${stdout}" 2>&1 41 | echo "👍🏻 Failed in repo without given file" 42 | 43 | { 44 | tmp=$(mktemp -d /tmp/XXXX) 45 | cd "${tmp}" 46 | rm -rf ./* 47 | rm -rf .git 48 | git init --quiet . 49 | git config user.email 'foo@example.com' 50 | git config user.name 'Foo' 51 | file1="one.java" 52 | touch "${file1}" 53 | "${LOCAL}/metrics/irc.sh" "./${file1}" "t0" 54 | grep "IRC 0" "t0" # There are no commits in repo with given file 55 | } > "${stdout}" 2>&1 56 | echo "👍🏻 Didn't fail in repo without commits" 57 | 58 | { 59 | tmp=$(mktemp -d /tmp/XXXX) 60 | cd "${tmp}" 61 | rm -rf ./* 62 | rm -rf .git 63 | git init --quiet . 64 | git config user.email 'foo@example.com' 65 | git config user.name 'Foo' 66 | 67 | file1="one.java" 68 | touch "${file1}" 69 | git add "${file1}" 70 | git config commit.gpgsign false 71 | git commit --no-verify --quiet -m "first file" 72 | "${LOCAL}/metrics/irc.sh" "./${file1}" "t2" 73 | grep "irc 1 " "t2" # There is only commit in repo and it is for the given file 74 | 75 | file2="two.java" 76 | touch "${file2}" 77 | git add "${file2}" 78 | git commit --no-verify --quiet -m "second file" 79 | "${LOCAL}/metrics/irc.sh" "./${file2}" "t3" 80 | grep "irc 0.5 " "t3" # There are two commits in repo and one for the given file 81 | } > "${stdout}" 2>&1 82 | echo "👍🏻 Correctly calculated the IRC (Impact Ratio by Commits)" 83 | -------------------------------------------------------------------------------- /tests/steps/aggregation-functions/test-90-percentile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | stdout=$2 8 | 9 | { 10 | dir="${TARGET}" 11 | mkdir -p "${dir}" 12 | touch "${dir}/LCOM5.csv" 13 | echo "repo,java_file,LCOM5" > "${dir}/LCOM5.csv" 14 | echo "kek,src/main/kek,42.000" >> "${dir}/LCOM5.csv" 15 | "${LOCAL}/steps/aggregation-functions/90-percentile.sh" "${dir}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 16 | test -e "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv" 17 | percentile_value=$(cat "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv") 18 | } > "${stdout}" 2>&1 19 | echo "👍🏻 Single metric (LCOM5) 90th percentile calculated correctly" 20 | 21 | { 22 | dir1="${TARGET}" 23 | mkdir -p "${dir1}" 24 | touch "${dir1}/LCOM5.csv" 25 | echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv" 26 | echo "kek,src/main/kek,42.000" >> "${dir1}/LCOM5.csv" 27 | touch "${dir1}/NHD.csv" 28 | echo "repo,java_file,NHD" > "${dir1}/NHD.csv" 29 | echo "kek,src/main/kek,1000.000" >> "${dir1}/NHD.csv" 30 | "${LOCAL}/steps/aggregation-functions/90-percentile.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 31 | "${LOCAL}/steps/aggregation-functions/90-percentile.sh" "${dir1}/NHD.csv" "${TARGET}/data/aggregation" "NHD" 32 | test -e "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv" 33 | percentile_value_lcom5=$(cat "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv") 34 | test "$percentile_value_lcom5" = "42.000" 35 | test -e "${TARGET}/data/aggregation/NHD.90th_percentile.csv" 36 | percentile_value_nhd=$(cat "${TARGET}/data/aggregation/NHD.90th_percentile.csv") 37 | test "$percentile_value_nhd" = "1000.000" 38 | } > "${stdout}" 2>&1 39 | echo "👍🏻 Multiple metrics (LCOM5, NHD) aggregated correctly" 40 | { 41 | dir="${TARGET}" 42 | mkdir -p "${dir}" 43 | touch "${dir}/Empty.java.m.LCOM5" 44 | echo "repo,java_file,LCOM5" > "${dir}/Empty.java.m.LCOM5" 45 | "${LOCAL}/steps/aggregation-functions/90-percentile.sh" "${dir}/Empty.java.m.LCOM5" "${TARGET}/data/aggregation" "LCOM5" 46 | test -e "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv" 47 | percentile_value=$(cat "${TARGET}/data/aggregation/LCOM5.90th_percentile.csv") 48 | test "$percentile_value" = "0.000" 49 | } > "${stdout}" 2>&1 50 | echo "👍🏻 Edge case with no data handled correctly" 51 | -------------------------------------------------------------------------------- /tests/metrics/test-rfvc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | metric_script_path="${LOCAL}/metrics/rfvc.sh" 11 | cd "${temp}" 12 | 13 | rm -rf ./* 14 | rm -rf .git 15 | 16 | git init --quiet . 17 | git config user.email 'foo@example.com' 18 | git config user.name 'Foo' 19 | git config commit.gpgsign false 20 | 21 | java_dir="./foo/dir/" 22 | java1="FooTest.java" 23 | java2="FooTest2.java" 24 | java3="FooTest3.java" 25 | 26 | mkdir -p "${java_dir}" 27 | cd ${java_dir} 28 | 29 | touch "${java1}" 30 | touch "stdout" 31 | 32 | printf "class Foo {}" > "${java1}" 33 | git add "${java1}" 34 | git commit --no-verify --quiet -m "first commit" 35 | ${metric_script_path} "${java1}" "stdout" 36 | grep "RFVC 1" "stdout" 37 | 38 | printf "class Foo {}" > "${java2}" 39 | git add "${java2}" 40 | git commit --no-verify --quiet -m "+second commit" 41 | 42 | ${metric_script_path} "${java1}" "stdout" 43 | grep "RFVC 0.5" "stdout" 44 | 45 | ${metric_script_path} "${java2}" "stdout" 46 | grep "RFVC 0.5" "stdout" 47 | 48 | 49 | printf "class Foo {}" > "${java3}" 50 | git add "${java3}" 51 | git commit --no-verify --quiet -m "-third commit" 52 | 53 | ${metric_script_path} "${java1}" "stdout" 54 | grep "RFVC 0.33" "stdout" 55 | 56 | ${metric_script_path} "${java2}" "stdout" 57 | grep "RFVC 0.33" "stdout" 58 | 59 | ${metric_script_path} "${java3}" "stdout" 60 | grep "RFVC 0.33" "stdout" 61 | 62 | 63 | printf "class Foo2 {}" > "${java1}" 64 | git add "${java1}" 65 | git commit --no-verify --quiet -m "forth commit" 66 | 67 | ${metric_script_path} "${java1}" "stdout" 68 | grep "RFVC 0.5" "stdout" 69 | 70 | ${metric_script_path} "${java2}" "stdout" 71 | grep "RFVC 0.25" "stdout" 72 | 73 | ${metric_script_path} "${java3}" "stdout" 74 | grep "RFVC 0.25" "stdout" 75 | 76 | 77 | printf "class Foo3 {}" > "${java1}" 78 | git add "${java1}" 79 | git commit --no-verify --quiet -m "fifth commit" 80 | 81 | ${metric_script_path} "${java1}" "stdout" 82 | grep "RFVC 0.6" "stdout" 83 | 84 | 85 | ${metric_script_path} "${java2}" "stdout" 86 | grep "RFVC 0.2" "stdout" 87 | 88 | ${metric_script_path} "${java3}" "stdout" 89 | grep "RFVC 0.2" "stdout" 90 | } > "${stdout}" 2>&1 91 | echo "👍🏻 Correctly calculated Relative File Volatility by Commits" 92 | -------------------------------------------------------------------------------- /tests/steps/test-measure.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | temp=$1 7 | stdout=$2 8 | 9 | { 10 | repo="foo /bar test \"; " 11 | name="dir (with) _ long & and weird ; name /hello.java/test.java/Foo.java" 12 | java="${TARGET}/github/${repo}/${name}" 13 | mkdir -p "$(dirname "${java}")" 14 | echo "class Foo {}" > "${java}" 15 | msg=$("${LOCAL}/steps/measure.sh") 16 | echo "${msg}" | grep "for: Foo.java (1/1)" 17 | echo "${msg}" | grep "All metrics calculated in 1 files" 18 | test -e "${TARGET}/measurements/${repo}/${name}.m" 19 | test ! -e "${TARGET}/measurements/${repo}/${name}.m.NHD" 20 | } > "${stdout}" 2>&1 21 | echo "👍🏻 Measured metrics correctly" 22 | 23 | { 24 | java="${temp}/Foo(xls;)';ого привет '\".java" 25 | cat > "${java}" < "${stdout}" 2>&1 50 | echo "👍🏻 All metrics are correctly named in AllCaps format" 51 | 52 | { 53 | java="${temp}/Foo(xls;)';не привет '\".java" 54 | cat > "${java}" < "${stdout}" 2>&1 77 | echo "👍🏻 All provided metrics are named uniquely" 78 | -------------------------------------------------------------------------------- /tests/metrics/test-irca.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # shellcheck disable=SC2317 5 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 6 | # SPDX-License-Identifier: MIT 7 | set -e -o pipefail 8 | 9 | # ENABLE THIS TESTS RIGHT AFTER IMPLEMENTING irca.sh VIA REMOVING `exit 0` 10 | exit 0 11 | 12 | temp=$1 13 | stdout=$2 14 | 15 | { 16 | tmp=$(mktemp -d /tmp/XXXX) 17 | cd "${tmp}" 18 | mkdir -p "${LOCAL}/${temp}" 19 | touch "${LOCAL}/${temp}/file.java" 20 | "${LOCAL}/metrics/irca.sh" "${LOCAL}/${temp}/file.java" "${LOCAL}/${temp}/stdout" 21 | grep "Provided non-git repo" "${LOCAL}/${temp}/stdout" 22 | } >"${stdout}" 2>&1 23 | echo "👍🏻 Didn't fail in non-git directory" 24 | 25 | { 26 | tmp=$(mktemp -d /tmp/XXXX) 27 | cd "${tmp}" 28 | rm -rf ./* 29 | rm -rf .git 30 | git init --quiet . 31 | git config user.email 'foo@example.com' 32 | git config user.name 'Foo' 33 | file1="one.java" 34 | "${LOCAL}/metrics/irca.sh" "./${file1}" "t0" 35 | grep "File does not exist" "t0" # The given file does not exist 36 | } >"${stdout}" 2>&1 37 | echo "👍🏻 Didn't fail in repo without given file" 38 | 39 | { 40 | tmp=$(mktemp -d /tmp/XXXX) 41 | cd "${tmp}" 42 | rm -rf ./* 43 | rm -rf .git 44 | git init --quiet . 45 | git config user.email 'foo@example.com' 46 | git config user.name 'Foo' 47 | file1="one.java" 48 | touch "${file1}" 49 | "${LOCAL}/metrics/irca.sh" "./${file1}" "t0" 50 | grep "No commits yet in repo" "t0" # There are no commits in repo with given file 51 | } >"${stdout}" 2>&1 52 | echo "👍🏻 Didn't fail in repo without commits" 53 | 54 | { 55 | tmp=$(mktemp -d /tmp/XXXX) 56 | cd "${tmp}" 57 | rm -rf ./* 58 | rm -rf .git 59 | git init --quiet . 60 | git config user.email 'foo1@example.com' 61 | git config user.name 'Foo1' 62 | file1="one.java" 63 | touch "${file1}" 64 | git add "${file1}" 65 | git config commit.gpgsign false 66 | git commit --no-verify --quiet -m "added first file" 67 | "${LOCAL}/metrics/irca.sh" "./${file1}" "t1" 68 | grep "irca 1 " "t1" # There is only committer in repo 69 | 70 | git config user.email 'foo2@example.com' 71 | git config user.name 'Foo2' 72 | file2="two.java" 73 | touch "${file2}" 74 | git add "${file2}" 75 | git commit --no-verify --quiet -m "added second file" 76 | "${LOCAL}/metrics/irca.sh" "./${file2}" "t2" 77 | grep "irca 0.5 " "t2" # There are two committers in repo and one for the given file 78 | 79 | git config user.email 'foo3@example.com' 80 | git config user.name 'Foo3' 81 | file3="three.java" 82 | touch "${file3}" 83 | git add "${file3}" 84 | git commit --no-verify --quiet -m "added third file" 85 | "${LOCAL}/metrics/irca.sh" "./${file3}" "t3" 86 | grep "irca 0.33 " "t3" # There are three committers in repo and one for the given file 87 | } >"${stdout}" 2>&1 88 | echo "👍🏻 Correctly calculated the IRLoC (Impact Ratio by Lines of Code)" 89 | -------------------------------------------------------------------------------- /tests/steps/aggregation-functions/test-mean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | { 9 | dir="${TARGET}" 10 | mkdir -p "${dir}" 11 | touch "${dir}/LCOM5.csv" 12 | echo "repo,java_file,LCOM5" > "${dir}/LCOM5.csv" 13 | echo "kek,src/main/kek,42.0000" >> "${dir}/LCOM5.csv" 14 | "${LOCAL}/steps/aggregation-functions/mean.sh" "${dir}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 15 | test -e "${TARGET}/data/aggregation/LCOM5.mean.csv" 16 | mean_value=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv") 17 | test "$mean_value" = "42.000" 18 | } > "${stdout}" 2>&1 19 | echo "👍🏻 Single metric (LCOM5) mean calculated correctly" 20 | 21 | { 22 | dir1="${TARGET}" 23 | mkdir -p "${dir1}" 24 | touch "${dir1}/LCOM5.csv" 25 | echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv" 26 | echo "kek,src/main/kek,42.000" >> "${dir1}/LCOM5.csv" 27 | touch "${dir1}/NHD.csv" 28 | echo "repo,java_file,NHD" > "${dir1}/NHD.csv" 29 | echo "kek,src/main/kek,1000.000" >> "${dir1}/NHD.csv" 30 | "${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 31 | "${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/NHD.csv" "${TARGET}/data/aggregation" "NHD" 32 | test -e "${TARGET}/data/aggregation/LCOM5.mean.csv" 33 | mean_value_lcom5=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv") 34 | test "$mean_value_lcom5" = "42.000" 35 | test -e "${TARGET}/data/aggregation/NHD.mean.csv" 36 | mean_value_nhd=$(cat "${TARGET}/data/aggregation/NHD.mean.csv") 37 | test "$mean_value_nhd" = "1000.000" 38 | } > "${stdout}" 2>&1 39 | echo "👍🏻 Multiple metrics (LCOM5, NHD) aggregated correctly" 40 | 41 | { 42 | dir1="${TARGET}" 43 | mkdir -p "${dir1}" 44 | touch "${dir1}/First.java.m.LCOM5" 45 | echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv" 46 | { 47 | echo "kek,src/main/kek,42.000" 48 | echo "kek,src/main/kek,35.000" 49 | echo "kek,src/main/kek,50.000" 50 | } >> "${dir1}/LCOM5.csv" 51 | "${LOCAL}/steps/aggregation-functions/mean.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 52 | test -e "${TARGET}/data/aggregation/LCOM5.mean.csv" 53 | mean_value=$(cat "${TARGET}/data/aggregation/LCOM5.mean.csv") 54 | test "$mean_value" = "42.333" 55 | } > "${stdout}" 2>&1 56 | echo "👍🏻 Mixed metrics aggregated correctly (LCOM5)" 57 | 58 | { 59 | dir="${TARGET}" 60 | mkdir -p "${dir}" 61 | touch "${dir}/Empty.java.m.LCOM5" 62 | echo "repo,java_file,LCOM5" > "${dir}/Empty.java.m.LCOM5" 63 | "${LOCAL}/steps/aggregation-functions/mean.sh" "${dir}/Empty.java.m.LCOM5" "${TARGET}/data/aggregation" "LCOM5" 64 | test ! -e "${TARGET}/data/aggregation/LCOM5.mean.csv" 65 | } > "${stdout}" 2>&1 66 | echo "👍🏻 Edge case with no data handled correctly" 67 | -------------------------------------------------------------------------------- /tests/steps/test-summarize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | file_name="file -p (test ) \n\?.k" 8 | repo_name="comp_.lex\$repo[info]>" 9 | 10 | { 11 | echo "${TARGET}" 12 | rm -rf "${TARGET}/data" 13 | rm -rf "${TARGET}/measurements" 14 | rm -rf "${TARGET}/summary" 15 | rm -rf "${TARGET}/temp" 16 | dir1="${TARGET}/measurements/${repo_name}1" 17 | mkdir -p "${dir1}" 18 | "${LOCAL}/steps/summarize.sh" 19 | test -z "$(ls -A "${TARGET}/data/summary")" 20 | } > "${stdout}" 2>&1 21 | echo "👍🏻 Summarization step handled empty repository correctly" 22 | 23 | { 24 | rm -rf "${TARGET}/data" 25 | rm -rf "${TARGET}/measurements" 26 | rm -rf "${TARGET}/summary" 27 | rm -rf "${TARGET}/temp" 28 | dir1="${TARGET}/measurements/${repo_name}1" 29 | mkdir -p "${TARGET}/temp" 30 | touch "${TARGET}/temp/repos-to-aggregate.txt" 31 | echo "${repo_name}1" >> "${TARGET}/temp/repos-to-aggregate.txt" 32 | mkdir -p "${dir1}" 33 | echo "50" > "${dir1}/${file_name}1.m.LOC" 34 | echo "100" > "${dir1}/${file_name}2.m.LOC" 35 | echo "10" > "${dir1}/${file_name}1.m.CYC" 36 | echo "20" > "${dir1}/${file_name}2.m.CYC" 37 | "${LOCAL}/steps/summarize.sh" 38 | test -e "${TARGET}/data/summary/LOC.csv" 39 | test -e "${TARGET}/data/summary/CYC.csv" 40 | grep -F "${repo_name}1,2,150" < "${TARGET}/data/summary/LOC.csv" 41 | grep -F "${repo_name}1,2,30" < "${TARGET}/data/summary/CYC.csv" 42 | } > "${stdout}" 2>&1 43 | echo "👍🏻 Summarization step handled multiple metrics correctly" 44 | 45 | { 46 | rm -rf "${TARGET}/data" 47 | rm -rf "${TARGET}/measurements" 48 | rm -rf "${TARGET}/summary" 49 | rm -rf "${TARGET}/temp" 50 | dir1="${TARGET}/measurements/${repo_name}1" 51 | mkdir -p "${TARGET}/temp" 52 | touch "${TARGET}/temp/repos-to-aggregate.txt" 53 | echo "${repo_name}1" >> "${TARGET}/temp/repos-to-aggregate.txt" 54 | echo "${repo_name}2" >> "${TARGET}/temp/repos-to-aggregate.txt" 55 | mkdir -p "${dir1}" 56 | echo "50" > "${dir1}/${file_name}1.m.LOC" 57 | echo "100" > "${dir1}/${file_name}2.m.LOC" 58 | echo "10" > "${dir1}/${file_name}1.m.CYC" 59 | dir2="${TARGET}/measurements/${repo_name}2" 60 | mkdir -p "${dir2}" 61 | echo "25" > "${dir2}/${file_name}1.m.LOC" 62 | "${LOCAL}/steps/summarize.sh" 63 | test -e "${TARGET}/data/summary/LOC.csv" 64 | grep -F "${repo_name}1,2,150" < "${TARGET}/data/summary/LOC.csv" 65 | grep -F "${repo_name}2,1,25" < "${TARGET}/data/summary/LOC.csv" 66 | test -e "${TARGET}/data/summary/CYC.csv" 67 | grep -F "${repo_name}1,1,10" < "${TARGET}/data/summary/CYC.csv" 68 | if grep "${repo_name}2" < "${TARGET}/data/summary/CYC.csv"; then 69 | exit 1 70 | fi 71 | } > "${stdout}" 2>&1 72 | echo "👍🏻 Summarization step handled mixed metrics across repositories correctly" 73 | -------------------------------------------------------------------------------- /steps/aggregate-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | repo=$1 7 | pos=$2 8 | total=$3 9 | metrics=$4 10 | 11 | start=$(date +%s%N) 12 | 13 | dir=${TARGET}/measurements/${repo} 14 | ddir=${TARGET}/data/${repo} 15 | if [ -e "${ddir}" ]; then 16 | declare -i found=0 17 | declare -i missing=0 18 | for m in ${metrics}; do 19 | if [ -e "${ddir}/${m}.csv" ]; then 20 | found=$((found+1)) 21 | else 22 | missing=$((missing+1)) 23 | fi 24 | done 25 | if [ "${missing}" = 0 ]; then 26 | echo "All ${found} metrics in ${repo} already aggregated (${pos}/${total}): ${ddir}" 27 | exit 28 | fi 29 | echo "Not all $((found+missing)) metrics aggregated in ${repo} (${pos}/${total}), ${missing} missing: ${ddir}" 30 | fi 31 | 32 | if [ ! -e "${dir}" ]; then 33 | echo "Nothing to aggregate in ${repo} (${pos}/${total}), no measurements in ${ddir}" 34 | exit 35 | fi 36 | 37 | mfiles=${TARGET}/temp/mfiles/${repo}.txt 38 | mkdir -p "$(dirname "${mfiles}")" 39 | find "${dir}" -type f -name '*.m' > "${mfiles}" 40 | 41 | sum=0 42 | declare -i files=0 43 | while IFS= read -r m; do 44 | slice=${TARGET}/temp/mfiles-slice/${repo}.txt 45 | mkdir -p "$(dirname "${slice}")" 46 | find "$(dirname "${m}")" -name "$(basename "${m}").*" -type f -print > "${slice}" 47 | while IFS= read -r v; do 48 | java=$(echo "${v}" | sed "s|${dir}||" | sed "s|\.m\..*$||") 49 | metric=${v//${dir}${java}\.m\./} 50 | csv=${ddir}/${metric}.csv 51 | mkdir -p "$(dirname "${csv}")" 52 | if [ ! -e "${csv}" ]; then 53 | printf 'java_file,%s\n' "${metric}" > "${csv}" 54 | fi 55 | printf '%s,%s\n' "$(echo "${java}" | "${LOCAL}/help/to-csv.sh")" "$(cat "${v}")" >> "${csv}" 56 | done < "${slice}" 57 | csv=${ddir}/all.csv 58 | mkdir -p "$(dirname "${csv}")" 59 | if [ ! -e "${csv}" ]; then 60 | printf 'java_file' > "${csv}" 61 | for a in ${metrics}; do 62 | printf ",%s" "${a}" >> "${csv}" 63 | done 64 | printf '\n' >> "${csv}" 65 | fi 66 | java=$(echo "${m}" | sed "s|${dir}||" | sed "s|\.m$||") 67 | printf '%s' "$(echo "${java}" | "${LOCAL}/help/to-csv.sh")" >> "${csv}" 68 | for a in ${metrics}; do 69 | if [ -e "${m}.${a}" ]; then 70 | value=$("${LOCAL}/help/float.sh" < "${m}.${a}") 71 | printf ",%s" "${value}" >> "${csv}" 72 | if [ ! "${value}" = "NaN" ]; then 73 | sum=$(echo "${sum} + ${value}" | bc | "${LOCAL}/help/float.sh") 74 | fi 75 | else 76 | printf ',-' >> "${csv}" 77 | fi 78 | done 79 | printf '\n' >> "${csv}" 80 | files=$((files+1)) 81 | done < "${mfiles}" 82 | 83 | echo "${repo} (${pos}/${total}) aggregated (.m files=${files}, sum=${sum})$("${LOCAL}/help/tdiff.sh" "${start}")" 84 | -------------------------------------------------------------------------------- /steps/aggregate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | start=$(date +%s%N) 7 | 8 | metrics=$(find "${TARGET}/measurements" -type f -name '*.m.*' -print | sed "s|^.*\.\(.*\)$|\1|" | sort | uniq | tr '\n' ' ') 9 | echo "All $(echo "${metrics}" | wc -w | xargs) metrics (in alphanumeric order): ${metrics}" 10 | 11 | repos=${TARGET}/temp/repos-to-aggregate.txt 12 | mkdir -p "$(dirname "${repos}")" 13 | find "${TARGET}/measurements" -maxdepth 2 -mindepth 2 -type d -exec bash -c 'realpath --relative-to="${1}" "$2"' _ "${TARGET}/measurements" {} \; > "${repos}" 14 | total=$(wc -l < "${repos}" | xargs) 15 | 16 | jobs=${TARGET}/temp/jobs/aggregate-jobs.txt 17 | rm -rf "${jobs}" 18 | mkdir -p "$(dirname "${jobs}")" 19 | touch "${jobs}" 20 | 21 | declare -i repo=0 22 | sh="$(dirname "$0")/aggregate-repo.sh" 23 | while IFS= read -r r; do 24 | repo=$((repo+1)) 25 | printf "%s %s %s %s %s\n" "${sh@Q}" "${r@Q}" "${repo@Q}" "${total@Q}" "${metrics@Q}" >> "${jobs}" 26 | done < "${repos}" 27 | "${LOCAL}/help/parallel.sh" "${jobs}" 28 | wait 29 | 30 | mkdir -p "${TARGET}/data" 31 | rm -rf "${TARGET}/data/*.csv" 32 | all=${TARGET}/data/all.csv 33 | printf "repository,file" >> "${all}" 34 | echo -n "${metrics}" | while IFS= read -r a; do 35 | printf ',%s' "${a}" >> "${all}" 36 | done 37 | printf "\n" >> "${all}" 38 | 39 | echo "All $(wc -l "${all}" | xargs) projects aggregated$("${LOCAL}/help/tdiff.sh" "${start}")" 40 | printf "\n" 41 | 42 | jobs=${TARGET}/temp/jobs/aggregate-join-jobs.txt 43 | rm -rf "${jobs}" 44 | mkdir -p "$(dirname "${jobs}")" 45 | touch "${jobs}" 46 | declare -i repo=0 47 | sh="$(dirname "$0")/aggregate-join.sh" 48 | repos=${TARGET}/temp/repos-to-join.txt 49 | mkdir -p "$(dirname "${repos}")" 50 | find "${TARGET}/data" -maxdepth 2 -mindepth 2 -type d -print > "${repos}" 51 | while IFS= read -r d; do 52 | r=$(realpath --relative-to="${TARGET}/data" "${d}" ) 53 | repo=$((repo+1)) 54 | printf "%s %s %s %s %s\n" "${sh@Q}" "${r@Q}" "${d@Q}" "${repo@Q}" "${total@Q}" >> "${jobs}" 55 | done < "${repos}" 56 | "${LOCAL}/help/parallel.sh" "${jobs}" 57 | wait 58 | 59 | mkdir -p "${TARGET}/data/aggregation" 60 | f_jobs=${TARGET}/temp/jobs/aggregate-function-jobs.txt 61 | rm -rf "${f_jobs}" 62 | mkdir -p "$(dirname "${f_jobs}")" 63 | touch "${f_jobs}" 64 | 65 | for metric in ${metrics}; do 66 | metric_file="${TARGET}/data/${metric}.csv" 67 | if [[ -f "${metric_file}" ]]; then 68 | output_folder="${TARGET}/data/aggregation" 69 | for sh_script in "${LOCAL}/steps/aggregation-functions/"*.sh; do 70 | if [[ -f "${sh_script}" ]]; then 71 | printf "%s %s %s %s\n" "${sh_script@Q}" "${metric_file}" "${output_folder@Q}" "${metric@Q}" >> "${f_jobs}" 72 | fi 73 | done 74 | fi 75 | done 76 | "${LOCAL}/help/parallel.sh" "${f_jobs}" 77 | wait 78 | 79 | echo "All metrics aggregated and joined in ${total} repositories$("${LOCAL}/help/tdiff.sh" "${start}")" 80 | -------------------------------------------------------------------------------- /tests/steps/aggregation-functions/test-median.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | stdout=$2 8 | 9 | { 10 | dir="${TARGET}" 11 | mkdir -p "${dir}" 12 | touch "${dir}/LCOM5.csv" 13 | echo "repo,java_file,LCOM5" > "${dir}/LCOM5.csv" 14 | echo "kek,src/main/kek,42.0000" >> "${dir}/LCOM5.csv" 15 | "${LOCAL}/steps/aggregation-functions/median.sh" "${dir}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 16 | test -e "${TARGET}/data/aggregation/LCOM5.median.csv" 17 | median_value=$(cat "${TARGET}/data/aggregation/LCOM5.median.csv") 18 | test "$median_value" = "42.000" 19 | } > "${stdout}" 2>&1 20 | echo "👍🏻 Single metric (LCOM5) median calculated correctly" 21 | 22 | { 23 | dir1="${TARGET}" 24 | mkdir -p "${dir1}" 25 | touch "${dir1}/LCOM5.csv" 26 | echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv" 27 | echo "kek,src/main/kek,42.000" >> "${dir1}/LCOM5.csv" 28 | touch "${dir1}/NHD.csv" 29 | echo "repo,java_file,NHD" > "${dir1}/NHD.csv" 30 | echo "kek,src/main/kek,1000.000" >> "${dir1}/NHD.csv" 31 | "${LOCAL}/steps/aggregation-functions/median.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 32 | "${LOCAL}/steps/aggregation-functions/median.sh" "${dir1}/NHD.csv" "${TARGET}/data/aggregation" "NHD" 33 | test -e "${TARGET}/data/aggregation/LCOM5.median.csv" 34 | median_value_lcom5=$(cat "${TARGET}/data/aggregation/LCOM5.median.csv") 35 | test "$median_value_lcom5" = "42.000" 36 | test -e "${TARGET}/data/aggregation/NHD.median.csv" 37 | median_value_nhd=$(cat "${TARGET}/data/aggregation/NHD.median.csv") 38 | test "$median_value_nhd" = "1000.000" 39 | } > "${stdout}" 2>&1 40 | echo "👍🏻 Multiple metrics (LCOM5, NHD) aggregated correctly" 41 | 42 | { 43 | dir1="${TARGET}" 44 | mkdir -p "${dir1}" 45 | touch "${dir1}/LCOM5.csv" 46 | echo "repo,java_file,LCOM5" > "${dir1}/LCOM5.csv" 47 | { 48 | echo "kek,src/main/kek,42.000" 49 | echo "kek,src/main/kek,35.000" 50 | echo "kek,src/main/kek,50.000" 51 | } >> "${dir1}/LCOM5.csv" 52 | "${LOCAL}/steps/aggregation-functions/median.sh" "${dir1}/LCOM5.csv" "${TARGET}/data/aggregation" "LCOM5" 53 | test -e "${TARGET}/data/aggregation/LCOM5.median.csv" 54 | median_value=$(cat "${TARGET}/data/aggregation/LCOM5.median.csv") 55 | test "$median_value" = "42.000" 56 | 57 | } > "${stdout}" 2>&1 58 | echo "👍🏻 Mixed metrics aggregated correctly (LCOM5)" 59 | 60 | { 61 | dir="${TARGET}" 62 | mkdir -p "${dir}" 63 | touch "${dir}/Empty.java.m.LCOM5" 64 | echo "repo,java_file,LCOM5" > "${dir}/Empty.java.m.LCOM5" 65 | "${LOCAL}/steps/aggregation-functions/median.sh" "${dir}/Empty.java.m.LCOM5" "${TARGET}/data/aggregation" "LCOM5" 66 | test -e "${TARGET}/data/aggregation/LCOM5.median.csv" 67 | median_value=$(cat "${TARGET}/data/aggregation/LCOM5.median.csv") 68 | test "${median_value}" = "0.000" 69 | 70 | } > "${stdout}" 2>&1 71 | echo "👍🏻 Edge case with no data handled correctly" 72 | -------------------------------------------------------------------------------- /tests/steps/test-report.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | set -e -o pipefail 5 | 6 | stdout=$2 7 | 8 | if ! tlmgr --version >/dev/null 2>&1; then 9 | PATH=$PATH:$("${LOCAL}/help/texlive-bin.sh") 10 | export PATH 11 | fi 12 | 13 | { 14 | pdflatex -v 15 | pdftotext -v 16 | latexmk --version 17 | } > "${stdout}" 2>&1 18 | echo "👍🏻 Dependencies are available" 19 | 20 | { 21 | date +%s%N > "${TARGET}/start.txt" 22 | mkdir -p "${TARGET}/temp" 23 | printf '%s' "repo,branch\nyegor256/jaxec,master" > "${TARGET}/repositories.csv" 24 | echo "nothing" > "${TARGET}/temp/repo-details.tex" 25 | mkdir -p "${TARGET}/temp/reports" 26 | mkdir -p "${TARGET}/data" 27 | mkdir -p "${TARGET}/github" 28 | echo "\\item foo" > "${TARGET}/temp/reports/foo.tex" 29 | : > "${TARGET}/temp/jpeek_failure.log" 30 | : > "${TARGET}/temp/jpeek_success.log" 31 | "${LOCAL}/steps/report.sh" 32 | test -e "${TARGET}/report.pdf" 33 | pdftotext "${TARGET}/report.pdf" "${TARGET}/report.txt" 34 | txt=$(cat "${TARGET}/report.txt") 35 | echo "${txt}" | grep "yegor256/cam" 36 | } > "${stdout}" 2>&1 37 | echo "👍🏻 A PDF report generated correctly" 38 | 39 | { 40 | while IFS= read -r t; do 41 | metric=$(echo "${t}" | cut -f1 -d' ') 42 | echo "${metric}" | grep '^\\item\\ff{[a-zA-Z0-9-]\+}:$' > /dev/null 43 | done < "${TARGET}/temp/list-of-metrics.tex.unstructured" 44 | } > "${stdout}" 2>&1 45 | echo "👍🏻 A list of metrics is properly formatted" 46 | 47 | { 48 | mkdir -p "${TARGET}/temp/test_metric" 49 | test_metric_sh="#!/usr/bin/env bash 50 | \n\n" 51 | test_metric_sh+="output=\$(realpath \"\$2\")\n" 52 | test_metric_sh+="for idx in {2..5}; do\n" 53 | test_metric_sh+=" echo \"Test-\${idx} 0 [Test group \$((idx % 2))] Test metrics\" >> \"\${output}\"\n" 54 | test_metric_sh+="done\n" 55 | printf "%b" "$test_metric_sh" > "${TARGET}/temp/test_metric/group_test.sh" 56 | chmod +x "${TARGET}/temp/test_metric/group_test.sh" 57 | LOCAL_METRICS="${TARGET}/temp/test_metric" "${LOCAL}/steps/report.sh" 58 | test -e "${TARGET}/report.pdf" 59 | pdftotext "${TARGET}/report.pdf" "${TARGET}/report.txt" 60 | txt=$(cat "${TARGET}/report.txt") 61 | actual=$(echo "${txt}" | grep -c '.*Test group [0-9]\+') 62 | if [ "$actual" != "2" ]; then 63 | echo "Exactly 2 test group names were expected, but ${actual} were actually found" 64 | exit 1 65 | fi 66 | awk ' 67 | /Test group 0/ { in_group_0 = 1; in_group_1 = 0 } 68 | /Test group 1/ { in_group_0 = 0; in_group_1 = 1 } 69 | in_group_0 && /Test-(2|4): Test metrics/ { group_0_valid++ } 70 | in_group_1 && /Test-(3|5): Test metrics/ { group_1_valid++ } 71 | END { 72 | if (group_0_valid != 2 || group_1_valid != 2) { 73 | printf "Expected 2 valid metrics in each group, but found %d in group 0 and %d in group 1\n", group_0_valid, group_1_valid 74 | exit 1 75 | } 76 | } 77 | ' <<< "$txt" 78 | } > "${stdout}" 2>&1 79 | echo "👍🏻 Grouping is properly formatted for the list of metrics." 80 | -------------------------------------------------------------------------------- /fixtures/jaxec/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 4.0.0 8 | 9 | com.jcabi 10 | parent 11 | 0.70.0 12 | 13 | com.yegor256 14 | jaxec 15 | 1.0-SNAPSHOT 16 | jar 17 | jaxec 18 | Simple command line executor from Java 19 | https://github.com/yegor256/jaxec 20 | 2023 21 | 22 | yegor256 23 | https://www.yegor256.com 24 | 25 | 26 | 27 | MIT 28 | https://raw.githubusercontent.com/yegor256/jaxec/master/LICENSE.txt 29 | site 30 | 31 | 32 | 33 | 34 | 1 35 | Yegor Bugayenko 36 | yegor256@gmail.com 37 | yegor256.com 38 | https://www.yegor256.com 39 | 40 | Architect 41 | Developer 42 | 43 | +3 44 | 45 | 46 | 47 | GitHub 48 | https://github.com/yegor256/jaxec/issues 49 | 50 | 51 | scm:git:git@github.com:yegor256/jaxec.git 52 | scm:git:git@github.com:yegor256/jaxec.git 53 | https://github.com/yegor256/jaxec 54 | 55 | 56 | rultor 57 | https://www.rultor.com/s/yegor256/jaxec 58 | 59 | 60 | 61 | github-pages 62 | https://github.com/yegor256/jaxec 63 | 64 | 65 | 66 | 67 | com.jcabi 68 | jcabi-log 69 | 0.24.3 70 | 71 | 72 | 73 | 74 | qulice 75 | 76 | 77 | 78 | com.qulice 79 | qulice-maven-plugin 80 | 0.24.3 81 | 82 | 83 | checkstyle:/src/site/resources/.* 84 | duplicatefinder:.* 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 2 | # SPDX-License-Identifier: MIT 3 | 4 | FROM ubuntu:24.04 5 | 6 | ENV DEBIAN_FRONTEND=noninteractive 7 | 8 | SHELL ["/bin/bash", "-eo", "pipefail", "-c"] 9 | 10 | # Build essentials that are required later 11 | RUN apt-get update -y --fix-missing \ 12 | && apt-get -y install --no-install-recommends \ 13 | build-essential=12.* \ 14 | software-properties-common=0.* \ 15 | make=4.* \ 16 | wget=1.* \ 17 | libssl-dev=3.* \ 18 | openssl=3.* \ 19 | gpg-agent=2.* \ 20 | zip=3.* \ 21 | unzip=6.* \ 22 | tree=2.* \ 23 | parallel=* \ 24 | bc=1.* \ 25 | cloc=1.* \ 26 | jq=1.* \ 27 | shellcheck=0.* \ 28 | aspell=0.* \ 29 | xmlstarlet=1.* \ 30 | xpdf=3.* \ 31 | coreutils=* \ 32 | gawk=* \ 33 | git=1:2.* \ 34 | libxml2-utils=2.* \ 35 | build-essential=12.* \ 36 | cmake=3.* \ 37 | libfreetype-dev=* \ 38 | pkg-config=* \ 39 | libfontconfig-dev=2.* \ 40 | libjpeg-dev=* \ 41 | libopenjp2-7-dev=2.* \ 42 | && apt-get clean \ 43 | && rm -rf /var/lib/apt/lists/* 44 | 45 | # Inkscape 46 | RUN apt-get update -y --fix-missing \ 47 | && add-apt-repository -y ppa:inkscape.dev/stable \ 48 | && apt-get update -y \ 49 | && apt-get -y install --no-install-recommends \ 50 | inkscape=1:1.* \ 51 | && apt-get clean \ 52 | && rm -rf /var/lib/apt/lists/* 53 | 54 | # Ruby 55 | RUN apt-get update -y --fix-missing \ 56 | && apt-get -y install --no-install-recommends \ 57 | ruby-full=1:3.* \ 58 | && apt-get clean \ 59 | && rm -rf /var/lib/apt/lists/* 60 | 61 | # Java + Maven 62 | RUN apt-get update -y --fix-missing \ 63 | && apt-get -y install --no-install-recommends \ 64 | openjdk-17-jdk=17.* \ 65 | maven=3.* \ 66 | && apt-get clean \ 67 | && rm -rf /var/lib/apt/lists/* 68 | 69 | # Python 70 | RUN add-apt-repository -y ppa:deadsnakes/ppa \ 71 | && apt-get update -y --fix-missing \ 72 | && apt-get -y install --no-install-recommends \ 73 | python3=* \ 74 | python3-venv=* \ 75 | python3-pip=* \ 76 | python3-dev=* \ 77 | && apt-get clean \ 78 | && rm -rf /var/lib/apt/lists/* 79 | 80 | WORKDIR /cam 81 | COPY Makefile /cam 82 | COPY steps/install.sh /cam/steps/ 83 | COPY help/* /cam/help/ 84 | 85 | ENV LOCAL=/cam 86 | 87 | COPY installs/install-pmd.sh installs/ 88 | RUN installs/install-pmd.sh 89 | 90 | COPY installs/install-gradle.sh installs/ 91 | RUN installs/install-gradle.sh 92 | ENV GRADLE_LOCAL=/usr/local/gradle 93 | ENV PATH=$PATH:/usr/local/gradle/bin 94 | 95 | COPY installs/install-gems.sh installs/ 96 | RUN installs/install-gems.sh 97 | 98 | COPY installs/install-jpeek.sh installs/ 99 | ENV JPEEK=/opt/app/jpeek.jar 100 | RUN installs/install-jpeek.sh 101 | 102 | COPY installs/install-poppler.sh installs/ 103 | RUN installs/install-poppler.sh 104 | 105 | COPY requirements.txt /cam 106 | COPY installs/install-pip.sh installs/ 107 | RUN installs/install-pip.sh 108 | 109 | COPY installs/install-texlive-base.sh installs/ 110 | RUN installs/install-texlive-base.sh 111 | COPY DEPENDS.txt /cam 112 | COPY installs/install-texlive-depends.sh installs/ 113 | RUN installs/install-texlive-depends.sh 114 | 115 | COPY . /cam 116 | -------------------------------------------------------------------------------- /metrics/multimetric.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # SPDX-FileCopyrightText: Copyright (c) 2021-2025 Yegor Bugayenko 3 | # SPDX-License-Identifier: MIT 4 | 5 | set -e -o pipefail 6 | 7 | java=$1 8 | output=$2 9 | 10 | json=$(multimetric "${java}") 11 | body=$(echo "${json}" | jq '.overall') 12 | temp="${TARGET}/temp/multimetric.json" 13 | mkdir -p "$(dirname "${temp}")" 14 | echo "${body}" > "${temp}" 15 | cat < "${output}" 16 | HSD $(echo "${body}" | jq '.halstead_difficulty' | "${LOCAL}/help/float.sh") \textbf{Halstead Difficulty}: This metric measures the difficulty of understanding a program based on the number of distinct operators and operands used. A higher Halstead Difficulty indicates that the program is harder to understand and maintain due to its complexity. It is calculated using the formula: \( \text{Difficulty} = \frac{(n_1)}{2} \times \frac{(n_2)}{n_2} \), where \( n_1 \) is the number of distinct operators, and \( n_2 \) is the number of distinct operands. See details: \href{https://en.wikipedia.org/wiki/Halstead_complexity_measures#Difficulty}{Halstead Difficulty on Wikipedia} 17 | HSE $(echo "${body}" | jq '.halstead_effort' | "${LOCAL}/help/float.sh") \textbf{Halstead Effort}: This metric estimates the total effort required to understand and implement a program based on its size and complexity. The higher the effort, the more difficult the program is to maintain and modify. Halstead Effort is calculated as \( \text{Effort} = \text{Difficulty} \times \text{Volume} \), where Difficulty is the Halstead Difficulty, and Volume represents the size of the program. A higher value reflects more effort required for comprehension and modification. See details: \href{https://en.wikipedia.org/wiki/Halstead_complexity_measures#Effort}{Halstead Effort on Wikipedia} 18 | HSV $(echo "${body}" | jq '.halstead_volume' | "${LOCAL}/help/float.sh") \textbf{Halstead Volume}: This metric measures the size of a program based on its operators and operands. Halstead Volume estimates the amount of mental effort required to understand the code. It is calculated as \( \text{Volume} = (n_1 + n_2) \times \log_2 (n_1 + n_2) \), where \( n_1 \) and \( n_2 \) are the number of distinct operators and operands, respectively. A larger volume indicates a larger and potentially more complex program. See details: \href{https://en.wikipedia.org/wiki/Halstead_complexity_measures#Volume}{Halstead Volume on Wikipedia} 19 | MIdx $(echo "${body}" | jq '.maintainability_index' | "${LOCAL}/help/float.sh") \textbf{Maintainability Index}: This metric is used to assess the maintainability of a software system based on its complexity and readability. It is a composite measure that takes into account various code metrics such as lines of code, cyclomatic complexity, and Halstead volume. The higher the Maintainability Index, the more maintainable the code is considered to be. The index is typically calculated using the formula: 20 | FOut $(echo "${body}" | jq '.fanout_external' | "${LOCAL}/help/float.sh") \textbf{Fan-Out}: This metric measures the extent to which a class or module depends on external components or other classes. A higher Fan-Out indicates that the class has a greater number of dependencies, which can lead to increased complexity and potential difficulties in maintaining the system. See details: \href{https://en.wikipedia.org/wiki/Fan-out_(software)}{Fan-Out on Wikipedia} 21 | EOT 22 | --------------------------------------------------------------------------------