├── tests ├── __init__.py ├── cli │ ├── __init__.py │ ├── test_formatter.py │ ├── test_meta_warning.py │ ├── test_warning_aggregator.py │ └── test_cbicov.py ├── util │ ├── __init__.py │ └── test_util.py ├── duplicates │ ├── cpu2 │ ├── cpu │ │ └── foo.cpp │ ├── gpu │ │ └── foo.cpp │ ├── __init__.py │ └── test_duplicates.py ├── metrics │ ├── __init__.py │ ├── test_divergence.py │ └── test_coverage.py ├── report │ ├── __init__.py │ └── test_summary_report.py ├── build-dir │ ├── foo.cpp │ ├── __init__.py │ └── test_build_dir.py ├── exclude │ ├── src │ │ ├── included.cpp │ │ ├── excluded_name.cpp │ │ ├── excluded_extension.f90 │ │ └── thirdparty │ │ │ └── library.cpp │ ├── __init__.py │ ├── commands.json │ └── test_exclude.py ├── comments │ ├── __init__.py │ ├── continuation.cpp │ ├── fortran.f90 │ └── test_comments.py ├── define │ ├── __init__.py │ ├── main.cpp │ └── test_define.py ├── disjoint │ ├── __init__.py │ ├── cpu_headers │ │ └── header.h │ ├── gpu_headers │ │ └── header.h │ ├── cpu.cpp │ ├── gpu.cpp │ └── test_disjoint.py ├── failure │ ├── __init__.py │ └── test_bignum.py ├── include │ ├── __init__.py │ ├── cpu_commands.json │ ├── gpu_commands.json │ ├── headers │ │ ├── cpu.h │ │ ├── gpu.h │ │ └── test.h │ ├── main.cpp │ └── test_include.py ├── lexer │ ├── __init__.py │ └── test_lexer.py ├── literals │ ├── __init__.py │ ├── main.cpp │ └── test_literals.py ├── nesting │ ├── __init__.py │ ├── main.cpp │ └── test_nesting.py ├── once │ ├── __init__.py │ ├── main.cpp │ ├── once.h │ └── test_once.py ├── parsers │ └── __init__.py ├── basic_asm │ ├── __init__.py │ ├── test.asm │ ├── test.ptx │ ├── lowercase.s │ ├── uppercase.S │ └── test_basic_asm.py ├── basic_fortran │ ├── __init__.py │ ├── test.f90 │ └── test_basic_fortran.py ├── files │ ├── __init__.py │ └── test_filetree_utils.py ├── multi_line │ ├── __init__.py │ ├── main.cpp │ └── test_multi_line.py ├── operators │ ├── __init__.py │ ├── main.cpp │ └── test_operators.py ├── preprocessor │ ├── __init__.py │ └── test_warnings.py ├── safe_write │ ├── __init__.py │ └── test_safe_write.py ├── schema │ ├── __init__.py │ ├── invalid_cbiconfig.toml │ ├── test.cpp │ ├── cbiconfig.toml │ ├── analysis.toml │ ├── invalid_analysis.toml │ ├── compile_commands.json │ └── invalid_compile_commands.json ├── source │ ├── __init__.py │ └── test_source.py ├── valid_path │ ├── __init__.py │ └── test_valid_path.py ├── code-base │ ├── __init__.py │ └── test_code_base.py ├── compilers │ ├── __init__.py │ └── test_actions.py ├── macro_expansion │ ├── __init__.py │ ├── macro_expansion-dendrogram.png │ ├── infinite_loop_test.cpp │ ├── defined_undefined_test.cpp │ ├── function_like_test.cpp │ └── max_level.cpp ├── source-tree │ ├── __init__.py │ └── test_source_tree.py ├── commented_directive │ ├── __init__.py │ ├── main.cpp │ └── test_commented_directive.py ├── compile-command │ ├── __init__.py │ └── test_compile_command.py └── compilation-database │ ├── __init__.py │ └── test_compilation_database.py ├── codebasin ├── coverage │ └── __init__.py ├── compilers │ ├── gnu.toml │ ├── clang.toml │ ├── nvidia.toml │ └── intel.toml ├── _detail │ └── __init__.py ├── schema │ ├── coverage.schema │ ├── analysis.schema │ ├── compilation-database.schema │ └── cbiconfig.schema ├── source.py ├── language.py └── tree.py ├── .gitignore ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── bug_report.yml │ └── feature_request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── commit-signoff-check.yml │ ├── run-precommit.yml │ ├── run-unittest.yml │ ├── coverage.yml │ └── check-coverage.py ├── docs ├── source │ ├── sample-code-base.zip │ ├── example-dendrogram.png │ ├── specialization-tree.png │ ├── notices-and-disclaimers.rst │ ├── conf.py │ ├── excluding-files.rst │ ├── sample-code-base.rst │ ├── cmd.rst │ ├── features.rst │ ├── index.rst │ ├── compilation-databases.rst │ ├── specialization.rst │ └── emulating-compiler-behavior.rst ├── sample-code-base │ └── src │ │ ├── third-party │ │ ├── library.h │ │ └── library.cpp │ │ ├── cpu │ │ └── foo.cpp │ │ ├── gpu │ │ └── foo.cpp │ │ ├── main.cpp │ │ └── CMakeLists.txt ├── README.md ├── Makefile └── make.bat ├── setup.py ├── SECURITY.md ├── MANIFEST.in ├── CITATION.cff ├── LICENSE ├── pyproject.toml ├── .pre-commit-config.yaml ├── README.md ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/duplicates/cpu2: -------------------------------------------------------------------------------- 1 | cpu/ -------------------------------------------------------------------------------- /tests/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/report/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /codebasin/coverage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/duplicates/cpu/foo.cpp: -------------------------------------------------------------------------------- 1 | void foo() {} 2 | -------------------------------------------------------------------------------- /tests/duplicates/gpu/foo.cpp: -------------------------------------------------------------------------------- 1 | void foo() {} 2 | -------------------------------------------------------------------------------- /tests/build-dir/foo.cpp: -------------------------------------------------------------------------------- 1 | void foo() { return; } 2 | -------------------------------------------------------------------------------- /tests/exclude/src/included.cpp: -------------------------------------------------------------------------------- 1 | #define INCLUDED 2 | -------------------------------------------------------------------------------- /tests/exclude/src/excluded_name.cpp: -------------------------------------------------------------------------------- 1 | #define EXCLUDED_NAME 2 | -------------------------------------------------------------------------------- /tests/exclude/src/excluded_extension.f90: -------------------------------------------------------------------------------- 1 | #define EXCLUDED_EXTENSION 2 | -------------------------------------------------------------------------------- /tests/exclude/src/thirdparty/library.cpp: -------------------------------------------------------------------------------- 1 | #define THIRDPARTY_LIBRARY 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.pyc 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: [] 3 | -------------------------------------------------------------------------------- /tests/comments/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/define/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/disjoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/failure/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/include/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/lexer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/literals/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/nesting/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/once/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/basic_asm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/basic_fortran/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/build-dir/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/duplicates/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/exclude/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/files/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/multi_line/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/operators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/safe_write/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/schema/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/source/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/valid_path/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/code-base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/compilers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/macro_expansion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/schema/invalid_cbiconfig.toml: -------------------------------------------------------------------------------- 1 | [compiler.test_one] 2 | options = 1 3 | 4 | [compiler.test_two] 5 | options = 2 6 | -------------------------------------------------------------------------------- /tests/source-tree/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /docs/source/sample-code-base.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/P3HPC/code-base-investigator/HEAD/docs/source/sample-code-base.zip -------------------------------------------------------------------------------- /tests/commented_directive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /tests/compile-command/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /docs/source/example-dendrogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/P3HPC/code-base-investigator/HEAD/docs/source/example-dendrogram.png -------------------------------------------------------------------------------- /tests/compilation-database/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | -------------------------------------------------------------------------------- /docs/source/specialization-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/P3HPC/code-base-investigator/HEAD/docs/source/specialization-tree.png -------------------------------------------------------------------------------- /docs/sample-code-base/src/third-party/library.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Intel Corporation 2 | // SPDX-License-Identifier: 0BSD 3 | void bar(); 4 | -------------------------------------------------------------------------------- /tests/disjoint/cpu_headers/header.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | void foo(); 5 | -------------------------------------------------------------------------------- /tests/disjoint/gpu_headers/header.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | void bar(); 5 | -------------------------------------------------------------------------------- /tests/schema/test.cpp: -------------------------------------------------------------------------------- 1 | // Empty test.cpp required to prevent a RuntimeError during the test. 2 | // All files referenced by the config file must exist. 3 | -------------------------------------------------------------------------------- /tests/macro_expansion/macro_expansion-dendrogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/P3HPC/code-base-investigator/HEAD/tests/macro_expansion/macro_expansion-dendrogram.png -------------------------------------------------------------------------------- /tests/schema/cbiconfig.toml: -------------------------------------------------------------------------------- 1 | [compiler.test_one] 2 | options = [ 3 | "TEST_ONE" 4 | ] 5 | 6 | [compiler.test_two] 7 | options = [ 8 | "TEST_TWO" 9 | ] 10 | -------------------------------------------------------------------------------- /tests/include/cpu_commands.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "command": "/usr/bin/c++ -DCPU -DCOMPUTED_INCLUDE=\\\"headers/cpu.h\\\" -I headers/ main.cpp", 4 | "file": "main.cpp" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /tests/include/gpu_commands.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "command": "/usr/bin/c++ -DGPU -DCOMPUTED_INCLUDE=\\\"headers/gpu.h\\\" -I headers/ main.cpp", 4 | "file": "main.cpp" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /tests/schema/analysis.toml: -------------------------------------------------------------------------------- 1 | [codebase] 2 | exclude = [ 3 | "*.F90", 4 | "*.cu", 5 | ] 6 | 7 | [platform.one] 8 | commands = "one.json" 9 | 10 | [platform.two] 11 | commands = "two.json" 12 | -------------------------------------------------------------------------------- /tests/schema/invalid_analysis.toml: -------------------------------------------------------------------------------- 1 | [codebase] 2 | exclude = [ 3 | 1, 4 | 2, 5 | ] 6 | 7 | [platform.one] 8 | commands = "one.json" 9 | 10 | [platform.two] 11 | commands = "two.json" 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (C) 2019-24 Intel Corporation 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | from setuptools import setup 6 | 7 | if __name__ == "__main__": 8 | setup() 9 | -------------------------------------------------------------------------------- /tests/disjoint/cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #include "header.h" 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/disjoint/gpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #include "header.h" 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /docs/sample-code-base/src/third-party/library.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Intel Corporation 2 | // SPDX-License-Identifier: 0BSD 3 | #include 4 | 5 | void bar() 6 | { 7 | printf("Running third-party library code.\n"); 8 | } 9 | -------------------------------------------------------------------------------- /tests/once/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #include "once.h" 5 | #include "once.h" 6 | 7 | void main(int argc, char* argv[]) 8 | { 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/P3HPC/code-base-investigator/security/advisories/new) tab. 6 | -------------------------------------------------------------------------------- /tests/include/headers/cpu.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef _CPU_H_ 5 | #define _CPU_H_ 6 | 7 | int another_cpu_specialization() 8 | { 9 | return 0; 10 | } 11 | 12 | #endif /* _CPU_H_ */ 13 | -------------------------------------------------------------------------------- /tests/once/once.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #pragma once 5 | 6 | #ifdef PROCESSED_ONCE // this should never be true 7 | void foo() 8 | { 9 | return; 10 | }; 11 | #endif 12 | #define PROCESSED_ONCE 13 | -------------------------------------------------------------------------------- /tests/literals/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #if 20110325ul >= 20100325ul 5 | void foo(); 6 | #endif 7 | 8 | #if 0xFF == 255 9 | void bar(); 10 | #endif 11 | 12 | #if 0b11 == 3 13 | void baz(); 14 | #endif 15 | -------------------------------------------------------------------------------- /codebasin/compilers/gnu.toml: -------------------------------------------------------------------------------- 1 | [compiler.gcc] 2 | 3 | [compiler."g++"] 4 | alias_of = "gcc" 5 | 6 | [[compiler.gcc.parser]] 7 | flags = ["-fopenmp"] 8 | action = "append_const" 9 | dest = "modes" 10 | const = "openmp" 11 | 12 | [[compiler.gcc.modes]] 13 | name = "openmp" 14 | defines = ["_OPENMP"] 15 | -------------------------------------------------------------------------------- /tests/macro_expansion/infinite_loop_test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define BAR 1 5 | #define FOO BAR 6 | #undef BAR 7 | #define BAR FOO 8 | 9 | #if FOO == 1 10 | void my_func(); 11 | #else 12 | void other_func(); 13 | #endif 14 | -------------------------------------------------------------------------------- /tests/schema/compile_commands.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "arguments": [ 4 | "gcc", 5 | "-c", 6 | "-o", 7 | "output", 8 | "test.cpp" 9 | ], 10 | "directory": "/path/containing/source/files/", 11 | "file": "test.cpp" 12 | } 13 | ] 14 | -------------------------------------------------------------------------------- /tests/schema/invalid_compile_commands.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "arguments": [ 4 | "gcc", 5 | "-c", 6 | "-o", 7 | "output", 8 | "test.cpp" 9 | ], 10 | "directory": ["not", "a", "directory"], 11 | "file": "test.cpp" 12 | } 13 | ] 14 | -------------------------------------------------------------------------------- /tests/include/headers/gpu.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef _GPU_H_ 5 | #define _GPU_H_ 6 | 7 | int another_gpu_specialization() 8 | { 9 | int retval = 0; // extra line to distinguish from cpu.h 10 | return retval; 11 | } 12 | 13 | #endif /* _GPU_H_ */ 14 | -------------------------------------------------------------------------------- /docs/sample-code-base/src/cpu/foo.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Intel Corporation 2 | // SPDX-License-Identifier: 0BSD 3 | #include 4 | 5 | void foo() { 6 | #if __GNUC__ >= 13 7 | printf("Using a feature that is only available in GCC 13 and later.\n"); 8 | #endif 9 | printf("Running the rest of foo() on the CPU.\n"); 10 | } 11 | -------------------------------------------------------------------------------- /docs/sample-code-base/src/gpu/foo.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Intel Corporation 2 | // SPDX-License-Identifier: 0BSD 3 | #include 4 | 5 | void foo() { 6 | #if __GNUC__ >= 13 7 | printf("Using a feature that is only available in GCC 13 and later.\n"); 8 | #endif 9 | printf("Running the rest of foo() on the GPU.\n"); 10 | } 11 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | The HTML documentation is built with Sphinx, which can be installed by 2 | following [these 3 | instructions](https://www.sphinx-doc.org/en/master/usage/installation.html). 4 | 5 | We use the [furo](https://github.com/pradyunsg/furo) theme, which must be 6 | installed separately. 7 | 8 | Other configuration options are documented in source/conf.py. 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include codebasin/compilers/clang.toml 2 | include codebasin/compilers/gnu.toml 3 | include codebasin/compilers/intel.toml 4 | include codebasin/compilers/nvidia.toml 5 | include codebasin/schema/analysis.schema 6 | include codebasin/schema/compilation-database.schema 7 | include codebasin/schema/coverage.schema 8 | include codebasin/schema/cbiconfig.schema 9 | -------------------------------------------------------------------------------- /tests/include/headers/test.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #ifndef _TEST_H_ 5 | #define _TEST_H_ 6 | 7 | #ifdef CPU 8 | int cpu_specialization() 9 | { 10 | return 0; 11 | } 12 | #else 13 | int gpu_specialization() 14 | { 15 | return 0; 16 | } 17 | #endif 18 | 19 | #endif /* _TEST_H_ */ 20 | -------------------------------------------------------------------------------- /tests/nesting/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define NESTING_TEST 5 | 6 | #ifdef CPU 7 | int foo() 8 | { 9 | #ifdef NESTING_TEST 10 | return 0; 11 | #endif 12 | } 13 | #endif 14 | 15 | #ifdef GPU 16 | int bar() 17 | { 18 | #ifdef NESTING_TEST 19 | return 1; 20 | #endif 21 | } 22 | #endif 23 | -------------------------------------------------------------------------------- /codebasin/_detail/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2025 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | """ 4 | This package contains implementation details that are not part of the public 5 | interface of Code Base Investigator. These implementation details are not 6 | intended to be used by other scripts, and should not be relied upon. 7 | """ 8 | import codebasin._detail.logging 9 | -------------------------------------------------------------------------------- /tests/basic_asm/test.asm: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; SPDX-License-Identifier: BSD-3-Clause 3 | 4 | ## Another comment 5 | // Yet another comment 6 | %define something 7 | .data Something 8 | .globl Name 9 | 10 | mov %r0, %r1 11 | ;; comment 12 | pop 13 | 14 | LABEL0: 15 | // that other comment syntax 16 | add 4(%r0,10,%r1), %r15 17 | 18 | ret 19 | -------------------------------------------------------------------------------- /tests/basic_asm/test.ptx: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; SPDX-License-Identifier: BSD-3-Clause 3 | 4 | ## Another comment 5 | // Yet another comment 6 | %define something 7 | .data Something 8 | .globl Name 9 | 10 | mov %r0, %r1 11 | ;; comment 12 | pop 13 | 14 | LABEL0: 15 | // that other comment syntax 16 | add 4(%r0,10,%r1), %r15 17 | 18 | ret 19 | -------------------------------------------------------------------------------- /tests/define/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define SOURCE_DEFINE 5 | #ifdef SOURCE_DEFINE 6 | int foo() // This version should be counted 7 | { 8 | return 0; 9 | } 10 | #endif 11 | 12 | #undef SOURCE_DEFINE 13 | #ifdef SOURCE_DEFINE 14 | int bar() // This version should not 15 | { 16 | return 1; 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /tests/basic_asm/lowercase.s: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; SPDX-License-Identifier: BSD-3-Clause 3 | 4 | ## Another comment 5 | // Yet another comment 6 | %define something 7 | .data Something 8 | .globl Name 9 | 10 | mov %r0, %r1 11 | ;; comment 12 | pop 13 | 14 | LABEL0: 15 | // that other comment syntax 16 | add 4(%r0,10,%r1), %r15 17 | 18 | ret 19 | -------------------------------------------------------------------------------- /tests/basic_asm/uppercase.S: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; SPDX-License-Identifier: BSD-3-Clause 3 | 4 | ## Another comment 5 | // Yet another comment 6 | %define something 7 | .data Something 8 | .globl Name 9 | 10 | mov %r0, %r1 11 | ;; comment 12 | pop 13 | 14 | LABEL0: 15 | // that other comment syntax 16 | add 4(%r0,10,%r1), %r15 17 | 18 | ret 19 | -------------------------------------------------------------------------------- /docs/sample-code-base/src/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 Intel Corporation 2 | // SPDX-License-Identifier: 0BSD 3 | #include 4 | #include "third-party/library.h" 5 | void foo(); 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | #if !defined(GPU_OFFLOAD) 10 | printf("Running on the CPU.\n"); 11 | #else 12 | printf("Running on the GPU.\n"); 13 | #endif 14 | foo(); 15 | bar(); 16 | } 17 | -------------------------------------------------------------------------------- /tests/include/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #include 5 | #include 6 | 7 | #include "test.h" 8 | #include "test.h" // repeated include to test handling of guards 9 | #include "missing.h" // missing include to test file handling 10 | 11 | #include COMPUTED_INCLUDE 12 | 13 | void main(int argc, char* argv[]) 14 | { 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /tests/multi_line/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define FOO 0 5 | #define BAR 10 6 | 7 | #if FOO < 1 \ 8 | && BAR == 10 9 | int foo() 10 | { 11 | return 0; 12 | } 13 | #endif 14 | 15 | #if FOO == 1 \ 16 | && BAR >= 2 17 | int bar() 18 | { 19 | return 1; 20 | } 21 | #endif 22 | 23 | /\ 24 | * 25 | */ # /* 26 | */ defi\ 27 | ne FO\ 28 | O 10\ 29 | 20 30 | -------------------------------------------------------------------------------- /tests/macro_expansion/defined_undefined_test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define CPU 1 5 | #define GPU 2 6 | 7 | #define ARCH GPU 8 | 9 | #undef CPU 10 | #undef GPU 11 | 12 | #if ARCH == 1 13 | 14 | void my_cpu_func() { 15 | 16 | } 17 | 18 | #elif ARCH == 2 19 | 20 | void my_gpu_func() { 21 | 22 | } 23 | 24 | #else 25 | 26 | #warning "ARCH Value is unexpected." 27 | 28 | #endif 29 | 30 | -------------------------------------------------------------------------------- /codebasin/compilers/clang.toml: -------------------------------------------------------------------------------- 1 | [compiler.clang] 2 | 3 | [compiler."clang++"] 4 | alias_of = "clang" 5 | 6 | [[compiler.clang.parser]] 7 | flags = ["-fopenmp"] 8 | action = "append_const" 9 | dest = "modes" 10 | const = "openmp" 11 | 12 | [[compiler.clang.parser]] 13 | flags = ["-fsycl-is-device"] 14 | action = "append_const" 15 | dest = "defines" 16 | const = "__SYCL_DEVICE_ONLY__" 17 | 18 | [[compiler.clang.modes]] 19 | name = "openmp" 20 | defines = ["_OPENMP"] 21 | -------------------------------------------------------------------------------- /tests/exclude/commands.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "command": "/usr/bin/g++ included.cpp", 4 | "file": "src/included.cpp" 5 | }, 6 | { 7 | "command": "/usr/bin/g++ excluded_name.cpp", 8 | "file": "src/excluded_name.cpp" 9 | }, 10 | { 11 | "command": "/usr/bin/gfortran excluded_extension.f90", 12 | "file": "src/excluded_extension.f90" 13 | }, 14 | { 15 | "command": "/usr/bin/g++ library.cpp", 16 | "file": "src/thirdparty/library.cpp" 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /tests/commented_directive/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | // 4 | // This test should result in 4 Nodes. One FileNode with three children. The Children are a CodeNode, followed by a DefineNode, followed by another CodeNode. 5 | 6 | void defines_and_comments() { 7 | 8 | #define FOO 1 9 | /* ***** 10 | #define FOO 0 11 | #define BAR 2 12 | #define BAZ 3 13 | * ******/ 14 | } 15 | 16 | #define FAR 4 17 | // #define NONE "" 18 | /* 19 | Blanks 20 | */ #define FAM 5 21 | -------------------------------------------------------------------------------- /docs/sample-code-base/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Intel Corporation 2 | # SPDX-License-Identifier: 0BSD 3 | cmake_minimum_required(VERSION 3.5) 4 | project(tutorial) 5 | 6 | set(SOURCES main.cpp third-party/library.cpp) 7 | 8 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 9 | 10 | option(GPU_OFFLOAD "Enable GPU offload." OFF) 11 | if (GPU_OFFLOAD) 12 | add_definitions("-D GPU_OFFLOAD=1") 13 | list(APPEND SOURCES gpu/foo.cpp) 14 | else() 15 | list(APPEND SOURCES cpu/foo.cpp) 16 | endif() 17 | 18 | add_executable(tutorial ${SOURCES}) 19 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Related issues 2 | 3 | 13 | 14 | # Proposed changes 15 | 16 | 20 | 21 | - 22 | - 23 | - 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /.github/workflows/commit-signoff-check.yml: -------------------------------------------------------------------------------- 1 | name: DCO commit signoff check 2 | 3 | on: 4 | pull_request: 5 | branches: [ "main" ] 6 | 7 | jobs: 8 | check-commit-message: 9 | name: Check commit message 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Searching for "Signed-off" 13 | uses: gsactions/commit-message-checker@v2 14 | with: 15 | pattern: 'Signed-off-by: \S+( \S+)* <\S+@\S+>' 16 | error: "Commit message does not include DCO signature; please see CONTRIBUTING.md." 17 | excludeDescription: 'true' 18 | excludeTitle: 'true' 19 | checkAllCommitMessages: 'true' 20 | accessToken: ${{ secrets.GITHUB_TOKEN }} 21 | permissions: read-all 22 | -------------------------------------------------------------------------------- /tests/basic_fortran/test.f90: -------------------------------------------------------------------------------- 1 | ! Copyright (C) 2019 Intel Corporation 2 | ! SPDX-License-Identifier: BSD-3-Clause 3 | 4 | program test 5 | implicit none 6 | integer :: i 7 | 8 | ! There are 3 source lines above this counted to both platforms 9 | ! And 3 directives that count to both platforms (below) 10 | #if defined(GPU) 11 | ! 3 lines here count for the GPU 12 | i = 1 13 | i = i + i 14 | i = i * i 15 | #elif defined(CPU) 16 | ! 2 lines here count for the CPU 17 | i = 2 18 | i = i**i 19 | !#else 20 | ! The above else should be ignored 21 | ! i = -1 22 | #endif 23 | 24 | ! 2 more source lines to both platforms. 25 | write(6,*) 'i = ', i 26 | end program test 27 | -------------------------------------------------------------------------------- /docs/source/notices-and-disclaimers.rst: -------------------------------------------------------------------------------- 1 | Notices and Disclaimers 2 | ####################### 3 | 4 | © Intel Corporation. Intel, the Intel logo, and other Intel marks are 5 | trademarks of Intel Corporation or its subsidiaries. Other names and brands may 6 | be claimed as the property of others. 7 | 8 | No license (express or implied, by estoppel or otherwise) to any intellectual 9 | property rights is granted by this document, with the sole exception that code 10 | included in this document is licensed subject to the Zero-Clause BSD open 11 | source license (OBSD), http://opensource.org/licenses/0BSD. 12 | 13 | License 14 | ======= 15 | 16 | Code Base Investigator (CBI) is licensed under the BSD-3-Clause License, and 17 | made available at http://github.com/P3HPC/code-base-investigator/. 18 | -------------------------------------------------------------------------------- /.github/workflows/run-precommit.yml: -------------------------------------------------------------------------------- 1 | name: Run pre-commit hooks 2 | 3 | on: 4 | pull_request: 5 | branches: [ "main" ] 6 | 7 | jobs: 8 | pre-commit: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | fetch-depth: 0 14 | - uses: actions/setup-python@v5 15 | name: Install Python 3.12 16 | with: 17 | python-version: "3.12" 18 | cache: 'pip' 19 | - run: pip install -U pip setuptools 20 | - uses: pre-commit/action@v3.0.1 21 | name: Configure and run pre-commit on changed files 22 | with: 23 | # the intention with this is to run pre-commit only 24 | # on the diff submitted with this PR 25 | extra_args: --color=always --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }} 26 | permissions: read-all 27 | -------------------------------------------------------------------------------- /tests/operators/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define OR A || B 5 | #define AND A && B 6 | #define RSHIFT A >> B 7 | #define LSHIFT A << B 8 | #define NEQUAL A != B 9 | #define GT_EQUAL A >= B 10 | #define LT_EQUAL A <= B 11 | #define EQUAL A == B 12 | #define CONCAT A ## B 13 | #define MINUS A - B 14 | #define PLUS A + B 15 | #define NOT ! A 16 | #define MULT A * B 17 | #define DIV A / B 18 | #define BIT_OR A | B 19 | #define BIT_AND A & B 20 | #define BIT_XOR A ^ B 21 | #define BIT_COMP A ~ B 22 | #define LT A < B 23 | #define GT A > B 24 | #define TERN A > B ? A : B 25 | #define ASSIGN A = B 26 | #define MOD A % B 27 | #define STRINGIFY #A 28 | 29 | #define PARENS ( A ) 30 | #define CURLY_BRACES { B } 31 | #define SQUARE_BRACES A[B] 32 | #define COMMA , 33 | #define PERIOD . 34 | #define SEMI_COLON ; 35 | #define SINGLE_QUOTE '' 36 | #define DOUBLE_QUOTE "" 37 | 38 | -------------------------------------------------------------------------------- /tests/comments/continuation.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019-2020 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | int i = \ 5 | 5; // comment \ 6 | lines \ 7 | more 8 | 9 | 10 | int x = \ 11 | 1 /* now what \ 12 | comment \ 13 | // \ 14 | */ +2/\ 15 | * hahaha */+3; 16 | 17 | "long // - - - string \ 18 | and \ 19 | \ 20 | \ 21 | stuff " 22 | 23 | \ char w[] = 24 | "confusing \"\ 25 | string \" \n\ 26 | \" \" \\ \/ /* \* */ "; \ 27 | "long - - - string \ 28 | and /* \" */ \ 29 | \ 30 | \ 31 | stuff " 32 | 33 | /* big block comment 34 | ** and so on 35 | ** and so on too 36 | */ 37 | 38 | '"' 39 | 40 | ''//what about this?\ 41 | d' 42 | 43 | '/' 44 | 45 | "'\"'" 46 | 47 | int foo(); /\ 48 | * hahahaha *\ 49 | / 50 | 51 | #warning Dangerous don't do this 52 | #warning "This is more safe" 53 | 54 | /* "Strings 'r' // Fun! *\ 55 | / 56 | -------------------------------------------------------------------------------- /codebasin/schema/coverage.schema: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://raw.githubusercontent.com/P3HPC/p3-analysis-library/main/p3/data/coverage.schema", 4 | "title": "Coverage", 5 | "description": "Lines of code used in each file of a code base.", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "file": { 11 | "type": "string" 12 | }, 13 | "id": { 14 | "type": "string" 15 | }, 16 | "used_lines": { 17 | "type": "array", 18 | "items": { 19 | "type": "integer" 20 | } 21 | }, 22 | "unused_lines": { 23 | "type": "array", 24 | "items": { 25 | "type": "integer" 26 | } 27 | } 28 | }, 29 | "required": [ 30 | "file", 31 | "id", 32 | "used_lines", 33 | "unused_lines" 34 | ] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 2.0.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Sewall" 5 | given-names: "Jason" 6 | orcid: "https://orcid.org/0000-0001-9482-3558" 7 | - family-names: "Pennycook" 8 | given-names: "John" 9 | orcid: "https://orcid.org/0000-0003-0237-3823" 10 | - family-names: "Jacobsen" 11 | given-names: "Douglas" 12 | orcid: "https://orcid.org/0000-0002-3836-207X" 13 | - family-names: "Lee" 14 | given-names: "Kin Long Kelvin" 15 | orcid: "https://orcid.org/0000-0002-1903-9242" 16 | title: "Code Base Investigator" 17 | version: 2.0.0 18 | date-released: "2025-05-16" 19 | doi: "10.5281/zenodo.15422620" 20 | identifiers: 21 | - description: Archive of all previous releases. 22 | type: doi 23 | value: "10.5281/zenodo.5018973" 24 | - description: Latest release. 25 | type: doi 26 | value: "10.5281/zenodo.15422620" 27 | url: "https://github.com/P3HPC/code-base-investigator" 28 | -------------------------------------------------------------------------------- /tests/comments/fortran.f90: -------------------------------------------------------------------------------- 1 | ! Copyright (C) 2019-2020 Intel Corporation 2 | ! SPDX-License-Identifier: BSD-3-Clause 3 | 4 | program foo 5 | 6 | #define my_fortran_macro() \ 7 | /*wow a comment*/ \ 8 | a = b - c /* another */ \ 9 | + b !FOO // "neat" /* hey look a c comment*/ 10 | 11 | integer a,b,c 12 | b = b & ! Comments after continuations 13 | ! no comment! 14 | + b 15 | !$ A directive 16 | 17 | write(*,*) "Fortran! /*Has*/ !Unique parsing semantics" 18 | !omp$ a different directive 19 | write(*,*) "& Fortran! has complex ways of dealing with (&) //ampersands&" 20 | !omp5% not a directives 21 | write(*,*) "Fortran! \& d \n & 22 | !Can be " 23 | &'quite' complex& 24 | !Mixin 25 | &"//"& 26 | !Mixin 27 | &with quoted continuations" 28 | 29 | my_fortran_macro() 30 | 31 | #if !defined(GPU) /*something*/ 32 | write(*,*) "directives" // "appending" 33 | #endif 34 | end program foo 35 | -------------------------------------------------------------------------------- /codebasin/compilers/nvidia.toml: -------------------------------------------------------------------------------- 1 | [compiler.nvcc] 2 | options = ["-D__NVCC__", "-D__CUDACC__"] 3 | 4 | [[compiler.nvcc.parser]] 5 | flags = ["-fopenmp"] 6 | action = "append_const" 7 | dest = "modes" 8 | const = "openmp" 9 | 10 | [[compiler.nvcc.parser]] 11 | flags = ["--gpu-architecture", "--gpu-code", "-gencode"] 12 | action = "extend_match" 13 | pattern = '(?:sm_|compute_)(\d+)' 14 | format = "sm_$value" 15 | dest = "passes" 16 | default = ["sm_70"] 17 | override = true 18 | 19 | [[compiler.nvcc.modes]] 20 | name = "openmp" 21 | defines = ["_OPENMP"] 22 | 23 | [[compiler.nvcc.passes]] 24 | name = "sm_70" 25 | defines = ["__CUDA_ARCH__=700"] 26 | 27 | [[compiler.nvcc.passes]] 28 | name = "sm_75" 29 | defines = ["__CUDA_ARCH__=750"] 30 | 31 | [[compiler.nvcc.passes]] 32 | name = "sm_80" 33 | defines = ["__CUDA_ARCH__=800"] 34 | 35 | [[compiler.nvcc.passes]] 36 | name = "sm_89" 37 | defines = ["__CUDA_ARCH__=890"] 38 | 39 | [[compiler.nvcc.passes]] 40 | name = "sm_90" 41 | defines = ["__CUDA_ARCH__=900"] 42 | -------------------------------------------------------------------------------- /codebasin/schema/analysis.schema: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://raw.githubusercontent.com/P3HPC/code-base-investigator/main/codebasin/schema/analysis.schema", 4 | "title": "Code Base Investigator Analysis File", 5 | "description": "Analysis options for Code Base Investigator.", 6 | "type": "object", 7 | "properties": { 8 | "codebase": { 9 | "type": "object", 10 | "properties": { 11 | "exclude": { 12 | "type": "array", 13 | "items": { 14 | "type": "string" 15 | } 16 | } 17 | } 18 | }, 19 | "platform": { 20 | "type": "object", 21 | "patternProperties": { 22 | ".*": { 23 | "type": "object", 24 | "properties": { 25 | "commands": { 26 | "type": "string" 27 | } 28 | }, 29 | "additionalProperties": false 30 | } 31 | } 32 | }, 33 | "additionalProperties": false 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /codebasin/schema/compilation-database.schema: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://raw.githubusercontent.com/P3HPC/code-base-investigator/main/codebasin/schema/compilation-database.schema", 4 | "title": "Compilation Database", 5 | "description": "Compilation database format used by many projects.", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "directory": { 11 | "type": "string" 12 | }, 13 | "arguments": { 14 | "type": "array", 15 | "items": { 16 | "type": "string" 17 | } 18 | }, 19 | "file": { 20 | "type": "string" 21 | }, 22 | "command": { 23 | "type": "string" 24 | }, 25 | "output": { 26 | "type": "string" 27 | } 28 | }, 29 | "anyOf": [ 30 | { 31 | "required": [ 32 | "arguments" 33 | ] 34 | }, 35 | { 36 | "required": [ 37 | "command" 38 | ] 39 | } 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /tests/comments/test_comments.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import os 5 | import unittest 6 | 7 | from codebasin import file_parser 8 | 9 | 10 | class TestExampleFortranFile(unittest.TestCase): 11 | """ 12 | Test handling of freeform Fortran 13 | """ 14 | 15 | def test_fortran_comments(self): 16 | rootdir = "./tests/comments/" 17 | parser = file_parser.FileParser(os.path.join(rootdir, "fortran.f90")) 18 | 19 | tree = parser.parse_file() 20 | self.assertEqual(tree.root.total_sloc, 20) 21 | 22 | 23 | class TestExampleCFile(unittest.TestCase): 24 | """ 25 | Test handling of C comments 26 | """ 27 | 28 | def test_c_comments(self): 29 | rootdir = "./tests/comments/" 30 | parser = file_parser.FileParser( 31 | os.path.join(rootdir, "continuation.cpp"), 32 | ) 33 | 34 | tree = parser.parse_file() 35 | self.assertEqual(tree.root.total_sloc, 25) 36 | 37 | 38 | if __name__ == "__main__": 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tests/valid_path/test_valid_path.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import unittest 5 | 6 | from codebasin import util 7 | 8 | 9 | class TestValidPath(unittest.TestCase): 10 | """ 11 | Test that valid_path correctly identifies null-byte, carriage return 12 | and line feed characters. 13 | """ 14 | 15 | def test_valid(self): 16 | """Check that a valid path is accepted""" 17 | self.assertTrue(util.valid_path("/valid/path/")) 18 | 19 | def test_null_byte(self): 20 | """Check that a null-byte character is rejected""" 21 | self.assertFalse(util.valid_path("/invalid/\x00/path/")) 22 | 23 | def test_carriage_return(self): 24 | """Check that a carriage return character is rejected""" 25 | self.assertFalse(util.valid_path("/invalid/\r/path/")) 26 | 27 | def test_line_feed(self): 28 | """Check that a line feed character is rejected""" 29 | self.assertFalse(util.valid_path("/invalid/\n/path/")) 30 | 31 | 32 | if __name__ == "__main__": 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /tests/macro_expansion/function_like_test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | 4 | #define FOO 1 5 | #define BAR 2 6 | 7 | #define MAX(a,b) (a) >= (b) ? (a) : (b) 8 | 9 | double a, b; 10 | 11 | #if MAX(FOO, BAR) == 0 12 | void neither_foo_nor_bar() 13 | { 14 | a = b 15 | } 16 | #else 17 | void both_foo_and_bar() 18 | { 19 | a = 10; 20 | b = a; 21 | a = 15; 22 | return; 23 | } 24 | #endif 25 | 26 | #define _GLIBC_PREREQ(x) x 27 | 28 | #if _GLIBC_PREREQ(6) 29 | #else 30 | #error "#error "Shouldn't be true" 31 | #endif 32 | 33 | #if _UNDEFINED_GLIBC_PREREQ(6) 34 | #else 35 | #error "Shouldn't be true" 36 | #endif 37 | 38 | #if defined(GPU) 39 | #define ARCH AGPU 40 | #elif defined(CPU) 41 | #define ARCH ACPU 42 | #endif 43 | 44 | #define AGPU_WIDTH 32 45 | #define ACPU_WIDTH 16 46 | #define THE_WIDTH_IMPL(X) X ## _WIDTH 47 | 48 | #define THE_WIDTH(X) THE_WIDTH_IMPL(X) 49 | 50 | #if THE_WIDTH(ARCH) == 32 51 | #warn "That's a wide width" 52 | #endif 53 | 54 | #if THE_WIDTH(ARCH) == 16 55 | #warn "That's a wide width" 56 | #warn "But not as much" 57 | #endif 58 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: 🐛 Bug report 2 | description: Create a report to help us improve 3 | labels: bug 4 | 5 | body: 6 | - type: textarea 7 | id: expected-behavior 8 | attributes: 9 | label: Expected behavior 10 | description: What did you expect to happen? 11 | validations: 12 | required: true 13 | - type: textarea 14 | id: actual-behavior 15 | attributes: 16 | label: Actual behavior 17 | description: Paste the stack trace, screen clips of the problem, etc. 18 | validations: 19 | required: true 20 | - type: textarea 21 | id: reproduce 22 | attributes: 23 | label: Steps to reproduce the problem 24 | description: Enumerate and/or describe the steps to reproduce this problem. 25 | validations: 26 | required: true 27 | - type: textarea 28 | id: specifications 29 | attributes: 30 | label: Specifications 31 | description: What version of the package are you using, and on what kind of hardware/software environment? Please consider including the output of `pip freeze` here. 32 | validations: 33 | required: true 34 | -------------------------------------------------------------------------------- /.github/workflows/run-unittest.yml: -------------------------------------------------------------------------------- 1 | name: CBI unittest 2 | 3 | permissions: read-all 4 | 5 | on: 6 | push: 7 | branches: [ "main" ] 8 | paths: 9 | - 'codebasin/**' 10 | - 'etc/**' 11 | - 'tests/**' 12 | - 'MANIFEST.in' 13 | - 'setup.py' 14 | pull_request: 15 | branches: [ "main" ] 16 | paths: 17 | - 'codebasin/**' 18 | - 'etc/**' 19 | - 'tests/**' 20 | - 'MANIFEST.in' 21 | - 'setup.py' 22 | 23 | jobs: 24 | build: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | # currently just one version, but later releases can be 29 | # added as they're needed 30 | python-version: ["3.12"] 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Set up Python ${{ matrix.python-version }} 35 | uses: actions/setup-python@v4 36 | with: 37 | python-version: ${{ matrix.python-version }} 38 | - name: Install `code-base-investigator` 39 | run: | 40 | python -m pip install -U pip 41 | pip install . 42 | - name: Run `unittest` 43 | run: | 44 | python -m unittest 45 | -------------------------------------------------------------------------------- /tests/failure/test_bignum.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import unittest 5 | 6 | from codebasin import preprocessor 7 | 8 | 9 | class TestBigNum(unittest.TestCase): 10 | """ 11 | Test ability to detect expressions with big numbers and prevent DoS. 12 | """ 13 | 14 | def test_oversized_constant(self): 15 | """oversized constant""" 16 | with self.assertRaises(OverflowError): 17 | tokens = preprocessor.Lexer( 18 | "10000000000000000000000000000000000000", 19 | ).tokenize() 20 | preprocessor.ExpressionEvaluator(tokens).evaluate() 21 | 22 | def test_overflow(self): 23 | """integer overflow""" 24 | with self.assertRaises(OverflowError): 25 | tokens = preprocessor.Lexer( 26 | "0xFFFFFFFFFFFFFFFF * 0xFFFFFFFFFFFFFFFF", 27 | ).tokenize() 28 | preprocessor.ExpressionEvaluator(tokens).evaluate() 29 | 30 | def test_power(self): 31 | """integer power""" 32 | with self.assertRaises(preprocessor.ParseError): 33 | tokens = preprocessor.Lexer("* 10").tokenize() 34 | preprocessor.ExpressionEvaluator(tokens).evaluate() 35 | -------------------------------------------------------------------------------- /tests/preprocessor/test_warnings.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import os 6 | import tempfile 7 | import unittest 8 | from pathlib import Path 9 | 10 | from codebasin import file_parser 11 | 12 | 13 | class TestPreprocessorWarnings(unittest.TestCase): 14 | """ 15 | Test that preprocessor generates warnings for weird corner cases. 16 | """ 17 | 18 | def setUp(self): 19 | self.cwd = os.getcwd() 20 | 21 | def tearDown(self): 22 | os.chdir(self.cwd) 23 | 24 | def test_backslash_eof(self): 25 | """Check backslash-newline at EOF is only a warning""" 26 | tmp = tempfile.TemporaryDirectory() 27 | path = Path(tmp.name) 28 | os.chdir(tmp.name) 29 | 30 | with open(path / "test.hpp", mode="w") as f: 31 | f.write("#define BAD_MACRO \\\n") 32 | 33 | parser = file_parser.FileParser(path / "test.hpp") 34 | 35 | logging.disable(logging.NOTSET) 36 | logger = logging.getLogger("codebasin") 37 | with self.assertLogs(logger, level="WARNING") as cm: 38 | _ = parser.parse_file() 39 | logging.disable() 40 | self.assertRegex(cm.output[0], "backslash-newline at end of file") 41 | 42 | tmp.cleanup() 43 | -------------------------------------------------------------------------------- /tests/metrics/test_divergence.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import math 6 | import unittest 7 | 8 | from codebasin.report import divergence 9 | 10 | 11 | class TestDivergence(unittest.TestCase): 12 | """ 13 | Test computation of code divergence. 14 | """ 15 | 16 | def setUp(self): 17 | logging.disable() 18 | 19 | def test_divergence(self): 20 | """Check divergence computation for simple setmap.""" 21 | setmap = { 22 | frozenset(["A"]): 1, 23 | frozenset(["B"]): 2, 24 | frozenset(["A", "B"]): 3, 25 | frozenset([]): 4, 26 | } 27 | intersection = 3 28 | union = 1 + 2 + 3 29 | 30 | expected_divergence = intersection / union 31 | self.assertEqual(divergence(setmap), expected_divergence) 32 | 33 | def test_null_divergence(self): 34 | """Check divergence computation for null cases.""" 35 | setmap = { 36 | frozenset(""): 0, 37 | } 38 | self.assertTrue(math.isnan(divergence(setmap))) 39 | 40 | setmap = { 41 | frozenset("A"): 1, 42 | } 43 | self.assertTrue(math.isnan(divergence(setmap))) 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /tests/util/test_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | 7 | from codebasin.util import ensure_ext 8 | 9 | 10 | class TestUtil(unittest.TestCase): 11 | """ 12 | Test utility functions. 13 | """ 14 | 15 | def setUp(self): 16 | logging.disable() 17 | 18 | def test_ensure_ext_validation(self): 19 | """Check ensure_ext raises expected errors""" 20 | with self.assertRaises(TypeError): 21 | ensure_ext("path.png", 1) 22 | 23 | with self.assertRaises(TypeError): 24 | ensure_ext("path.png", [1]) 25 | 26 | with self.assertRaises(TypeError): 27 | ensure_ext("path.png", [".png", 1]) 28 | 29 | with self.assertRaises(TypeError): 30 | not_a_path = 1 31 | ensure_ext(not_a_path, [".png"]) 32 | 33 | def test_ensure_ext(self): 34 | """Check ensure_ext correctness""" 35 | with self.assertRaises(ValueError): 36 | ensure_ext("path.jpg", [".png"]) 37 | 38 | ensure_ext("path.png", ".png") 39 | ensure_ext("path.png", [".png"]) 40 | ensure_ext("path.png", [".jpg", ".png"]) 41 | ensure_ext("path.tar.gz", [".tar.gz"]) 42 | 43 | 44 | if __name__ == "__main__": 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "Code Base Investigator" 10 | copyright = ( 11 | " Intel Corporation. Intel, the Intel logo, and other Intel marks are " 12 | + "trademarks of Intel Corporation or its subsidiaries. Other names and " 13 | + "brands may be claimed as the property of others." 14 | ) 15 | author = "Intel Corporation" 16 | version = "2.0.0" 17 | release = "2.0.0" 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = ["sphinx.ext.mathjax", "sphinx_inline_tabs"] 23 | 24 | templates_path = ["_templates"] 25 | exclude_patterns = [] 26 | 27 | # -- Options for HTML output ------------------------------------------------- 28 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 29 | 30 | html_theme = "furo" 31 | html_static_path = ["_static"] 32 | html_title = "Code Base Investigator" 33 | -------------------------------------------------------------------------------- /tests/source/test_source.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import unittest 5 | from pathlib import Path 6 | 7 | import codebasin.source as source 8 | 9 | 10 | class TestSource(unittest.TestCase): 11 | """ 12 | Test functionality in the source module. 13 | """ 14 | 15 | def test_is_source_file_string(self): 16 | """Check source file identification for string filenames""" 17 | self.assertTrue(source.is_source_file("file.cpp")) 18 | self.assertTrue(source.is_source_file("/path/to/file.cpp")) 19 | self.assertFalse(source.is_source_file("file.o")) 20 | self.assertFalse(source.is_source_file("/path/to/file.o")) 21 | 22 | def test_is_source_file_path(self): 23 | """Check source file identification for Path filenames""" 24 | self.assertTrue(source.is_source_file(Path("file.cpp"))) 25 | self.assertTrue(source.is_source_file(Path("/path/to/file.cpp"))) 26 | self.assertFalse(source.is_source_file(Path("file.o"))) 27 | self.assertFalse(source.is_source_file(Path("/path/to/file.o"))) 28 | 29 | def test_is_source_types(self): 30 | """Check type validation for is_source""" 31 | with self.assertRaises(TypeError): 32 | source.is_source_file(1) 33 | 34 | 35 | if __name__ == "__main__": 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature request 2 | description: Suggest new functionality 3 | labels: enhancement 4 | body: 5 | - type: textarea 6 | validations: 7 | required: true 8 | attributes: 9 | label: Feature/behavior summary 10 | description: Describe your feature or desired package behavior succinctly. 11 | - type: checkboxes 12 | validations: 13 | required: true 14 | attributes: 15 | label: Request attributes 16 | options: 17 | - label: Would this be a refactor of existing code? 18 | - label: Does this proposal require new package dependencies? 19 | - label: Would this change break backwards compatibility? 20 | - type: textarea 21 | validations: 22 | required: false 23 | attributes: 24 | label: Related issues 25 | description: Is your feature request related to any issue(s)? If so, please provide the issue number(s). 26 | - type: textarea 27 | validations: 28 | required: true 29 | attributes: 30 | label: Solution description 31 | description: Describe solution(s) to the problem. Write "Unknown" if you haven't identified a solution. 32 | - type: textarea 33 | validations: 34 | required: false 35 | attributes: 36 | label: Additional notes 37 | description: Any additional context, screenshots, etc. that may help with the discussion and implementation. 38 | -------------------------------------------------------------------------------- /codebasin/compilers/intel.toml: -------------------------------------------------------------------------------- 1 | [compiler.icx] 2 | 3 | [compiler.icpx] 4 | alias_of = "icx" 5 | 6 | [[compiler.icx.parser]] 7 | flags = ["-fopenmp"] 8 | action = "append_const" 9 | dest = "modes" 10 | const = "openmp" 11 | 12 | [[compiler.icx.parser]] 13 | flags = ["-fsycl"] 14 | action = "append_const" 15 | dest = "modes" 16 | const = "sycl" 17 | 18 | [[compiler.icx.parser]] 19 | flags = ["-fsycl-targets"] 20 | action = "store_split" 21 | sep = "," 22 | format = "sycl-$value" 23 | dest = "passes" 24 | default = ["sycl-spir64"] 25 | 26 | [[compiler.icx.modes]] 27 | name = "sycl" 28 | defines = ["SYCL_LANGUAGE_VERSION"] 29 | 30 | [[compiler.icx.modes]] 31 | name = "openmp" 32 | defines = ["_OPENMP"] 33 | 34 | [[compiler.icx.passes]] 35 | name = "sycl-spir64" 36 | defines = ["__SYCL_DEVICE_ONLY__", "__SPIR__", "__SPIRV__"] 37 | modes = ["sycl"] 38 | 39 | [[compiler.icx.passes]] 40 | name = "sycl-spir64_x86_64" 41 | defines = ["__SYCL_DEVICE_ONLY__", "__SPIR__", "__SPIRV__"] 42 | modes = ["sycl"] 43 | 44 | [[compiler.icx.passes]] 45 | name = "sycl-spir64_gen" 46 | defines = ["__SYCL_DEVICE_ONLY__", "__SPIR__", "__SPIRV__"] 47 | modes = ["sycl"] 48 | 49 | [[compiler.icx.passes]] 50 | name = "sycl-spir64_fpga" 51 | defines = ["__SYCL_DEVICE_ONLY__", "__SPIR__", "__SPIRV__"] 52 | modes = ["sycl"] 53 | 54 | [[compiler.icx.passes]] 55 | name = "sycl-nvptx64-nvidia-cuda" 56 | defines = ["__SYCL_DEVICE_ONLY__", "__NVPTX__"] 57 | modes = ["sycl"] 58 | -------------------------------------------------------------------------------- /tests/report/test_summary_report.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from io import StringIO 7 | 8 | from codebasin.report import summary 9 | 10 | 11 | class TestSummaryReport(unittest.TestCase): 12 | """ 13 | Test summary report functionality. 14 | """ 15 | 16 | def setUp(self): 17 | logging.disable() 18 | 19 | def test_output(self): 20 | """Check summary report output""" 21 | setmap = { 22 | frozenset(["X"]): 1, 23 | frozenset(["Y"]): 2, 24 | frozenset(["X", "Y"]): 3, 25 | frozenset([]): 6, 26 | } 27 | output = StringIO() 28 | summary(setmap, stream=output) 29 | expected = """ 30 | Summary 31 | ======= 32 | ┌────────────────┬───────┬─────────┐ 33 | │ Platform Set │ LOC │ % LOC │ 34 | ├────────────────┼───────┼─────────┤ 35 | │ {} │ 6 │ 50.00 │ 36 | ├────────────────┼───────┼─────────┤ 37 | │ {X} │ 1 │ 8.33 │ 38 | ├────────────────┼───────┼─────────┤ 39 | │ {Y} │ 2 │ 16.67 │ 40 | ├────────────────┼───────┼─────────┤ 41 | │ {X, Y} │ 3 │ 25.00 │ 42 | └────────────────┴───────┴─────────┘ 43 | Code Divergence: 0.50 44 | Coverage (%): 50.00 45 | Avg. Coverage (%): 37.50 46 | Total SLOC: 12 47 | """ 48 | self.assertEqual(expected, output.getvalue()) 49 | 50 | 51 | if __name__ == "__main__": 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /.github/workflows/coverage.yml: -------------------------------------------------------------------------------- 1 | name: coverage 2 | 3 | permissions: read-all 4 | 5 | on: 6 | pull_request: 7 | branches: [ "main" ] 8 | paths: 9 | - 'codebasin/**' 10 | 11 | jobs: 12 | check-coverage: 13 | name: Ensure modified lines are tested 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: ["3.12"] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 23 | 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | 29 | - name: Install `code-base-investigator` 30 | run: | 31 | python -m pip install -U pip 32 | pip install . 33 | 34 | - name: Install `coverage` 35 | run: | 36 | pip install coverage 37 | 38 | - name: Run `coverage` 39 | run: | 40 | python -m coverage run -m unittest 41 | 42 | - name: Generate coverage.json 43 | run: | 44 | python -m coverage json --include=$(git diff --name-status ${{ github.event.pull_request.base.sha }} codebasin/*.py | grep "^M" | awk '{ print $2 }' | paste -sd,) 45 | 46 | - name: Check coverage against latest commits 47 | run: | 48 | FROM=${{ github.event.pull_request.base.sha }} 49 | TO=${{ github.sha }} 50 | COMMITS=$(git rev-list $FROM..$TO) 51 | python .github/workflows/check-coverage.py coverage.json --commits $COMMITS 52 | -------------------------------------------------------------------------------- /codebasin/source.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import os 5 | from pathlib import Path 6 | 7 | 8 | def is_source_file(filename: str | os.PathLike) -> bool: 9 | """ 10 | Parameters 11 | ---------- 12 | filename: Union[str, os.Pathlike] 13 | The filename of a potential source file. 14 | 15 | Returns 16 | ------- 17 | bool 18 | True if the file ends in a recognized extension and False otherwise. 19 | Only files that can be parsed correctly have recognized extensions. 20 | 21 | Raises 22 | ------ 23 | TypeError 24 | If filename is not a string or Path. 25 | """ 26 | if not (isinstance(filename, str) or isinstance(filename, Path)): 27 | raise TypeError("filename must be a string or Path") 28 | 29 | extension = Path(filename).suffix 30 | supported_extensions = [ 31 | ".f90", 32 | ".F90", 33 | ".f", 34 | ".ftn", 35 | ".fpp", 36 | ".F", 37 | ".FOR", 38 | ".FTN", 39 | ".FPP", 40 | ".c", 41 | ".h", 42 | ".c++", 43 | ".cxx", 44 | ".cpp", 45 | ".cc", 46 | ".hpp", 47 | ".hxx", 48 | ".h++", 49 | ".hh", 50 | ".inc", 51 | ".inl", 52 | ".tcc", 53 | ".icc", 54 | ".ipp", 55 | ".cu", 56 | ".cuh", 57 | ".cl", 58 | ".s", 59 | ".S", 60 | ".asm", 61 | ] 62 | return extension in supported_extensions 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019-2024, Intel Corporation 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /tests/once/test_once.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestOnce(unittest.TestCase): 12 | """ 13 | Simple test of ability to obey #pragma once directives. 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = { 21 | frozenset([]): 4, 22 | frozenset(["CPU", "GPU"]): 10, 23 | } 24 | 25 | def test_yaml(self): 26 | """once/once.yaml""" 27 | codebase = CodeBase(self.rootdir) 28 | configuration = { 29 | "CPU": [ 30 | { 31 | "file": str(self.rootdir / "main.cpp"), 32 | "defines": ["CPU"], 33 | "include_paths": [], 34 | "include_files": [], 35 | }, 36 | ], 37 | "GPU": [ 38 | { 39 | "file": str(self.rootdir / "main.cpp"), 40 | "defines": ["GPU"], 41 | "include_paths": [], 42 | "include_files": [], 43 | }, 44 | ], 45 | } 46 | state = finder.find(self.rootdir, codebase, configuration) 47 | setmap = state.get_setmap(codebase) 48 | self.assertDictEqual( 49 | setmap, 50 | self.expected_setmap, 51 | "Mismatch in setmap", 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/define/test_define.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestDefine(unittest.TestCase): 12 | """ 13 | Simple test of ability to recognize #define directives within files. 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = { 21 | frozenset([]): 4, 22 | frozenset(["CPU", "GPU"]): 10, 23 | } 24 | 25 | def test_yaml(self): 26 | """define/define.yaml""" 27 | codebase = CodeBase(self.rootdir) 28 | configuration = { 29 | "CPU": [ 30 | { 31 | "file": str(self.rootdir / "main.cpp"), 32 | "defines": ["CPU"], 33 | "include_paths": [], 34 | "include_files": [], 35 | }, 36 | ], 37 | "GPU": [ 38 | { 39 | "file": str(self.rootdir / "main.cpp"), 40 | "defines": ["GPU"], 41 | "include_paths": [], 42 | "include_files": [], 43 | }, 44 | ], 45 | } 46 | state = finder.find(self.rootdir, codebase, configuration) 47 | setmap = state.get_setmap(codebase) 48 | self.assertDictEqual( 49 | setmap, 50 | self.expected_setmap, 51 | "Mismatch in setmap", 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/multi_line/test_multi_line.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestMultiLine(unittest.TestCase): 12 | """ 13 | Simple test of ability to handle counting of multi-line directives 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = { 21 | frozenset([]): 4, 22 | frozenset(["CPU", "GPU"]): 17, 23 | } 24 | 25 | def test_yaml(self): 26 | """multi_line/multi_line.yaml""" 27 | codebase = CodeBase(self.rootdir) 28 | configuration = { 29 | "CPU": [ 30 | { 31 | "file": str(self.rootdir / "main.cpp"), 32 | "defines": ["CPU"], 33 | "include_paths": [], 34 | "include_files": [], 35 | }, 36 | ], 37 | "GPU": [ 38 | { 39 | "file": str(self.rootdir / "main.cpp"), 40 | "defines": ["GPU"], 41 | "include_paths": [], 42 | "include_files": [], 43 | }, 44 | ], 45 | } 46 | state = finder.find(self.rootdir, codebase, configuration) 47 | setmap = state.get_setmap(codebase) 48 | self.assertDictEqual( 49 | setmap, 50 | self.expected_setmap, 51 | "Mismatch in setmap", 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/nesting/test_nesting.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestNesting(unittest.TestCase): 12 | """ 13 | Simple test of ability to handle nested definition scopes 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = { 21 | frozenset(["CPU"]): 6, 22 | frozenset(["GPU"]): 6, 23 | frozenset(["CPU", "GPU"]): 5, 24 | } 25 | 26 | def test_yaml(self): 27 | """nesting/nesting.yaml""" 28 | codebase = CodeBase(self.rootdir) 29 | configuration = { 30 | "CPU": [ 31 | { 32 | "file": str(self.rootdir / "main.cpp"), 33 | "defines": ["CPU"], 34 | "include_paths": [], 35 | "include_files": [], 36 | }, 37 | ], 38 | "GPU": [ 39 | { 40 | "file": str(self.rootdir / "main.cpp"), 41 | "defines": ["GPU"], 42 | "include_paths": [], 43 | "include_files": [], 44 | }, 45 | ], 46 | } 47 | state = finder.find(self.rootdir, codebase, configuration) 48 | setmap = state.get_setmap(codebase) 49 | self.assertDictEqual( 50 | setmap, 51 | self.expected_setmap, 52 | "Mismatch in setmap", 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /tests/basic_fortran/test_basic_fortran.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestBasicFortran(unittest.TestCase): 12 | """ 13 | Simple test of ability to handle directives in Fortran code. 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = { 21 | frozenset(["CPU"]): 2, 22 | frozenset(["GPU"]): 3, 23 | frozenset(["CPU", "GPU"]): 8, 24 | } 25 | 26 | def test_yaml(self): 27 | """basic_fortran/basic_fortran.yaml""" 28 | codebase = CodeBase(self.rootdir) 29 | configuration = { 30 | "CPU": [ 31 | { 32 | "file": str(self.rootdir / "test.f90"), 33 | "defines": ["CPU"], 34 | "include_paths": [], 35 | "include_files": [], 36 | }, 37 | ], 38 | "GPU": [ 39 | { 40 | "file": str(self.rootdir / "test.f90"), 41 | "defines": ["GPU"], 42 | "include_paths": [], 43 | "include_files": [], 44 | }, 45 | ], 46 | } 47 | state = finder.find(self.rootdir, codebase, configuration) 48 | setmap = state.get_setmap(codebase) 49 | self.assertDictEqual( 50 | setmap, 51 | self.expected_setmap, 52 | "Mismatch in setmap", 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /tests/files/test_filetree_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | 7 | from codebasin.report import _human_readable, _strip_colors 8 | 9 | 10 | class TestFileTreeUtils(unittest.TestCase): 11 | """ 12 | Test FileTree utility/helper functions. 13 | """ 14 | 15 | def setUp(self): 16 | logging.disable() 17 | 18 | def test_human_readable_validation(self): 19 | """Check that human_readable rejects non-integers.""" 20 | with self.assertRaises(TypeError): 21 | _ = _human_readable("1") 22 | 23 | def test_human_readable(self): 24 | """Check that human_readable produces correct results.""" 25 | integers = [1, 12, 123, 1234, 12345, 123456, 123456789] 26 | strings = ["1", "12", "123", "1.2k", "12.3k", "123.5k", "123.5M"] 27 | for i, expected in zip(integers, strings): 28 | with self.subTest(i=i, expected=expected): 29 | s = _human_readable(i) 30 | self.assertEqual(s, expected) 31 | 32 | def test_strip_colors_validation(self): 33 | """Check that strip_colors rejects non-strings.""" 34 | with self.assertRaises(TypeError): 35 | _ = _strip_colors(1) 36 | 37 | def test_strip_colors(self): 38 | """Check that strip_colors produces correct results.""" 39 | inputs = ["\033[2mA\033[0m", "\033[1m\033[33mB\033[0m"] 40 | expected = ["A", "B"] 41 | for s, expected in zip(inputs, expected): 42 | with self.subTest(s=s, expected=expected): 43 | stripped = _strip_colors(s) 44 | self.assertEqual(stripped, expected) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /tests/disjoint/test_disjoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestDisjointCodebase(unittest.TestCase): 12 | """ 13 | Test of handling for disjoint code bases: 14 | - Separate file lists for each platform 15 | - Separate include paths for each platform 16 | """ 17 | 18 | def setUp(self): 19 | self.rootdir = Path(__file__).parent.resolve() 20 | logging.disable() 21 | 22 | self.expected_setmap = {frozenset(["CPU"]): 6, frozenset(["GPU"]): 6} 23 | 24 | def test_yaml(self): 25 | """disjoint/disjoint.yaml""" 26 | codebase = CodeBase(self.rootdir) 27 | configuration = { 28 | "CPU": [ 29 | { 30 | "file": str(self.rootdir / "cpu.cpp"), 31 | "defines": ["CPU"], 32 | "include_paths": [str(self.rootdir / "cpu_headers")], 33 | "include_files": [], 34 | }, 35 | ], 36 | "GPU": [ 37 | { 38 | "file": str(self.rootdir / "gpu.cpp"), 39 | "defines": ["GPU"], 40 | "include_paths": [str(self.rootdir / "gpu_headers")], 41 | "include_files": [], 42 | }, 43 | ], 44 | } 45 | state = finder.find(self.rootdir, codebase, configuration) 46 | setmap = state.get_setmap(codebase) 47 | self.assertDictEqual( 48 | setmap, 49 | self.expected_setmap, 50 | "Mismatch in setmap", 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /codebasin/language.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | """ 4 | Contains classes and functions related to language detection 5 | and providing information about the language to other parts of 6 | code base investigator 7 | """ 8 | 9 | import logging 10 | import os 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class FileLanguage: 16 | """ 17 | Represents the language and modifiers for a given filename 18 | """ 19 | 20 | _supported_languages = ["fortran-free", "fortran-fixed", "c", "c++", "asm"] 21 | 22 | _language_extensions = {} 23 | _language_extensions["fortran-free"] = [".f90", ".F90"] 24 | _language_extensions["fortran-fixed"] = [ 25 | ".f", 26 | ".ftn", 27 | ".fpp", 28 | ".F", 29 | ".FOR", 30 | ".FTN", 31 | ".FPP", 32 | ] 33 | _language_extensions["c"] = [".c", ".h"] 34 | _language_extensions["c++"] = [ 35 | ".c++", 36 | ".cxx", 37 | ".cpp", 38 | ".cc", 39 | ".hpp", 40 | ".hxx", 41 | ".h++", 42 | ".hh", 43 | ".inc", 44 | ".inl", 45 | ".tcc", 46 | ".icc", 47 | ".ipp", 48 | ".cu", 49 | ".cuh", 50 | ".cl", 51 | ] 52 | _language_extensions["asm"] = [".s", ".S", ".asm"] 53 | 54 | def __init__(self, filename: str): 55 | self._filename = filename 56 | self._extension = os.path.splitext(self._filename)[1] 57 | self._language = None 58 | 59 | for lang in self._supported_languages: 60 | if self._extension in self._language_extensions[lang]: 61 | self._language = lang 62 | break 63 | 64 | def get_language(self) -> str | None: 65 | return self._language 66 | -------------------------------------------------------------------------------- /.github/workflows/check-coverage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import subprocess # nosec B404 5 | import sys 6 | 7 | # Parse command-line arguments. 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--commits", nargs="+", default=[]) 10 | parser.add_argument("file", metavar="", action="store") 11 | args = parser.parse_args(sys.argv[1:]) 12 | 13 | # Read the coverage information into an object. 14 | with open(args.file, "rb") as f: 15 | coverage = json.load(f) 16 | 17 | # For each file: 18 | # - Determine which lines were not covered; 19 | # - Check when the lines were last modified; 20 | # - Print details of new, uncovered, lines. 21 | report = {} 22 | for filename, info in coverage["files"].items(): 23 | if not isinstance(filename, str): 24 | raise TypeError("filename must be a string") 25 | 26 | missing = info["missing_lines"] 27 | if not missing: 28 | continue 29 | 30 | for lineno in missing: 31 | if not isinstance(lineno, int): 32 | raise TypeError("line numbers must be integers") 33 | cmd = [ 34 | "git", 35 | "blame", 36 | filename, 37 | "-L", 38 | f"{lineno},{lineno}", 39 | "--no-abbrev", 40 | ] 41 | completed = subprocess.run(cmd, capture_output=True) 42 | commit = completed.stdout.decode().split()[0].strip() 43 | 44 | if commit in args.commits: 45 | if filename not in report: 46 | report[filename] = [] 47 | report[filename].append(str(lineno)) 48 | 49 | for filename in report: 50 | n = len(report[filename]) 51 | print(f'{n} uncovered lines in {filename}: {",".join(report[filename])}') 52 | 53 | # Use the exit code to communicate failure to GitHub. 54 | if len(report) != 0: 55 | sys.exit(1) 56 | else: 57 | sys.exit(0) 58 | -------------------------------------------------------------------------------- /tests/basic_asm/test_basic_asm.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestBasicAsm(unittest.TestCase): 12 | """ 13 | Simple test of ability to handle assembly files. 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | self.expected_setmap = {frozenset(["CPU"]): 24} 21 | 22 | def test_yaml(self): 23 | """basic_asm/basic_asm.yaml""" 24 | codebase = CodeBase(self.rootdir) 25 | entries = [] 26 | for f in codebase: 27 | entries.append( 28 | { 29 | "file": f, 30 | "defines": [], 31 | "include_paths": [], 32 | "include_files": [], 33 | }, 34 | ) 35 | configuration = {"CPU": entries} 36 | state = finder.find(self.rootdir, codebase, configuration) 37 | setmap = state.get_setmap(codebase) 38 | self.assertDictEqual( 39 | setmap, 40 | self.expected_setmap, 41 | "Mismatch in setmap", 42 | ) 43 | 44 | def test_ptx(self): 45 | """basic_asm/basic_asm_ptx.yaml""" 46 | codebase = CodeBase(self.rootdir) 47 | entry = { 48 | "file": str(self.rootdir / "test.ptx"), 49 | "defines": [], 50 | "include_paths": [], 51 | "include_files": [], 52 | } 53 | configuration = {"GPU": [entry]} 54 | self.assertRaises( 55 | RuntimeError, 56 | finder.find, 57 | self.rootdir, 58 | codebase, 59 | configuration, 60 | ) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /tests/cli/test_formatter.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | 7 | from codebasin._detail.logging import Formatter 8 | 9 | 10 | class TestFormatter(unittest.TestCase): 11 | """ 12 | Test Formatter class. 13 | """ 14 | 15 | def setUp(self): 16 | logging.disable() 17 | 18 | def test_constructor(self): 19 | """Check constructor arguments""" 20 | self.assertTrue(Formatter(colors=True).colors) 21 | self.assertFalse(Formatter(colors=False).colors) 22 | self.assertFalse(Formatter().colors) 23 | 24 | def test_format(self): 25 | """Check output format""" 26 | levels = ["DEBUG", "INFO", "WARNING", "ERROR"] 27 | colors = ["\033[39m", "\033[39m", "\033[93m", "\033[91m"] 28 | for colorize in [True, False]: 29 | for levelname, color in zip(levels, colors): 30 | formatter = Formatter(colors=colorize) 31 | with self.subTest( 32 | colorize=colorize, 33 | levelname=levelname, 34 | color=color, 35 | ): 36 | record = logging.makeLogRecord( 37 | { 38 | "msg": "Testing", 39 | "levelname": levelname, 40 | }, 41 | ) 42 | msg = record.msg 43 | level = record.levelname.lower() 44 | output = formatter.format(record) 45 | if level == "info": 46 | expected = msg 47 | elif colorize: 48 | expected = f"\033[1m{color}{level}\033[0m: {msg}" 49 | else: 50 | expected = f"{level}: {msg}" 51 | self.assertEqual(output, expected) 52 | 53 | 54 | if __name__ == "__main__": 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | [build-system] 4 | build-backend = "setuptools.build_meta" 5 | requires = ["setuptools>=64", "setuptools-scm>=8"] 6 | 7 | [project] 8 | authors = [ 9 | {"name" = "S. John Pennycook", "email" = "john.pennycook@gmail.com"}, 10 | ] 11 | description = "Code Base Investigator" 12 | dynamic = ["version", "readme"] 13 | keywords = ["performance", "portability", "productivity"] 14 | name = "codebasin" 15 | requires-python = ">=3.12" 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Environment :: Console", 19 | "Intended Audience :: Developers", 20 | "License :: OSI Approved :: BSD License", 21 | "Programming Language :: Python", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.12", 24 | "Topic :: Software Development", 25 | ] 26 | dependencies = [ 27 | "numpy==2.2.4", 28 | "matplotlib==3.10.1", 29 | "pathspec==0.12.1", 30 | "scipy==1.15.2", 31 | "jsonschema==4.23.0", 32 | "tabulate==0.9.0", 33 | "tqdm==4.67.1", 34 | ] 35 | 36 | [project.scripts] 37 | codebasin = "codebasin:__main__.main" 38 | cbi-cov = "codebasin.coverage:__main__.main" 39 | cbi-tree = "codebasin:tree.main" 40 | 41 | [project.urls] 42 | "Github" = "https://www.github.com/P3HPC/code-base-investigator" 43 | "Issues" = "https://www.github.com/P3HPC/code-base-investigator/issues" 44 | "Pull requests" = "https://www.github.com/P3HPC/code-base-investigator/pulls" 45 | 46 | [project.optional-dependencies] 47 | dev = [ 48 | "sphinx", 49 | "pre-commit", 50 | ] 51 | 52 | [tool.setuptools] 53 | include-package-data = true 54 | 55 | [tool.setuptools.packages.find] 56 | where = ["."] 57 | include = ["codebasin*"] 58 | 59 | [tool.setuptools.dynamic] 60 | readme = {file = ["README.md"]} 61 | 62 | [tool.setuptools_scm] 63 | # Deliberately empty to enable setuptools-scm 64 | 65 | [tool.coverage.run] 66 | command_line = "-m unittest" 67 | source = ["codebasin"] 68 | 69 | [tool.coverage.report] 70 | show_missing = true 71 | -------------------------------------------------------------------------------- /tests/cli/test_meta_warning.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import re 6 | import unittest 7 | 8 | from codebasin._detail.logging import MetaWarning 9 | 10 | 11 | class TestMetaWarning(unittest.TestCase): 12 | """ 13 | Test MetaWarning class. 14 | """ 15 | 16 | def test_constructor(self): 17 | """Check constructor arguments""" 18 | mw = MetaWarning("regex", "msg") 19 | self.assertTrue(mw.regex, re.compile("regex")) 20 | self.assertTrue(mw.msg, "msg") 21 | self.assertEqual(mw._count, 0) 22 | 23 | def test_inspect(self): 24 | """Check inspect matches records correctly""" 25 | mw = MetaWarning("test[0-9]", "Testing") 26 | 27 | record = logging.makeLogRecord( 28 | { 29 | "msg": "test1", 30 | "levelname": "WARNING", 31 | }, 32 | ) 33 | self.assertTrue(mw.inspect(record)) 34 | self.assertEqual(mw._count, 1) 35 | 36 | record = logging.makeLogRecord( 37 | { 38 | "msg": "testA", 39 | "levelname": "WARNING", 40 | }, 41 | ) 42 | self.assertFalse(mw.inspect(record)) 43 | self.assertEqual(mw._count, 1) 44 | 45 | def test_warn(self): 46 | """Check warn produces expected logging messages""" 47 | logging.disable(logging.NOTSET) 48 | logger = logging.getLogger("codebasin") 49 | 50 | mw = MetaWarning("test[0-9]", "Testing {}") 51 | with self.assertNoLogs(logger): 52 | mw.warn(logger) 53 | 54 | record = logging.makeLogRecord( 55 | { 56 | "msg": "test1", 57 | "levelno": logging.WARNING, 58 | }, 59 | ) 60 | mw.inspect(record) 61 | with self.assertLogs(logger, level="WARNING") as cm: 62 | mw.warn(logger) 63 | self.assertEqual(cm.output, ["WARNING:codebasin:Testing 1"]) 64 | logging.disable() 65 | 66 | 67 | if __name__ == "__main__": 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /tests/operators/test_operators.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder, preprocessor 9 | from codebasin.preprocessor import Platform 10 | 11 | 12 | class TestOperators(unittest.TestCase): 13 | """ 14 | Simple test of ability to recognize different operators when used 15 | within directives 16 | """ 17 | 18 | def setUp(self): 19 | self.rootdir = Path(__file__).parent.resolve() 20 | logging.disable() 21 | 22 | self.expected_setmap = {frozenset(["CPU", "GPU"]): 32} 23 | 24 | def test_operators(self): 25 | """operators/operators.yaml""" 26 | codebase = CodeBase(self.rootdir) 27 | configuration = { 28 | "CPU": [ 29 | { 30 | "file": str(self.rootdir / "main.cpp"), 31 | "defines": ["CPU"], 32 | "include_paths": [], 33 | "include_files": [], 34 | }, 35 | ], 36 | "GPU": [ 37 | { 38 | "file": str(self.rootdir / "main.cpp"), 39 | "defines": ["GPU"], 40 | "include_paths": [], 41 | "include_files": [], 42 | }, 43 | ], 44 | } 45 | state = finder.find(self.rootdir, codebase, configuration) 46 | setmap = state.get_setmap(codebase) 47 | self.assertDictEqual( 48 | setmap, 49 | self.expected_setmap, 50 | "Mismatch in setmap", 51 | ) 52 | 53 | def test_paths(self): 54 | input_str = r"FUNCTION(looks/2like/a/path/with_/bad%%identifiers)" 55 | tokens = preprocessor.Lexer(input_str).tokenize() 56 | p = Platform("Test", self.rootdir) 57 | macro = preprocessor.macro_from_definition_string("FUNCTION(x)=#x") 58 | p._definitions = {macro.name: macro} 59 | _ = preprocessor.MacroExpander(p).expand(tokens) 60 | 61 | 62 | if __name__ == "__main__": 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /tests/commented_directive/test_commented_directive.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, finder 9 | 10 | 11 | class TestCommentedDirective(unittest.TestCase): 12 | """ 13 | Simple test of ability to recognize #commented_directive directives 14 | within files. 15 | """ 16 | 17 | def setUp(self): 18 | self.rootdir = Path(__file__).parent.resolve() 19 | logging.disable() 20 | 21 | self.expected_setmap = {frozenset(["CPU", "GPU"]): 5} 22 | 23 | def count_children_nodes(self, node): 24 | my_count = 0 25 | for child in node.children: 26 | my_count += 1 + self.count_children_nodes(child) 27 | 28 | return my_count 29 | 30 | def test_yaml(self): 31 | """commented_directive/commented_directive.yaml""" 32 | codebase = CodeBase(self.rootdir) 33 | configuration = { 34 | "CPU": [ 35 | { 36 | "file": str(self.rootdir / "main.cpp"), 37 | "defines": ["CPU"], 38 | "include_paths": [], 39 | "include_files": [], 40 | }, 41 | ], 42 | "GPU": [ 43 | { 44 | "file": str(self.rootdir / "main.cpp"), 45 | "defines": ["GPU"], 46 | "include_paths": [], 47 | "include_files": [], 48 | }, 49 | ], 50 | } 51 | state = finder.find(self.rootdir, codebase, configuration) 52 | setmap = state.get_setmap(codebase) 53 | 54 | node_count = 1 55 | for fn in state.get_filenames(): 56 | node_count += self.count_children_nodes(state.get_tree(fn).root) 57 | 58 | self.assertDictEqual( 59 | setmap, 60 | self.expected_setmap, 61 | "Mismatch in setmap", 62 | ) 63 | self.assertEqual( 64 | node_count, 65 | 6, 66 | f"Incorrect number of nodes in tree: {node_count}", 67 | ) 68 | 69 | 70 | if __name__ == "__main__": 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-json 8 | - id: pretty-format-json 9 | - id: debug-statements 10 | 11 | - repo: https://github.com/asottile/setup-cfg-fmt 12 | rev: v2.4.0 13 | hooks: 14 | - id: setup-cfg-fmt 15 | 16 | - repo: https://github.com/MarcoGorelli/absolufy-imports 17 | rev: v0.3.1 18 | hooks: 19 | - id: absolufy-imports 20 | 21 | - repo: https://github.com/pycqa/isort 22 | rev: 5.12.0 23 | hooks: 24 | - id: isort 25 | name: isort (python) 26 | args: ["--profile", "black", "-l", "79"] 27 | 28 | - repo: https://github.com/asottile/pyupgrade 29 | rev: v3.10.1 30 | hooks: 31 | - id: pyupgrade 32 | args: [--py39-plus, --py310-plus, --py311-plus] 33 | 34 | - repo: https://github.com/psf/black 35 | rev: 23.7.0 36 | hooks: 37 | - id: black 38 | language_version: python 39 | args: ["-l 79"] 40 | 41 | - repo: https://github.com/asottile/add-trailing-comma 42 | rev: v3.0.1 43 | hooks: 44 | - id: add-trailing-comma 45 | 46 | - repo: https://github.com/PyCQA/flake8 47 | rev: 6.1.0 48 | hooks: 49 | - id: flake8 50 | additional_dependencies: 51 | [ 52 | flake8-debugger, 53 | flake8-use-fstring, 54 | ] 55 | args: 56 | [ 57 | "--extend-ignore=S105,E203,W503,E701", 58 | "--per-file-ignores=__init__.py:F401", 59 | ] 60 | 61 | - repo: https://github.com/PyCQA/bandit 62 | rev: 1.7.5 63 | hooks: 64 | - id: bandit 65 | name: bandit 66 | args: ["-c", ".bandit"] 67 | 68 | - repo: https://github.com/pre-commit/mirrors-mypy 69 | rev: v1.17.1 70 | hooks: 71 | - id: mypy 72 | name: mypy 73 | args: ["--disallow-untyped-defs"] 74 | additional_dependencies: 75 | [ 76 | "types-jsonschema", 77 | "types-tqdm", 78 | "types-tabulate", 79 | "scipy-stubs", 80 | "matplotlib", # There are no official stubs for matplotlib 81 | ] 82 | exclude: "^tests/" 83 | -------------------------------------------------------------------------------- /tests/literals/test_literals.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | 10 | from codebasin import CodeBase, finder, preprocessor 11 | 12 | 13 | class TestLiterals(unittest.TestCase): 14 | """ 15 | Simple test of C-style literal handling. 16 | e.g. 0x0ULL, 0b11 17 | """ 18 | 19 | def setUp(self): 20 | self.rootdir = Path(__file__).parent.resolve() 21 | logging.disable() 22 | 23 | self.expected_setmap = {frozenset(["CPU", "GPU"]): 9} 24 | 25 | def test_literals(self): 26 | """literals/literals.yaml""" 27 | codebase = CodeBase(self.rootdir) 28 | configuration = { 29 | "CPU": [ 30 | { 31 | "file": str(self.rootdir / "main.cpp"), 32 | "defines": ["USE_CPU"], 33 | "include_paths": [], 34 | "include_files": [], 35 | }, 36 | ], 37 | "GPU": [ 38 | { 39 | "file": str(self.rootdir / "main.cpp"), 40 | "defines": ["USE_GPU"], 41 | "include_paths": [], 42 | "include_files": [], 43 | }, 44 | ], 45 | } 46 | state = finder.find(self.rootdir, codebase, configuration) 47 | setmap = state.get_setmap(codebase) 48 | self.assertDictEqual( 49 | setmap, 50 | self.expected_setmap, 51 | "Mismatch in setmap", 52 | ) 53 | 54 | def test_strings(self): 55 | expected_str = r'"L + 2-2 \"\\\" \\n\""' 56 | tokens = preprocessor.Lexer(expected_str).tokenize() 57 | expected = preprocessor.StringConstant( 58 | "Unknown", 59 | "Unknown", 60 | False, 61 | r"L + 2-2 \"\\\" \\n\"", 62 | ) 63 | self.assertEqual(tokens[0].token, expected.token) 64 | 65 | def test_long_constants(self): 66 | tokens = preprocessor.Lexer("0xFFFFFFFFFFFFFFFFULL").tokenize() 67 | term = preprocessor.ExpressionEvaluator(tokens).term() 68 | self.assertEqual(term, np.uint64(int("0xFFFFFFFFFFFFFFFF", 16))) 69 | 70 | 71 | if __name__ == "__main__": 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /tests/include/test_include.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import os 6 | import tempfile 7 | import unittest 8 | from pathlib import Path 9 | 10 | from codebasin import CodeBase, config, finder 11 | 12 | 13 | class TestInclude(unittest.TestCase): 14 | """ 15 | Simple test of ability to follow #include directives to additional 16 | files. 17 | """ 18 | 19 | def setUp(self): 20 | self.rootdir = Path(__file__).parent.resolve() 21 | logging.disable() 22 | 23 | self.expected_setmap = { 24 | frozenset(["CPU"]): 11, 25 | frozenset(["GPU"]): 12, 26 | frozenset(["CPU", "GPU"]): 16, 27 | } 28 | 29 | def test_include(self): 30 | """include/include.yaml""" 31 | codebase = CodeBase(self.rootdir) 32 | 33 | cpu_path = self.rootdir / "cpu_commands.json" 34 | gpu_path = self.rootdir / "gpu_commands.json" 35 | configuration = { 36 | "CPU": config.load_database(str(cpu_path), str(self.rootdir)), 37 | "GPU": config.load_database(str(gpu_path), str(self.rootdir)), 38 | } 39 | 40 | state = finder.find(self.rootdir, codebase, configuration) 41 | setmap = state.get_setmap(codebase) 42 | self.assertDictEqual( 43 | setmap, 44 | self.expected_setmap, 45 | "Mismatch in setmap", 46 | ) 47 | 48 | def test_include_from_symlink(self): 49 | """Check included file correctly identifies its parent""" 50 | tmp = tempfile.TemporaryDirectory() 51 | p = Path(tmp.name) 52 | with open(p / "test.cpp", mode="w") as f: 53 | f.write('#include "test.h"') 54 | open(p / "test.h", mode="w").close() 55 | os.symlink(p / "test.cpp", p / "symlink.cpp") 56 | 57 | codebase = CodeBase(p) 58 | configuration = { 59 | "test": [ 60 | { 61 | "file": str(p / "symlink.cpp"), 62 | "defines": [], 63 | "include_paths": [], 64 | "include_files": [], 65 | }, 66 | ], 67 | } 68 | _ = finder.find(self.rootdir, codebase, configuration) 69 | 70 | tmp.cleanup() 71 | 72 | 73 | if __name__ == "__main__": 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /tests/exclude/test_exclude.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | from pathlib import Path 7 | 8 | from codebasin import CodeBase, config, finder 9 | 10 | 11 | class TestExclude(unittest.TestCase): 12 | """ 13 | Simple test of ability to exclude files using patterns. 14 | """ 15 | 16 | def setUp(self): 17 | self.rootdir = Path(__file__).parent.resolve() 18 | logging.disable() 19 | 20 | def _get_setmap(self, excludes): 21 | codebase = CodeBase( 22 | self.rootdir, 23 | exclude_patterns=excludes, 24 | ) 25 | dbpath = self.rootdir / "commands.json" 26 | configuration = { 27 | "test": config.load_database(str(dbpath), str(self.rootdir)), 28 | } 29 | state = finder.find(self.rootdir, codebase, configuration) 30 | setmap = state.get_setmap(codebase) 31 | return setmap 32 | 33 | def test_exclude_nothing(self): 34 | """exclude/nothing""" 35 | excludes = [] 36 | setmap = self._get_setmap(excludes) 37 | expected_setmap = {frozenset(["test"]): 4} 38 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 39 | 40 | def test_exclude_extension(self): 41 | """exclude/extension""" 42 | excludes = ["*.f90"] 43 | setmap = self._get_setmap(excludes) 44 | expected_setmap = {frozenset(["test"]): 3} 45 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 46 | 47 | def test_exclude_name(self): 48 | """exclude/name""" 49 | excludes = ["src/excluded_name.cpp"] 50 | setmap = self._get_setmap(excludes) 51 | expected_setmap = {frozenset(["test"]): 3} 52 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 53 | 54 | def test_excluded_directory(self): 55 | excludes = ["thirdparty/"] 56 | setmap = self._get_setmap(excludes) 57 | expected_setmap = {frozenset(["test"]): 3} 58 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 59 | 60 | def test_excludes(self): 61 | excludes = ["*.f90", "src/excluded_name.cpp", "thirdparty/"] 62 | setmap = self._get_setmap(excludes) 63 | expected_setmap = {frozenset(["test"]): 1} 64 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 65 | 66 | 67 | if __name__ == "__main__": 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /tests/compile-command/test_compile_command.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import unittest 5 | 6 | from codebasin import CompileCommand 7 | 8 | 9 | class TestCompileCommand(unittest.TestCase): 10 | """ 11 | Test CompileCommand class. 12 | """ 13 | 14 | def test_commands_and_arguments(self): 15 | """Check commands and arguments are not both None""" 16 | 17 | with self.assertRaises(ValueError): 18 | CompileCommand("file.cpp", command=None, arguments=None) 19 | 20 | with self.assertRaises(ValueError): 21 | instance = { 22 | "file": "file.cpp", 23 | } 24 | CompileCommand.from_json(instance) 25 | 26 | def test_command_to_arguments(self): 27 | """Check commands convert to arguments""" 28 | command = CompileCommand("file.cpp", command="c++ file.cpp") 29 | self.assertEqual(command.arguments, ["c++", "file.cpp"]) 30 | 31 | instance = { 32 | "file": "file.cpp", 33 | "command": "c++ file.cpp", 34 | } 35 | command = CompileCommand.from_json(instance) 36 | self.assertEqual(command.arguments, ["c++", "file.cpp"]) 37 | 38 | def test_arguments_to_command(self): 39 | """Check arguments convert to command""" 40 | command = CompileCommand("file.cpp", arguments=["c++", "file.cpp"]) 41 | self.assertEqual(str(command), "c++ file.cpp") 42 | 43 | instance = { 44 | "file": "file.cpp", 45 | "arguments": [ 46 | "c++", 47 | "file.cpp", 48 | ], 49 | } 50 | command = CompileCommand.from_json(instance) 51 | self.assertEqual(str(command), "c++ file.cpp") 52 | 53 | def test_empty_command(self): 54 | """Check empty commands are not supported""" 55 | command = CompileCommand("file.cpp", command="") 56 | self.assertFalse(command.is_supported()) 57 | 58 | def test_link_command(self): 59 | """Check link commands are not supported""" 60 | command = CompileCommand("file.o", command="c++ -o a.out file.o") 61 | self.assertFalse(command.is_supported()) 62 | 63 | def test_valid_command(self): 64 | """Check valid commands are supported""" 65 | command = CompileCommand("file.cpp", command="c++ file.cpp") 66 | self.assertTrue(command.is_supported()) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /tests/metrics/test_coverage.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import math 6 | import unittest 7 | 8 | from codebasin.report import average_coverage, coverage 9 | 10 | 11 | class TestCoverage(unittest.TestCase): 12 | """ 13 | Test computation of coverage. 14 | """ 15 | 16 | def setUp(self): 17 | logging.disable() 18 | 19 | def test_coverage(self): 20 | """Check coverage computation for simple setmap.""" 21 | setmap = { 22 | frozenset(["A"]): 1, 23 | frozenset(["B"]): 2, 24 | frozenset(["A", "B"]): 3, 25 | frozenset([]): 4, 26 | } 27 | used_sloc = 1 + 2 + 3 28 | total_sloc = 1 + 2 + 3 + 4 29 | 30 | expected_coverage = used_sloc / total_sloc * 100.0 31 | self.assertEqual(coverage(setmap), expected_coverage) 32 | self.assertEqual(coverage(setmap, ["A", "B"]), expected_coverage) 33 | 34 | expected_a = (1 + 3) / total_sloc * 100.0 35 | self.assertEqual(coverage(setmap, ["A"]), expected_a) 36 | 37 | expected_b = (2 + 3) / total_sloc * 100.0 38 | self.assertEqual(coverage(setmap, ["B"]), expected_b) 39 | 40 | def test_average_coverage(self): 41 | """Check average coverage computation for simple setmap.""" 42 | setmap = { 43 | frozenset(["A"]): 1, 44 | frozenset(["B"]): 2, 45 | frozenset(["A", "B"]): 3, 46 | frozenset([]): 4, 47 | } 48 | total_sloc = 1 + 2 + 3 + 4 49 | 50 | subset = {"A"} 51 | used_sloc = 1 + 3 52 | coverage_a = used_sloc / total_sloc * 100.0 53 | self.assertEqual(average_coverage(setmap, subset), coverage_a) 54 | 55 | subset = {"B"} 56 | used_sloc = 2 + 3 57 | coverage_b = used_sloc / total_sloc * 100.0 58 | self.assertEqual(average_coverage(setmap, subset), coverage_b) 59 | 60 | subset = {"A", "B"} 61 | expected_avg = (coverage_a + coverage_b) / 2 62 | self.assertEqual(average_coverage(setmap, subset), expected_avg) 63 | self.assertEqual(average_coverage(setmap), expected_avg) 64 | 65 | def test_null_coverage(self): 66 | """Check coverage computation for null cases.""" 67 | setmap = { 68 | frozenset(""): 0, 69 | } 70 | self.assertTrue(math.isnan(coverage(setmap))) 71 | self.assertTrue(math.isnan(average_coverage(setmap))) 72 | 73 | 74 | if __name__ == "__main__": 75 | unittest.main() 76 | -------------------------------------------------------------------------------- /docs/source/excluding-files.rst: -------------------------------------------------------------------------------- 1 | Excluding Files 2 | =============== 3 | 4 | By default, CBI will process any file that it encounters in a compilation 5 | database (including :code:`#include` files). The lines of code in these files 6 | will be included in the code divergence calculation unless: 7 | 8 | - The file exists outside of the directory where ``codebasin`` is run. 9 | - The file is explicitly excluded from the analysis. 10 | 11 | 12 | Using the Analysis File 13 | ####################### 14 | 15 | Files can be explicitly excluded from an analysis by adding an :code:`exclude` 16 | key to the :code:`codebase` section of the TOML file. Each entry in the exclude 17 | list is a pattern to match files against: 18 | 19 | .. code-block:: toml 20 | 21 | [codebase] 22 | exclude = [ 23 | "pattern", 24 | "pattern" 25 | ] 26 | 27 | .. note:: 28 | 29 | Each pattern is a "pathspec", matching the format used by git. For more 30 | information, see the `git glossary`_. 31 | 32 | .. _`git glossary`: https://git-scm.com/docs/gitglossary 33 | 34 | 35 | For example, we can use this section to instruct CBI to ignore all files in the 36 | ``third-party/`` subdirectory, allowing us to focus on our own code: 37 | 38 | .. code-block:: toml 39 | 40 | [codebase] 41 | exclude = [ 42 | "third-party/" 43 | ] 44 | 45 | Using this new analysis file, the output of ``codebasin`` shows fewer lines 46 | shared between the cpu and gpu platforms: 47 | 48 | 49 | .. code-block:: text 50 | :emphasize-lines: 7 51 | 52 | ----------------------- 53 | Platform Set LOC % LOC 54 | ----------------------- 55 | {} 2 7.41 56 | {cpu} 7 25.93 57 | {gpu} 7 25.93 58 | {cpu, gpu} 11 40.74 59 | ----------------------- 60 | Code Divergence: 0.56 61 | Coverage (%): 92.59 62 | Avg. Coverage (%): 66.67 63 | Total SLOC: 27 64 | 65 | 66 | Using the Command Line 67 | ###################### 68 | 69 | It is also possible to exclude files directly from the command line, using the 70 | :code:`--exclude` flag (or :code:`-x` flag). 71 | 72 | The flag expects exclude patterns to be specified the same way as the TOML 73 | file. To ignore all files in the ``third-party/`` subdirectory as we did 74 | before, we can simply run: 75 | 76 | .. code:: sh 77 | 78 | $ codebasin -x "third-party/" analysis.toml 79 | 80 | .. tip:: 81 | 82 | If a file should *always* be excluded, it's better to specify that in the 83 | analysis file. The command line approach is best suited to evaluate "what 84 | if" scenarios, like "what if I excluded all files with a specific 85 | extension?" (e.g., ``-x "*.cu"``). 86 | -------------------------------------------------------------------------------- /tests/compilation-database/test_compilation_database.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import json 5 | import tempfile 6 | import unittest 7 | 8 | from codebasin import CompilationDatabase, CompileCommand 9 | 10 | 11 | class TestCompilationDatabase(unittest.TestCase): 12 | """ 13 | Test CompileDatabase class. 14 | """ 15 | 16 | def setUp(self): 17 | self.commands = [ 18 | CompileCommand("foo.o", command="c++ -o foo.o foo.c"), 19 | CompileCommand("bar.o", command="c++ -o bar.o bar.c"), 20 | ] 21 | self.valid_json = [ 22 | { 23 | "arguments": ["gcc", "-c", "-o", "output", "test.cpp"], 24 | "directory": "/path/containing/source/files/", 25 | "file": "test.cpp", 26 | }, 27 | ] 28 | self.invalid_json = [ 29 | { 30 | "arguments": ["gcc", "-c", "-o", "output", "test.cpp"], 31 | "directory": ["not", "a", "directory"], 32 | "file": "test.cpp", 33 | }, 34 | ] 35 | 36 | def test_constructor(self): 37 | """Check commands are stored correctly""" 38 | db = CompilationDatabase(self.commands) 39 | self.assertEqual(self.commands, db.commands) 40 | 41 | def test_iterator(self): 42 | """Check implementation of __iter__""" 43 | db = CompilationDatabase(self.commands) 44 | commands = [c for c in db] 45 | self.assertEqual(self.commands, commands) 46 | 47 | def test_from_json(self): 48 | """Check conversion from JSON""" 49 | db = CompilationDatabase.from_json(self.valid_json) 50 | commands = [CompileCommand.from_json(self.valid_json[0])] 51 | self.assertEqual(commands, db.commands) 52 | 53 | with self.assertRaises(ValueError): 54 | _ = CompilationDatabase.from_json(self.invalid_json) 55 | 56 | def test_from_file(self): 57 | """Check conversion from file""" 58 | with tempfile.NamedTemporaryFile(mode="w", delete_on_close=False) as f: 59 | json.dump(self.valid_json, f) 60 | f.close() 61 | db = CompilationDatabase.from_file(f.name) 62 | commands = [CompileCommand.from_json(self.valid_json[0])] 63 | self.assertEqual(commands, db.commands) 64 | 65 | with tempfile.NamedTemporaryFile(mode="w", delete_on_close=False) as f: 66 | json.dump(self.invalid_json, f) 67 | f.close() 68 | with self.assertRaises(ValueError): 69 | _ = CompilationDatabase.from_file(f.name) 70 | 71 | 72 | if __name__ == "__main__": 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /tests/safe_write/test_safe_write.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import os 5 | import shutil 6 | import tempfile 7 | import unittest 8 | 9 | from codebasin import util 10 | 11 | 12 | class TestSafeWrite(unittest.TestCase): 13 | """ 14 | Test that safe_open_write_binary properly opens non-symlinks and 15 | bails on symlinks. 16 | """ 17 | 18 | def setUp(self): 19 | self.testdir = tempfile.mkdtemp() 20 | self.path_linkfail = os.path.join(self.testdir, "nowrite.bin") 21 | self.path_link = os.path.join(self.testdir, "link.bin") 22 | self.path_write = os.path.join(self.testdir, "write.bin") 23 | self.path_create = os.path.join(self.testdir, "create.bin") 24 | 25 | self.initial = bytes("MAGIC", "utf-8") 26 | self.updated = bytes("GOOD", "utf-8") 27 | 28 | with open(self.path_linkfail, "wb") as fp: 29 | fp.write(self.initial) 30 | 31 | shutil.copyfile(self.path_linkfail, self.path_write) 32 | 33 | os.symlink(self.path_linkfail, self.path_link) 34 | 35 | def tearDown(self): 36 | shutil.rmtree(self.testdir) 37 | 38 | def test_linkfail(self): 39 | """Check that we fail to open a symlink for writing""" 40 | with self.assertRaises(os.error): 41 | with util.safe_open_write_binary(self.path_link) as fp: 42 | fp.write(bytes("BAD", "utf-8")) 43 | 44 | with open(self.path_linkfail, "rb") as fp: 45 | got = fp.read(5) 46 | self.assertEqual(got, self.initial) 47 | st = os.fstat(fp.fileno()) 48 | self.assertEqual(st.st_mode & 0o111, 0) 49 | 50 | with open(self.path_link, "rb") as fp: 51 | got = fp.read(5) 52 | self.assertEqual(got, self.initial) 53 | st = os.fstat(fp.fileno()) 54 | self.assertEqual(st.st_mode & 0o111, 0) 55 | 56 | def test_write(self): 57 | """Check that we can write to existing non-symlink files""" 58 | with util.safe_open_write_binary(self.path_write) as fp: 59 | fp.write(self.updated) 60 | 61 | with open(self.path_write, "rb") as fp: 62 | got = fp.read(5) 63 | self.assertEqual(got, self.updated) 64 | st = os.fstat(fp.fileno()) 65 | self.assertEqual(st.st_mode & 0o111, 0) 66 | 67 | def test_create(self): 68 | """Check that we can write to non-existing files""" 69 | with util.safe_open_write_binary(self.path_create) as fp: 70 | fp.write(self.updated) 71 | 72 | with open(self.path_create, "rb") as fp: 73 | got = fp.read(5) 74 | self.assertEqual(got, self.updated) 75 | st = os.fstat(fp.fileno()) 76 | self.assertEqual(st.st_mode & 0o111, 0) 77 | 78 | 79 | if __name__ == "__main__": 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code Base Investigator 2 | 3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5018973.svg)](https://doi.org/10.5281/zenodo.5018973) 4 | [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8679/badge)](https://www.bestpractices.dev/projects/8679) 5 | 6 | Code Base Investigator (CBI) is an analysis tool that provides insight into the 7 | portability and maintainability of an application's source code. 8 | 9 | - Measure [code divergence](http://doi.org/10.1109/P3HPC.2018.00006) and 10 | platform coverage to understand how much code is specialized for different 11 | compilers, operating systems, hardware micro-architectures and more. 12 | 13 | - Visualize the distance between the code paths used to support different 14 | compilation targets. 15 | 16 | - Identify stale, legacy, code paths that are unused by any compilation target. 17 | 18 | - Export metrics and code path information required for P3 analysis using [other 19 | tools](https://p3hpc.github.io/p3-analysis-library/). 20 | 21 | 22 | ## Table of Contents 23 | 24 | - [Dependencies](#dependencies) 25 | - [Installation](#installation) 26 | - [Getting Started](#getting-started) 27 | - [Contribute](#contribute) 28 | - [License](#license) 29 | - [Security](#security) 30 | - [Code of Conduct](#code-of-conduct) 31 | - [Citations](#citations) 32 | 33 | 34 | ## Dependencies 35 | 36 | - jsonschema 37 | - Matplotlib 38 | - NumPy 39 | - pathspec 40 | - Python 3 41 | - SciPy 42 | - tabulate 43 | - tqdm 44 | 45 | 46 | ## Installation 47 | 48 | The latest release of CBI is version 2.0.0. To download and install this 49 | release, run the following: 50 | 51 | ``` 52 | pip install git+https://github.com/P3HPC/code-base-investigator@2.0.0 53 | ``` 54 | 55 | We strongly recommend installing CBI within a [virtual 56 | environment](https://docs.python.org/3/library/venv.html). 57 | 58 | ## Getting Started 59 | 60 | After installation, run `codebasin -h` to see a complete list of options. 61 | 62 | A full tutorial can be found in the [online 63 | documentation](https://p3hpc.github.io/code-base-investigator/). 64 | 65 | 66 | ## Contribute 67 | 68 | Contributions to CBI are welcome in the form of issues and pull requests. 69 | 70 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 71 | 72 | 73 | ## License 74 | 75 | [BSD 3-Clause](./LICENSE) 76 | 77 | 78 | ## Security 79 | 80 | See [SECURITY](SECURITY.md) for more information. 81 | 82 | The main branch of CBI is the development branch, and should not be used in 83 | production. Tagged releases are available 84 | [here](https://github.com/P3HPC/code-base-investigator/releases). 85 | 86 | 87 | ## Code of Conduct 88 | 89 | We have adopted the Contributor Covenant as the Code of Conduct for this 90 | project. See [CODE OF CONDUCT](CODE_OF_CONDUCT.md) for more information. 91 | 92 | 93 | ## Citations 94 | 95 | If your use of CBI results in a research publication, please consider citing 96 | the software and/or the papers that inspired its functionality (as 97 | appropriate). See [CITATION](CITATION.cff) for more information. 98 | -------------------------------------------------------------------------------- /docs/source/sample-code-base.rst: -------------------------------------------------------------------------------- 1 | Sample Code Base 2 | ================ 3 | 4 | This tutorial uses a sample code base designed to showcase the features of CBI. 5 | 6 | .. attention:: 7 | 8 | To follow along with the tutorial, we first need a copy of the sample code 9 | base. 10 | 11 | It can be downloaded from :download:`here` or copied 12 | from the ``docs/source/sample-code-base`` directory `on GitHub`_. 13 | 14 | .. _on GitHub: https://github.com/P3HPC/code-base-investigator/tree/main/docs/sample-code-base/ 15 | 16 | 17 | Directory Structure 18 | ------------------- 19 | 20 | The sample code base consists of just a few source files, arranged as shown 21 | below:: 22 | 23 | src/ 24 | ├── CMakeLists.txt 25 | ├── cpu 26 | │   └── foo.cpp 27 | ├── gpu 28 | │   └── foo.cpp 29 | ├── main.cpp 30 | └── third-party 31 | ├── library.cpp 32 | └── library.h 33 | 34 | 35 | Although simple, this structure is representative of many applications that 36 | target multiple platforms. The code base contains: 37 | 38 | - A directory (``src``) containing all of the source files required to build 39 | the application. 40 | 41 | - Two subdirectories (``cpu`` and ``gpu``) containing source files that are 42 | only used when building the application to target specific platforms. 43 | 44 | - Some shared source files (``main.cpp``) that are always used when building 45 | the application to target any platform. 46 | 47 | - Some third-party source files (``third-party/library.h`` and 48 | ``third-party/library.cpp``). 49 | 50 | .. tip:: 51 | Generally speaking, "third party source files" just means "source files 52 | maintained by somebody else". Even if we're working with a code base 53 | without any external dependencies, treating code written by other 54 | developers or other teams as "third party source files" will allow us to 55 | limit our analysis to source files that we care about. 56 | 57 | 58 | File Structure 59 | -------------- 60 | 61 | Let's take a look at one of the files in the code base, ``main.cpp``: 62 | 63 | .. code-block:: cpp 64 | :linenos: 65 | :emphasize-lines: 10,12 66 | 67 | // Copyright (c) 2024 Intel Corporation 68 | // SPDX-License-Identifier: 0BSD 69 | #include 70 | #include "third-party/library.h" 71 | void foo(); 72 | 73 | int main(int argc, char* argv[]) 74 | { 75 | #if !defined(GPU_OFFLOAD) 76 | printf("Running on the CPU.\n"); 77 | #else 78 | printf("Running on the GPU.\n"); 79 | #endif 80 | foo(); 81 | bar(); 82 | } 83 | 84 | The preprocessor directives on Lines 9, 11 and 12 (:code:`#if`, :code:`#else` 85 | and :code:`#endif`) define a specialization point, allowing Lines 10 and 12 86 | to be specialized based on the value of a preprocessor macro 87 | (:code:`GPU_OFFLOAD`). This approach is common in many C, C++ and Fortran 88 | applications. 89 | 90 | Lines 3, 4 and 5 also define potential specialization points, because the 91 | compilation commands targeting different platforms may search different 92 | include paths and/or link against different libraries. 93 | 94 | .. note:: 95 | 96 | Although this file contains 16 lines of *text*, CBI will count only 13 97 | lines of *code*. Comments and whitespace do not count. 98 | -------------------------------------------------------------------------------- /docs/source/cmd.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | .. code-block:: text 5 | 6 | codebasin [-h] [--version] [-v] [-q] [-R ] [-x ] [-p ] [] 7 | 8 | **positional arguments:** 9 | 10 | ``analysis-file`` 11 | TOML file describing the analysis to be performed, 12 | including the codebase and platform descriptions. 13 | 14 | **options:** 15 | 16 | ``-h, --help`` 17 | Show help message and exit. 18 | 19 | ``--version`` 20 | Display version information and exit. 21 | 22 | ``-v, --verbose`` 23 | Increase verbosity level. 24 | 25 | ``-q, --quiet`` 26 | Decrease verbosity level. 27 | 28 | ``--debug`` 29 | Enable debug mode. 30 | 31 | ``-R `` 32 | Generate a report of the specified type. 33 | 34 | - ``summary``: code divergence information 35 | - ``clustering``: distance matrix and dendrogram 36 | - ``duplicates``: detected duplicate files 37 | 38 | ``-x , --exclude `` 39 | Exclude files matching this pattern from the code base. 40 | May be specified multiple times. 41 | 42 | ``-p , --platform `` 43 | Include the specified platform in the analysis. 44 | May be specified multiple times. 45 | If not specified, all platforms will be included. 46 | 47 | Tree Tool 48 | --------- 49 | 50 | The tree tool generates a visualization of the code base where each file and 51 | directory is annotated with information about platform usage and coverage. 52 | 53 | .. code-block:: text 54 | 55 | cbi-tree [-h] [--version] [-x ] [-p ] [--prune] [-L ] 56 | 57 | **positional arguments:** 58 | 59 | ``analysis-file`` 60 | TOML file describing the analysis to be performed, including the codebase and platform descriptions. 61 | 62 | **options:** 63 | 64 | ``-h, --help`` 65 | Display help message and exit. 66 | 67 | ``--version`` 68 | Display version information and exit. 69 | 70 | ``-x , --exclude `` 71 | Exclude files matching this pattern from the code base. 72 | May be specified multiple times. 73 | 74 | ``-p , --platform `` 75 | Include the specified platform in the analysis. 76 | May be specified multiple times. 77 | If not specified, all platforms will be included. 78 | 79 | ``--prune`` 80 | Prune unused files from the tree. 81 | 82 | ``-L , --levels `` 83 | Print only the specified number of levels. 84 | 85 | Coverage Tool 86 | ------------- 87 | 88 | The coverage tool reads a JSON compilation database and generates a JSON 89 | coverage file that is suitable to be read by other tools. 90 | 91 | .. code-block:: text 92 | 93 | cbi-cov compute [-h] [-S ] [-x ] [-o ] 94 | 95 | **positional arguments:** 96 | 97 | ``input path`` 98 | Path to compilation database JSON file. 99 | 100 | **options:** 101 | 102 | ``-h, --help`` 103 | Display help message and exit. 104 | 105 | ``-S , --source-dir `` 106 | Path to source directory. 107 | 108 | ``-x , --exclude `` 109 | Exclude files matching this pattern from the code base. 110 | May be specified multiple times. 111 | 112 | ``-o , --output `` 113 | Path to coverage JSON file. 114 | If not specified, defaults to 'coverage.json'. 115 | -------------------------------------------------------------------------------- /tests/build-dir/test_build_dir.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import json 5 | import logging 6 | import tempfile 7 | import unittest 8 | from pathlib import Path 9 | 10 | from codebasin import CodeBase, config, finder 11 | 12 | 13 | class TestBuildDirectories(unittest.TestCase): 14 | """ 15 | Test ability to correctly handle out-of-tree builds. 16 | """ 17 | 18 | def setUp(self): 19 | self.rootdir = Path(__file__).parent.resolve() 20 | logging.disable(logging.NOTSET) 21 | 22 | def test_absolute_paths(self): 23 | """ 24 | Test database with build "directory" path but source "file" path. 25 | All "file" fields are absolute paths. 26 | """ 27 | 28 | source = self.rootdir / "foo.cpp" 29 | 30 | # CBI only understands how to load compilation databases from file. 31 | # For now, create temporary files every time we test. 32 | dir1 = self.rootdir / "build1/" 33 | build1 = tempfile.NamedTemporaryFile() 34 | json1 = [ 35 | { 36 | "command": f"/usr/bin/c++ -o foo.cpp.o -c {source}", 37 | "directory": f"{dir1}", 38 | "file": f"{source}", 39 | }, 40 | ] 41 | with open(build1.name, "w") as f: 42 | json.dump(json1, f) 43 | 44 | dir2 = self.rootdir / "build2/" 45 | build2 = tempfile.NamedTemporaryFile() 46 | json2 = [ 47 | { 48 | "command": f"/usr/bin/c++ -o foo.cpp.o -c {source}", 49 | "directory": f"{dir2}", 50 | "file": f"{source}", 51 | }, 52 | ] 53 | with open(build2.name, "w") as f: 54 | json.dump(json2, f) 55 | 56 | codebase = CodeBase(self.rootdir) 57 | 58 | configuration = {} 59 | for name, path in [("one", build1.name), ("two", build2.name)]: 60 | db = config.load_database(path, self.rootdir) 61 | configuration.update({name: db}) 62 | 63 | expected_setmap = {frozenset(["one", "two"]): 1} 64 | 65 | state = finder.find(self.rootdir, codebase, configuration) 66 | setmap = state.get_setmap(codebase) 67 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 68 | 69 | def test_empty_platform(self): 70 | """ 71 | Check that we warn if all files from a platform are excluded. 72 | This may be a sign that the compilation database has incorrect paths. 73 | """ 74 | 75 | source = self.rootdir / "foo.cpp" 76 | 77 | # CBI only understands how to load compilation databases from file. 78 | # For now, create temporary files every time we test. 79 | build = self.rootdir / "build/" 80 | tmp = tempfile.NamedTemporaryFile() 81 | obj = [ 82 | { 83 | "command": f"/usr/bin/c++ -o foo.cpp.o -c {source}", 84 | "directory": f"{build}", 85 | "file": "foo.cpp", 86 | }, 87 | ] 88 | with open(tmp.name, "w") as f: 89 | json.dump(obj, f) 90 | 91 | with self.assertLogs("codebasin", level="WARNING") as log: 92 | config.load_database(tmp.name, self.rootdir) 93 | 94 | found_expected_warning = False 95 | for msg in log.output: 96 | if msg.find("No files found in compilation database"): 97 | found_expected_warning = True 98 | self.assertTrue(found_expected_warning) 99 | 100 | 101 | if __name__ == "__main__": 102 | unittest.main() 103 | -------------------------------------------------------------------------------- /docs/source/features.rst: -------------------------------------------------------------------------------- 1 | Key Features 2 | ============ 3 | 4 | Identifying Specialization 5 | ########################## 6 | 7 | CBI is currently limited to identifying two forms of specialization: 8 | 9 | 1) Different source files are compiled for different platforms; 10 | 2) Lines of code within source files are guarded by C preprocessor macros. 11 | 12 | Although limited, this functionality is sufficient to support analysis of many 13 | HPC codes, and CBI has been tested on C, C++, CUDA and some Fortran code bases. 14 | 15 | 16 | Computing Specialization Metrics 17 | ################################ 18 | 19 | CBI computes code divergence and platform coverage by building a 20 | *specialization tree*, like the one shown below: 21 | 22 | .. image:: specialization-tree.png 23 | :alt: An example of a specialization tree. 24 | 25 | CBI can then walk and evaluate this tree for different platform definitions, to 26 | produce a report providing a breakdown of how many lines of code are shared 27 | between different platform sets. 28 | 29 | .. code:: text 30 | 31 | --------------------------------------------- 32 | Platform Set LOC % LOC 33 | --------------------------------------------- 34 | {} 2 4.88 35 | {GPU 1} 1 2.44 36 | {GPU 2} 1 2.44 37 | {CPU 2} 1 2.44 38 | {CPU 1} 1 2.44 39 | {FPGA} 14 34.15 40 | {GPU 2, GPU 1} 6 14.63 41 | {CPU 1, CPU 2} 6 14.63 42 | {FPGA, CPU 1, GPU 2, GPU 1, CPU 2} 9 21.95 43 | --------------------------------------------- 44 | Code Divergence: 0.55 45 | Coverage (%): 95.12 46 | Avg. Coverage (%): 42.44 47 | Total SLOC: 41 48 | 49 | For more information about these metrics, see :doc:`here `. 50 | 51 | 52 | Hierarchical Clustering 53 | ####################### 54 | 55 | Since code divergence is constructed from pair-wise distances, CBI can also 56 | produce a pair-wise distance matrix, showing the ratio of platform-specific 57 | code to code used by both platforms. 58 | 59 | .. code:: text 60 | 61 | Distance Matrix 62 | ----------------------------------- 63 | FPGA CPU 1 GPU 2 GPU 1 CPU 2 64 | ----------------------------------- 65 | FPGA 0.00 0.70 0.70 0.70 0.70 66 | CPU 1 0.70 0.00 0.61 0.61 0.12 67 | GPU 2 0.70 0.61 0.00 0.12 0.61 68 | GPU 1 0.70 0.61 0.12 0.00 0.61 69 | CPU 2 0.70 0.12 0.61 0.61 0.00 70 | ----------------------------------- 71 | 72 | These distances can also be used to produce a dendrogram, showing the result of 73 | hierarchical clustering by platform similarity. 74 | 75 | .. image:: example-dendrogram.png 76 | :alt: A dendrogram representing the distance between platforms. 77 | 78 | 79 | Visualizing Platform Coverage 80 | ############################# 81 | 82 | To assist developers in identifying exactly which parts of their code are 83 | specialized and for which platforms, CBI can produce an annotated tree showing 84 | the amount of specialization within each file. 85 | 86 | .. code:: text 87 | 88 | Legend: 89 | A: cpu 90 | B: gpu 91 | 92 | Columns: 93 | [Platforms | SLOC | Coverage (%) | Avg. Coverage (%)] 94 | 95 | [AB | 1.0k | 2.59 | 1.83] o /path/to/sample-code-base/src/ 96 | [-- | 1.0k | 0.00 | 0.00] |-- unused.cpp 97 | [AB | 13 | 100.00 | 92.31] |-- main.cpp 98 | [A- | 7 | 100.00 | 50.00] |-o cpu/ 99 | [A- | 7 | 100.00 | 50.00] | \-- foo.cpp 100 | [-B | 7 | 100.00 | 50.00] \-o gpu/ 101 | [-B | 7 | 100.00 | 50.00] \-- foo.cpp 102 | -------------------------------------------------------------------------------- /tests/code-base/test_code_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import tempfile 6 | import unittest 7 | from pathlib import Path 8 | 9 | from codebasin import CodeBase 10 | 11 | 12 | class TestCodeBase(unittest.TestCase): 13 | """ 14 | Test CodeBase class. 15 | """ 16 | 17 | @classmethod 18 | def setUpClass(self): 19 | logging.disable() 20 | 21 | # Create a temporary codebase spread across two directories 22 | self.tmp1 = tempfile.TemporaryDirectory() 23 | self.tmp2 = tempfile.TemporaryDirectory() 24 | p1 = Path(self.tmp1.name) 25 | p2 = Path(self.tmp2.name) 26 | open(p1 / "foo.cpp", mode="w").close() 27 | open(p1 / "bar.cpp", mode="w").close() 28 | open(p1 / "baz.h", mode="w").close() 29 | open(p1 / "README.md", mode="w").close() 30 | open(p2 / "qux.cpp", mode="w").close() 31 | open(p2 / "quux.h", mode="w").close() 32 | open(p2 / "README.md", mode="w").close() 33 | 34 | @classmethod 35 | def tearDownClass(self): 36 | self.tmp1.cleanup() 37 | self.tmp2.cleanup() 38 | 39 | def test_constructor(self): 40 | """Check directories and exclude_patterns are handled correctly""" 41 | path = Path(self.tmp1.name) 42 | codebase = CodeBase(path, exclude_patterns=["*.h"]) 43 | self.assertTrue(codebase.directories == [str(path)]) 44 | self.assertTrue(codebase.exclude_patterns == ["*.h"]) 45 | 46 | def test_constructor_validation(self): 47 | """Check directories and exclude_patterns are valid""" 48 | 49 | with self.assertRaises(TypeError): 50 | CodeBase(exclude_patterns="*") 51 | 52 | with self.assertRaises(TypeError): 53 | CodeBase(1, "2", 3) 54 | 55 | with self.assertRaises(TypeError): 56 | CodeBase(exclude_patterns=[1, "2", 3]) 57 | 58 | def test_repr(self): 59 | """Check implementation of __repr__""" 60 | path = Path(self.tmp1.name) 61 | codebase = CodeBase(path, exclude_patterns=["*.h"]) 62 | self.assertTrue( 63 | codebase.__repr__(), 64 | f'CodeBase(directories=[{path}], exclude_patterns=[".h"])', 65 | ) 66 | 67 | def test_contains(self): 68 | """Check implementation of __contains__""" 69 | p1 = Path(self.tmp1.name) 70 | p2 = Path(self.tmp2.name) 71 | codebase = CodeBase(p1, p2, exclude_patterns=["*.h"]) 72 | 73 | # Files in the temporary directories should be in the code base. 74 | self.assertTrue(p1 / "foo.cpp" in codebase) 75 | self.assertTrue(p1 / "bar.cpp" in codebase) 76 | self.assertTrue(p2 / "qux.cpp" in codebase) 77 | 78 | # Files that match exclude pattern(s) should not be in the code base. 79 | self.assertFalse(p1 / "baz.h" in codebase) 80 | self.assertFalse(p2 / "quux.h" in codebase) 81 | 82 | # Files that don't exist should not be in the code base. 83 | self.assertFalse(p1 / "asdf.cpp" in codebase) 84 | self.assertFalse(p2 / "asdf.cpp" in codebase) 85 | 86 | # The temporary directories themselves should not be in the code base. 87 | self.assertFalse(p1 in codebase) 88 | self.assertFalse(p2 in codebase) 89 | 90 | # Non-source files should not be in the code base. 91 | self.assertFalse(p1 / "README.md" in codebase) 92 | self.assertFalse(p2 / "README.md" in codebase) 93 | 94 | def test_iterator(self): 95 | """Check implementation of __iter__""" 96 | p1 = Path(self.tmp1.name) 97 | p2 = Path(self.tmp2.name) 98 | codebase = CodeBase(p1, p2, exclude_patterns=["*.h"]) 99 | 100 | files = [f for f in codebase] 101 | expected = [ 102 | str(p1 / "bar.cpp"), 103 | str(p1 / "foo.cpp"), 104 | str(p2 / "qux.cpp"), 105 | ] 106 | self.assertCountEqual(files, expected) 107 | 108 | 109 | if __name__ == "__main__": 110 | unittest.main() 111 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Code Base Investigator 2 | ====================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :hidden: 7 | :caption: Introduction 8 | 9 | Getting Started 10 | specialization 11 | features 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | :hidden: 16 | :caption: Tutorial 17 | 18 | sample-code-base 19 | compilation-databases 20 | analysis 21 | excluding-files 22 | emulating-compiler-behavior 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | :hidden: 27 | :caption: Reference 28 | 29 | cmd 30 | 31 | .. toctree:: 32 | :maxdepth: 1 33 | :hidden: 34 | :caption: Contributing 35 | 36 | How to Contribute 37 | GitHub 38 | notices-and-disclaimers 39 | 40 | 41 | Code Base Investigator (CBI) is an analysis tool that provides insight into the 42 | portability and maintainability of an application's source code. 43 | 44 | - Measure "code divergence" and "platform coverage" to understand how much code 45 | is :doc:`specialized ` for different compilers, operating 46 | systems, hardware micro-architectures and more. 47 | 48 | - Visualize the distance between the code paths used to support different 49 | compilation targets. 50 | 51 | - Identify stale, legacy, code paths that are unused by any compilation target. 52 | 53 | - Export metrics and code path information required for P3 analysis using 54 | `other tools`_. 55 | 56 | .. _other tools: https://p3hpc.github.io/p3-analysis-library/index.html 57 | 58 | 59 | Installation 60 | ############ 61 | 62 | The latest release of CBI is version 2.0.0. To download and install this 63 | release, run the following:: 64 | 65 | $ pip install git+https://github.com/P3HPC/code-base-investigator@2.0.0 66 | 67 | We strongly recommend installing CBI within a virtual environment, to simplify 68 | dependency management and improve security. Some alternative methods of 69 | creating a virtual environment are shown below. 70 | 71 | .. tab:: venv 72 | 73 | .. code-block:: text 74 | 75 | $ git clone --branch 2.0.0 https://github.com/P3HPC/code-base-investigator.git 76 | $ python3 -m venv cbi 77 | $ source cbi/bin/activate 78 | $ cd code-base-investigator 79 | $ pip install . 80 | 81 | .. tab:: uv 82 | 83 | .. code-block:: text 84 | 85 | $ git clone --branch 2.0.0 https://github.com/P3HPC/code-base-investigator.git 86 | $ cd code-base-investigator 87 | $ uv tool install . 88 | 89 | Getting Started 90 | ############### 91 | 92 | Using CBI to analyze a code base is a three step process. For more detailed 93 | information on any of these steps, we recommend that you work through the 94 | tutorial using the :doc:`sample code base`. 95 | 96 | 97 | 1. **Generate a compilation database for each platform** 98 | 99 | You can use the |CMAKE_EXPORT_COMPILE_COMMANDS option|_ with `CMake`_, 100 | intercept the compilation of an application using `Bear`_, or write a 101 | database manually. 102 | 103 | .. _`CMake`: https://cmake.org/ 104 | .. _`Bear`: https://github.com/rizsotto/Bear 105 | 106 | .. |CMAKE_EXPORT_COMPILE_COMMANDS option| replace:: :code:`CMAKE_EXPORT_COMPILE_COMMANDS` option 107 | .. _CMAKE_EXPORT_COMPILE_COMMANDS option: https://cmake.org/cmake/help/latest/variable/CMAKE_EXPORT_COMPILE_COMMANDS.html 108 | 109 | 110 | 2. **Create a TOML file describing the analysis** 111 | 112 | CBI reads platform definitions from a `TOML`_ file, like the one shown 113 | below: 114 | 115 | .. code:: toml 116 | 117 | [platform.cpu] 118 | commands = "cpu/compile_commands.json" 119 | 120 | [platform.gpu] 121 | commands = "gpu/compile_commands.json" 122 | 123 | .. _`TOML`: https://toml.io/en/ 124 | 125 | 126 | 3. **Launch CBI** 127 | 128 | To perform a full analysis, launch ``codebasin`` with no arguments except 129 | the input TOML file: 130 | 131 | .. code:: text 132 | 133 | $ codebasin analysis.toml 134 | 135 | To see the other options that are available, run ``codebasin -h``. 136 | -------------------------------------------------------------------------------- /tests/cli/test_warning_aggregator.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import unittest 6 | 7 | from codebasin._detail.logging import WarningAggregator 8 | 9 | 10 | class TestWarningAggregator(unittest.TestCase): 11 | """ 12 | Test WarningAggregator class. 13 | """ 14 | 15 | def test_constructor(self): 16 | """Check constructor arguments""" 17 | wa = WarningAggregator() 18 | self.assertEqual(len(wa.meta_warnings), 3) 19 | 20 | def test_filter(self): 21 | """Check filter inspects warnings correctly""" 22 | wa = WarningAggregator() 23 | record = logging.makeLogRecord( 24 | { 25 | "msg": "test1", 26 | "levelname": "WARNING", 27 | "levelno": logging.WARNING, 28 | }, 29 | ) 30 | self.assertTrue(wa.filter(record)) 31 | self.assertEqual(wa.meta_warnings[0]._count, 1) 32 | self.assertEqual(wa.meta_warnings[1]._count, 0) 33 | self.assertEqual(wa.meta_warnings[2]._count, 0) 34 | 35 | record = logging.makeLogRecord( 36 | { 37 | "msg": "Missing user include", 38 | "levelname": "WARNING", 39 | "levelno": logging.WARNING, 40 | }, 41 | ) 42 | self.assertTrue(wa.filter(record)) 43 | self.assertEqual(wa.meta_warnings[0]._count, 2) 44 | self.assertEqual(wa.meta_warnings[1]._count, 1) 45 | self.assertEqual(wa.meta_warnings[2]._count, 0) 46 | 47 | record = logging.makeLogRecord( 48 | { 49 | "msg": "Missing system include", 50 | "levelname": "WARNING", 51 | "levelno": logging.WARNING, 52 | }, 53 | ) 54 | self.assertTrue(wa.filter(record)) 55 | self.assertEqual(wa.meta_warnings[0]._count, 3) 56 | self.assertEqual(wa.meta_warnings[1]._count, 1) 57 | self.assertEqual(wa.meta_warnings[2]._count, 1) 58 | 59 | # NB: This matches on message but not levelname. 60 | record = logging.makeLogRecord( 61 | { 62 | "msg": "Missing system include", 63 | "levelname": "ERROR", 64 | "levelno": logging.ERROR, 65 | }, 66 | ) 67 | self.assertTrue(wa.filter(record)) 68 | self.assertEqual(wa.meta_warnings[0]._count, 3) 69 | self.assertEqual(wa.meta_warnings[1]._count, 1) 70 | self.assertEqual(wa.meta_warnings[2]._count, 1) 71 | 72 | def test_warn(self): 73 | """Check warn produces expected logging messages""" 74 | logging.disable(logging.NOTSET) 75 | logger = logging.getLogger("codebasin") 76 | 77 | wa = WarningAggregator() 78 | with self.assertNoLogs(logger): 79 | wa.warn(logger) 80 | 81 | wa.filter( 82 | logging.makeLogRecord( 83 | { 84 | "msg": "test1", 85 | "levelname": "WARNING", 86 | "levelno": logging.WARNING, 87 | }, 88 | ), 89 | ) 90 | wa.filter( 91 | logging.makeLogRecord( 92 | { 93 | "msg": "Missing user include", 94 | "levelname": "WARNING", 95 | "levelno": logging.WARNING, 96 | }, 97 | ), 98 | ) 99 | wa.filter( 100 | logging.makeLogRecord( 101 | { 102 | "msg": "Missing system include", 103 | "levelname": "WARNING", 104 | "levelno": logging.WARNING, 105 | }, 106 | ), 107 | ) 108 | with self.assertLogs(logger, level="WARNING") as cm: 109 | wa.warn(logger) 110 | 111 | self.assertRegex(cm.output[0], "3 warnings generated") 112 | self.assertRegex(cm.output[1], "user include files") 113 | self.assertRegex(cm.output[2], "system include files") 114 | 115 | logging.disable() 116 | 117 | 118 | if __name__ == "__main__": 119 | unittest.main() 120 | -------------------------------------------------------------------------------- /tests/source-tree/test_source_tree.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import tempfile 6 | import unittest 7 | 8 | from codebasin.file_parser import FileParser 9 | from codebasin.preprocessor import CodeNode, DirectiveNode, FileNode, Visit 10 | 11 | 12 | class TestSourceTree(unittest.TestCase): 13 | """ 14 | Test SourceTree class. 15 | """ 16 | 17 | def setUp(self): 18 | logging.getLogger("codebasin").disabled = False 19 | 20 | # TODO: Revisit this when SourceTree can be built without a file. 21 | with tempfile.NamedTemporaryFile( 22 | mode="w", 23 | delete_on_close=False, 24 | suffix=".cpp", 25 | ) as f: 26 | source = """ 27 | #if defined(FOO) 28 | void foo(); 29 | #elif defined(BAR) 30 | void bar(); 31 | #else 32 | void baz(); 33 | #endif 34 | 35 | void qux(); 36 | """ 37 | f.write(source) 38 | f.close() 39 | 40 | # TODO: Revisit this when __str__() is more reliable. 41 | self.tree = FileParser(f.name).parse_file(summarize_only=False) 42 | self.filename = f.name 43 | 44 | def test_walk(self): 45 | """Check that walk() visits nodes in the expected order""" 46 | expected_types = [ 47 | FileNode, 48 | DirectiveNode, 49 | CodeNode, 50 | DirectiveNode, 51 | CodeNode, 52 | DirectiveNode, 53 | CodeNode, 54 | DirectiveNode, 55 | CodeNode, 56 | ] 57 | expected_contents = [ 58 | self.filename, 59 | "FOO", 60 | "foo", 61 | "BAR", 62 | "bar", 63 | "else", 64 | "baz", 65 | "endif", 66 | "qux", 67 | ] 68 | for i, node in enumerate(self.tree.walk()): 69 | self.assertTrue(isinstance(node, expected_types[i])) 70 | if isinstance(node, CodeNode): 71 | contents = node.spelling()[0] 72 | else: 73 | contents = str(node) 74 | self.assertTrue(expected_contents[i] in contents) 75 | 76 | def test_visit_types(self): 77 | """Check that visit() validates inputs""" 78 | 79 | class valid_visitor: 80 | def __call__(self, node): 81 | return True 82 | 83 | self.tree.visit(valid_visitor()) 84 | 85 | def visitor_function(node): 86 | return True 87 | 88 | self.tree.visit(visitor_function) 89 | 90 | with self.assertRaises(TypeError): 91 | self.tree.visit(1) 92 | 93 | class invalid_visitor: 94 | pass 95 | 96 | with self.assertRaises(TypeError): 97 | self.tree.visit(invalid_visitor()) 98 | 99 | def test_visit(self): 100 | """Check that visit() visits nodes as expected""" 101 | 102 | # Check that a trivial visitor visits all nodes. 103 | class NodeCounter: 104 | def __init__(self): 105 | self.count = 0 106 | 107 | def __call__(self, node): 108 | self.count += 1 109 | 110 | node_counter = NodeCounter() 111 | self.tree.visit(node_counter) 112 | self.assertEqual(node_counter.count, 9) 113 | 114 | # Check that returning NEXT_SIBLING prevents descent. 115 | class TopLevelCounter: 116 | def __init__(self): 117 | self.count = 0 118 | 119 | def __call__(self, node): 120 | if not isinstance(node, FileNode): 121 | self.count += 1 122 | if isinstance(node, DirectiveNode): 123 | return Visit.NEXT_SIBLING 124 | return Visit.NEXT 125 | 126 | top_level_counter = TopLevelCounter() 127 | self.tree.visit(top_level_counter) 128 | self.assertEqual(top_level_counter.count, 5) 129 | 130 | 131 | if __name__ == "__main__": 132 | unittest.main() 133 | -------------------------------------------------------------------------------- /tests/cli/test_cbicov.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import io 5 | import json 6 | import logging 7 | import sys 8 | import tempfile 9 | import unittest 10 | from contextlib import redirect_stdout 11 | from pathlib import Path 12 | 13 | from codebasin.coverage.__main__ import cli 14 | 15 | 16 | class TestCbiCov(unittest.TestCase): 17 | """ 18 | Test cbi-cov command line interface. 19 | """ 20 | 21 | def setUp(self): 22 | logging.disable() 23 | 24 | def test_help(self): 25 | """Check help string displays correctly.""" 26 | 27 | # Supplying no commands displays the help string. 28 | with self.assertRaises(SystemExit) as e: 29 | with redirect_stdout(io.StringIO()) as f: 30 | cli([]) 31 | self.assertEqual(e.exception.code, 2) 32 | self.assertRegex(f.getvalue(), "usage:") 33 | 34 | # Supplying -h or --help displays the help string. 35 | for option in ["-h", "--help"]: 36 | with self.subTest(option=option): 37 | with self.assertRaises(SystemExit) as e: 38 | with redirect_stdout(io.StringIO()) as f: 39 | cli([option]) 40 | self.assertEqual(e.exception.code, 0) 41 | self.assertRegex(f.getvalue(), "usage:") 42 | 43 | # Supplying a command with -h or --help displays the help string. 44 | for command in ["compute"]: 45 | for option in ["-h", "--help"]: 46 | with self.subTest(command=command, option=option): 47 | with self.assertRaises(SystemExit) as e: 48 | with redirect_stdout(io.StringIO()) as f: 49 | cli([command, option]) 50 | self.assertEqual(e.exception.code, 0) 51 | self.assertRegex(f.getvalue(), "usage:") 52 | 53 | def test_path_validation(self): 54 | """Check that path arguments are validated.""" 55 | sys.stdout = io.StringIO() 56 | for path in ["invalid\npath", "invalid.extension"]: 57 | with self.subTest(path=path): 58 | with self.assertRaises(ValueError): 59 | cli(["compute", path]) 60 | cli(["compute", "-o", path]) 61 | sys.stdout = sys.__stdout__ 62 | 63 | def test_compute(self): 64 | """Check that coverage is computed correctly.""" 65 | sys.stdout = io.StringIO() 66 | # Create a temporary codebase to work on. 67 | tmp = tempfile.TemporaryDirectory() 68 | p = Path(tmp.name) 69 | with open(p / "foo.cpp", mode="w") as f: 70 | f.write( 71 | r"""#ifdef MACRO 72 | void guarded(); 73 | #endif 74 | unguarded();""", 75 | ) 76 | with open(p / "bar.h", mode="w") as f: 77 | f.write("unguarded();") 78 | 79 | # cbi-cov reads compile commands from disk. 80 | compile_commands = [ 81 | { 82 | "file": str(p / "foo.cpp"), 83 | "command": "c++ foo.cpp", 84 | }, 85 | ] 86 | with open(p / "compile_commands.json", mode="w") as f: 87 | json.dump(compile_commands, f) 88 | 89 | ipath = p / "compile_commands.json" 90 | opath = p / "coverage.json" 91 | with self.assertRaises(SystemExit): 92 | cli(["compute", "-S", str(p), "-o", str(opath), str(ipath)]) 93 | 94 | with open(p / "coverage.json") as f: 95 | coverage = json.load(f) 96 | expected_coverage = [ 97 | { 98 | "file": "bar.h", 99 | "id": "3ba8372282f8f1bafc59bb3d0472dcd7ecd5f13a54f17585c6012bfc40bfba7b9afb905f24ccea087546f4c90363bba97d988e4067ec880f619d0ab623c3a7a1", # noqa: E501 100 | "used_lines": [], 101 | "unused_lines": [1], 102 | }, 103 | { 104 | "file": "foo.cpp", 105 | "id": "1359957a144db36091624c1091ac6a47c47945a3ff63a47ace3dc5c1b13159929adac14c21733ec1054f7f1f8809ac416e643483191aab6687d7849ee17edaa0", # noqa: E501 106 | "used_lines": [1, 3, 4], 107 | "unused_lines": [2], 108 | }, 109 | ] 110 | self.assertCountEqual(coverage, expected_coverage) 111 | sys.stdout = sys.__stdout__ 112 | 113 | tmp.cleanup() 114 | 115 | 116 | if __name__ == "__main__": 117 | unittest.main() 118 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | Thank you for your interest in contributing to Code Base Investigator! We 4 | welcome community contributions. You can: 5 | 6 | - Submit your changes directly with a [pull request][1]. 7 | - File a bug or open a feature request with an [issue][2]. 8 | 9 | [1]: https://github.com/P3HPC/code-base-investigator/pulls 10 | [2]: https://github.com/P3HPC/code-base-investigator/issues 11 | 12 | # Pull Requests 13 | 14 | This project follows the [GitHub flow][3]. To submit your change directly to 15 | the repository: 16 | 17 | - Fork the repository and develop your patch. 18 | - Make sure your code is in line with our [coding conventions][4]. 19 | - Consider adding a [test][5]. 20 | - Submit a [pull request][6] into the main branch. 21 | 22 | [3]: https://guides.github.com/introduction/flow/index.html 23 | [4]: #coding-conventions 24 | [5]: #testing 25 | [6]: https://docs.github.com/en/free-pro-team@latest/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request 26 | 27 | # Dependencies and Hooks 28 | 29 | We recommend installing the following additional dependencies: 30 | 31 | - pre-commit 32 | 33 | These packages will allow the use of several development hooks, designed to 34 | automatically enforce the project's coding conventions. 35 | 36 | To install these dependencies and configure the hooks, run: 37 | 38 | $ pip install . 39 | $ pre-commit install 40 | 41 | # Coding Conventions 42 | 43 | For code: 44 | - Follow PEP8 for Python code, with a line limit of 79 characters. 45 | 46 | For commits: 47 | - Limit the first line of Git commit messages to 50 characters. 48 | - Limit the other lines of Git commit messages to 72 characters. 49 | - Please consider following the guidelines outlined [here][7]. 50 | 51 | [7]: https://cbea.ms/git-commit/ 52 | 53 | # Testing 54 | 55 | Code Base Investigator uses the Python [unittest][8] unit testing framework. 56 | If you are contributing a new feature or bug fix, please also consider 57 | providing an associated test. Pull requests with tests are more likely to be 58 | accepted quickly. 59 | 60 | Existing tests and information about how to run them can be found in the 61 | [tests](tests) directory. 62 | 63 | [8]: https://docs.python.org/3/library/unittest.html 64 | 65 | # License 66 | 67 | Code Base Investigator is licensed under the terms in [LICENSE](LICENSE). By 68 | contributing to the project, you agree to the license and copyright terms 69 | therein and release your contribution under these terms. 70 | 71 | # Sign Your Work 72 | 73 | Please use the sign-off line at the end of the patch. Your signature certifies 74 | that you wrote the patch or otherwise have the right to pass it on as an 75 | open-source patch. The rules are pretty simple: if you can certify 76 | the below (from [developercertificate.org](http://developercertificate.org/)): 77 | 78 | ``` 79 | Developer Certificate of Origin 80 | Version 1.1 81 | 82 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 83 | 660 York Street, Suite 102, 84 | San Francisco, CA 94110 USA 85 | 86 | Everyone is permitted to copy and distribute verbatim copies of this 87 | license document, but changing it is not allowed. 88 | 89 | Developer's Certificate of Origin 1.1 90 | 91 | By making a contribution to this project, I certify that: 92 | 93 | (a) The contribution was created in whole or in part by me and I 94 | have the right to submit it under the open source license 95 | indicated in the file; or 96 | 97 | (b) The contribution is based upon previous work that, to the best 98 | of my knowledge, is covered under an appropriate open source 99 | license and I have the right under that license to submit that 100 | work with modifications, whether created in whole or in part 101 | by me, under the same open source license (unless I am 102 | permitted to submit under a different license), as indicated 103 | in the file; or 104 | 105 | (c) The contribution was provided directly to me by some other 106 | person who certified (a), (b) or (c) and I have not modified 107 | it. 108 | 109 | (d) I understand and agree that this project and the contribution 110 | are public and that a record of the contribution (including all 111 | personal information I submit with it, including my sign-off) is 112 | maintained indefinitely and may be redistributed consistent with 113 | this project or the open source license(s) involved. 114 | ``` 115 | 116 | Then you just add a line to every git commit message: 117 | 118 | Signed-off-by: Your Name 119 | 120 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 121 | 122 | If you set your `user.name` and `user.email` git configs, you can sign your 123 | commit automatically with `git commit -s`. 124 | -------------------------------------------------------------------------------- /tests/lexer/test_lexer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import unittest 5 | 6 | from codebasin import preprocessor 7 | 8 | 9 | class TestLexer(unittest.TestCase): 10 | """ 11 | Test ability to tokenize strings correctly. 12 | """ 13 | 14 | def test_character(self): 15 | """characters""" 16 | tokens = preprocessor.Lexer("'c'").tokenize() 17 | self.assertTrue(len(tokens) == 1) 18 | self.assertTrue(isinstance(tokens[0], preprocessor.CharacterConstant)) 19 | 20 | def test_numerical(self): 21 | """numbers""" 22 | numbers = [ 23 | "123", 24 | "123ul", 25 | "123.4", 26 | "123.4e+05", 27 | ".123", 28 | "0xFF", 29 | "0b10", 30 | ] 31 | for number in numbers: 32 | tokens = preprocessor.Lexer(number).tokenize() 33 | self.assertTrue(len(tokens) == 1) 34 | self.assertTrue( 35 | isinstance(tokens[0], preprocessor.NumericalConstant), 36 | ) 37 | 38 | def test_string(self): 39 | """strings""" 40 | tokens = preprocessor.Lexer('"this is a string constant"').tokenize() 41 | self.assertTrue(len(tokens) == 1) 42 | self.assertTrue(isinstance(tokens[0], preprocessor.StringConstant)) 43 | 44 | def test_identifier(self): 45 | """identifiers""" 46 | tokens = preprocessor.Lexer("this is a string of words").tokenize() 47 | self.assertTrue(len(tokens) == 6) 48 | self.assertTrue( 49 | all([isinstance(t, preprocessor.Identifier) for t in tokens]), 50 | ) 51 | 52 | def test_operator(self): 53 | """operators""" 54 | operators = ["||", "&&", ">>", "<<", "!=", ">=", "<=", "==", "##"] + [ 55 | "-", 56 | "+", 57 | "!", 58 | "*", 59 | "/", 60 | "|", 61 | "&", 62 | "^", 63 | "<", 64 | ">", 65 | "?", 66 | ":", 67 | "~", 68 | "#", 69 | "=", 70 | "%", 71 | ] 72 | for op in operators: 73 | tokens = preprocessor.Lexer(op).tokenize() 74 | self.assertTrue(len(tokens) == 1) 75 | self.assertTrue(isinstance(tokens[0], preprocessor.Operator)) 76 | self.assertTrue(str(tokens[0].token) == op) 77 | 78 | def test_puncuator(self): 79 | """punctuators""" 80 | punctuators = [ 81 | "(", 82 | ")", 83 | "{", 84 | "}", 85 | "[", 86 | "]", 87 | ",", 88 | ".", 89 | ";", 90 | "'", 91 | '"', 92 | "\\", 93 | ] 94 | for punc in punctuators: 95 | tokens = preprocessor.Lexer(punc).tokenize() 96 | self.assertTrue(len(tokens) == 1) 97 | self.assertTrue(isinstance(tokens[0], preprocessor.Punctuator)) 98 | self.assertTrue(str(tokens[0].token) == punc) 99 | 100 | def test_expression(self): 101 | """expression""" 102 | tokens = preprocessor.Lexer("foo(a,b) * 124 + 'c'").tokenize() 103 | self.assertTrue(len(tokens) == 10) 104 | self.assertTrue(isinstance(tokens[0], preprocessor.Identifier)) 105 | self.assertTrue(isinstance(tokens[1], preprocessor.Punctuator)) 106 | self.assertTrue(isinstance(tokens[2], preprocessor.Identifier)) 107 | self.assertTrue(isinstance(tokens[3], preprocessor.Punctuator)) 108 | self.assertTrue(isinstance(tokens[4], preprocessor.Identifier)) 109 | self.assertTrue(isinstance(tokens[5], preprocessor.Punctuator)) 110 | self.assertTrue(isinstance(tokens[6], preprocessor.Operator)) 111 | self.assertTrue(isinstance(tokens[7], preprocessor.NumericalConstant)) 112 | self.assertTrue(isinstance(tokens[8], preprocessor.Operator)) 113 | self.assertTrue(isinstance(tokens[9], preprocessor.CharacterConstant)) 114 | 115 | tokens = preprocessor.Lexer( 116 | 'a > b ? "true_string" : "false_string"', 117 | ).tokenize() 118 | self.assertTrue(len(tokens) == 7) 119 | self.assertTrue(isinstance(tokens[0], preprocessor.Identifier)) 120 | self.assertTrue(isinstance(tokens[1], preprocessor.Operator)) 121 | self.assertTrue(isinstance(tokens[2], preprocessor.Identifier)) 122 | self.assertTrue(isinstance(tokens[3], preprocessor.Operator)) 123 | self.assertTrue(isinstance(tokens[4], preprocessor.StringConstant)) 124 | self.assertTrue(isinstance(tokens[5], preprocessor.Operator)) 125 | self.assertTrue(isinstance(tokens[6], preprocessor.StringConstant)) 126 | 127 | 128 | if __name__ == "__main__": 129 | unittest.main() 130 | -------------------------------------------------------------------------------- /tests/duplicates/test_duplicates.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import logging 5 | import os 6 | import tempfile 7 | import unittest 8 | from pathlib import Path 9 | 10 | from codebasin import CodeBase, finder 11 | from codebasin.report import find_duplicates 12 | 13 | 14 | class TestDuplicates(unittest.TestCase): 15 | """ 16 | Test ability to detect and report identical files in a codebase. 17 | """ 18 | 19 | def setUp(self): 20 | self.rootdir = Path(__file__).parent.resolve() 21 | logging.disable() 22 | 23 | def test_duplicates(self): 24 | """Check that duplicate files count towards divergence.""" 25 | 26 | cpufile = str(self.rootdir / "cpu/foo.cpp") 27 | gpufile = str(self.rootdir / "gpu/foo.cpp") 28 | 29 | codebase = CodeBase(self.rootdir) 30 | 31 | configuration = { 32 | "cpu": [ 33 | { 34 | "file": cpufile, 35 | "defines": [], 36 | "include_paths": [], 37 | "include_files": [], 38 | }, 39 | ], 40 | "gpu": [ 41 | { 42 | "file": gpufile, 43 | "defines": [], 44 | "include_paths": [], 45 | "include_files": [], 46 | }, 47 | ], 48 | } 49 | 50 | expected_setmap = {frozenset(["cpu"]): 1, frozenset(["gpu"]): 1} 51 | 52 | state = finder.find(self.rootdir, codebase, configuration) 53 | setmap = state.get_setmap(codebase) 54 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 55 | 56 | def test_symlink_directories(self): 57 | """Check that symlink directories do not count towards divergence.""" 58 | 59 | cpufile = str(self.rootdir / "cpu/foo.cpp") 60 | cpu2file = str(self.rootdir / "cpu2/foo.cpp") 61 | 62 | codebase = CodeBase(self.rootdir, exclude_patterns=["gpu/"]) 63 | 64 | configuration = { 65 | "cpu": [ 66 | { 67 | "file": cpufile, 68 | "defines": [], 69 | "include_paths": [], 70 | "include_files": [], 71 | }, 72 | ], 73 | "cpu2": [ 74 | { 75 | "file": cpu2file, 76 | "defines": [], 77 | "include_paths": [], 78 | "include_files": [], 79 | }, 80 | ], 81 | } 82 | 83 | expected_setmap = {frozenset(["cpu", "cpu2"]): 1} 84 | 85 | state = finder.find(self.rootdir, codebase, configuration) 86 | setmap = state.get_setmap(codebase) 87 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 88 | 89 | def test_symlink_files(self): 90 | """Check that symlink files do not count towards divergence.""" 91 | tmp = tempfile.TemporaryDirectory() 92 | p = Path(tmp.name) 93 | with open(p / "base.cpp", mode="w") as f: 94 | f.write("void foo();") 95 | os.symlink(p / "base.cpp", p / "symlink.cpp") 96 | 97 | codebase = CodeBase(p) 98 | configuration = { 99 | "test": [ 100 | { 101 | "file": str(p / "base.cpp"), 102 | "defines": [], 103 | "include_paths": [], 104 | "include_files": [], 105 | }, 106 | { 107 | "file": str(p / "symlink.cpp"), 108 | "defines": [], 109 | "include_paths": [], 110 | "include_files": [], 111 | }, 112 | ], 113 | } 114 | 115 | expected_setmap = {frozenset(["test"]): 1} 116 | 117 | state = finder.find(self.rootdir, codebase, configuration) 118 | setmap = state.get_setmap(codebase) 119 | self.assertDictEqual(setmap, expected_setmap, "Mismatch in setmap") 120 | 121 | tmp.cleanup() 122 | 123 | def test_find_duplicates(self): 124 | """Check that we can correctly identify duplicate files.""" 125 | tmp = tempfile.TemporaryDirectory() 126 | path = Path(tmp.name) 127 | with open(path / "foo.cpp", mode="w") as f: 128 | f.write("void foo();") 129 | with open(path / "bar.cpp", mode="w") as f: 130 | f.write("void foo();") 131 | codebase = CodeBase(path) 132 | 133 | duplicates = find_duplicates(codebase) 134 | expected_duplicates = [{path / "foo.cpp", path / "bar.cpp"}] 135 | self.assertCountEqual(duplicates, expected_duplicates) 136 | 137 | tmp.cleanup() 138 | 139 | def test_find_duplicates_symlinks(self): 140 | """Check that we ignore symlinks when identifying duplicates.""" 141 | tmp = tempfile.TemporaryDirectory() 142 | path = Path(tmp.name) 143 | with open(path / "foo.cpp", mode="w") as f: 144 | f.write("void foo();") 145 | os.symlink(path / "foo.cpp", path / "bar.cpp") 146 | codebase = CodeBase(path) 147 | 148 | duplicates = find_duplicates(codebase) 149 | self.assertEqual(duplicates, []) 150 | 151 | tmp.cleanup() 152 | 153 | 154 | if __name__ == "__main__": 155 | unittest.main() 156 | -------------------------------------------------------------------------------- /codebasin/schema/cbiconfig.schema: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json-schema.org/draft/2020-12/schema", 3 | "$id": "https://raw.githubusercontent.com/P3HPC/code-base-investigator/main/codebasin/schema/cbiconfig.schema", 4 | "title": "Code Base Investigator Configuration File", 5 | "description": "Configuration options for Code Base Investigator.", 6 | "type": "object", 7 | "properties": { 8 | "compiler": { 9 | "type": "object", 10 | "patternProperties": { 11 | ".*": { 12 | "oneOf": [ 13 | { 14 | "type": "object", 15 | "properties": { 16 | "options": { 17 | "type": "array", 18 | "items": { 19 | "type": "string" 20 | } 21 | }, 22 | "parser": { 23 | "type": "array", 24 | "items": { 25 | "type": "object", 26 | "properties": { 27 | "flags": { 28 | "type": "array", 29 | "items": { 30 | "type": "string" 31 | } 32 | }, 33 | "action": { 34 | "type": "string" 35 | }, 36 | "dest": { 37 | "type": "string" 38 | }, 39 | "const": { 40 | "type": "string" 41 | }, 42 | "sep": { 43 | "type": "string" 44 | }, 45 | "format": { 46 | "type": "string" 47 | }, 48 | "pattern": { 49 | "type": "string" 50 | }, 51 | "default": { 52 | "oneOf": [ 53 | { 54 | "type": "string" 55 | }, 56 | { 57 | "type": "array", 58 | "items": { 59 | "type": "string" 60 | } 61 | } 62 | ] 63 | }, 64 | "override": { 65 | "type": "boolean" 66 | } 67 | }, 68 | "additionalProperties": false 69 | } 70 | }, 71 | "modes": { 72 | "type": "array", 73 | "items": { 74 | "type": "object", 75 | "properties": { 76 | "name": { 77 | "type": "string" 78 | }, 79 | "defines": { 80 | "type": "array", 81 | "items": { 82 | "type": "string" 83 | } 84 | }, 85 | "include_paths": { 86 | "type": "array", 87 | "items": { 88 | "type": "string" 89 | } 90 | }, 91 | "include_files": { 92 | "type": "array", 93 | "items": { 94 | "type": "string" 95 | } 96 | } 97 | }, 98 | "required": ["name"], 99 | "additionalProperties": false 100 | } 101 | }, 102 | "passes": { 103 | "type": "array", 104 | "items": { 105 | "type": "object", 106 | "properties": { 107 | "name": { 108 | "type": "string" 109 | }, 110 | "defines": { 111 | "type": "array", 112 | "items": { 113 | "type": "string" 114 | } 115 | }, 116 | "include_paths": { 117 | "type": "array", 118 | "items": { 119 | "type": "string" 120 | } 121 | }, 122 | "include_files": { 123 | "type": "array", 124 | "items": { 125 | "type": "string" 126 | } 127 | }, 128 | "modes": { 129 | "type": "array", 130 | "items": { 131 | "type": "string" 132 | } 133 | } 134 | }, 135 | "required": ["name"], 136 | "additionalProperties": false 137 | } 138 | } 139 | }, 140 | "additionalProperties": false 141 | }, 142 | { 143 | "type": "object", 144 | "properties": { 145 | "alias_of": { 146 | "type": "string" 147 | } 148 | }, 149 | "additionalProperties": false 150 | } 151 | ] 152 | } 153 | } 154 | }, 155 | "additionalProperties": false 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /tests/macro_expansion/max_level.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019 Intel Corporation 2 | // SPDX-License-Identifier: BSD-3-Clause 3 | // Should be 207 lines 4 | 5 | #define FOO 10 6 | #define FOO1 FOO 7 | #define FOO2 FOO1 8 | #define FOO3 FOO2 9 | #define FOO4 FOO3 10 | #define FOO5 FOO4 11 | #define FOO6 FOO5 12 | #define FOO7 FOO6 13 | #define FOO8 FOO7 14 | #define FOO9 FOO8 15 | #define FOO10 FOO9 16 | #define FOO11 FOO10 17 | #define FOO12 FOO11 18 | #define FOO13 FOO12 19 | #define FOO14 FOO13 20 | #define FOO15 FOO14 21 | #define FOO16 FOO15 22 | #define FOO17 FOO16 23 | #define FOO18 FOO17 24 | #define FOO19 FOO18 25 | #define FOO20 FOO19 26 | #define FOO21 FOO20 27 | #define FOO22 FOO21 28 | #define FOO23 FOO22 29 | #define FOO24 FOO23 30 | #define FOO25 FOO24 31 | #define FOO26 FOO25 32 | #define FOO27 FOO26 33 | #define FOO28 FOO27 34 | #define FOO29 FOO28 35 | #define FOO30 FOO29 36 | #define FOO31 FOO30 37 | #define FOO32 FOO31 38 | #define FOO33 FOO32 39 | #define FOO34 FOO33 40 | #define FOO35 FOO34 41 | #define FOO36 FOO35 42 | #define FOO37 FOO36 43 | #define FOO38 FOO37 44 | #define FOO39 FOO38 45 | #define FOO40 FOO39 46 | #define FOO41 FOO40 47 | #define FOO42 FOO41 48 | #define FOO43 FOO42 49 | #define FOO44 FOO43 50 | #define FOO45 FOO44 51 | #define FOO46 FOO45 52 | #define FOO47 FOO46 53 | #define FOO48 FOO47 54 | #define FOO49 FOO48 55 | #define FOO50 FOO49 56 | #define FOO51 FOO50 57 | #define FOO52 FOO51 58 | #define FOO53 FOO52 59 | #define FOO54 FOO53 60 | #define FOO55 FOO54 61 | #define FOO56 FOO55 62 | #define FOO57 FOO56 63 | #define FOO58 FOO57 64 | #define FOO59 FOO58 65 | #define FOO60 FOO59 66 | #define FOO61 FOO60 67 | #define FOO62 FOO61 68 | #define FOO63 FOO62 69 | #define FOO64 FOO63 70 | #define FOO65 FOO64 71 | #define FOO66 FOO65 72 | #define FOO67 FOO66 73 | #define FOO68 FOO67 74 | #define FOO69 FOO68 75 | #define FOO70 FOO69 76 | #define FOO71 FOO70 77 | #define FOO72 FOO71 78 | #define FOO73 FOO72 79 | #define FOO74 FOO73 80 | #define FOO75 FOO74 81 | #define FOO76 FOO75 82 | #define FOO77 FOO76 83 | #define FOO78 FOO77 84 | #define FOO79 FOO78 85 | #define FOO80 FOO79 86 | #define FOO81 FOO80 87 | #define FOO82 FOO81 88 | #define FOO83 FOO82 89 | #define FOO84 FOO83 90 | #define FOO85 FOO84 91 | #define FOO86 FOO85 92 | #define FOO87 FOO86 93 | #define FOO88 FOO87 94 | #define FOO89 FOO88 95 | #define FOO90 FOO89 96 | #define FOO91 FOO90 97 | #define FOO92 FOO91 98 | #define FOO93 FOO92 99 | #define FOO94 FOO93 100 | #define FOO95 FOO94 101 | #define FOO96 FOO95 102 | #define FOO97 FOO96 103 | #define FOO98 FOO97 104 | #define FOO99 FOO98 105 | #define FOO100 FOO99 106 | #define FOO101 FOO100 107 | #define FOO102 FOO101 108 | #define FOO103 FOO102 109 | #define FOO104 FOO103 110 | #define FOO105 FOO104 111 | #define FOO106 FOO105 112 | #define FOO107 FOO106 113 | #define FOO108 FOO107 114 | #define FOO109 FOO108 115 | #define FOO110 FOO109 116 | #define FOO111 FOO110 117 | #define FOO112 FOO111 118 | #define FOO113 FOO112 119 | #define FOO114 FOO113 120 | #define FOO115 FOO114 121 | #define FOO116 FOO115 122 | #define FOO117 FOO116 123 | #define FOO118 FOO117 124 | #define FOO119 FOO118 125 | #define FOO120 FOO119 126 | #define FOO121 FOO120 127 | #define FOO122 FOO121 128 | #define FOO123 FOO122 129 | #define FOO124 FOO123 130 | #define FOO125 FOO124 131 | #define FOO126 FOO125 132 | #define FOO127 FOO126 133 | #define FOO128 FOO127 134 | #define FOO129 FOO128 135 | #define FOO130 FOO129 136 | #define FOO131 FOO130 137 | #define FOO132 FOO131 138 | #define FOO133 FOO132 139 | #define FOO134 FOO133 140 | #define FOO135 FOO134 141 | #define FOO136 FOO135 142 | #define FOO137 FOO136 143 | #define FOO138 FOO137 144 | #define FOO139 FOO138 145 | #define FOO140 FOO139 146 | #define FOO141 FOO140 147 | #define FOO142 FOO141 148 | #define FOO143 FOO142 149 | #define FOO144 FOO143 150 | #define FOO145 FOO144 151 | #define FOO146 FOO145 152 | #define FOO147 FOO146 153 | #define FOO148 FOO147 154 | #define FOO149 FOO148 155 | #define FOO150 FOO149 156 | #define FOO151 FOO150 157 | #define FOO152 FOO151 158 | #define FOO153 FOO152 159 | #define FOO154 FOO153 160 | #define FOO155 FOO154 161 | #define FOO156 FOO155 162 | #define FOO157 FOO156 163 | #define FOO158 FOO157 164 | #define FOO159 FOO158 165 | #define FOO160 FOO159 166 | #define FOO161 FOO160 167 | #define FOO162 FOO161 168 | #define FOO163 FOO162 169 | #define FOO164 FOO163 170 | #define FOO165 FOO164 171 | #define FOO166 FOO165 172 | #define FOO167 FOO166 173 | #define FOO168 FOO167 174 | #define FOO169 FOO168 175 | #define FOO170 FOO169 176 | #define FOO171 FOO170 177 | #define FOO172 FOO171 178 | #define FOO173 FOO172 179 | #define FOO174 FOO173 180 | #define FOO175 FOO174 181 | #define FOO176 FOO175 182 | #define FOO177 FOO176 183 | #define FOO178 FOO177 184 | #define FOO179 FOO178 185 | #define FOO180 FOO179 186 | #define FOO181 FOO180 187 | #define FOO182 FOO181 188 | #define FOO183 FOO182 189 | #define FOO184 FOO183 190 | #define FOO185 FOO184 191 | #define FOO186 FOO185 192 | #define FOO187 FOO186 193 | #define FOO188 FOO187 194 | #define FOO189 FOO188 195 | #define FOO190 FOO189 196 | #define FOO191 FOO190 197 | #define FOO192 FOO191 198 | #define FOO193 FOO192 199 | #define FOO194 FOO193 200 | #define FOO195 FOO194 201 | #define FOO196 FOO195 202 | #define FOO197 FOO196 203 | #define FOO198 FOO197 204 | #define FOO199 FOO198 205 | #define FOO200 FOO199 206 | 207 | #if FOO200 == 10 208 | void reached_max() { 209 | int bar = 0; 210 | int baz = 1; 211 | } 212 | #else 213 | void died_before_max() { 214 | return; 215 | } 216 | #endif 217 | -------------------------------------------------------------------------------- /docs/source/compilation-databases.rst: -------------------------------------------------------------------------------- 1 | Compilation Databases 2 | ===================== 3 | 4 | Before it can analyze a code base, CBI needs to know how each source file is 5 | compiled. Just like a compiler, CBI requires a full list of include paths, 6 | macro definitions and other options in order to identify which code is used 7 | by each platform. Rather than require all of this information to be specified 8 | manually, CBI reads it from a `compilation database`_. 9 | 10 | 11 | Generating a Compilation Database 12 | ################################# 13 | 14 | Since our sample code base is already set up with a ``CMakeLists.txt`` file, we 15 | can ask CMake to generate the compilation database for us with the 16 | :code:`CMAKE_EXPORT_COMPILE_COMMANDS` option: 17 | 18 | .. code-block:: cmake 19 | :emphasize-lines: 4 20 | 21 | cmake_minimum_required(VERSION 3.5) 22 | project(tutorial) 23 | 24 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 25 | 26 | set(SOURCES main.cpp third-party/library.cpp) 27 | 28 | option(GPU_OFFLOAD "Enable GPU offload." OFF) 29 | if (GPU_OFFLOAD) 30 | add_definitions("-D GPU_OFFLOAD=1") 31 | list(APPEND SOURCES gpu/foo.cpp) 32 | else() 33 | list(APPEND SOURCES cpu/foo.cpp) 34 | endif() 35 | 36 | add_executable(tutorial ${SOURCES}) 37 | 38 | .. important:: 39 | For projects that don't use CMake, we can use `Bear`_ to intercept the 40 | commands generated by other build systems (such as GNU makefiles). Other 41 | build systems and tools that produce compilation databases should also be 42 | compatible. 43 | 44 | .. _`compilation database`: https://clang.llvm.org/docs/JSONCompilationDatabase.html 45 | .. _`Bear`: https://github.com/rizsotto/Bear 46 | 47 | 48 | CPU Compilation Commands 49 | ------------------------ 50 | 51 | Let's start by running CMake without the :code:`GPU_OFFLOAD` option enabled, to 52 | obtain a compilation database for the CPU: 53 | 54 | .. code :: sh 55 | 56 | $ mkdir build-cpu 57 | $ cmake -G Ninja ../ 58 | $ ls 59 | 60 | CMakeCache.txt CMakeFiles Makefile cmake_install.cmake compile_commands.json 61 | 62 | .. tip:: 63 | Using the "Ninja" generator is not required, but is often faster and can 64 | improve the quality of CBI's results. Other generators (such as "Unix 65 | Makefiles") may use response (:code:`.rsp`) files to pass command-line 66 | options, and any options passed this way will not be respected by CBI. 67 | You may need to install Ninja on your system (e.g., with :code:`pip install 68 | ninja` or similar). 69 | 70 | This :code:`compile_commands.json` file includes all the commands required to 71 | build the code, corresponding to the commands that would be executed if we were 72 | to actually run :code:`make`. 73 | 74 | .. attention:: 75 | CMake generates compilation databases when the ``cmake`` command is 76 | executed, allowing us to generate compilation databases without also 77 | building the application. Other tools (like Bear) may require a build. 78 | 79 | In this case, it contains: 80 | 81 | .. code :: json 82 | 83 | [ 84 | { 85 | "directory": "/home/username/src/build-cpu", 86 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/main.cpp.o -c /home/username/src/main.cpp", 87 | "file": "/home/username/src/main.cpp" 88 | }, 89 | { 90 | "directory": "/home/username/src/build-cpu", 91 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/third-party/library.cpp.o -c /home/username/src/third-party/library.cpp", 92 | "file": "/home/username/src/third-party/library.cpp" 93 | }, 94 | { 95 | "directory": "/home/username/src/build-cpu", 96 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/cpu/foo.cpp.o -c /home/username/src/cpu/foo.cpp", 97 | "file": "/home/username/src/cpu/foo.cpp" 98 | } 99 | ] 100 | 101 | 102 | GPU Compilation Commands 103 | ------------------------ 104 | 105 | Repeating the exercise with :code:`GPU_OFFLOAD` enabled gives us a different 106 | compilation database for the GPU. 107 | 108 | .. warning:: 109 | The ``GPU_OFFLOAD`` option is specific to this ``CMakeLists.txt`` file, and 110 | isn't something provided by CMake. Understanding how to build an application 111 | for a specific target platform is beyond the scope of this tutorial. 112 | 113 | As expected, we can see that the compilation database refers to ``gpu.cpp`` 114 | instead of ``cpu.cpp``, and that the ``GPU_OFFLOAD`` macro is defined as part 115 | of each compilation command: 116 | 117 | .. code :: json 118 | 119 | [ 120 | { 121 | "directory": "/home/username/src/build-gpu", 122 | "command": "/usr/bin/c++ -D GPU_OFFLOAD=1 -o CMakeFiles/tutorial.dir/main.cpp.o -c /home/username/src/main.cpp", 123 | "file": "/home/username/src/main.cpp" 124 | }, 125 | { 126 | "directory": "/home/username/src/build-gpu", 127 | "command": "/usr/bin/c++ -D GPU_OFFLOAD=1 -o CMakeFiles/tutorial.dir/third-party/library.cpp.o -c /home/username/src/third-party/library.cpp", 128 | "file": "/home/username/src/third-party/library.cpp" 129 | }, 130 | { 131 | "directory": "/home/username/src/build-gpu", 132 | "command": "/usr/bin/c++ -D GPU_OFFLOAD=1 -o CMakeFiles/tutorial.dir/gpu/foo.cpp.o -c /home/username/src/gpu/foo.cpp", 133 | "file": "/home/username/src/gpu/foo.cpp" 134 | } 135 | ] 136 | 137 | These differences are the result of code divergence. We'll explore how to use 138 | ``codebasin`` to measure the *amount* of code divergence in a later tutorial. 139 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders via e-mail. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 125 | [https://www.contributor-covenant.org/translations][translations]. 126 | 127 | [homepage]: https://www.contributor-covenant.org 128 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 129 | [Mozilla CoC]: https://github.com/mozilla/diversity 130 | [FAQ]: https://www.contributor-covenant.org/faq 131 | -------------------------------------------------------------------------------- /docs/source/specialization.rst: -------------------------------------------------------------------------------- 1 | Understanding Specialization 2 | ============================ 3 | 4 | The goal of CBI is to help developers to reason about how a code base 5 | uses *specialization* to adapt to the capabilities and requirements of the 6 | different platforms it supports. By measuring specialization, we can reason 7 | about its impact upon maintenance effort. 8 | 9 | 10 | Platforms 11 | ######### 12 | 13 | The definition of *platform* used by CBI was first introduced in 14 | "`Implications of a Metric for Performance Portability`_", and is shared 15 | with the `P3 Analysis Library`_: 16 | 17 | A collection of software and hardware on which an *application* may run a 18 | *problem*. 19 | 20 | .. _Implications of a Metric for Performance Portability: 21 | https://doi.org/10.1016/j.future.2017.08.007 22 | 23 | .. _P3 Analysis Library: 24 | https://p3hpc.github.io/p3-analysis-library/ 25 | 26 | This definition is deliberately very flexible, so a platform can represent 27 | **any** execution environment for which code may be specialized. A platform 28 | could be a compiler, an operating system, a micro-architecture or some 29 | combination of these options. 30 | 31 | 32 | Specialization 33 | ############## 34 | 35 | There are many forms of specialization. What they all have in common is that 36 | these *specialization points* act as branches: different code is executed 37 | on different platforms based on some set of conditions. These conditions 38 | express a platform's capabilities, properties of the input problem, or both. 39 | 40 | The simplest form of specialization point is a run-time branch, which is easily 41 | expressed but can incur run-time overheads and prevent compiler optimizations. 42 | Compile-time specialization avoids these issues, and in practice a lot of 43 | specialization is performed using preprocessor tools or with some kind of 44 | metaprogramming. 45 | 46 | 47 | Code Divergence 48 | ############### 49 | 50 | Code divergence is a metric proposed by Harrell and Kitson in "`Effective 51 | Performance Portability`_", which uses the Jaccard distance to measure the 52 | distance between two source codes. 53 | 54 | For a given set of platforms, :math:`H`, the code divergence :math:`CD` of 55 | an application :math:`a` solving problem :math:`p` is an average of 56 | pairwise distances: 57 | 58 | .. math:: 59 | CD(a, p, H) = \binom{|H|}{2}^{-1} 60 | \sum_{\{i, j\} \in \binom{H}{2}} {d_{i, j}(a, p)} 61 | 62 | where :math:`d_{i, j}(a, p)` represents the distance between the source 63 | code required by platforms :math:`i` and :math:`j` for application 64 | :math:`a` to solve problem :math:`p`. 65 | 66 | The distance is calculated as: 67 | 68 | .. math:: 69 | d_{i, j}(a, p) = 1 - \frac{|c_i(a, p) \cap c_j(a, p)|} 70 | {|c_i(a, p) \cup c_j(a, p)|} 71 | 72 | where :math:`c_i` and :math:`c_j` are the lines of code required to compile 73 | application :math:`a` and solve problem :math:`p` using platforms :math:`i` 74 | and :math:`j`. A distance of 0 means that all code is shared between the 75 | two platforms, whereas a distance of 1 means that no code is shared. 76 | 77 | .. note:: 78 | 79 | It is sometimes useful to talk about code *convergence* instead, which is 80 | simply the code divergence subtracted from 1. 81 | 82 | .. _Effective Performance Portability: 83 | https://doi.org/10.1109/P3HPC.2018.00006 84 | 85 | Platform Coverage 86 | ################# 87 | 88 | Platform coverage builds on the well-established concept of "test coverage", 89 | and measures the amount of code in a code base that is utilized by a set of 90 | platforms. Computing platform coverage is straightforward: it is simply the 91 | number of lines of code used by one or more platforms expressed as a percentage 92 | of the number of lines of code in the code base. 93 | 94 | .. important:: 95 | CBI often uses "coverage" as a shorthand for "platform coverage"! 96 | 97 | Formally, for a given set of platforms, :math:`H`, the coverage for an 98 | application :math:`a` solving problem :math:`p` is: 99 | 100 | .. math:: 101 | \textrm{Coverage}(a, p, H) = \frac{\left|\bigcup_{i \in H} c_i(a,p)\right|} 102 | {\left|\bigcup_{i \in H} c_i(a,p)\right| + \left|\bigcap_{i \in H} c_i'(a,p)\right|} \times 100 103 | 104 | where :math:`c_i` is the set of lines of code required to compile application 105 | :math:`a` and solve problem :math:`p` using platform :math:`i`, and 106 | :math:`c_i'` is the complement of that set (i.e., the set of lines of code 107 | *not* required). A coverage of 0% means that none of the code is used by any 108 | platform, whereas a coverage of 100% means that all of the code is used by at 109 | least one platform. 110 | 111 | Measuring coverage can also help us to reason about differences between 112 | platforms. The *average* coverage (over platforms) allows us to reason about 113 | the amount of code covered by *all* platforms. 114 | 115 | Formally, the average coverage is: 116 | 117 | .. math:: 118 | \textrm{Average Coverage}(a, p, H) = \frac{\sum_{h \in H} \textrm{Coverage}(a, p, h)} 119 | {\left|H\right|} 120 | 121 | An average coverage of 0% means that none of the code is used by any platform, 122 | whereas an average coverage of 100% means that all of the code is used by all 123 | platforms. 124 | 125 | .. tip:: 126 | Low average coverage does not always mean the platforms in :math:`H` share 127 | little code; low average coverage can result from a high amount of unused 128 | code. Presenting coverage alongside average coverage provides the most insight. 129 | 130 | The straightforward nature of coverage and average coverage has several 131 | advantages. First, it is very easy to intuit the impact of a code change upon 132 | coverage. Second, it is simple to use and understand in hierarchical contexts 133 | (e.g., the number of used lines for a directory is the sum of the used lines 134 | over all files in the directory). For these reasons, CBI functionality focused 135 | on understanding potential improvements to code structure tend to use coverage 136 | instead of divergence. 137 | -------------------------------------------------------------------------------- /tests/compilers/test_actions.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2024 Intel Corporation 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | 4 | import argparse 5 | import logging 6 | import unittest 7 | 8 | from codebasin.config import _ExtendMatchAction, _StoreSplitAction 9 | 10 | 11 | class TestActions(unittest.TestCase): 12 | """ 13 | Test that custom argparse.Action classes work as expected. 14 | These classes enable handling of complex user-defined compiler options. 15 | """ 16 | 17 | def setUp(self): 18 | logging.disable() 19 | 20 | def test_store_split_init(self): 21 | """Check that store_split recognizes custom arguments""" 22 | action = _StoreSplitAction(["--foo"], "foo", sep=",", format="$value") 23 | self.assertEqual(action.sep, ",") 24 | self.assertEqual(action.format, "$value") 25 | 26 | action = _StoreSplitAction(["--foo"], "foo") 27 | self.assertEqual(action.sep, None) 28 | self.assertEqual(action.format, None) 29 | 30 | def test_store_split(self): 31 | """Check that argparse calls store_split correctly""" 32 | namespace = argparse.Namespace() 33 | namespace._passes = {} 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("--foo", action=_StoreSplitAction, sep=",") 37 | parser.add_argument( 38 | "--bar", 39 | action=_StoreSplitAction, 40 | sep=",", 41 | format="prefix-$value-suffix", 42 | ) 43 | parser.add_argument("--baz", action=_StoreSplitAction, type=int) 44 | parser.add_argument( 45 | "--qux", 46 | action=_StoreSplitAction, 47 | sep=",", 48 | dest="passes", 49 | ) 50 | 51 | args, _ = parser.parse_known_args(["--foo=one,two"], namespace) 52 | self.assertEqual(args.foo, ["one", "two"]) 53 | 54 | args, _ = parser.parse_known_args(["--bar=one,two"], namespace) 55 | self.assertEqual(args.bar, ["prefix-one-suffix", "prefix-two-suffix"]) 56 | 57 | with self.assertRaises(TypeError): 58 | args, _ = parser.parse_known_args(["--baz=1"], namespace) 59 | 60 | args, _ = parser.parse_known_args(["--qux=one,two"], namespace) 61 | self.assertEqual(args._passes, {"--qux": ["one", "two"]}) 62 | 63 | def test_extend_match_init(self): 64 | """Check that extend_match recognizes custom arguments""" 65 | action = _ExtendMatchAction( 66 | ["--foo"], 67 | "foo", 68 | pattern="*", 69 | format="$value", 70 | ) 71 | self.assertEqual(action.pattern, "*") 72 | self.assertEqual(action.format, "$value") 73 | 74 | action = _ExtendMatchAction(["--foo"], "foo") 75 | self.assertEqual(action.pattern, None) 76 | self.assertEqual(action.format, None) 77 | 78 | def test_extend_match(self): 79 | """Check that argparse calls store_split correctly""" 80 | namespace = argparse.Namespace() 81 | 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument( 84 | "--foo", 85 | action=_ExtendMatchAction, 86 | pattern=r"option_(\d+)", 87 | default=[], 88 | ) 89 | parser.add_argument( 90 | "--bar", 91 | action=_ExtendMatchAction, 92 | pattern=r"option_(\d+)", 93 | format="prefix-$value-suffix", 94 | default=[], 95 | ) 96 | parser.add_argument("--baz", action=_ExtendMatchAction, type=int) 97 | parser.add_argument( 98 | "--qux", 99 | action=_ExtendMatchAction, 100 | pattern=r"option_(\d+)", 101 | dest="passes", 102 | default=["0"], 103 | override=True, 104 | ) 105 | parser.add_argument( 106 | "--one", 107 | "--two", 108 | action=_ExtendMatchAction, 109 | pattern=r"option_(\d+)", 110 | dest="passes", 111 | ) 112 | parser.add_argument( 113 | "--default-override", 114 | action=_ExtendMatchAction, 115 | pattern=r"option_(\d+)", 116 | default=["0"], 117 | dest="override", 118 | override=True, 119 | ) 120 | 121 | args, _ = parser.parse_known_args( 122 | ["--foo=option_1,option_2"], 123 | namespace, 124 | ) 125 | self.assertEqual(args.foo, ["1", "2"]) 126 | 127 | args, _ = parser.parse_known_args( 128 | ["--bar=option_1,option_2"], 129 | namespace, 130 | ) 131 | self.assertEqual(args.bar, ["prefix-1-suffix", "prefix-2-suffix"]) 132 | 133 | with self.assertRaises(TypeError): 134 | args, _ = parser.parse_known_args(["--baz=1"], namespace) 135 | 136 | # Check that the default values defined by flags always exists. 137 | # Note that the caller must initialize the default. 138 | namespace.override = ["0"] 139 | namespace._passes = {"--qux": ["0"]} 140 | args, _ = parser.parse_known_args( 141 | [], 142 | namespace, 143 | ) 144 | self.assertEqual(args.override, ["0"]) 145 | self.assertEqual(args._passes, {"--qux": ["0"]}) 146 | 147 | # Check that the default pass is overridden by use of --qux. 148 | # Note that the caller must initialize the default. 149 | namespace.override = ["0"] 150 | namespace._passes = {"--qux": ["0"]} 151 | args, _ = parser.parse_known_args( 152 | ["--qux=option_1,option_2", "--default-override=option_1"], 153 | namespace, 154 | ) 155 | self.assertEqual(args.override, ["1"]) 156 | self.assertEqual(args._passes, {"--qux": ["1", "2"]}) 157 | 158 | namespace._passes = {} 159 | args, _ = parser.parse_known_args( 160 | ["--one=option_1", "--two=option_2"], 161 | namespace, 162 | ) 163 | self.assertEqual(args._passes, {"--one": ["1", "2"]}) 164 | 165 | 166 | if __name__ == "__main__": 167 | unittest.main() 168 | -------------------------------------------------------------------------------- /docs/source/emulating-compiler-behavior.rst: -------------------------------------------------------------------------------- 1 | Emulating Compiler Behavior 2 | =========================== 3 | 4 | When CBI processes a file, it tries to obey all of the arguments that it can 5 | see in the compilation database. Unfortunately, compilers often have behaviors 6 | that are not reflected on the command line (such as their default include 7 | paths, or compiler version macros). 8 | 9 | If we believe (or already know!) that these behaviors will impact the 10 | CBI's analysis of a code base, we can use a configuration file to append 11 | additional options when emulating certain compilers. 12 | 13 | .. attention:: 14 | 15 | If you encounter a situation that is not supported by CBI and which cannot 16 | be described by our existing configuration files, please `open an issue`_. 17 | 18 | .. _`open an issue`: https://github.com/P3HPC/code-base-investigator/issues/new/choose 19 | 20 | 21 | Motivating Example 22 | ------------------ 23 | 24 | The ``foo.cpp`` files in our sample code base include specialization that we 25 | have ignored so far, which selects a line based on the value of the 26 | :code:`__GNUC__` preprocessor macro: 27 | 28 | .. code-block:: cpp 29 | :linenos: 30 | :emphasize-lines: 6 31 | 32 | // Copyright (c) 2024 Intel Corporation 33 | // SPDX-License-Identifier: 0BSD 34 | #include 35 | 36 | void foo() { 37 | #if __GNUC__ >= 13 38 | printf("Using a feature that is only available in GCC 13 and later.\n"); 39 | #else 40 | printf("Running the rest of foo() on the CPU.\n"); 41 | #endif 42 | } 43 | 44 | This macro is defined automatically by all GNU compilers and is set based on 45 | the compiler's major version. For example, ``gcc`` version 13.0.0 would set 46 | :code:`__GNUC__` to 13. Checking the values of macros like this one can be 47 | useful when specializing code paths to workaround bugs in specific compilers, 48 | or when specializing code paths to make use of functionality that is only 49 | available in newer compiler versions. 50 | 51 | Let's take another look at the compilation database entry for this file: 52 | 53 | .. code-block:: json 54 | :emphasize-lines: 14 55 | 56 | [ 57 | { 58 | "directory": "/home/username/src/build-cpu", 59 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/main.cpp.o -c /home/username/src/main.cpp", 60 | "file": "/home/username/src/main.cpp" 61 | }, 62 | { 63 | "directory": "/home/username/src/build-cpu", 64 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/third-party/library.cpp.o -c /home/username/src/third-party/library.cpp", 65 | "file": "/home/username/src/third-party/library.cpp" 66 | }, 67 | { 68 | "directory": "/home/username/src/build-cpu", 69 | "command": "/usr/bin/c++ -o CMakeFiles/tutorial.dir/cpu/foo.cpp.o -c /home/username/src/cpu/foo.cpp", 70 | "file": "/home/username/src/cpu/foo.cpp" 71 | } 72 | ] 73 | 74 | CBI can see that the compiler used for ``foo.cpp`` is called ``/usr/bin/c++``, 75 | but there is not enough information to decide what the value of 76 | :code:`__GNUC__` should be. 77 | 78 | 79 | Defining Implicit Options 80 | ------------------------- 81 | 82 | CBI searches for a file called ``.cbi/config``, and uses the information found 83 | in that file to determine implicit compiler options. Each compiler definition 84 | is a TOML `table`_, of the form shown below: 85 | 86 | .. _`table`: https://toml.io/en/v1.0.0#table 87 | 88 | .. code:: toml 89 | 90 | [compiler.name] 91 | options = [ 92 | "option", 93 | "option" 94 | ] 95 | 96 | In our example, we would like to define :code:`__GNUC__` for the ``c++`` 97 | compiler, so we can add the following compiler definition: 98 | 99 | .. code:: toml 100 | 101 | [compiler."c++"] 102 | options = [ 103 | "-D__GNUC__=13", 104 | ] 105 | 106 | .. important:: 107 | The quotes around "c++" are necessary because of the + symbols. The quotes 108 | would not be necessary for other compilers. 109 | 110 | With the :code:`__GNUC__` macro set, the two lines of code that were previously 111 | considered "unused" are assigned to platforms, and the output of ``codebasin`` 112 | becomes: 113 | 114 | .. code:: text 115 | 116 | ----------------------- 117 | Platform Set LOC % LOC 118 | ----------------------- 119 | {cpu} 8 29.63 120 | {gpu} 8 29.63 121 | {cpu, gpu} 11 40.74 122 | ----------------------- 123 | Code Divergence: 0.59 124 | Coverage (%): 100.00 125 | Avg. Coverage (%): 70.37 126 | Total SLOC: 27 127 | 128 | 129 | Parsing Compiler Options 130 | ------------------------ 131 | 132 | In more complex cases, emulating a compiler's implicit behavior requires CBI to 133 | parse the command-line arguments passed to the compiler. Such emulation 134 | requires CBI to understand which options are important and how they impact 135 | compilation. 136 | 137 | CBI ships with a number of compiler definitions included (see `here`_), and the 138 | same syntax can be used to define custom compiler behaviors within the 139 | ``.cbi/config`` file. 140 | 141 | .. _`here`: https://github.com/P3HPC/code-base-investigator/tree/main/codebasin/compilers 142 | 143 | For example, the TOML file below defines behavior for the ``gcc`` and ``g++`` compilers: 144 | 145 | .. code-block:: toml 146 | 147 | [compiler.gcc] 148 | # This example does not define any implicit options. 149 | 150 | # g++ inherits all options of gcc. 151 | [compiler."g++"] 152 | alias_of = "gcc" 153 | 154 | # The -fopenmp flag enables a dedicated OpenMP compiler "mode". 155 | [[compiler.gcc.parser]] 156 | flags = ["-fopenmp"] 157 | action = "append_const" 158 | dest = "modes" 159 | const = "openmp" 160 | 161 | # In OpenMP mode, the _OPENMP macro is defined. 162 | [[compiler.gcc.modes]] 163 | name = "openmp" 164 | defines = ["_OPENMP"] 165 | 166 | This functionality is intended for expert users. In most cases, we expect that 167 | defining implicit options or relying on CBI's built-in compiler emulation 168 | support will be sufficient. 169 | 170 | .. attention:: 171 | 172 | If you encounter a common case where a custom compiler definition is 173 | required, please `open an issue`_. 174 | -------------------------------------------------------------------------------- /codebasin/tree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (C) 2019-2024 Intel Corporation 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | import argparse 6 | import logging 7 | import os 8 | import sys 9 | 10 | from codebasin import CodeBase, __version__, config, finder, report, util 11 | 12 | # TODO: Refactor to avoid imports from __main__ 13 | from codebasin.__main__ import Formatter, _help_string 14 | 15 | log = logging.getLogger("codebasin") 16 | 17 | 18 | def _build_parser() -> argparse.ArgumentParser: 19 | """ 20 | Build argument parser. 21 | """ 22 | parser = argparse.ArgumentParser( 23 | description="CBI Tree Tool " + __version__, 24 | formatter_class=argparse.RawTextHelpFormatter, 25 | add_help=False, 26 | ) 27 | parser.add_argument( 28 | "-h", 29 | "--help", 30 | action="help", 31 | help=_help_string("Display help message and exit."), 32 | ) 33 | parser.add_argument( 34 | "--version", 35 | action="version", 36 | version=f"CBI Tree Tool {__version__}", 37 | help=_help_string("Display version information and exit."), 38 | ) 39 | parser.add_argument( 40 | "-x", 41 | "--exclude", 42 | dest="excludes", 43 | metavar="", 44 | action="append", 45 | default=[], 46 | help=_help_string( 47 | "Exclude files matching this pattern from the code base.", 48 | "May be specified multiple times.", 49 | is_long=True, 50 | ), 51 | ) 52 | parser.add_argument( 53 | "-p", 54 | "--platform", 55 | dest="platforms", 56 | metavar="", 57 | action="append", 58 | default=[], 59 | help=_help_string( 60 | "Include the specified platform in the analysis.", 61 | "May be specified multiple times.", 62 | "If not specified, all platforms will be included.", 63 | is_long=True, 64 | ), 65 | ) 66 | parser.add_argument( 67 | "--prune", 68 | dest="prune", 69 | action="store_true", 70 | help=_help_string( 71 | "Prune unused files from the tree.", 72 | ), 73 | ) 74 | parser.add_argument( 75 | "-L", 76 | "--levels", 77 | dest="levels", 78 | metavar="", 79 | type=int, 80 | help=_help_string( 81 | "Print only the specified number of levels.", 82 | is_long=True, 83 | is_last=True, 84 | ), 85 | ) 86 | 87 | parser.add_argument( 88 | "analysis_file", 89 | metavar="", 90 | help=_help_string( 91 | "TOML file describing the analysis to be performed, " 92 | + "including the codebase and platform descriptions.", 93 | is_last=True, 94 | ), 95 | ) 96 | 97 | return parser 98 | 99 | 100 | def _tree(args: argparse.Namespace) -> None: 101 | # Refuse to print a tree with no levels, consistent with tree utility. 102 | if args.levels is not None and args.levels <= 0: 103 | raise ValueError("Number of levels must be greater than 0.") 104 | 105 | # TODO: Refactor this to avoid duplication in __main__ 106 | # Determine the root directory based on where codebasin is run. 107 | rootdir = os.path.abspath(os.getcwd()) 108 | 109 | # Set up a default configuration object. 110 | configuration = {} 111 | 112 | # Load the analysis file if it exists. 113 | if args.analysis_file is not None: 114 | path = os.path.abspath(args.analysis_file) 115 | if os.path.exists(path): 116 | if not os.path.splitext(path)[1] == ".toml": 117 | raise RuntimeError(f"Analysis file {path} must end in .toml.") 118 | 119 | with open(path, "rb") as f: 120 | analysis_toml = util._load_toml(f, "analysis") 121 | 122 | if "codebase" in analysis_toml: 123 | if "exclude" in analysis_toml["codebase"]: 124 | args.excludes += analysis_toml["codebase"]["exclude"] 125 | 126 | for name in args.platforms: 127 | if name not in analysis_toml["platform"].keys(): 128 | raise KeyError( 129 | f"Platform {name} requested on the command line " 130 | + "does not exist in the configuration file.", 131 | ) 132 | 133 | cmd_platforms = args.platforms.copy() 134 | for name in analysis_toml["platform"].keys(): 135 | if cmd_platforms and name not in cmd_platforms: 136 | continue 137 | if "commands" not in analysis_toml["platform"][name]: 138 | raise ValueError(f"Missing 'commands' for platform {name}") 139 | p = analysis_toml["platform"][name]["commands"] 140 | db = config.load_database(p, rootdir) 141 | args.platforms.append(name) 142 | configuration.update({name: db}) 143 | 144 | # Construct a codebase object associated with the root directory. 145 | codebase = CodeBase(rootdir, exclude_patterns=args.excludes) 146 | 147 | # Parse the source tree, and determine source line associations. 148 | # The trees and associations are housed in state. 149 | state = finder.find( 150 | rootdir, 151 | codebase, 152 | configuration, 153 | show_progress=True, 154 | ) 155 | 156 | # Print the file tree. 157 | report.files(codebase, state, prune=args.prune, levels=args.levels) 158 | sys.exit(0) 159 | 160 | 161 | def cli(argv: list[str]) -> None: 162 | parser = _build_parser() 163 | args = parser.parse_args(argv) 164 | 165 | # Configure logging such that: 166 | # - Only errors are written to the terminal 167 | log.setLevel(logging.DEBUG) 168 | 169 | stderr_handler = logging.StreamHandler(sys.stderr) 170 | stderr_handler.setLevel(logging.ERROR) 171 | stderr_handler.setFormatter(Formatter(colors=sys.stderr.isatty())) 172 | log.addHandler(stderr_handler) 173 | 174 | _tree(args) 175 | 176 | 177 | def main() -> None: 178 | try: 179 | cli(sys.argv[1:]) 180 | except Exception as e: 181 | log.error(str(e)) 182 | sys.exit(1) 183 | 184 | 185 | if __name__ == "__main__": 186 | sys.argv[0] = "codebasin.tree" 187 | main() 188 | --------------------------------------------------------------------------------