├── .gitignore ├── README.md ├── pyproject.toml └── src └── chimeracat ├── __init__.py └── chimeracat.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ChimeraCat: 2 | Intelligent code concatenator and summarizer 3 | 4 | ---- 5 | ## What does it do? 6 | ChimeraCat (ccat) analyzes Python codebases to generate consolidated files optimized for LLM processing, 7 | with configurable summarization to reduce token usage while preserving key information in the code. It was originally to smartly concatenate multiple files from a python lib in development into a single notebook for testing in Colab, then I realized its fitness to the sharing-code-with-an-LLM purpose. 8 | 9 | Now includes cli `ccat` with all configuration exposed as command-line arguments, so it's ready to use without any development on your part. Just `pip install git+https://github.com/scottvr/chimeracat` into your venv. 10 | 11 | ## Key Features: 12 | - Analyzes Python files for imports and definitions 13 | - Builds dependency graphs using NetworkX 14 | - Displays graph visually as a DAG using ASCII via [PHART](https://github.com/scottvr/PHART) 15 | - Generates both .py files and Colab notebooks 16 | - Smart handling of internal/external imports 17 | - Configurable code summarization 18 | 19 | ## General Dependency and Interface mapping 20 | 21 | If you aren't trying to save on token and conserve context memory when pairing with an LLM, you may still find ChimeraCat's reporting functionality useful. 22 | 23 | ### CLI report generation example: 24 | ```bash 25 | ccat --report-only --elide-disconnected --numeric-labels ..\ASSET\stemprover\src > ccat-report.stemprover.txt 26 | ``` 27 |
28 | Example output from above command 29 | 30 | ``` 31 | Dependency Analysis Report 32 | ========================= 33 | 34 | Directory Structure: 35 | stemprover\__init__.py 36 | stemprover\analysis\base.py 37 | stemprover\analysis\spectral.py 38 | stemprover\analysis\artifacts\base.py 39 | stemprover\analysis\artifacts\high_freq.py 40 | stemprover\analysis\artifacts\preprocessor.py 41 | stemprover\analysis\selection\metrics.py 42 | stemprover\analysis\selection\segment_finder.py 43 | stemprover\analysis\selection\__init__.py 44 | stemprover\common\audio_utils.py 45 | stemprover\common\math_utils.py 46 | stemprover\common\spectral_utils.py 47 | stemprover\common\types.py 48 | stemprover\common\__init__.py 49 | stemprover\core\audio.py 50 | stemprover\core\config.py 51 | stemprover\core\types.py 52 | stemprover\enhancement\base.py 53 | stemprover\enhancement\controlnet.py 54 | stemprover\enhancement\training.py 55 | stemprover\io\audio.py 56 | stemprover\preparation\base.py 57 | stemprover\preparation\segments\generator.py 58 | stemprover\preparation\segments\__init__.py 59 | stemprover\separation\base.py 60 | stemprover\separation\spleeter.py 61 | stemprover\training\dataset.py 62 | stemprover\training\pairs.py 63 | 64 | Import Summary: 65 | 66 | External Dependencies: 67 | abc, common.audio_utils, common.types, core.audio, core.types, dataclasses, datetime, enum, json, librosa, matplotlib.pyplot as plt, numpy as np, pathlib, soundfile as sf, spleeter.separator, stemprover.common.audio_utils, stemprover.common.spectral_utils, stemprover.common.types, stemprover.core.audio, stemprover.core.config, stemprover.core.types, stemprover.enhancement.controlnet, tensorflow as tf, torch, torch.nn as nn, torch.nn.functional as F, torch.utils.data, typing 68 | 69 | Internal Dependencies: 70 | ...common.audio_utils, ...common.types, ...core.audio, ...core.types, ..analysis.spectral, ..common.audio_utils, ..common.math_utils, ..common.types, ..core.audio, ..core.types, ..io.audio, .analysis.base, .analysis.spectral, .audio, .audio_utils, .base, .core.types, .math_utils, .metrics, .preparation.segments, .separation.base, .separation.spleeter, .spectral_utils, .types 71 | 72 | 73 | Module Statistics: 74 | Total modules: 28 75 | Total dependencies: 19 76 | 77 | Module Dependencies: 78 | ------------------- 79 | 80 | PHART Module Dependency Graph Visualization (see legend below): 81 | 82 | [1] [23] [25] [7] [8] [9] 83 | | | | | | | 84 | v v v v v v 85 | [10]<---[12]--+-[13]--+-[15]--->[16]--->[17]+-->[19] 86 | 87 | 88 | 89 | 90 | 91 | 92 | Legend: 93 | 1: ..\ASSET\stemprover\src\stemprover\__init__.py 94 | 2: ..\ASSET\stemprover\src\stemprover\analysis\base.py 95 | 3: ..\ASSET\stemprover\src\stemprover\analysis\spectral.py 96 | 4: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\base.py 97 | 5: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\high_freq.py 98 | 6: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\preprocessor.py 99 | 7: ..\ASSET\stemprover\src\stemprover\analysis\selection\metrics.py 100 | 8: ..\ASSET\stemprover\src\stemprover\analysis\selection\segment_finder.py 101 | 9: ..\ASSET\stemprover\src\stemprover\analysis\selection\__init__.py 102 | 10: ..\ASSET\stemprover\src\stemprover\common\audio_utils.py 103 | 11: ..\ASSET\stemprover\src\stemprover\common\math_utils.py 104 | 12: ..\ASSET\stemprover\src\stemprover\common\spectral_utils.py 105 | 13: ..\ASSET\stemprover\src\stemprover\common\types.py 106 | 14: ..\ASSET\stemprover\src\stemprover\common\__init__.py 107 | 15: ..\ASSET\stemprover\src\stemprover\core\audio.py 108 | 16: ..\ASSET\stemprover\src\stemprover\core\config.py 109 | 17: ..\ASSET\stemprover\src\stemprover\core\types.py 110 | 18: ..\ASSET\stemprover\src\stemprover\enhancement\base.py 111 | 19: ..\ASSET\stemprover\src\stemprover\enhancement\controlnet.py 112 | 20: ..\ASSET\stemprover\src\stemprover\enhancement\training.py 113 | 21: ..\ASSET\stemprover\src\stemprover\io\audio.py 114 | 22: ..\ASSET\stemprover\src\stemprover\preparation\base.py 115 | 23: ..\ASSET\stemprover\src\stemprover\preparation\segments\generator.py 116 | 24: ..\ASSET\stemprover\src\stemprover\preparation\segments\__init__.py 117 | 25: ..\ASSET\stemprover\src\stemprover\separation\base.py 118 | 26: ..\ASSET\stemprover\src\stemprover\separation\spleeter.py 119 | 27: ..\ASSET\stemprover\src\stemprover\training\dataset.py 120 | 28: ..\ASSET\stemprover\src\stemprover\training\pairs.py 121 | (non-dependent modules elided from visualization) 122 | 123 | 124 | 125 | Dependency Chains: 126 | ----------------- 127 | 1. stemprover\__init__.py 128 | 2. stemprover\analysis\base.py 129 | 3. stemprover\analysis\spectral.py 130 | 4. stemprover\analysis\artifacts\base.py 131 | 5. stemprover\analysis\artifacts\high_freq.py 132 | 6. stemprover\analysis\artifacts\preprocessor.py 133 | 7. stemprover\analysis\selection\metrics.py 134 | 8. stemprover\analysis\selection\segment_finder.py 135 | 9. stemprover\analysis\selection\__init__.py 136 | 10. stemprover\common\math_utils.py 137 | 11. stemprover\common\__init__.py 138 | 12. stemprover\enhancement\base.py 139 | 13. stemprover\enhancement\training.py 140 | 14. stemprover\io\audio.py 141 | 15. stemprover\preparation\base.py 142 | 16. stemprover\preparation\segments\generator.py 143 | 17. stemprover\preparation\segments\__init__.py 144 | 18. stemprover\separation\base.py 145 | 19. stemprover\separation\spleeter.py 146 | 20. stemprover\training\dataset.py 147 | 21. stemprover\training\pairs.py 148 | 22. stemprover\enhancement\controlnet.py 149 | Depends on: stemprover\__init__.py 150 | 23. stemprover\common\spectral_utils.py 151 | Depends on: stemprover\analysis\selection\segment_finder.py 152 | 24. stemprover\core\audio.py 153 | Depends on: stemprover\__init__.py, stemprover\analysis\selection\metrics.py, stemprover\analysis\selection\segment_finder.py, stemprover\analysis\selection\__init__.py, stemprover\preparation\segments\generator.py 154 | 25. stemprover\common\types.py 155 | Depends on: stemprover\analysis\selection\segment_finder.py, stemprover\preparation\segments\generator.py 156 | 26. stemprover\common\audio_utils.py 157 | Depends on: stemprover\analysis\selection\segment_finder.py, stemprover\preparation\segments\generator.py 158 | 27. stemprover\core\config.py 159 | Depends on: stemprover\separation\base.py 160 | 28. stemprover\core\types.py 161 | Depends on: stemprover\__init__.py, stemprover\analysis\selection\metrics.py, stemprover\analysis\selection\segment_finder.py, stemprover\analysis\selection\__init__.py, stemprover\core\config.py, stemprover\preparation\segments\generator.py, stemprover\separation\base.py 162 | 163 | Module Details: 164 | ------------- 165 | 166 | stemprover\__init__.py: 167 | Classes: None 168 | Functions: None 169 | Imports: stemprover.core.types, .separation.base, .analysis.base, .separation.spleeter, stemprover.core.audio, .analysis.spectral, stemprover.enhancement.controlnet 170 | 171 | stemprover\analysis\base.py: 172 | Classes: VocalAnalyzer, for 173 | Functions: __init__, analyze, _create_spectrograms 174 | Imports: abc, numpy as np, pathlib, ..core.audio 175 | 176 | stemprover\analysis\spectral.py: 177 | Classes: SpectralAnalyzer 178 | Functions: __init__, _create_spectrogram, _analyze_differences, analyze, _save_comparison, _plot_spectrogram, _save_analysis 179 | Imports: ..common.types, typing, ..common.math_utils, json, ..core.audio, datetime, pathlib, ..core.types, matplotlib.pyplot as plt, ..common.audio_utils 180 | 181 | stemprover\analysis\artifacts\base.py: 182 | Classes: SignalProcessor, class, from, HybridProcessor, for, ArtifactProcessor, ControlNetProcessor 183 | Functions: __init__, validate, run_validation, as_dict, _calculate_snr, _analyze_frequency_response, _measure_phase_coherence, process, _bandpass_filter 184 | Imports: typing, torch.nn as nn, abc, torch, ...common.audio_utils, pathlib, ...common.types, numpy as np, dataclasses 185 | 186 | stemprover\analysis\artifacts\high_freq.py: 187 | Classes: HighFrequencyArtifactPreprocessor 188 | Functions: forward, __init__, generate_training_pair 189 | Imports: None 190 | 191 | stemprover\analysis\artifacts\preprocessor.py: 192 | Classes: HighFrequencyArtifactPreprocessor 193 | Functions: forward, __init__, generate_training_pair 194 | Imports: None 195 | 196 | stemprover\analysis\selection\metrics.py: 197 | Classes: from, class, MetricsCalculator 198 | Functions: __init__, _calculate_detailed_score, calculate_sdr, calculate_metrics, calculate_band_sdrs 199 | Imports: stemprover.core.types, typing, stemprover.core.audio, numpy as np, dataclasses 200 | 201 | stemprover\analysis\selection\segment_finder.py: 202 | Classes: from, TestSegmentFinder 203 | Functions: __init__, find_best_segments, _compute_score, analyze_segment, _calculate_transitions, _calculate_high_freq_content, _calculate_vocal_clarity 204 | Imports: stemprover.core.types, typing, stemprover.common.types, stemprover.core.audio, numpy as np, stemprover.common.audio_utils, stemprover.common.spectral_utils, .metrics, librosa, dataclasses 205 | 206 | stemprover\analysis\selection\__init__.py: 207 | Classes: None 208 | Functions: None 209 | Imports: stemprover.core.types, stemprover.core.audio 210 | 211 | stemprover\common\audio_utils.py: 212 | Classes: None 213 | Functions: get_frequency_bins, get_band_mask, calculate_phase_complexity, calculate_dynamic_range, to_mono, create_spectrogram, calculate_onset_variation 214 | Imports: .math_utils, numpy as np, .types, soundfile as sf, librosa 215 | 216 | stemprover\common\math_utils.py: 217 | Classes: None 218 | Functions: magnitude, db_scale, phase_difference, phase_coherence, rms, angle 219 | Imports: .types, numpy as np 220 | 221 | stemprover\common\spectral_utils.py: 222 | Classes: None 223 | Functions: calculate_band_energy 224 | Imports: typing, .audio_utils, numpy as np, .types, soundfile as sf, librosa 225 | 226 | stemprover\common\types.py: 227 | Classes: None 228 | Functions: None 229 | Imports: librosa, typing, numpy as np, torch 230 | 231 | stemprover\common\__init__.py: 232 | Classes: None 233 | Functions: None 234 | Imports: .math_utils, .spectral_utils, .types, .audio_utils 235 | 236 | stemprover\core\audio.py: 237 | Classes: import, class, for 238 | Functions: is_mono, to_mono, duration_seconds, is_stereo 239 | Imports: librosa, typing, numpy as np, dataclasses 240 | 241 | stemprover\core\config.py: 242 | Classes: from, SeparatorBackend, class 243 | Functions: None 244 | Imports: stemprover.core.types, enum, typing, pathlib, dataclasses 245 | 246 | stemprover\core\types.py: 247 | Classes: from, for, class 248 | Functions: hop_samples, segment_samples 249 | Imports: typing, pathlib, matplotlib.pyplot as plt, .audio, dataclasses 250 | 251 | stemprover\enhancement\base.py: 252 | Classes: for, EnhancementProcessor 253 | Functions: __init__, enhance, validate 254 | Imports: abc, typing, ...core.types, ...core.audio 255 | 256 | stemprover\enhancement\controlnet.py: 257 | Classes: PhaseAwareControlNet, PhaseAwareZeroConv, ArtifactDetector 258 | Functions: forward, __init__ 259 | Imports: torch.nn as nn, typing, torch 260 | 261 | stemprover\enhancement\training.py: 262 | Classes: ArtifactDataset, ControlNetTrainer 263 | Functions: __init__, validate, load_checkpoint, train, train_step, __len__, save_checkpoint, frequency_loss, prepare_training, __getitem__ 264 | Imports: torch.nn.functional as F, torch.utils.data 265 | 266 | stemprover\io\audio.py: 267 | Classes: None 268 | Functions: save_audio_file, load_audio_file 269 | Imports: librosa, typing, ..core.audio, pathlib, soundfile as sf, numpy as np 270 | 271 | stemprover\preparation\base.py: 272 | Classes: None 273 | Functions: None 274 | Imports: None 275 | 276 | stemprover\preparation\segments\generator.py: 277 | Classes: from, TrainingSegmentGenerator 278 | Functions: _create_backing_combinations, __init__, generate_segments, _has_vocal_content 279 | Imports: typing, core.audio, common.types, torch.utils.data, common.audio_utils, core.types, pathlib, numpy as np, dataclasses 280 | 281 | stemprover\preparation\segments\__init__.py: 282 | Classes: None 283 | Functions: None 284 | Imports: None 285 | 286 | stemprover\separation\base.py: 287 | Classes: class, from, VocalSeparator, StemProcessor, for 288 | Functions: cleanup, __init__, process_stems, _separate_vocals, __enter__, _apply_controlnet_enhancement, _load_stereo_pair, __exit__, separate_and_analyze, _save_audio_files 289 | Imports: stemprover.core.types, enum, typing, stemprover.core.config, abc, ..core.audio, pathlib, ..core.types, dataclasses 290 | 291 | stemprover\separation\spleeter.py: 292 | Classes: from, class, SpleeterSeparator 293 | Functions: cleanup, __init__, capabilities, separate, _load_mono, _separate_vocals, _load_stereo_pair, _setup_tensorflow, separate_and_analyze, separate_file, _save_audio_files 294 | Imports: typing, .base, ..analysis.spectral, spleeter.separator, ..core.audio, datetime, pathlib, ..core.types, ..io.audio, numpy as np, dataclasses, tensorflow as tf 295 | 296 | stemprover\training\dataset.py: 297 | Classes: TrainingDataset 298 | Functions: __getitem__, __init__, __len__ 299 | Imports: typing, torch.utils.data, .preparation.segments, .core.types 300 | 301 | stemprover\training\pairs.py: 302 | Classes: None 303 | Functions: None 304 | Imports: None 305 | ``` 306 |
307 | 308 | ## Configuration Details: 309 | - src_dir: Source directory to analyze. Defaults to "./src" in cwd. 310 | 311 | - summary_level: Controls summarization aggressiveness: 312 | - NONE: Full code output 313 | - INTERFACE: Preserve signatures/types/docstrings only 314 | - CORE: Include core logic, skip standard patterns 315 | 316 | - exclude_patterns: Files matching these patterns are skipped. 317 | - Note: ChimeraCat always excludes itself to avoid recursion. 318 | 319 | - rules: Override default summarization rules with custom SummaryRules. 320 | - Useful for domain-specific boilerplate detection. 321 | 322 | - elide_disconnected_deps: When True, omit modules with no dependencies 323 | from visualization. 324 | - Useful for cleaner dependency graphs. 325 | 326 | - generate_report: Controls inclusion of dependency analysis. 327 | - Defaults to True for INTERFACE/CORE summaries. 328 | 329 | - report_only: Generate only dependency report without code output. 330 | 331 | - use_numeric: Use numbers instead of letters for node labels. 332 | 333 | ## API Example: 334 | ```python 335 | # Generate both notebook and summarized Python file 336 | cat = ChimeraCat( 337 | "src", 338 | summary_level=SummaryLevel.INTERFACE, 339 | exclude_patterns=["tests"], 340 | elide_disconnected_deps=True 341 | ) 342 | notebook = cat.generate_colab_notebook() 343 | py_file = cat.generate_concat_file() 344 | ``` 345 | 346 | Though for most cases, what you probably want is the CLI: 347 | 348 | ## CLI Usage 349 | ChimeraCat installs with a cli tool `ccat`. The configuration dictionary can be manipulated via command-line arguments. 350 | 351 | ```bash 352 | usage: ccat [-h] [-s {interface,core,none}] [-e EXCLUDE [EXCLUDE ...]] [-o OUTPUT] 353 | [-t {py,ipynb,both}] [-r] [--report-only] [--numeric-labels] [--no-report] 354 | [--elide-disconnected] [-d] [--debug-prefix DEBUG_PREFIX] [--version] 355 | [src_dir] 356 | 357 | ChimeraCat (ccat) - The smart code concatenator 358 | /\___/\ 359 | ( o o ) Intelligently combines Python source files 360 | ( =^= ) while maintaining dependencies and readability 361 | (______) 362 | 363 | 364 | positional arguments: 365 | src_dir Source directory containing Python files (default: src) 366 | 367 | options: 368 | -h, --help show this help message and exit 369 | -s {interface,core,none}, --summary-level {interface,core,none} 370 | Code summarization level (for .py output only, default: none) 371 | -e EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...] 372 | Patterns to exclude from processing (e.g., "test" "temp") 373 | -o OUTPUT, --output OUTPUT 374 | Output file name (without extension, default: based on output type 375 | and summary level) 376 | -t {py,ipynb,both}, --output-type {py,ipynb,both} 377 | Output file type (default: both) 378 | -r, --report Generate dependency report and ASCII visualization. By default, 379 | reports are included for interface/core summary levels and excluded 380 | for complete code and notebooks. This flag overrides that behavior. 381 | --report-only Suppress code summarization or notebook cocatenization 382 | --numeric-labels Use numbers instead of letters for node labels 383 | --no-report Suppress dependency report generation even for interface/core 384 | summary levels 385 | --elide-disconnected 386 | Remove modules with no dependencies from visualization 387 | -d, --debug Enable debug output 388 | --debug-prefix DEBUG_PREFIX 389 | Prefix for debug messages (default: CCAT:) 390 | --version show program's version number and exit 391 | ``` 392 | 393 | """ 394 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "chimeracat" 7 | dynamic = ["version"] 8 | description = "ChimeraCat (ccat) - The smart code concatenator" 9 | readme = "README.md" 10 | requires-python = ">=3.10" 11 | license = {text = "MIT"} 12 | authors = [ 13 | {name = "Scott VR", email = "scottvr@paperclipmaximizer.ai"} 14 | ] 15 | dependencies = [ 16 | "phart", 17 | ] 18 | 19 | [project.scripts] 20 | ccat = "chimeracat.chimeracat:cli_main" 21 | 22 | [tool.hatch.build] 23 | packages = ["src/chimeracat"] 24 | 25 | [tool.hatch.version] 26 | path = "src/chimeracat/__init__.py" 27 | -------------------------------------------------------------------------------- /src/chimeracat/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.5" 2 | __all__ = ["ChimeraCat"] 3 | 4 | from .chimeracat import ChimeraCat -------------------------------------------------------------------------------- /src/chimeracat/chimeracat.py: -------------------------------------------------------------------------------- 1 | """ChimeraCat: Intelligent code concatenator and summarizer for LLM analysis. 2 | 3 | Analyzes Python codebases to generate consolidated files optimized for LLM processing, 4 | with configurable summarization to reduce token usage while preserving key information. 5 | 6 | Args: 7 | src_dir (str): Source directory containing Python files (default: "src") 8 | summary_level (SummaryLevel): Summarization level (INTERFACE/CORE/NONE) 9 | exclude_patterns (List[str]): Patterns to exclude from processing 10 | rules (Optional[SummaryRules]): Custom summarization rules 11 | elide_disconnected_deps (bool): Omit disconnected modules from visualization 12 | generate_report (Optional[bool]): Include dependency analysis report 13 | report_only (bool): Generate only the dependency report 14 | use_numeric (bool): Use numeric instead of alpha labels in visualizations 15 | debug (bool): Enable debug output 16 | debug_str (str): Prefix for debug messages 17 | 18 | Key Features: 19 | - Analyzes Python files for imports and definitions 20 | - Builds dependency graphs using NetworkX 21 | - Generates both .py files and Colab notebooks 22 | - Smart handling of internal/external imports 23 | - Configurable code summarization 24 | 25 | Configuration Details: 26 | src_dir: Source directory to analyze. Defaults to "./src" in cwd. 27 | 28 | summary_level: Controls summarization aggressiveness: 29 | - NONE: Full code output 30 | - INTERFACE: Preserve signatures/types/docstrings only 31 | - CORE: Include core logic, skip standard patterns 32 | 33 | exclude_patterns: Files matching these patterns are skipped. 34 | Note: ChimeraCat always excludes itself to avoid recursion. 35 | 36 | rules: Override default summarization rules with custom SummaryRules. 37 | Useful for domain-specific boilerplate detection. 38 | 39 | elide_disconnected_deps: When True, omit modules with no dependencies 40 | from visualization. Useful for cleaner dependency graphs. 41 | 42 | generate_report: Controls inclusion of dependency analysis. 43 | Defaults to True for INTERFACE/CORE summaries. 44 | 45 | report_only: Generate only dependency report without code output. 46 | 47 | use_numeric: Use numbers instead of letters for node labels. 48 | 49 | Example: 50 | ```python 51 | # Generate both notebook and summarized Python file 52 | cat = ChimeraCat( 53 | "src", 54 | summary_level=SummaryLevel.INTERFACE, 55 | exclude_patterns=["tests"], 56 | elide_disconnected_deps=True 57 | ) 58 | notebook = cat.generate_colab_notebook() 59 | py_file = cat.generate_concat_file() 60 | ``` 61 | """ 62 | 63 | import re 64 | from pathlib import Path 65 | import networkx as nx 66 | 67 | from enum import Enum 68 | from typing import Dict, List, Set, Optional, Pattern 69 | from dataclasses import dataclass, field 70 | from datetime import datetime 71 | from phart import ASCIIRenderer, LayoutOptions, NodeStyle 72 | from . import __version__ 73 | 74 | import argparse 75 | import sys 76 | 77 | class SummaryLevel(Enum): 78 | INTERFACE = "interface" # Just interfaces/types/docstrings 79 | CORE = "core" # + Core logic, skip standard patterns 80 | NONE = "none" # Full code 81 | 82 | @dataclass 83 | class SummaryPattern: 84 | """Pattern for code summarization with explanation""" 85 | pattern: str 86 | replacement: str 87 | explanation: str 88 | flags: re.RegexFlag = re.MULTILINE 89 | 90 | def apply(self, content: str) -> str: 91 | return re.sub(self.pattern, f"{self.replacement} # {self.explanation}\n", 92 | content, flags=self.flags) 93 | 94 | @dataclass 95 | class SummaryRules: 96 | """Collection of patterns for different summary levels""" 97 | interface: List[SummaryPattern] = field(default_factory=list) 98 | core: List[SummaryPattern] = field(default_factory=list) 99 | 100 | @classmethod 101 | def default_rules(cls) -> 'SummaryRules': 102 | return cls( 103 | interface=[ 104 | SummaryPattern( 105 | pattern=r'(class\s+\w+(?:\([^)]*\))?):(?:\s*"""[^"]*""")?[^\n]*(?:\n(?!class|def)[^\n]*)*', 106 | replacement=r'\1:\n ... # ', 107 | explanation="Class interface preserved", 108 | flags=re.MULTILINE 109 | ), 110 | SummaryPattern( 111 | pattern=r'(def\s+\w+\s*\([^)]*\)):(?:\s*"""[^"]*""")?[^\n]*(?:\n(?!class|def)[^\n]*)*', 112 | replacement=r'\1:\n ... # ', 113 | explanation="Function signature preserved", 114 | flags=re.MULTILINE 115 | ) 116 | ], 117 | core=[ 118 | SummaryPattern( 119 | pattern=r'(def\s+get_\w+\([^)]*\)):\s*return[^\n]*\n', 120 | replacement=r'\1:\n ... # ', 121 | explanation="Getter method summarized" 122 | ), 123 | SummaryPattern( 124 | pattern=r'(def\s*__init__\s*\([^)]*\)):[^\n]*(?:\n(?!def|class)[^\n]*)*', 125 | replacement=r'\1:\n ... # ', 126 | explanation="Standard initialization summarized" 127 | ) 128 | ] 129 | ) 130 | @dataclass 131 | class ModuleInfo: 132 | """Information about a Python module""" 133 | path: Path 134 | content: str 135 | imports: Set[str] 136 | classes: Set[str] 137 | functions: Set[str] 138 | 139 | class ChimeraCat: 140 | """Utility to concatenate modular code into Colab-friendly single files""" 141 | def __init__(self, 142 | src_dir: str = "src", 143 | summary_level: SummaryLevel = SummaryLevel.NONE, 144 | exclude_patterns: List[str] = None, 145 | rules: Optional[SummaryRules] = None, 146 | elide_disconnected_deps: bool = False, 147 | generate_report: Optional[bool] = None, 148 | report_only: bool = False, 149 | use_numeric: bool = False, 150 | debug: bool = False, 151 | debug_str = ""): 152 | 153 | self.src_dir = Path(src_dir) 154 | self.summary_level = summary_level 155 | self.report_only = report_only 156 | self.use_numeric = use_numeric 157 | self.rules = rules or SummaryRules.default_rules() 158 | self.modules: Dict[Path, ModuleInfo] = {} 159 | self.dep_graph = nx.DiGraph() 160 | self.self_path = Path(__file__).resolve() 161 | self.exclude_patterns = exclude_patterns or [] 162 | self.debug = debug 163 | self.elide_disconnected_deps = elide_disconnected_deps 164 | self.debug_str = debug_str 165 | 166 | if generate_report is None: 167 | self.generate_report = summary_level in (SummaryLevel.INTERFACE, SummaryLevel.CORE) 168 | else: 169 | self.generate_report = generate_report 170 | 171 | def _debug_print(self, *args, **kwargs): 172 | """Helper for debug output""" 173 | if self.debug: 174 | print(f"{self.debug_str}: {args} {list(kwargs.items())}") 175 | 176 | def should_exclude(self, file_path: Path) -> bool: 177 | """Check if a file should be excluded from processing""" 178 | # Always exclude self 179 | self._debug_print(file_path.resolve(), self.self_path) 180 | if file_path.resolve() == self.self_path: 181 | if self.debug: 182 | self._debug_print(f"excluding self {self.self_path}") 183 | return True 184 | 185 | # Check against exclude patterns 186 | str_path = str(file_path) 187 | self._debug_print("str_path",str_path) 188 | for pattern in self.exclude_patterns: 189 | self._debug_print("comparing", pattern, str_path) 190 | return any(pattern in str_path for pattern in self.exclude_patterns) 191 | 192 | def analyze_file(self, file_path: Path) -> Optional[ModuleInfo]: 193 | """Analyze a Python file for imports and definitions""" 194 | if self.should_exclude(file_path): 195 | self._debug_print(f'excluding {file_path}') 196 | return None 197 | 198 | with open(file_path, 'r') as f: 199 | content = f.read() 200 | 201 | # Find imports 202 | import_pattern = r'^(?:from\s+(\S+)\s+)?import\s+([^#\n]+)' 203 | imports = set() 204 | for match in re.finditer(import_pattern, content, re.MULTILINE): 205 | if match.group(1): # from X import Y 206 | imports.add(match.group(1)) 207 | else: # import X 208 | imports.add(match.group(2).split(',')[0].strip()) 209 | 210 | # Find class definitions 211 | class_pattern = r'class\s+(\w+)' 212 | classes = set(re.findall(class_pattern, content)) 213 | 214 | # Find function definitions 215 | func_pattern = r'def\s+(\w+)' 216 | functions = set(re.findall(func_pattern, content)) 217 | 218 | return ModuleInfo( 219 | path=file_path, 220 | content=content, 221 | imports=imports, 222 | classes=classes, 223 | functions=functions 224 | ) 225 | 226 | def _summarize_content(self, content: str) -> str: 227 | """Apply summary patterns based on current level""" 228 | if not isinstance(content, str): 229 | raise TypeError(f"Expected string content but got {type(content)}: {content}") 230 | 231 | if self.summary_level == SummaryLevel.NONE: 232 | return content 233 | 234 | result = content 235 | rules = self.rules or SummaryRules.default_rules() 236 | 237 | # Apply patterns based on level 238 | if self.summary_level == SummaryLevel.INTERFACE: 239 | for pattern in rules.interface: 240 | result = pattern.apply(result) 241 | elif self.summary_level == SummaryLevel.CORE: 242 | # Apply both interface and core patterns 243 | for pattern in rules.interface + rules.core: 244 | result = pattern.apply(result) 245 | 246 | return result 247 | 248 | def _process_imports(self, content: str, module_path: Path) -> str: 249 | """Process and adjust imports for concatenated context""" 250 | if not isinstance(content, str): 251 | raise TypeError(f"Expected string content but got {type(content)}: {content}") 252 | 253 | def replace_relative_import(match: re.Match) -> str: 254 | indent = len(match.group()) - len(match.group().lstrip()) 255 | spaces = ' ' * indent 256 | original_line = match.group() 257 | return f'{spaces}"""RELATIVE_IMPORT: \n{original_line}\n{spaces}"""' 258 | 259 | pattern = r'^\s*from\s+\..*$' 260 | return re.sub(pattern, replace_relative_import, content, flags=re.MULTILINE) 261 | 262 | def build_dependency_graph(self): 263 | """Build a dependency graph with proper relative import resolution""" 264 | self._debug_print("\nBuilding dependency graph...") 265 | 266 | # First pass: Create nodes 267 | for file_path in self.src_dir.rglob("*.py"): 268 | module_info = self.analyze_file(file_path) 269 | if module_info is not None: 270 | self.modules[file_path] = module_info 271 | self.dep_graph.add_node(file_path) 272 | self._debug_print(f"Added node: {file_path.relative_to(self.src_dir)}") 273 | if module_info.imports: 274 | self._debug_print(f" Found imports: {', '.join(module_info.imports)}") 275 | 276 | # Second pass: Add edges 277 | for file_path, module in self.modules.items(): 278 | current_module = str(file_path.relative_to(self.src_dir)).replace('\\', '/') 279 | module_dir = str(file_path.parent.relative_to(self.src_dir)).replace('\\', '/') 280 | 281 | for imp in module.imports: 282 | if imp.startswith('.'): 283 | # Handle relative imports 284 | dots = imp.count('.') 285 | parts = module_dir.split('/') 286 | 287 | # Go up directory tree based on dot count 288 | if dots > len(parts): 289 | continue # Invalid relative import 290 | 291 | base_path = '/'.join(parts[:-dots] if dots > 0 else parts) 292 | target_module = imp.lstrip('.') 293 | 294 | if target_module: 295 | full_target = f"{base_path}/{target_module.replace('.', '/')}.py" 296 | else: 297 | full_target = f"{base_path}/__init__.py" 298 | 299 | # Find matching module 300 | for other_path in self.modules: 301 | other_rel = str(other_path.relative_to(self.src_dir)).replace('\\', '/') 302 | if other_rel == full_target: 303 | self._debug_print(f" Adding edge: {other_rel} -> {current_module}") 304 | self.dep_graph.add_edge(file_path, other_path) 305 | else: 306 | # Handle absolute imports within our project 307 | potential_path = imp.replace('.', '/') + '.py' 308 | for other_path in self.modules: 309 | other_rel = str(other_path.relative_to(self.src_dir)).replace('\\', '/') 310 | if other_rel.endswith(potential_path): 311 | self._debug_print(f" Adding edge: {other_rel} -> {current_module}") 312 | self.dep_graph.add_edge(file_path, other_path) 313 | 314 | def generate_concat_file(self, output_file: str = "colab_combined.py") -> str: 315 | """Generate a single file combining all modules in dependency order""" 316 | self.build_dependency_graph() 317 | 318 | header = f"""{self._get_header_content()} 319 | Summary Level: {self.summary_level.value} 320 | """ 321 | 322 | # Start with external imports 323 | output = [ 324 | header, 325 | '"""', 326 | self.generate_dependency_ascii(), 327 | "# External imports",'"""', 328 | *self._get_external_imports(), 329 | "\n# Combined module code\n" 330 | ] 331 | 332 | # Get files in dependency order 333 | sorted_files = self._get_sorted_files() 334 | 335 | # Create a map of original module paths to their contents 336 | module_contents = {} 337 | 338 | # First pass: collect and process all module contents 339 | for file_path in sorted_files: 340 | if file_path in self.modules: 341 | module = self.modules[file_path] 342 | rel_path = file_path.relative_to(self.src_dir) 343 | 344 | # Process imports and summarize content 345 | processed_content = self._process_imports( 346 | self._summarize_content(module.content), 347 | file_path 348 | ) 349 | 350 | module_contents[file_path] = { 351 | 'content': processed_content, 352 | 'rel_path': rel_path 353 | } 354 | 355 | # Second pass: output in correct order with headers 356 | for file_path in sorted_files: 357 | if file_path in module_contents: 358 | info = module_contents[file_path] 359 | output.extend([ 360 | f"\n# From {info['rel_path']}", 361 | info['content'] 362 | ]) 363 | 364 | with open(output_file, 'w') as f: 365 | f.write('\n'.join(output)) 366 | 367 | return output_file 368 | 369 | def _get_external_imports(self) -> List[str]: 370 | """Get sorted list of external imports from all modules""" 371 | external_imports = set() 372 | for module in self.modules.values(): 373 | external_imports.update( 374 | imp for imp in module.imports 375 | if not any(str(imp).startswith(str(p.relative_to(self.src_dir).parent)) 376 | for p in self.modules) 377 | and not imp.startswith('.') 378 | ) 379 | 380 | # Format and sort the import statements 381 | return sorted(f"import {imp}" for imp in external_imports) 382 | 383 | def _paths_match(self, path: Path, import_parts: List[str]) -> bool: 384 | """Check if a path matches an import statement""" 385 | path_parts = list(path.parts) 386 | return len(path_parts) == len(import_parts) and \ 387 | all(p == i for p, i in zip(path_parts, import_parts)) 388 | 389 | def _get_sorted_files(self) -> List[Path]: 390 | """Get files sorted by dependencies""" 391 | try: 392 | # Topological sort ensures dependencies come before dependents 393 | return list(nx.topological_sort(self.dep_graph)) 394 | 395 | except nx.NetworkXUnfeasible as e: 396 | # If we detect a cycle, identify and report it 397 | cycles = list(nx.simple_cycles(self.dep_graph)) 398 | self._debug_print("Warning: Circular dependencies detected:") 399 | for cycle in cycles: 400 | cycle_path = ' -> '.join(p.name for p in cycle) 401 | self._debug_print(f" {cycle_path}") 402 | 403 | # Fall back to simple ordering but warn user 404 | self._debug_print("Using simple ordering instead.") 405 | return list(self.modules.keys()) 406 | 407 | def visualize_dependencies(self, output_file: str = "dependencies.png"): 408 | """Optional: Visualize the dependency graph""" 409 | try: 410 | import matplotlib.pyplot as plt 411 | pos = nx.spring_layout(self.dep_graph) 412 | plt.figure(figsize=(12, 8)) 413 | nx.draw(self.dep_graph, pos, with_labels=True, 414 | labels={p: p.name for p in self.dep_graph.nodes()}, 415 | node_color='lightblue', 416 | node_size=2000, 417 | font_size=8) 418 | plt.savefig(output_file) 419 | plt.close() 420 | return output_file 421 | except ImportError: 422 | print("matplotlib not available for visualization") 423 | return None 424 | 425 | def get_dependency_report(self) -> str: 426 | """Generate a detailed dependency report organized in logical sections. 427 | 428 | Returns a string containing sections in this order: 429 | 1. Header 430 | 2. Directory Structure 431 | 3. Import Summary 432 | 4. Module Statistics 433 | 5. PHART Visualization with Legend 434 | 6. Dependency Chains 435 | 7. Module Details 436 | """ 437 | # Header section - introduces the report 438 | header = ["Dependency Analysis Report", "=" * 25, ""] 439 | 440 | # Directory tree section - shows file organization 441 | directory_structure = [ 442 | "Directory Structure:", 443 | self._get_tree_output(), 444 | "" 445 | ] 446 | 447 | # Import summary section - external and internal dependencies 448 | import_summary = [ 449 | "Import Summary:", 450 | self._generate_import_summary(), 451 | "" 452 | ] 453 | 454 | # Module statistics and graph header 455 | statistics = [ 456 | "Module Statistics:", 457 | f"Total modules: {len(self.modules)}", 458 | f"Total dependencies: {self.dep_graph.number_of_edges()}", 459 | "", 460 | "Module Dependencies:", 461 | "-------------------", 462 | "" 463 | ] 464 | 465 | # PHART visualization - includes the graph and its legend 466 | # Note: generate_dependency_ascii() returns a pre-formatted string 467 | visualization = [self.generate_dependency_ascii(), ""] 468 | 469 | # Dependency chains section - shows topological ordering 470 | chains = ["Dependency Chains:", "-" * 17] 471 | try: 472 | sorted_files = list(nx.topological_sort(self.dep_graph)) 473 | for idx, file in enumerate(sorted_files): 474 | deps = list(self.dep_graph.predecessors(file)) 475 | chains.append(f"{idx+1}. {file.relative_to(self.src_dir)}") 476 | if deps: 477 | chains.append( 478 | f" Depends on: {', '.join(str(d.relative_to(self.src_dir)) for d in deps)}" 479 | ) 480 | chains.append("") 481 | except nx.NetworkXUnfeasible: 482 | chains.extend([ 483 | "Warning: Circular dependencies detected!", 484 | "Cycles found:", 485 | *[f" {' -> '.join(str(p.relative_to(self.src_dir)) for p in cycle)}" 486 | for cycle in nx.simple_cycles(self.dep_graph)], 487 | "" 488 | ]) 489 | 490 | # Module details section - detailed information about each module 491 | details = ["Module Details:", "-" * 13] 492 | for path, module in self.modules.items(): 493 | details.extend([ 494 | f"\n{path.relative_to(self.src_dir)}:", 495 | f"Classes: {', '.join(module.classes) if module.classes else 'None'}", 496 | f"Functions: {', '.join(module.functions) if module.functions else 'None'}", 497 | f"Imports: {', '.join(module.imports) if module.imports else 'None'}" 498 | ]) 499 | 500 | # Combine all sections in the desired order using extend() 501 | # This preserves the proper formatting of each section 502 | report = [] 503 | for section in [header, directory_structure, import_summary, statistics, 504 | visualization, chains, details]: 505 | report.extend(section) 506 | 507 | # Join all lines with newlines to create the final report 508 | return '\n'.join(report) 509 | 510 | def _get_header_content(self): 511 | return f""" 512 | # Generated by ChimeraCat 513 | # /\___/\ ChimeraCat 514 | # ( o o ) smart code concatenator/summarizer 515 | # ( =^= ) ccat {__version__} https://github.com/scottvr/chimeracat 516 | # (______) Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}""" 517 | 518 | def generate_colab_notebook(self, output_file: str = "colab_combined.ipynb"): 519 | """Generate a Jupyter notebook with the combined code""" 520 | py_file = self.generate_concat_file("temp_combined.py") 521 | 522 | with open(py_file, 'r') as f: 523 | code = f.read() 524 | 525 | timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 526 | notebook = { 527 | "cells": [ 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "##Notebook Generated by ChimeraCat\n" 533 | ], 534 | }, 535 | { 536 | "cell_type": "code", 537 | "metadata": {}, 538 | "source": code.splitlines(keepends=True), 539 | "execution_count": None, 540 | "outputs": [] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "```\n", 547 | f"{self._get_header_content()}".splitlines(keepends=True), 548 | "```\n" 549 | ] 550 | } 551 | 552 | ], 553 | "metadata": { 554 | "kernelspec": { 555 | "display_name": "Python 3", 556 | "language": "python", 557 | "name": "python3" 558 | } 559 | }, 560 | "nbformat": 4, 561 | "nbformat_minor": 4 562 | } 563 | 564 | import json 565 | with open(output_file, 'w') as f: 566 | json.dump(notebook, f, indent=2) 567 | 568 | Path("temp_combined.py").unlink() # Clean up temporary file 569 | return output_file 570 | 571 | def generate_dependency_ascii(self) -> str: 572 | """Generate ASCII representation of dependency graph""" 573 | 574 | display_graph = nx.DiGraph() 575 | 576 | # Mapping of short labels to original node names 577 | label_mapping = {} 578 | label_index = 0 579 | 580 | def get_short_label(index): 581 | """Generate a short label (e.g., A, B, ..., AA, AB).""" 582 | return str(index + 1) if self.use_numeric else ''.join(chr(65 + i) for i in divmod(index, 26)[1::-1] or [index % 26]) 583 | 584 | # Add nodes and edges with relative path names 585 | for node in self.dep_graph.nodes(): 586 | short_label = get_short_label(label_index) 587 | label_mapping[short_label] = node 588 | display_graph.add_node(short_label) 589 | label_index += 1 590 | 591 | # Add edges using new node names 592 | for src, dst in self.dep_graph.edges(): 593 | src_label = [k for k, v in label_mapping.items() if v == src][0] 594 | dst_label = [k for k, v in label_mapping.items() if v == dst][0] 595 | display_graph.add_edge(src_label, dst_label) 596 | 597 | if self.elide_disconnected_deps: 598 | self._debug_print("removing disconnected imports (no dependent relationship)") 599 | self._debug_print(display_graph) 600 | display_graph.remove_nodes_from(list(nx.isolates(display_graph))) 601 | self._debug_print(display_graph) 602 | #TODO: enable phart cli options from ccat command-line 603 | options = LayoutOptions( 604 | node_style=NodeStyle.SQUARE, 605 | node_spacing=4, 606 | layer_spacing=3 607 | ) 608 | 609 | # Generate the legend 610 | legend_lines = ["Legend:"] 611 | for short_label, original_node in label_mapping.items(): 612 | legend_lines.append(f"{short_label}: {original_node}") 613 | legend = "\n".join(legend_lines) 614 | 615 | renderer = ASCIIRenderer(display_graph, options=options) 616 | ascii_art = f""" 617 | Directory Structure: 618 | -------------------- 619 | {self._get_tree_output()} 620 | 621 | Module Dependencies: 622 | -------------------- 623 | PHART Module Dependency Graph Visualization (see legend below): 624 | 625 | {renderer.render()} 626 | {legend} 627 | {"(non-dependent modules elided from visualization)" if self.elide_disconnected_deps else "node names detached from the network and printed in isolation are non-connected/likely unused."} 628 | 629 | """ 630 | return ascii_art 631 | 632 | def _get_tree_output(self) -> str: 633 | """Get tree command output""" 634 | try: 635 | import subprocess 636 | result = subprocess.run( 637 | ['tree', str(self.src_dir)], 638 | capture_output=True, 639 | text=True 640 | ) 641 | return result.stdout 642 | except FileNotFoundError: 643 | # Fallback to simple directory listing if tree not available 644 | return '\n'.join(str(p.relative_to(self.src_dir)) 645 | for p in self.src_dir.rglob('*.py')) 646 | 647 | def _generate_import_summary(self) -> str: 648 | """Generate summary of imports""" 649 | external_imports = set() 650 | internal_deps = set() 651 | 652 | for module in self.modules.values(): 653 | for imp in module.imports: 654 | if not imp.startswith('.'): 655 | external_imports.add(imp) 656 | else: 657 | internal_deps.add(imp) 658 | 659 | return f""" 660 | External Dependencies: 661 | {', '.join(sorted(external_imports))} 662 | 663 | Internal Dependencies: 664 | {', '.join(sorted(internal_deps))} 665 | """ 666 | 667 | def get_default_filename(summary_level: SummaryLevel, is_notebook: bool = False) -> str: 668 | """Get the default base filename based on output type and summary level""" 669 | if is_notebook: 670 | return "colab_ready" # Always full code for notebooks 671 | 672 | summary_level_names = { 673 | SummaryLevel.INTERFACE: "signatures_only", 674 | SummaryLevel.CORE: "essential_code", 675 | SummaryLevel.NONE: "complete_code" 676 | } 677 | return summary_level_names[summary_level] 678 | 679 | def process_cli_args(args: Optional[List[str]] = None) -> dict: 680 | """Process command line arguments and return a config dict for ChimeraCat""" 681 | parser = create_cli_parser() 682 | parsed_args = parser.parse_args(args) 683 | 684 | # Convert summary level string to enum 685 | summary_level = getattr(SummaryLevel, parsed_args.summary_level.upper()) 686 | 687 | # Build config dict 688 | config = { 689 | 'src_dir': parsed_args.src_dir, 690 | 'summary_level': summary_level, 691 | 'exclude_patterns': parsed_args.exclude, 692 | 'elide_disconnected_deps': parsed_args.elide_disconnected, 693 | 'generate_report': parsed_args.report, 694 | 'report_only': parsed_args.report_only, 695 | 'use_numeric': parsed_args.use_numeric, 696 | 'debug': parsed_args.debug, 697 | 'debug_str': parsed_args.debug_prefix if parsed_args.debug else "" 698 | } 699 | 700 | return config, parsed_args 701 | 702 | def cli_main(args: Optional[List[str]] = None) -> int: 703 | """Main CLI entry point for ChimeraCat""" 704 | try: 705 | config, args = process_cli_args(args) 706 | 707 | # Create ChimeraCat instance for Python output (with summarization) 708 | cat = ChimeraCat(**config) 709 | 710 | if args.report_only: 711 | cat.build_dependency_graph() 712 | print(cat.get_dependency_report()) 713 | 714 | else: 715 | # Get base filename from argument or generate default 716 | base_filename = args.output 717 | 718 | if args.output_type in ('py', 'both'): 719 | py_filename = f"{base_filename or get_default_filename(config['summary_level'])}.py" 720 | py_file = cat.generate_concat_file(py_filename) 721 | print(f"Generated Python file: {py_file}") 722 | 723 | if args.output_type in ('ipynb', 'both'): 724 | # Create new instance with NONE summary level for notebook 725 | notebook_cat = ChimeraCat( 726 | src_dir=config['src_dir'], 727 | exclude_patterns=config['exclude_patterns'], 728 | elide_disconnected_deps=config['elide_disconnected_deps'], 729 | debug=config['debug'], 730 | debug_str=config['debug_str'], 731 | generate_report=config['generate_report'], 732 | summary_level=SummaryLevel.NONE 733 | ) 734 | 735 | nb_filename = f"{base_filename or get_default_filename(summary_level=SummaryLevel.NONE, is_notebook=True)}.ipynb" 736 | nb_file = notebook_cat.generate_colab_notebook(nb_filename) 737 | print(f"Generated Jupyter notebook (complete code): {nb_file}") 738 | 739 | # If debug is enabled, show additional information regardless of report setting 740 | if args.debug or args.report: 741 | print(cat.get_dependency_report()) 742 | 743 | return 0 744 | 745 | except Exception as e: 746 | print(f"Error: {str(e)}", file=sys.stderr) 747 | if args.debug: 748 | import traceback 749 | traceback.print_exc() 750 | return 1 751 | 752 | def create_cli_parser() -> argparse.ArgumentParser: 753 | """Create the command-line argument parser for ChimeraCat""" 754 | parser = argparse.ArgumentParser( 755 | prog='ccat', 756 | description=""" 757 | ChimeraCat (ccat) - The smart code concatenator 758 | /\\___/\\ 759 | ( o o ) Intelligently combines Python source files 760 | ( =^= ) while maintaining dependencies and readability 761 | (______) 762 | """, 763 | formatter_class=argparse.RawDescriptionHelpFormatter 764 | ) 765 | 766 | parser.add_argument( 767 | 'src_dir', 768 | type=str, 769 | nargs='?', 770 | default='src', 771 | help='Source directory containing Python files (default: src)' 772 | ) 773 | 774 | parser.add_argument( 775 | '-s', '--summary-level', 776 | type=str, 777 | choices=['interface', 'core', 'none'], 778 | default='none', 779 | help='Code summarization level (for .py output only, default: none)' 780 | ) 781 | 782 | parser.add_argument( 783 | '-e', '--exclude', 784 | type=str, 785 | nargs='+', 786 | help='Patterns to exclude from processing (e.g., "test" "temp")' 787 | ) 788 | 789 | parser.add_argument( 790 | '-o', '--output', 791 | type=str, 792 | help='Output file name (without extension, default: based on output type and summary level)' 793 | ) 794 | 795 | parser.add_argument( 796 | '-t', '--output-type', 797 | type=str, 798 | choices=['py', 'ipynb', 'both'], 799 | default='both', 800 | help='Output file type (default: both)' 801 | ) 802 | 803 | parser.add_argument( 804 | '-r', '--report', 805 | action='store_true', 806 | default=None, 807 | help='Generate dependency report and ASCII visualization. By default, reports are ' 808 | 'included for interface/core summary levels and excluded for complete code ' 809 | 'and notebooks. This flag overrides that behavior.' 810 | ) 811 | 812 | parser.add_argument( 813 | '--report-only', 814 | action='store_true', 815 | dest='report_only', 816 | help='Suppress code summarization or notebook cocatenization' 817 | ) 818 | 819 | parser.add_argument('--numeric-labels', action='store_true', dest="use_numeric", help='Use numbers instead of letters for node labels') 820 | 821 | # Add a no-report option for when you want to suppress reports in INTERFACE/CORE 822 | parser.add_argument( 823 | '--no-report', 824 | action='store_false', 825 | dest='report', 826 | help='Suppress dependency report generation even for interface/core summary levels' 827 | ) 828 | 829 | parser.add_argument( 830 | '--elide-disconnected', 831 | action='store_true', 832 | help='Remove modules with no dependencies from visualization' 833 | ) 834 | 835 | parser.add_argument( 836 | '-d', '--debug', 837 | action='store_true', 838 | help='Enable debug output' 839 | ) 840 | 841 | parser.add_argument( 842 | '--debug-prefix', 843 | type=str, 844 | default='CCAT:', 845 | help='Prefix for debug messages (default: CCAT:)' 846 | ) 847 | 848 | parser.add_argument( 849 | '--version', 850 | action='version', 851 | version=f'%(prog)s {__version__}' 852 | ) 853 | 854 | return parser 855 | 856 | # 857 | #if __name__ == "__main__": 858 | # debug = True 859 | # generate_report = True 860 | # # Example with different summary levels for Python output 861 | # examples = { 862 | # SummaryLevel.INTERFACE: "signatures_only.py", 863 | # SummaryLevel.CORE: "essential_code.py", 864 | # SummaryLevel.NONE: "complete_code.py", 865 | # } 866 | # 867 | # for level, filename in examples.items(): 868 | # cat = ChimeraCat( 869 | # "src", 870 | # exclude_patterns=["tools\\", ".ipynb", "cats\\"], 871 | # summary_level=level, 872 | # debug=debug, 873 | # debug_str="DBG: ", 874 | # elide_disconnected_deps=True, 875 | # generate_report=generate_report 876 | # ) 877 | # output_file = cat.generate_concat_file(filename) 878 | # print(f"Generated {level.value} version: {output_file}") 879 | # 880 | # # Generate notebook with complete code 881 | # cat = ChimeraCat( 882 | # "src", 883 | # exclude_patterns=["tools\\", ".ipynb", "cats\\"], 884 | # elide_disconnected_deps=True, 885 | # ) 886 | # output_file = cat.generate_colab_notebook("colab_ready.ipynb") 887 | # print(f"Generated notebook version: {output_file}") 888 | # 889 | # if debug: 890 | # cat.visualize_dependencies("module_deps.png") 891 | # if generate_report: 892 | # report = cat.get_dependency_report() 893 | # print(report) --------------------------------------------------------------------------------