├── .gitignore
├── README.md
├── pyproject.toml
└── src
    └── chimeracat
        ├── __init__.py
        └── chimeracat.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ChimeraCat: 
  2 | Intelligent code concatenator and summarizer
  3 | 
  4 | ----
  5 | ## What does it do?
  6 | ChimeraCat (ccat) analyzes Python codebases to generate consolidated files optimized for LLM processing,
  7 | with configurable summarization to reduce token usage while preserving key information in the code. It was originally to smartly concatenate multiple files from a python lib in development into a single notebook for testing in Colab, then I realized its fitness to the sharing-code-with-an-LLM purpose. 
  8 | 
  9 | Now includes cli `ccat` with all configuration exposed as command-line arguments, so it's ready to use without any development on your part. Just `pip install git+https://github.com/scottvr/chimeracat` into your venv.
 10 | 
 11 | ## Key Features:
 12 | - Analyzes Python files for imports and definitions
 13 | - Builds dependency graphs using NetworkX
 14 | - Displays graph visually as a DAG using ASCII via [PHART](https://github.com/scottvr/PHART)
 15 | - Generates both .py files and Colab notebooks
 16 | - Smart handling of internal/external imports
 17 | - Configurable code summarization
 18 | 
 19 | ## General Dependency and Interface mapping
 20 | 
 21 | If you aren't trying to save on token and conserve context memory when pairing with an LLM, you may still find ChimeraCat's reporting functionality useful.
 22 | 
 23 | ### CLI report generation example:
 24 | ```bash
 25 | ccat --report-only --elide-disconnected --numeric-labels ..\ASSET\stemprover\src > ccat-report.stemprover.txt
 26 | ```
 27 | <details>
 28 | <summary>Example output from above command</summary>
 29 | 
 30 | ```
 31 | Dependency Analysis Report
 32 | =========================
 33 | 
 34 | Directory Structure:
 35 | stemprover\__init__.py
 36 | stemprover\analysis\base.py
 37 | stemprover\analysis\spectral.py
 38 | stemprover\analysis\artifacts\base.py
 39 | stemprover\analysis\artifacts\high_freq.py
 40 | stemprover\analysis\artifacts\preprocessor.py
 41 | stemprover\analysis\selection\metrics.py
 42 | stemprover\analysis\selection\segment_finder.py
 43 | stemprover\analysis\selection\__init__.py
 44 | stemprover\common\audio_utils.py
 45 | stemprover\common\math_utils.py
 46 | stemprover\common\spectral_utils.py
 47 | stemprover\common\types.py
 48 | stemprover\common\__init__.py
 49 | stemprover\core\audio.py
 50 | stemprover\core\config.py
 51 | stemprover\core\types.py
 52 | stemprover\enhancement\base.py
 53 | stemprover\enhancement\controlnet.py
 54 | stemprover\enhancement\training.py
 55 | stemprover\io\audio.py
 56 | stemprover\preparation\base.py
 57 | stemprover\preparation\segments\generator.py
 58 | stemprover\preparation\segments\__init__.py
 59 | stemprover\separation\base.py
 60 | stemprover\separation\spleeter.py
 61 | stemprover\training\dataset.py
 62 | stemprover\training\pairs.py
 63 | 
 64 | Import Summary:
 65 | 
 66 |     External Dependencies:
 67 |     abc, common.audio_utils, common.types, core.audio, core.types, dataclasses, datetime, enum, json, librosa, matplotlib.pyplot as plt, numpy as np, pathlib, soundfile as sf, spleeter.separator, stemprover.common.audio_utils, stemprover.common.spectral_utils, stemprover.common.types, stemprover.core.audio, stemprover.core.config, stemprover.core.types, stemprover.enhancement.controlnet, tensorflow as tf, torch, torch.nn as nn, torch.nn.functional as F, torch.utils.data, typing
 68 |     
 69 |     Internal Dependencies:
 70 |     ...common.audio_utils, ...common.types, ...core.audio, ...core.types, ..analysis.spectral, ..common.audio_utils, ..common.math_utils, ..common.types, ..core.audio, ..core.types, ..io.audio, .analysis.base, .analysis.spectral, .audio, .audio_utils, .base, .core.types, .math_utils, .metrics, .preparation.segments, .separation.base, .separation.spleeter, .spectral_utils, .types
 71 |     
 72 | 
 73 | Module Statistics:
 74 | Total modules: 28
 75 | Total dependencies: 19
 76 | 
 77 | Module Dependencies:
 78 | -------------------
 79 | 
 80 | PHART Module Dependency Graph Visualization (see legend below):
 81 | 
 82 |             [1]    [23]    [25]    [7]    [8]    [9]
 83 |              |      |       |       |      |      |
 84 |              v      v       v       v      v      v
 85 |       [10]<---[12]--+-[13]--+-[15]--->[16]--->[17]+-->[19]
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | Legend:
 93 | 1: ..\ASSET\stemprover\src\stemprover\__init__.py
 94 | 2: ..\ASSET\stemprover\src\stemprover\analysis\base.py
 95 | 3: ..\ASSET\stemprover\src\stemprover\analysis\spectral.py
 96 | 4: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\base.py
 97 | 5: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\high_freq.py
 98 | 6: ..\ASSET\stemprover\src\stemprover\analysis\artifacts\preprocessor.py
 99 | 7: ..\ASSET\stemprover\src\stemprover\analysis\selection\metrics.py
100 | 8: ..\ASSET\stemprover\src\stemprover\analysis\selection\segment_finder.py
101 | 9: ..\ASSET\stemprover\src\stemprover\analysis\selection\__init__.py
102 | 10: ..\ASSET\stemprover\src\stemprover\common\audio_utils.py
103 | 11: ..\ASSET\stemprover\src\stemprover\common\math_utils.py
104 | 12: ..\ASSET\stemprover\src\stemprover\common\spectral_utils.py
105 | 13: ..\ASSET\stemprover\src\stemprover\common\types.py
106 | 14: ..\ASSET\stemprover\src\stemprover\common\__init__.py
107 | 15: ..\ASSET\stemprover\src\stemprover\core\audio.py
108 | 16: ..\ASSET\stemprover\src\stemprover\core\config.py
109 | 17: ..\ASSET\stemprover\src\stemprover\core\types.py
110 | 18: ..\ASSET\stemprover\src\stemprover\enhancement\base.py
111 | 19: ..\ASSET\stemprover\src\stemprover\enhancement\controlnet.py
112 | 20: ..\ASSET\stemprover\src\stemprover\enhancement\training.py
113 | 21: ..\ASSET\stemprover\src\stemprover\io\audio.py
114 | 22: ..\ASSET\stemprover\src\stemprover\preparation\base.py
115 | 23: ..\ASSET\stemprover\src\stemprover\preparation\segments\generator.py
116 | 24: ..\ASSET\stemprover\src\stemprover\preparation\segments\__init__.py
117 | 25: ..\ASSET\stemprover\src\stemprover\separation\base.py
118 | 26: ..\ASSET\stemprover\src\stemprover\separation\spleeter.py
119 | 27: ..\ASSET\stemprover\src\stemprover\training\dataset.py
120 | 28: ..\ASSET\stemprover\src\stemprover\training\pairs.py
121 | (non-dependent modules elided from visualization)
122 | 
123 | 
124 | 
125 | Dependency Chains:
126 | -----------------
127 | 1. stemprover\__init__.py
128 | 2. stemprover\analysis\base.py
129 | 3. stemprover\analysis\spectral.py
130 | 4. stemprover\analysis\artifacts\base.py
131 | 5. stemprover\analysis\artifacts\high_freq.py
132 | 6. stemprover\analysis\artifacts\preprocessor.py
133 | 7. stemprover\analysis\selection\metrics.py
134 | 8. stemprover\analysis\selection\segment_finder.py
135 | 9. stemprover\analysis\selection\__init__.py
136 | 10. stemprover\common\math_utils.py
137 | 11. stemprover\common\__init__.py
138 | 12. stemprover\enhancement\base.py
139 | 13. stemprover\enhancement\training.py
140 | 14. stemprover\io\audio.py
141 | 15. stemprover\preparation\base.py
142 | 16. stemprover\preparation\segments\generator.py
143 | 17. stemprover\preparation\segments\__init__.py
144 | 18. stemprover\separation\base.py
145 | 19. stemprover\separation\spleeter.py
146 | 20. stemprover\training\dataset.py
147 | 21. stemprover\training\pairs.py
148 | 22. stemprover\enhancement\controlnet.py
149 |  Depends on: stemprover\__init__.py
150 | 23. stemprover\common\spectral_utils.py
151 |  Depends on: stemprover\analysis\selection\segment_finder.py
152 | 24. stemprover\core\audio.py
153 |  Depends on: stemprover\__init__.py, stemprover\analysis\selection\metrics.py, stemprover\analysis\selection\segment_finder.py, stemprover\analysis\selection\__init__.py, stemprover\preparation\segments\generator.py
154 | 25. stemprover\common\types.py
155 |  Depends on: stemprover\analysis\selection\segment_finder.py, stemprover\preparation\segments\generator.py
156 | 26. stemprover\common\audio_utils.py
157 |  Depends on: stemprover\analysis\selection\segment_finder.py, stemprover\preparation\segments\generator.py
158 | 27. stemprover\core\config.py
159 |  Depends on: stemprover\separation\base.py
160 | 28. stemprover\core\types.py
161 |  Depends on: stemprover\__init__.py, stemprover\analysis\selection\metrics.py, stemprover\analysis\selection\segment_finder.py, stemprover\analysis\selection\__init__.py, stemprover\core\config.py, stemprover\preparation\segments\generator.py, stemprover\separation\base.py
162 | 
163 | Module Details:
164 | -------------
165 | 
166 | stemprover\__init__.py:
167 | Classes: None
168 | Functions: None
169 | Imports: stemprover.core.types, .separation.base, .analysis.base, .separation.spleeter, stemprover.core.audio, .analysis.spectral, stemprover.enhancement.controlnet
170 | 
171 | stemprover\analysis\base.py:
172 | Classes: VocalAnalyzer, for
173 | Functions: __init__, analyze, _create_spectrograms
174 | Imports: abc, numpy as np, pathlib, ..core.audio
175 | 
176 | stemprover\analysis\spectral.py:
177 | Classes: SpectralAnalyzer
178 | Functions: __init__, _create_spectrogram, _analyze_differences, analyze, _save_comparison, _plot_spectrogram, _save_analysis
179 | Imports: ..common.types, typing, ..common.math_utils, json, ..core.audio, datetime, pathlib, ..core.types, matplotlib.pyplot as plt, ..common.audio_utils
180 | 
181 | stemprover\analysis\artifacts\base.py:
182 | Classes: SignalProcessor, class, from, HybridProcessor, for, ArtifactProcessor, ControlNetProcessor
183 | Functions: __init__, validate, run_validation, as_dict, _calculate_snr, _analyze_frequency_response, _measure_phase_coherence, process, _bandpass_filter
184 | Imports: typing, torch.nn as nn, abc, torch, ...common.audio_utils, pathlib, ...common.types, numpy as np, dataclasses
185 | 
186 | stemprover\analysis\artifacts\high_freq.py:
187 | Classes: HighFrequencyArtifactPreprocessor
188 | Functions: forward, __init__, generate_training_pair
189 | Imports: None
190 | 
191 | stemprover\analysis\artifacts\preprocessor.py:
192 | Classes: HighFrequencyArtifactPreprocessor
193 | Functions: forward, __init__, generate_training_pair
194 | Imports: None
195 | 
196 | stemprover\analysis\selection\metrics.py:
197 | Classes: from, class, MetricsCalculator
198 | Functions: __init__, _calculate_detailed_score, calculate_sdr, calculate_metrics, calculate_band_sdrs
199 | Imports: stemprover.core.types, typing, stemprover.core.audio, numpy as np, dataclasses
200 | 
201 | stemprover\analysis\selection\segment_finder.py:
202 | Classes: from, TestSegmentFinder
203 | Functions: __init__, find_best_segments, _compute_score, analyze_segment, _calculate_transitions, _calculate_high_freq_content, _calculate_vocal_clarity
204 | Imports: stemprover.core.types, typing, stemprover.common.types, stemprover.core.audio, numpy as np, stemprover.common.audio_utils, stemprover.common.spectral_utils, .metrics, librosa, dataclasses
205 | 
206 | stemprover\analysis\selection\__init__.py:
207 | Classes: None
208 | Functions: None
209 | Imports: stemprover.core.types, stemprover.core.audio
210 | 
211 | stemprover\common\audio_utils.py:
212 | Classes: None
213 | Functions: get_frequency_bins, get_band_mask, calculate_phase_complexity, calculate_dynamic_range, to_mono, create_spectrogram, calculate_onset_variation
214 | Imports: .math_utils, numpy as np, .types, soundfile as sf, librosa
215 | 
216 | stemprover\common\math_utils.py:
217 | Classes: None
218 | Functions: magnitude, db_scale, phase_difference, phase_coherence, rms, angle
219 | Imports: .types, numpy as np
220 | 
221 | stemprover\common\spectral_utils.py:
222 | Classes: None
223 | Functions: calculate_band_energy
224 | Imports: typing, .audio_utils, numpy as np, .types, soundfile as sf, librosa
225 | 
226 | stemprover\common\types.py:
227 | Classes: None
228 | Functions: None
229 | Imports: librosa, typing, numpy as np, torch
230 | 
231 | stemprover\common\__init__.py:
232 | Classes: None
233 | Functions: None
234 | Imports: .math_utils, .spectral_utils, .types, .audio_utils
235 | 
236 | stemprover\core\audio.py:
237 | Classes: import, class, for
238 | Functions: is_mono, to_mono, duration_seconds, is_stereo
239 | Imports: librosa, typing, numpy as np, dataclasses
240 | 
241 | stemprover\core\config.py:
242 | Classes: from, SeparatorBackend, class
243 | Functions: None
244 | Imports: stemprover.core.types, enum, typing, pathlib, dataclasses
245 | 
246 | stemprover\core\types.py:
247 | Classes: from, for, class
248 | Functions: hop_samples, segment_samples
249 | Imports: typing, pathlib, matplotlib.pyplot as plt, .audio, dataclasses
250 | 
251 | stemprover\enhancement\base.py:
252 | Classes: for, EnhancementProcessor
253 | Functions: __init__, enhance, validate
254 | Imports: abc, typing, ...core.types, ...core.audio
255 | 
256 | stemprover\enhancement\controlnet.py:
257 | Classes: PhaseAwareControlNet, PhaseAwareZeroConv, ArtifactDetector
258 | Functions: forward, __init__
259 | Imports: torch.nn as nn, typing, torch
260 | 
261 | stemprover\enhancement\training.py:
262 | Classes: ArtifactDataset, ControlNetTrainer
263 | Functions: __init__, validate, load_checkpoint, train, train_step, __len__, save_checkpoint, frequency_loss, prepare_training, __getitem__
264 | Imports: torch.nn.functional as F, torch.utils.data
265 | 
266 | stemprover\io\audio.py:
267 | Classes: None
268 | Functions: save_audio_file, load_audio_file
269 | Imports: librosa, typing, ..core.audio, pathlib, soundfile as sf, numpy as np
270 | 
271 | stemprover\preparation\base.py:
272 | Classes: None
273 | Functions: None
274 | Imports: None
275 | 
276 | stemprover\preparation\segments\generator.py:
277 | Classes: from, TrainingSegmentGenerator
278 | Functions: _create_backing_combinations, __init__, generate_segments, _has_vocal_content
279 | Imports: typing, core.audio, common.types, torch.utils.data, common.audio_utils, core.types, pathlib, numpy as np, dataclasses
280 | 
281 | stemprover\preparation\segments\__init__.py:
282 | Classes: None
283 | Functions: None
284 | Imports: None
285 | 
286 | stemprover\separation\base.py:
287 | Classes: class, from, VocalSeparator, StemProcessor, for
288 | Functions: cleanup, __init__, process_stems, _separate_vocals, __enter__, _apply_controlnet_enhancement, _load_stereo_pair, __exit__, separate_and_analyze, _save_audio_files
289 | Imports: stemprover.core.types, enum, typing, stemprover.core.config, abc, ..core.audio, pathlib, ..core.types, dataclasses
290 | 
291 | stemprover\separation\spleeter.py:
292 | Classes: from, class, SpleeterSeparator
293 | Functions: cleanup, __init__, capabilities, separate, _load_mono, _separate_vocals, _load_stereo_pair, _setup_tensorflow, separate_and_analyze, separate_file, _save_audio_files
294 | Imports: typing, .base, ..analysis.spectral, spleeter.separator, ..core.audio, datetime, pathlib, ..core.types, ..io.audio, numpy as np, dataclasses, tensorflow as tf
295 | 
296 | stemprover\training\dataset.py:
297 | Classes: TrainingDataset
298 | Functions: __getitem__, __init__, __len__
299 | Imports: typing, torch.utils.data, .preparation.segments, .core.types
300 | 
301 | stemprover\training\pairs.py:
302 | Classes: None
303 | Functions: None
304 | Imports: None
305 | ```
306 | </details>
307 | 
308 | ## Configuration Details:
309 | - src_dir: Source directory to analyze. Defaults to "./src" in cwd.
310 |     
311 | - summary_level: Controls summarization aggressiveness:
312 |   - NONE: Full code output
313 |   - INTERFACE: Preserve signatures/types/docstrings only
314 |   - CORE: Include core logic, skip standard patterns
315 |     
316 | - exclude_patterns: Files matching these patterns are skipped.
317 |   - Note: ChimeraCat always excludes itself to avoid recursion.
318 |     
319 | - rules: Override default summarization rules with custom SummaryRules.
320 |   - Useful for domain-specific boilerplate detection.
321 |     
322 | - elide_disconnected_deps: When True, omit modules with no dependencies
323 |   from visualization.
324 |   - Useful for cleaner dependency graphs.
325 |     
326 | - generate_report: Controls inclusion of dependency analysis.
327 |   - Defaults to True for INTERFACE/CORE summaries.
328 |     
329 | - report_only: Generate only dependency report without code output.
330 |     
331 | - use_numeric: Use numbers instead of letters for node labels.
332 | 
333 | ## API Example:
334 |     ```python
335 |     # Generate both notebook and summarized Python file
336 |     cat = ChimeraCat(
337 |         "src",
338 |         summary_level=SummaryLevel.INTERFACE,
339 |         exclude_patterns=["tests"],
340 |         elide_disconnected_deps=True
341 |     )
342 |     notebook = cat.generate_colab_notebook()
343 |     py_file = cat.generate_concat_file()
344 |     ```
345 | 
346 | Though for most cases, what you probably want is the CLI:
347 | 
348 | ## CLI Usage
349 | ChimeraCat installs with a cli tool `ccat`. The configuration dictionary can be manipulated via command-line arguments.
350 | 
351 | ```bash
352 | usage: ccat [-h] [-s {interface,core,none}] [-e EXCLUDE [EXCLUDE ...]] [-o OUTPUT]
353 |             [-t {py,ipynb,both}] [-r] [--report-only] [--numeric-labels] [--no-report]       
354 |             [--elide-disconnected] [-d] [--debug-prefix DEBUG_PREFIX] [--version]
355 |             [src_dir]
356 | 
357 |     ChimeraCat (ccat) - The smart code concatenator
358 |      /\___/\
359 |     ( o   o )  Intelligently combines Python source files
360 |     (  =^=  )  while maintaining dependencies and readability
361 |      (______)
362 | 
363 | 
364 | positional arguments:
365 |   src_dir               Source directory containing Python files (default: src)
366 | 
367 | options:
368 |   -h, --help            show this help message and exit
369 |   -s {interface,core,none}, --summary-level {interface,core,none}
370 |                         Code summarization level (for .py output only, default: none)        
371 |   -e EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...]
372 |                         Patterns to exclude from processing (e.g., "test" "temp")
373 |   -o OUTPUT, --output OUTPUT
374 |                         Output file name (without extension, default: based on output type   
375 |                         and summary level)
376 |   -t {py,ipynb,both}, --output-type {py,ipynb,both}
377 |                         Output file type (default: both)
378 |   -r, --report          Generate dependency report and ASCII visualization. By default,      
379 |                         reports are included for interface/core summary levels and excluded  
380 |                         for complete code and notebooks. This flag overrides that behavior.  
381 |   --report-only         Suppress code summarization or notebook cocatenization
382 |   --numeric-labels      Use numbers instead of letters for node labels
383 |   --no-report           Suppress dependency report generation even for interface/core        
384 |                         summary levels
385 |   --elide-disconnected
386 |                         Remove modules with no dependencies from visualization
387 |   -d, --debug           Enable debug output
388 |   --debug-prefix DEBUG_PREFIX
389 |                         Prefix for debug messages (default: CCAT:)
390 |   --version             show program's version number and exit
391 | ```
392 | 
393 | """
394 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "chimeracat"
 7 | dynamic = ["version"]
 8 | description = "ChimeraCat (ccat) - The smart code concatenator"
 9 | readme = "README.md"
10 | requires-python = ">=3.10"
11 | license = {text = "MIT"}
12 | authors = [
13 |     {name = "Scott VR", email = "scottvr@paperclipmaximizer.ai"}
14 | ]
15 | dependencies = [
16 |     "phart",
17 | ]
18 | 
19 | [project.scripts]
20 | ccat = "chimeracat.chimeracat:cli_main"
21 | 
22 | [tool.hatch.build]
23 | packages = ["src/chimeracat"]
24 | 
25 | [tool.hatch.version]
26 | path = "src/chimeracat/__init__.py"
27 | 


--------------------------------------------------------------------------------
/src/chimeracat/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.5"
2 | __all__ = ["ChimeraCat"]
3 | 
4 | from .chimeracat import ChimeraCat


--------------------------------------------------------------------------------
/src/chimeracat/chimeracat.py:
--------------------------------------------------------------------------------
  1 | """ChimeraCat: Intelligent code concatenator and summarizer for LLM analysis.
  2 | 
  3 | Analyzes Python codebases to generate consolidated files optimized for LLM processing,
  4 | with configurable summarization to reduce token usage while preserving key information.
  5 | 
  6 | Args:
  7 |     src_dir (str): Source directory containing Python files (default: "src")
  8 |     summary_level (SummaryLevel): Summarization level (INTERFACE/CORE/NONE)
  9 |     exclude_patterns (List[str]): Patterns to exclude from processing
 10 |     rules (Optional[SummaryRules]): Custom summarization rules
 11 |     elide_disconnected_deps (bool): Omit disconnected modules from visualization
 12 |     generate_report (Optional[bool]): Include dependency analysis report
 13 |     report_only (bool): Generate only the dependency report
 14 |     use_numeric (bool): Use numeric instead of alpha labels in visualizations
 15 |     debug (bool): Enable debug output
 16 |     debug_str (str): Prefix for debug messages
 17 | 
 18 | Key Features:
 19 |     - Analyzes Python files for imports and definitions
 20 |     - Builds dependency graphs using NetworkX
 21 |     - Generates both .py files and Colab notebooks
 22 |     - Smart handling of internal/external imports
 23 |     - Configurable code summarization
 24 | 
 25 | Configuration Details:
 26 |     src_dir: Source directory to analyze. Defaults to "./src" in cwd.
 27 |     
 28 |     summary_level: Controls summarization aggressiveness:
 29 |         - NONE: Full code output
 30 |         - INTERFACE: Preserve signatures/types/docstrings only
 31 |         - CORE: Include core logic, skip standard patterns
 32 |     
 33 |     exclude_patterns: Files matching these patterns are skipped.
 34 |         Note: ChimeraCat always excludes itself to avoid recursion.
 35 |     
 36 |     rules: Override default summarization rules with custom SummaryRules.
 37 |         Useful for domain-specific boilerplate detection.
 38 |     
 39 |     elide_disconnected_deps: When True, omit modules with no dependencies
 40 |         from visualization. Useful for cleaner dependency graphs.
 41 |     
 42 |     generate_report: Controls inclusion of dependency analysis.
 43 |         Defaults to True for INTERFACE/CORE summaries.
 44 |     
 45 |     report_only: Generate only dependency report without code output.
 46 |     
 47 |     use_numeric: Use numbers instead of letters for node labels.
 48 | 
 49 | Example:
 50 |     ```python
 51 |     # Generate both notebook and summarized Python file
 52 |     cat = ChimeraCat(
 53 |         "src",
 54 |         summary_level=SummaryLevel.INTERFACE,
 55 |         exclude_patterns=["tests"],
 56 |         elide_disconnected_deps=True
 57 |     )
 58 |     notebook = cat.generate_colab_notebook()
 59 |     py_file = cat.generate_concat_file()
 60 |     ```
 61 | """
 62 | 
 63 | import re
 64 | from pathlib import Path
 65 | import networkx as nx
 66 | 
 67 | from enum import Enum
 68 | from typing import Dict, List, Set, Optional, Pattern
 69 | from dataclasses import dataclass, field
 70 | from datetime import datetime
 71 | from phart import ASCIIRenderer, LayoutOptions, NodeStyle    
 72 | from . import __version__
 73 | 
 74 | import argparse
 75 | import sys
 76 | 
 77 | class SummaryLevel(Enum):
 78 |     INTERFACE = "interface"     # Just interfaces/types/docstrings
 79 |     CORE = "core"              # + Core logic, skip standard patterns
 80 |     NONE = "none"      # Full code
 81 | 
 82 | @dataclass
 83 | class SummaryPattern:
 84 |     """Pattern for code summarization with explanation"""
 85 |     pattern: str
 86 |     replacement: str
 87 |     explanation: str
 88 |     flags: re.RegexFlag = re.MULTILINE
 89 | 
 90 |     def apply(self, content: str) -> str:
 91 |         return re.sub(self.pattern, f"{self.replacement} # {self.explanation}\n", 
 92 |                      content, flags=self.flags)
 93 | 
 94 | @dataclass
 95 | class SummaryRules:
 96 |     """Collection of patterns for different summary levels"""
 97 |     interface: List[SummaryPattern] = field(default_factory=list)
 98 |     core: List[SummaryPattern] = field(default_factory=list)
 99 | 
100 |     @classmethod
101 |     def default_rules(cls) -> 'SummaryRules':
102 |         return cls(
103 |             interface=[
104 |                 SummaryPattern(
105 |                     pattern=r'(class\s+\w+(?:\([^)]*\))?):(?:\s*"""[^"]*""")?[^\n]*(?:\n(?!class|def)[^\n]*)*',
106 |                     replacement=r'\1:\n    ... # ',
107 |                     explanation="Class interface preserved",
108 |                     flags=re.MULTILINE
109 |                 ),
110 |                 SummaryPattern(
111 |                     pattern=r'(def\s+\w+\s*\([^)]*\)):(?:\s*"""[^"]*""")?[^\n]*(?:\n(?!class|def)[^\n]*)*',
112 |                     replacement=r'\1:\n    ... # ',
113 |                     explanation="Function signature preserved",
114 |                     flags=re.MULTILINE
115 |                 )
116 |             ],
117 |             core=[
118 |                 SummaryPattern(
119 |                     pattern=r'(def\s+get_\w+\([^)]*\)):\s*return[^\n]*\n',
120 |                     replacement=r'\1:\n    ... # ',
121 |                     explanation="Getter method summarized"
122 |                 ),
123 |                 SummaryPattern(
124 |                     pattern=r'(def\s*__init__\s*\([^)]*\)):[^\n]*(?:\n(?!def|class)[^\n]*)*',
125 |                     replacement=r'\1:\n    ... # ',
126 |                     explanation="Standard initialization summarized"
127 |                 )
128 |             ]
129 |         ) 
130 | @dataclass
131 | class ModuleInfo:
132 |     """Information about a Python module"""
133 |     path: Path
134 |     content: str
135 |     imports: Set[str]
136 |     classes: Set[str]
137 |     functions: Set[str]
138 | 
139 | class ChimeraCat:
140 |     """Utility to concatenate modular code into Colab-friendly single files"""
141 |     def __init__(self, 
142 |              src_dir: str = "src", 
143 |              summary_level: SummaryLevel = SummaryLevel.NONE,
144 |              exclude_patterns: List[str] = None,
145 |              rules: Optional[SummaryRules] = None,
146 |              elide_disconnected_deps: bool = False,
147 |              generate_report: Optional[bool] = None,
148 |              report_only: bool = False,
149 |              use_numeric: bool = False,
150 |              debug: bool = False,
151 |              debug_str = ""):
152 | 
153 |         self.src_dir = Path(src_dir)
154 |         self.summary_level = summary_level
155 |         self.report_only = report_only
156 |         self.use_numeric = use_numeric
157 |         self.rules = rules or SummaryRules.default_rules()
158 |         self.modules: Dict[Path, ModuleInfo] = {}
159 |         self.dep_graph = nx.DiGraph()
160 |         self.self_path = Path(__file__).resolve()
161 |         self.exclude_patterns = exclude_patterns or []
162 |         self.debug = debug
163 |         self.elide_disconnected_deps = elide_disconnected_deps
164 |         self.debug_str = debug_str
165 | 
166 |         if generate_report is None:
167 |             self.generate_report = summary_level in (SummaryLevel.INTERFACE, SummaryLevel.CORE)
168 |         else:
169 |             self.generate_report = generate_report
170 | 
171 |     def _debug_print(self, *args, **kwargs):
172 |         """Helper for debug output"""
173 |         if self.debug:
174 |             print(f"{self.debug_str}: {args} {list(kwargs.items())}")
175 | 
176 |     def should_exclude(self, file_path: Path) -> bool:
177 |         """Check if a file should be excluded from processing"""
178 |         # Always exclude self
179 |         self._debug_print(file_path.resolve(), self.self_path)
180 |         if file_path.resolve() == self.self_path:
181 |             if self.debug:
182 |                 self._debug_print(f"excluding self {self.self_path}")
183 |             return True
184 |             
185 |         # Check against exclude patterns
186 |         str_path = str(file_path)
187 |         self._debug_print("str_path",str_path)
188 |         for pattern in self.exclude_patterns:
189 |             self._debug_print("comparing", pattern, str_path)
190 |         return any(pattern in str_path for pattern in self.exclude_patterns)
191 | 
192 |     def analyze_file(self, file_path: Path) -> Optional[ModuleInfo]:
193 |         """Analyze a Python file for imports and definitions"""
194 |         if self.should_exclude(file_path):
195 |             self._debug_print(f'excluding {file_path}')
196 |             return None
197 | 
198 |         with open(file_path, 'r') as f:
199 |             content = f.read()
200 |             
201 |         # Find imports
202 |         import_pattern = r'^(?:from\s+(\S+)\s+)?import\s+([^#\n]+)'
203 |         imports = set()
204 |         for match in re.finditer(import_pattern, content, re.MULTILINE):
205 |             if match.group(1):  # from X import Y
206 |                 imports.add(match.group(1))
207 |             else:  # import X
208 |                 imports.add(match.group(2).split(',')[0].strip())
209 |                 
210 |         # Find class definitions
211 |         class_pattern = r'class\s+(\w+)'
212 |         classes = set(re.findall(class_pattern, content))
213 |         
214 |         # Find function definitions
215 |         func_pattern = r'def\s+(\w+)'
216 |         functions = set(re.findall(func_pattern, content))
217 |         
218 |         return ModuleInfo(
219 |             path=file_path,
220 |             content=content,
221 |             imports=imports,
222 |             classes=classes,
223 |             functions=functions
224 |         )
225 | 
226 |     def _summarize_content(self, content: str) -> str:
227 |         """Apply summary patterns based on current level"""
228 |         if not isinstance(content, str):
229 |             raise TypeError(f"Expected string content but got {type(content)}: {content}")
230 |             
231 |         if self.summary_level == SummaryLevel.NONE:
232 |             return content
233 |             
234 |         result = content
235 |         rules = self.rules or SummaryRules.default_rules()
236 |         
237 |         # Apply patterns based on level
238 |         if self.summary_level == SummaryLevel.INTERFACE:
239 |             for pattern in rules.interface:
240 |                 result = pattern.apply(result)
241 |         elif self.summary_level == SummaryLevel.CORE:
242 |             # Apply both interface and core patterns
243 |             for pattern in rules.interface + rules.core:
244 |                 result = pattern.apply(result)
245 |                 
246 |         return result
247 | 
248 |     def _process_imports(self, content: str, module_path: Path) -> str:
249 |         """Process and adjust imports for concatenated context"""
250 |         if not isinstance(content, str):
251 |             raise TypeError(f"Expected string content but got {type(content)}: {content}")
252 | 
253 |         def replace_relative_import(match: re.Match) -> str:
254 |             indent = len(match.group()) - len(match.group().lstrip())
255 |             spaces = ' ' * indent
256 |             original_line = match.group()
257 |             return f'{spaces}"""RELATIVE_IMPORT: \n{original_line}\n{spaces}"""'
258 |         
259 |         pattern = r'^\s*from\s+\..*$'
260 |         return re.sub(pattern, replace_relative_import, content, flags=re.MULTILINE)
261 | 
262 |     def build_dependency_graph(self):
263 |         """Build a dependency graph with proper relative import resolution"""
264 |         self._debug_print("\nBuilding dependency graph...")
265 |         
266 |         # First pass: Create nodes
267 |         for file_path in self.src_dir.rglob("*.py"):
268 |             module_info = self.analyze_file(file_path)
269 |             if module_info is not None:
270 |                 self.modules[file_path] = module_info
271 |                 self.dep_graph.add_node(file_path)
272 |                 self._debug_print(f"Added node: {file_path.relative_to(self.src_dir)}")
273 |                 if module_info.imports:
274 |                     self._debug_print(f"  Found imports: {', '.join(module_info.imports)}")
275 |         
276 |         # Second pass: Add edges
277 |         for file_path, module in self.modules.items():
278 |             current_module = str(file_path.relative_to(self.src_dir)).replace('\\', '/')
279 |             module_dir = str(file_path.parent.relative_to(self.src_dir)).replace('\\', '/')
280 |             
281 |             for imp in module.imports:
282 |                 if imp.startswith('.'):
283 |                     # Handle relative imports
284 |                     dots = imp.count('.')
285 |                     parts = module_dir.split('/')
286 |                     
287 |                     # Go up directory tree based on dot count
288 |                     if dots > len(parts):
289 |                         continue  # Invalid relative import
290 |                         
291 |                     base_path = '/'.join(parts[:-dots] if dots > 0 else parts)
292 |                     target_module = imp.lstrip('.')
293 |                     
294 |                     if target_module:
295 |                         full_target = f"{base_path}/{target_module.replace('.', '/')}.py"
296 |                     else:
297 |                         full_target = f"{base_path}/__init__.py"
298 |                     
299 |                     # Find matching module
300 |                     for other_path in self.modules:
301 |                         other_rel = str(other_path.relative_to(self.src_dir)).replace('\\', '/')
302 |                         if other_rel == full_target:
303 |                             self._debug_print(f"  Adding edge: {other_rel} -> {current_module}")
304 |                             self.dep_graph.add_edge(file_path, other_path)
305 |                 else:
306 |                     # Handle absolute imports within our project
307 |                     potential_path = imp.replace('.', '/') + '.py'
308 |                     for other_path in self.modules:
309 |                         other_rel = str(other_path.relative_to(self.src_dir)).replace('\\', '/')
310 |                         if other_rel.endswith(potential_path):
311 |                             self._debug_print(f"  Adding edge: {other_rel} -> {current_module}")
312 |                             self.dep_graph.add_edge(file_path, other_path)
313 | 
314 |     def generate_concat_file(self, output_file: str = "colab_combined.py") -> str:
315 |         """Generate a single file combining all modules in dependency order"""
316 |         self.build_dependency_graph()
317 |         
318 |         header = f"""{self._get_header_content()}
319 | Summary Level: {self.summary_level.value}
320 |         """
321 |         
322 |         # Start with external imports
323 |         output = [
324 |             header,
325 |             '"""',
326 |             self.generate_dependency_ascii(),
327 |             "# External imports",'"""',
328 |             *self._get_external_imports(),
329 |             "\n# Combined module code\n"
330 |         ]
331 |         
332 |         # Get files in dependency order
333 |         sorted_files = self._get_sorted_files()
334 |         
335 |         # Create a map of original module paths to their contents
336 |         module_contents = {}
337 |         
338 |         # First pass: collect and process all module contents
339 |         for file_path in sorted_files:
340 |             if file_path in self.modules:
341 |                 module = self.modules[file_path]
342 |                 rel_path = file_path.relative_to(self.src_dir)
343 |                 
344 |                 # Process imports and summarize content
345 |                 processed_content = self._process_imports(
346 |                     self._summarize_content(module.content),
347 |                     file_path
348 |                 )
349 |                 
350 |                 module_contents[file_path] = {
351 |                     'content': processed_content,
352 |                     'rel_path': rel_path
353 |                 }
354 |         
355 |         # Second pass: output in correct order with headers
356 |         for file_path in sorted_files:
357 |             if file_path in module_contents:
358 |                 info = module_contents[file_path]
359 |                 output.extend([
360 |                     f"\n# From {info['rel_path']}",
361 |                     info['content']
362 |                 ])
363 |         
364 |         with open(output_file, 'w') as f:
365 |             f.write('\n'.join(output))
366 |             
367 |         return output_file
368 |     
369 |     def _get_external_imports(self) -> List[str]:
370 |       """Get sorted list of external imports from all modules"""
371 |       external_imports = set()
372 |       for module in self.modules.values():
373 |           external_imports.update(
374 |               imp for imp in module.imports 
375 |               if not any(str(imp).startswith(str(p.relative_to(self.src_dir).parent)) 
376 |                         for p in self.modules)
377 |               and not imp.startswith('.')
378 |           )
379 |       
380 |       # Format and sort the import statements
381 |       return sorted(f"import {imp}" for imp in external_imports)
382 | 
383 |     def _paths_match(self, path: Path, import_parts: List[str]) -> bool:
384 |         """Check if a path matches an import statement"""
385 |         path_parts = list(path.parts)
386 |         return len(path_parts) == len(import_parts) and \
387 |                all(p == i for p, i in zip(path_parts, import_parts))
388 | 
389 |     def _get_sorted_files(self) -> List[Path]:
390 |         """Get files sorted by dependencies"""
391 |         try:
392 |             # Topological sort ensures dependencies come before dependents
393 |             return list(nx.topological_sort(self.dep_graph))
394 | 
395 |         except nx.NetworkXUnfeasible as e:
396 |             # If we detect a cycle, identify and report it
397 |             cycles = list(nx.simple_cycles(self.dep_graph))
398 |             self._debug_print("Warning: Circular dependencies detected:")
399 |             for cycle in cycles:
400 |                 cycle_path = ' -> '.join(p.name for p in cycle)
401 |                 self._debug_print(f"  {cycle_path}")
402 |             
403 |             # Fall back to simple ordering but warn user
404 |             self._debug_print("Using simple ordering instead.")
405 |             return list(self.modules.keys())
406 | 
407 |     def visualize_dependencies(self, output_file: str = "dependencies.png"):
408 |         """Optional: Visualize the dependency graph"""
409 |         try:
410 |             import matplotlib.pyplot as plt
411 |             pos = nx.spring_layout(self.dep_graph)
412 |             plt.figure(figsize=(12, 8))
413 |             nx.draw(self.dep_graph, pos, with_labels=True, 
414 |                    labels={p: p.name for p in self.dep_graph.nodes()},
415 |                    node_color='lightblue',
416 |                    node_size=2000,
417 |                    font_size=8)
418 |             plt.savefig(output_file)
419 |             plt.close()
420 |             return output_file
421 |         except ImportError:
422 |             print("matplotlib not available for visualization")
423 |             return None
424 | 
425 |     def get_dependency_report(self) -> str:
426 |         """Generate a detailed dependency report organized in logical sections.
427 |         
428 |         Returns a string containing sections in this order:
429 |         1. Header
430 |         2. Directory Structure
431 |         3. Import Summary
432 |         4. Module Statistics
433 |         5. PHART Visualization with Legend
434 |         6. Dependency Chains
435 |         7. Module Details
436 |         """
437 |         # Header section - introduces the report
438 |         header = ["Dependency Analysis Report", "=" * 25, ""]
439 |         
440 |         # Directory tree section - shows file organization
441 |         directory_structure = [
442 |             "Directory Structure:",
443 |             self._get_tree_output(),
444 |             ""
445 |         ]
446 |         
447 |         # Import summary section - external and internal dependencies
448 |         import_summary = [
449 |             "Import Summary:",
450 |             self._generate_import_summary(),
451 |             ""
452 |         ]
453 |         
454 |         # Module statistics and graph header
455 |         statistics = [
456 |             "Module Statistics:",
457 |             f"Total modules: {len(self.modules)}",
458 |             f"Total dependencies: {self.dep_graph.number_of_edges()}",
459 |             "",
460 |             "Module Dependencies:",
461 |             "-------------------",
462 |             ""
463 |         ]
464 |         
465 |         # PHART visualization - includes the graph and its legend
466 |         # Note: generate_dependency_ascii() returns a pre-formatted string
467 |         visualization = [self.generate_dependency_ascii(), ""]
468 |         
469 |         # Dependency chains section - shows topological ordering
470 |         chains = ["Dependency Chains:", "-" * 17]
471 |         try:
472 |             sorted_files = list(nx.topological_sort(self.dep_graph))
473 |             for idx, file in enumerate(sorted_files):
474 |                 deps = list(self.dep_graph.predecessors(file))
475 |                 chains.append(f"{idx+1}. {file.relative_to(self.src_dir)}")
476 |                 if deps:
477 |                     chains.append(
478 |                         f" Depends on: {', '.join(str(d.relative_to(self.src_dir)) for d in deps)}"
479 |                     )
480 |             chains.append("")
481 |         except nx.NetworkXUnfeasible:
482 |             chains.extend([
483 |                 "Warning: Circular dependencies detected!",
484 |                 "Cycles found:",
485 |                 *[f"  {' -> '.join(str(p.relative_to(self.src_dir)) for p in cycle)}"
486 |                 for cycle in nx.simple_cycles(self.dep_graph)],
487 |                 ""
488 |             ])
489 |         
490 |         # Module details section - detailed information about each module
491 |         details = ["Module Details:", "-" * 13]
492 |         for path, module in self.modules.items():
493 |             details.extend([
494 |                 f"\n{path.relative_to(self.src_dir)}:",
495 |                 f"Classes: {', '.join(module.classes) if module.classes else 'None'}",
496 |                 f"Functions: {', '.join(module.functions) if module.functions else 'None'}",
497 |                 f"Imports: {', '.join(module.imports) if module.imports else 'None'}"
498 |             ])
499 |         
500 |         # Combine all sections in the desired order using extend()
501 |         # This preserves the proper formatting of each section
502 |         report = []
503 |         for section in [header, directory_structure, import_summary, statistics, 
504 |                     visualization, chains, details]:
505 |             report.extend(section)
506 |         
507 |         # Join all lines with newlines to create the final report
508 |         return '\n'.join(report)
509 | 
510 |     def _get_header_content(self):
511 |         return  f"""
512 | # Generated by ChimeraCat
513 | #  /\___/\   ChimeraCat
514 | # ( o   o )  smart code concatenator/summarizer
515 | # (  =^=  )  ccat {__version__} https://github.com/scottvr/chimeracat
516 | #  (______)  Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"""
517 |         
518 |     def generate_colab_notebook(self, output_file: str = "colab_combined.ipynb"):
519 |         """Generate a Jupyter notebook with the combined code"""
520 |         py_file = self.generate_concat_file("temp_combined.py")
521 |         
522 |         with open(py_file, 'r') as f:
523 |             code = f.read()
524 |         
525 |         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 
526 |         notebook = {
527 |             "cells": [
528 |                 {
529 |                     "cell_type": "markdown",
530 |                     "metadata": {},
531 |                     "source": [
532 |                         "##Notebook Generated by ChimeraCat\n"
533 |                     ],
534 |                 },
535 |                 {
536 |                     "cell_type": "code",
537 |                     "metadata": {},
538 |                     "source": code.splitlines(keepends=True),
539 |                     "execution_count": None,
540 |                     "outputs": []
541 |                 },
542 |                 {
543 |                     "cell_type": "markdown",
544 |                     "metadata": {},
545 |                     "source": [
546 |                         "```\n",
547 |                         f"{self._get_header_content()}".splitlines(keepends=True),
548 |                         "```\n"
549 |                     ]
550 |                 }
551 | 
552 |             ],
553 |             "metadata": {
554 |                 "kernelspec": {
555 |                     "display_name": "Python 3",
556 |                     "language": "python",
557 |                     "name": "python3"
558 |                 }
559 |             },
560 |             "nbformat": 4,
561 |             "nbformat_minor": 4
562 |         }
563 |         
564 |         import json
565 |         with open(output_file, 'w') as f:
566 |             json.dump(notebook, f, indent=2)
567 |         
568 |         Path("temp_combined.py").unlink()  # Clean up temporary file
569 |         return output_file
570 |     
571 |     def generate_dependency_ascii(self) -> str:
572 |         """Generate ASCII representation of dependency graph"""
573 |         
574 |         display_graph = nx.DiGraph()
575 |         
576 |          # Mapping of short labels to original node names
577 |         label_mapping = {}
578 |         label_index = 0
579 | 
580 |         def get_short_label(index):
581 |             """Generate a short label (e.g., A, B, ..., AA, AB)."""
582 |             return str(index + 1) if self.use_numeric else ''.join(chr(65 + i) for i in divmod(index, 26)[1::-1] or [index % 26])
583 | 
584 |         # Add nodes and edges with relative path names
585 |         for node in self.dep_graph.nodes():
586 |             short_label = get_short_label(label_index)
587 |             label_mapping[short_label] = node
588 |             display_graph.add_node(short_label)
589 |             label_index += 1
590 |         
591 |         # Add edges using new node names
592 |         for src, dst in self.dep_graph.edges():
593 |             src_label = [k for k, v in label_mapping.items() if v == src][0]
594 |             dst_label = [k for k, v in label_mapping.items() if v == dst][0]
595 |             display_graph.add_edge(src_label, dst_label)
596 |         
597 |         if self.elide_disconnected_deps:
598 |             self._debug_print("removing disconnected imports (no dependent relationship)")
599 |             self._debug_print(display_graph)
600 |             display_graph.remove_nodes_from(list(nx.isolates(display_graph)))
601 |             self._debug_print(display_graph)
602 | #TODO: enable phart cli options from ccat command-line
603 |         options = LayoutOptions(
604 |             node_style=NodeStyle.SQUARE,
605 |             node_spacing=4,
606 |             layer_spacing=3
607 |         )
608 | 
609 |         # Generate the legend
610 |         legend_lines = ["Legend:"]
611 |         for short_label, original_node in label_mapping.items():
612 |             legend_lines.append(f"{short_label}: {original_node}")
613 |         legend = "\n".join(legend_lines)
614 | 
615 |         renderer = ASCIIRenderer(display_graph, options=options)
616 |         ascii_art = f"""
617 | Directory Structure:
618 | --------------------
619 | {self._get_tree_output()}
620 |    
621 | Module Dependencies:
622 | --------------------
623 | PHART Module Dependency Graph Visualization (see legend below):
624 | 
625 | {renderer.render()}
626 | {legend}
627 | {"(non-dependent modules elided from visualization)" if self.elide_disconnected_deps else "node names detached from the network and printed in isolation are non-connected/likely unused."}
628 | 
629 | """
630 |         return ascii_art
631 | 
632 |     def _get_tree_output(self) -> str:
633 |         """Get tree command output"""
634 |         try:
635 |             import subprocess
636 |             result = subprocess.run(
637 |                 ['tree', str(self.src_dir)],
638 |                 capture_output=True,
639 |                 text=True
640 |             )
641 |             return result.stdout
642 |         except FileNotFoundError:
643 |             # Fallback to simple directory listing if tree not available
644 |             return '\n'.join(str(p.relative_to(self.src_dir)) 
645 |                             for p in self.src_dir.rglob('*.py'))
646 |     
647 |     def _generate_import_summary(self) -> str:
648 |         """Generate summary of imports"""
649 |         external_imports = set()
650 |         internal_deps = set()
651 |         
652 |         for module in self.modules.values():
653 |             for imp in module.imports:
654 |                 if not imp.startswith('.'):
655 |                     external_imports.add(imp)
656 |                 else:
657 |                     internal_deps.add(imp)
658 |         
659 |         return f"""
660 |     External Dependencies:
661 |     {', '.join(sorted(external_imports))}
662 |     
663 |     Internal Dependencies:
664 |     {', '.join(sorted(internal_deps))}
665 |     """
666 |     
667 | def get_default_filename(summary_level: SummaryLevel, is_notebook: bool = False) -> str:
668 |     """Get the default base filename based on output type and summary level"""
669 |     if is_notebook:
670 |         return "colab_ready"  # Always full code for notebooks
671 |         
672 |     summary_level_names = {
673 |         SummaryLevel.INTERFACE: "signatures_only",
674 |         SummaryLevel.CORE: "essential_code",
675 |         SummaryLevel.NONE: "complete_code"
676 |     }
677 |     return summary_level_names[summary_level]
678 | 
679 | def process_cli_args(args: Optional[List[str]] = None) -> dict:
680 |     """Process command line arguments and return a config dict for ChimeraCat"""
681 |     parser = create_cli_parser()
682 |     parsed_args = parser.parse_args(args)
683 | 
684 |     # Convert summary level string to enum
685 |     summary_level = getattr(SummaryLevel, parsed_args.summary_level.upper())
686 | 
687 |     # Build config dict
688 |     config = {
689 |         'src_dir': parsed_args.src_dir,
690 |         'summary_level': summary_level,
691 |         'exclude_patterns': parsed_args.exclude,
692 |         'elide_disconnected_deps': parsed_args.elide_disconnected,
693 |         'generate_report': parsed_args.report,
694 |         'report_only': parsed_args.report_only,
695 |         'use_numeric': parsed_args.use_numeric,
696 |         'debug': parsed_args.debug,
697 |         'debug_str': parsed_args.debug_prefix if parsed_args.debug else ""
698 |     }
699 | 
700 |     return config, parsed_args
701 | 
702 | def cli_main(args: Optional[List[str]] = None) -> int:
703 |     """Main CLI entry point for ChimeraCat"""
704 |     try:
705 |         config, args = process_cli_args(args)
706 |         
707 |         # Create ChimeraCat instance for Python output (with summarization)
708 |         cat = ChimeraCat(**config)
709 |         
710 |         if args.report_only:
711 |             cat.build_dependency_graph() 
712 |             print(cat.get_dependency_report())
713 | 
714 |         else:
715 |             # Get base filename from argument or generate default
716 |             base_filename = args.output
717 |             
718 |             if args.output_type in ('py', 'both'):
719 |                 py_filename = f"{base_filename or get_default_filename(config['summary_level'])}.py"
720 |                 py_file = cat.generate_concat_file(py_filename)
721 |                 print(f"Generated Python file: {py_file}")
722 |                 
723 |             if args.output_type in ('ipynb', 'both'):
724 |                 # Create new instance with NONE summary level for notebook
725 |                 notebook_cat = ChimeraCat(
726 |                     src_dir=config['src_dir'],
727 |                     exclude_patterns=config['exclude_patterns'],
728 |                     elide_disconnected_deps=config['elide_disconnected_deps'],
729 |                     debug=config['debug'],
730 |                     debug_str=config['debug_str'],
731 |                     generate_report=config['generate_report'],
732 |                     summary_level=SummaryLevel.NONE
733 |                 )
734 |             
735 |                 nb_filename = f"{base_filename or get_default_filename(summary_level=SummaryLevel.NONE, is_notebook=True)}.ipynb"
736 |                 nb_file = notebook_cat.generate_colab_notebook(nb_filename)
737 |                 print(f"Generated Jupyter notebook (complete code): {nb_file}")
738 | 
739 |         # If debug is enabled, show additional information regardless of report setting
740 |         if args.debug or args.report:
741 |             print(cat.get_dependency_report())
742 |         
743 |         return 0
744 | 
745 |     except Exception as e:
746 |         print(f"Error: {str(e)}", file=sys.stderr)
747 |         if args.debug:
748 |             import traceback
749 |             traceback.print_exc()
750 |         return 1
751 | 
752 | def create_cli_parser() -> argparse.ArgumentParser:
753 |     """Create the command-line argument parser for ChimeraCat"""
754 |     parser = argparse.ArgumentParser(
755 |         prog='ccat',
756 |         description="""
757 |     ChimeraCat (ccat) - The smart code concatenator
758 |      /\\___/\\   
759 |     ( o   o )  Intelligently combines Python source files
760 |     (  =^=  )  while maintaining dependencies and readability
761 |      (______)  
762 |         """,
763 |         formatter_class=argparse.RawDescriptionHelpFormatter
764 |     )
765 | 
766 |     parser.add_argument(
767 |         'src_dir',
768 |         type=str,
769 |         nargs='?',
770 |         default='src',
771 |         help='Source directory containing Python files (default: src)'
772 |     )
773 | 
774 |     parser.add_argument(
775 |         '-s', '--summary-level',
776 |         type=str,
777 |         choices=['interface', 'core', 'none'],
778 |         default='none',
779 |         help='Code summarization level (for .py output only, default: none)'
780 |     )
781 | 
782 |     parser.add_argument(
783 |         '-e', '--exclude',
784 |         type=str,
785 |         nargs='+',
786 |         help='Patterns to exclude from processing (e.g., "test" "temp")'
787 |     )
788 | 
789 |     parser.add_argument(
790 |         '-o', '--output',
791 |         type=str,
792 |         help='Output file name (without extension, default: based on output type and summary level)'
793 |     )
794 | 
795 |     parser.add_argument(
796 |         '-t', '--output-type',
797 |         type=str,
798 |         choices=['py', 'ipynb', 'both'],
799 |         default='both',
800 |         help='Output file type (default: both)'
801 |     )
802 | 
803 |     parser.add_argument(
804 |         '-r', '--report',
805 |         action='store_true',
806 |         default=None,
807 |         help='Generate dependency report and ASCII visualization. By default, reports are '
808 |             'included for interface/core summary levels and excluded for complete code '
809 |             'and notebooks. This flag overrides that behavior.'
810 |     )
811 | 
812 |     parser.add_argument(
813 |         '--report-only',
814 |         action='store_true',
815 |         dest='report_only',
816 |         help='Suppress code summarization or notebook cocatenization' 
817 |     )
818 |     
819 |     parser.add_argument('--numeric-labels', action='store_true', dest="use_numeric", help='Use numbers instead of letters for node labels')
820 |     
821 |     # Add a no-report option for when you want to suppress reports in INTERFACE/CORE
822 |     parser.add_argument(
823 |         '--no-report',
824 |         action='store_false',
825 |         dest='report',
826 |         help='Suppress dependency report generation even for interface/core summary levels'
827 |     )
828 | 
829 |     parser.add_argument(
830 |         '--elide-disconnected',
831 |         action='store_true',
832 |         help='Remove modules with no dependencies from visualization'
833 |     )
834 | 
835 |     parser.add_argument(
836 |         '-d', '--debug',
837 |         action='store_true',
838 |         help='Enable debug output'
839 |     )
840 | 
841 |     parser.add_argument(
842 |         '--debug-prefix',
843 |         type=str,
844 |         default='CCAT:',
845 |         help='Prefix for debug messages (default: CCAT:)'
846 |     )
847 | 
848 |     parser.add_argument(
849 |         '--version',
850 |         action='version',
851 |         version=f'%(prog)s {__version__}'
852 |     )
853 | 
854 |     return parser
855 | 
856 | #
857 | #if __name__ == "__main__":
858 | #    debug = True
859 | #    generate_report = True
860 | #    # Example with different summary levels for Python output
861 | #    examples = {
862 | #        SummaryLevel.INTERFACE: "signatures_only.py",
863 | #        SummaryLevel.CORE: "essential_code.py",
864 | #        SummaryLevel.NONE: "complete_code.py",
865 | #    }
866 | #    
867 | #    for level, filename in examples.items():
868 | #        cat = ChimeraCat(
869 | #            "src", 
870 | #            exclude_patterns=["tools\\", ".ipynb", "cats\\"], 
871 | #            summary_level=level, 
872 | #            debug=debug, 
873 | #            debug_str="DBG: ", 
874 | #            elide_disconnected_deps=True,
875 | #            generate_report=generate_report
876 | #        )
877 | #        output_file = cat.generate_concat_file(filename)
878 | #        print(f"Generated {level.value} version: {output_file}")
879 | #    
880 | #    # Generate notebook with complete code
881 | #    cat = ChimeraCat(
882 | #        "src", 
883 | #        exclude_patterns=["tools\\", ".ipynb", "cats\\"], 
884 | #        elide_disconnected_deps=True,
885 | #    )
886 | #    output_file = cat.generate_colab_notebook("colab_ready.ipynb")
887 | #    print(f"Generated notebook version: {output_file}")
888 | #
889 | #    if debug:
890 | #        cat.visualize_dependencies("module_deps.png")
891 | #        if generate_report:
892 | #            report = cat.get_dependency_report()
893 | #            print(report)


--------------------------------------------------------------------------------