├── tests
    ├── __init__.py
    ├── test_strings.py
    ├── test_cli.py
    ├── test_num.py
    ├── test_rand.py
    ├── test_embeddings_openai.py
    ├── test_print_utils.py
    ├── test_env_integration.py
    ├── test_git_status.py
    └── test_indexing.py
├── src
    ├── gjdutils
    │   ├── cli
    │   │   ├── pypi
    │   │   │   ├── __init__.py
    │   │   │   ├── app.py
    │   │   │   ├── deploy.py
    │   │   │   └── check.py
    │   │   ├── __init__.py
    │   │   ├── install_all_dev_dependencies.py
    │   │   ├── main.py
    │   │   └── check_git_clean.py
    │   ├── __version__.py
    │   ├── __init__.py
    │   ├── scripts
    │   │   ├── __init__.py
    │   │   ├── export_envs.sh
    │   │   └── install.py
    │   ├── errors.py
    │   ├── sets.py
    │   ├── lists.py
    │   ├── misc.py
    │   ├── prompt_templates.py
    │   ├── runtime.py
    │   ├── todo
    │   │   └── convert_parquet.py
    │   ├── print_utils.py
    │   ├── colab.py
    │   ├── decorators.py
    │   ├── typ.py
    │   ├── hashing.py
    │   ├── voice_speechrecognition.py
    │   ├── audios.py
    │   ├── functions.py
    │   ├── collection_utils.py
    │   ├── shell.py
    │   ├── rand.py
    │   ├── pytest_utils.py
    │   ├── obsolete
    │   │   └── google_text_to_speech.py
    │   ├── stopwatch.py
    │   ├── web.py
    │   ├── jsons.py
    │   ├── env.py
    │   ├── num.py
    │   ├── google_translate.py
    │   ├── iterfunc.py
    │   ├── image_utils.py
    │   ├── html.py
    │   ├── regex.py
    │   ├── indexing.py
    │   ├── webserver.py
    │   ├── ports.py
    │   ├── pypi_build.py
    │   ├── files.py
    │   ├── llm_utils.py
    │   ├── cmd.py
    │   └── llms_claude.py
    └── ts
    │   └── README.md
├── docs
    ├── instructions
    │   ├── EDIT_LIGHTLY_CONVERSATION_TRANSCRIPT.md
    │   ├── DEBRIEF_UPDATE_COMMIT.md
    │   ├── SURGEON_MODE.md
    │   ├── RESOLVE_MERGE_CONFLICTS.md
    │   ├── SOUNDING_BOARD_MODE.md
    │   ├── HOUSEKEEPING_OLD_PLANNING_DOC.md
    │   ├── GIT_CREATE_BRANCH.md
    │   ├── DETECTIVE_SCIENTIST_MODE.md
    │   ├── DO_EXECUTE_PLANNING_DOC.md
    │   ├── THIRD_PARTY_LIBRARY_SELECTION.md
    │   ├── dev
    │   │   └── PULL_PUSH_LOCAL_GJDUTILS_REPOS.md
    │   ├── AUDIT_ARCHITECTURE_MODE.md
    │   ├── TASKS_SUBAGENTS.md
    │   ├── DEBRIEF_PROGRESS.md
    │   ├── CODING_PRINCIPLES.md
    │   ├── WRITE_DEEP_DIVE_AS_DOC.md
    │   ├── RENAME_OR_MOVE.md
    │   ├── GIT_COMMIT_CHANGES.md
    │   ├── UPDATE_DOCUMENTATION_ORGANISATION_DOC.md
    │   ├── CAPTURE_SOUNDING_BOARD_CONVERSATION.md
    │   ├── CRITIQUE_OF_PLANNING_DOC.md
    │   ├── GENERATE_MERMAID_DIAGRAM.md
    │   ├── FIX_HOUSEKEEPING_BUILD_TYPECHECK_LINT.md
    │   ├── UPDATE_CLAUDE_INSTRUCTIONS.md
    │   ├── draft
    │   │   └── NONINTERACTIVE.md
    │   └── WRITE_EVERGREEN_DOC.md
    ├── reference
    │   ├── TESTING_PYTHON.md
    │   ├── CODING_SHELL_SCRIPTS.md
    │   ├── DOCUMENTATION_ORGANISATION.md
    │   └── SD_STRING_DISPLACEMENT_FIND_REPLACE.md
    ├── README.md
    └── WORKFLOW.md
├── .cursor
    └── rules
    │   └── README.md
├── tsconfig.json
├── LICENSE
├── package.json
├── .gitignore
├── README.md
├── CLAUDE.md
├── planning
    ├── 250215_publishing_to_pypi.md
    └── 250215_rename_gdutils_to_gjdutils.md
└── pyproject.toml


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/pypi/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import app
2 | 
3 | __all__ = ["app"]
4 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/__init__.py:
--------------------------------------------------------------------------------
1 | from gjdutils.cli.main import app
2 | 
3 | __all__ = ["app"]
4 | 


--------------------------------------------------------------------------------
/src/gjdutils/__version__.py:
--------------------------------------------------------------------------------
1 | """Single source of truth for package version."""
2 | 
3 | __version__ = "0.6.1"
4 | 


--------------------------------------------------------------------------------
/tests/test_strings.py:
--------------------------------------------------------------------------------
1 | # test_strings.py
2 | 
3 | # test that jinja_render raises an error if missing variables
4 | 


--------------------------------------------------------------------------------
/src/gjdutils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | GJDutils - A collection of useful utility functions
3 | """
4 | 
5 | from gjdutils.__version__ import __version__
6 | 


--------------------------------------------------------------------------------
/src/gjdutils/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """Scripts package for gjdutils."""
2 | 
3 | from .install import install_export_envs
4 | 
5 | __all__ = ["install_export_envs"]
6 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/install_all_dev_dependencies.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | from gjdutils.cmd import run_cmd
4 | 
5 | if __name__ == "__main__":
6 |     run_cmd("pip install -e '.[all_no_dev,dev]'", verbose=4)
7 | 


--------------------------------------------------------------------------------
/docs/instructions/EDIT_LIGHTLY_CONVERSATION_TRANSCRIPT.md:
--------------------------------------------------------------------------------
1 | This is a verbatim transcript of a conversation. Tighten it up slightly, keeping all of the detail and nuance, and preserving as much verbatim phrasing as possible, but a bit more compact.
2 | 


--------------------------------------------------------------------------------
/docs/instructions/DEBRIEF_UPDATE_COMMIT.md:
--------------------------------------------------------------------------------
1 | # Debrief and Commit
2 | 
3 | Run @docs/instructions/DEBRIEF_PROGRESS.md .
4 | 
5 | Update the planning doc with your progress.
6 | 
7 | Then commit these changes, as per docs/instructions/GIT_COMMIT_CHANGES.md .


--------------------------------------------------------------------------------
/docs/instructions/SURGEON_MODE.md:
--------------------------------------------------------------------------------
1 | # Surgeon mode
2 | 
3 | Make minimal changes, focused on the task at hand.
4 | 
5 | Do no harm if in doubt ask. If there are decisions to be made talk through the various options and the trade-offs so that we can decide together before making changes


--------------------------------------------------------------------------------
/src/gjdutils/errors.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import sys
 3 | import traceback
 4 | 
 5 | # see also functions.func_name()
 6 | 
 7 | 
 8 | def str_from_exception(name=None):
 9 |     return {
10 |         "name": name,
11 |         "msg": "".join(traceback.format_exception(*sys.exc_info())),
12 |     }
13 | 


--------------------------------------------------------------------------------
/src/gjdutils/sets.py:
--------------------------------------------------------------------------------
1 | def assert_sets_identical(set1: set, set2: set):
2 |     # assert that the sets are the same, and print the difference if not
3 |     if set1 != set2:
4 |         assert not set1 - set2, f"Set 1 has extra elements: {set1 - set2}"
5 |         assert not set2 - set1, f"Set 2 has extra elements: {set2 - set1}"
6 | 


--------------------------------------------------------------------------------
/src/gjdutils/lists.py:
--------------------------------------------------------------------------------
 1 | def get_list_from_str_or_list(str_or_list: str | list[str]) -> list[str]:
 2 |     """
 3 |     e.g.
 4 |       - get_list_from_str_or_list("asdf") -> ["asdf"]
 5 |       - get_list_from_str_or_list(["asdf"]) -> ["asdf"]
 6 |     """
 7 |     if isinstance(str_or_list, str):
 8 |         aliases = [str_or_list]
 9 |     elif isinstance(str_or_list, list):
10 |         aliases = str_or_list
11 |     else:
12 |         raise Exception(f"Unknown typ: {type(str_or_list)}")
13 |     return aliases
14 | 


--------------------------------------------------------------------------------
/docs/instructions/RESOLVE_MERGE_CONFLICTS.md:
--------------------------------------------------------------------------------
 1 | # Resolve Merge Conflicts
 2 | 
 3 | Review:
 4 | - Git status
 5 | - the recent/relevant Git history
 6 | - planning docs for any relevant pieces of work
 7 | - the details of the merge conflict itself
 8 | - relevant code & docs, starting with relevant planning documents
 9 | 
10 | Look for a way to preserve the best of both worlds.
11 | 
12 | Do you feel confident about how to resolve the merge conflict? Ask if you have questions.
13 | 
14 | Ultrathink.
15 | 
16 | Make a proposal. Don't make changes yet.
17 | 


--------------------------------------------------------------------------------
/.cursor/rules/README.md:
--------------------------------------------------------------------------------
 1 | I've found the Cursor rules to be a bit unreliable, so I've moved all of them to straight Markdown .md docs in `docs/`.
 2 | 
 3 | My current approach is to create lots of small rules and explicitly reference one or more of them explicitly, but I'm sure this will evolve over time.
 4 | 
 5 | e.g.
 6 | 
 7 | - Debug problem X, following @scientistic_detective_mode.mdc
 8 | 
 9 | - Do X, following `coding_principles.md`. Be in `sounding_board_mode.md`. See also `testing_python.md`
10 | 
11 | - Write a planning doc for X, following `WRITE_PLANNING_DOC.md`.


--------------------------------------------------------------------------------
/src/gjdutils/cli/pypi/app.py:
--------------------------------------------------------------------------------
 1 | """PyPI-related CLI commands."""
 2 | 
 3 | import typer
 4 | from rich.console import Console
 5 | 
 6 | from .check import app as check_app
 7 | from .deploy import app as deploy_app
 8 | 
 9 | app = typer.Typer(
10 |     help="PyPI package management commands",
11 |     add_completion=True,
12 |     no_args_is_help=True,
13 |     context_settings={"help_option_names": ["-h", "--help"]},
14 | )
15 | 
16 | # Add subcommand groups to main app
17 | app.add_typer(check_app, name="check")
18 | app.add_typer(deploy_app, name="deploy")
19 | 
20 | console = Console()
21 | 


--------------------------------------------------------------------------------
/docs/instructions/SOUNDING_BOARD_MODE.md:
--------------------------------------------------------------------------------
1 | Investigate the codebase, search the web if you need to, ask questions to clarify requirements if you need to, raise concerns if you have them, consider the desiderata/criteria for success, suggest alternatives, weigh up options and trade-offs, point out if you think the user is wrong or see a better way.
2 | 
3 | Do everything you can to help the user to think this through, and make the best decision.
4 | 
5 | When asking questions, ask at most a couple at a time, to avoid overwhelming cognitive overload for the user.
6 | 
7 | Ultrathink.
8 | 
9 | Don't make changes yet.


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | from typer.testing import CliRunner
 2 | from gjdutils.cli.main import app
 3 | from gjdutils.__version__ import __version__
 4 | 
 5 | runner = CliRunner()
 6 | 
 7 | 
 8 | def test_version():
 9 |     result = runner.invoke(app, ["version"])
10 |     assert result.exit_code == 0
11 |     assert result.stdout.strip() == __version__
12 | 
13 | 
14 | def test_help():
15 |     result = runner.invoke(app, ["--help"])
16 |     assert result.exit_code == 0
17 |     assert "GJDutils CLI" in result.stdout
18 |     assert "version" in result.stdout
19 |     assert "git-clean" in result.stdout
20 | 


--------------------------------------------------------------------------------
/docs/instructions/HOUSEKEEPING_OLD_PLANNING_DOC.md:
--------------------------------------------------------------------------------
 1 | Read the planning doc, and relevant code & docs.
 2 | 
 3 | This is an old planning doc. We might have basically finished it. Is there anything important remaining?
 4 | 
 5 | - If so, stop and let's discuss.
 6 | 
 7 | - If not:
 8 |   - Update documentation if needed
 9 |   - Follow instructions in docs/instructions/RENAME_OR_MOVE.md to move the planning doc to planning/finished/ and update references to the new path with a subagent (if available)
10 |   - Commit this set of changes in a single commit (otherwise following docs/instructions/GIT_COMMIT_CHANGES.md ).


--------------------------------------------------------------------------------
/docs/instructions/GIT_CREATE_BRANCH.md:
--------------------------------------------------------------------------------
1 | If the user hasn't provided info about what the branch will be for, stop and ask them.
2 | 
3 | Decide on a short phrase, based on the task defined by the user, as the branch name, e.g. `refactor_blah_for_foo`
4 | 
5 | Run this in a subagent (if available):
6 | - Check that we're on the main branch (typically `main` or `master`) or another appropriate base branch - if not, double-check with the user before continuing.
7 | - Generate date prefix using `npx tsx src/ts/cli/sequential-datetime-prefix.ts .` and prepend to the short-phrase branch-name
8 | - Then create that as a new branch


--------------------------------------------------------------------------------
/docs/instructions/DETECTIVE_SCIENTIST_MODE.md:
--------------------------------------------------------------------------------
 1 | # Scientist-detective mode
 2 | 
 3 | Your job is to help understand what's going on, and why things are happening the way they are, and perhaps inform a solution.
 4 | 
 5 | Be a combination of detective (sniffing around for clues) and scientist (systematically considering and testing possible explanations).
 6 | 
 7 | Gather information, generate hypotheses, look for ways to test/confirm/disconfirm those hypotheses (starting with the most likely and easiest to test).
 8 | 
 9 | Ask questions of the user if that will clarify (about the problem or the goals) or help you to gather more information.
10 | 
11 | Don't make changes unless instructed, or do anything risky/destructive.


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "commonjs",
 5 |     "lib": ["ES2022"],
 6 |     "outDir": "./dist/ts",
 7 |     "rootDir": "./src/ts",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "resolveJsonModule": true,
13 |     "declaration": true,
14 |     "declarationMap": true,
15 |     "sourceMap": true,
16 |     "removeComments": false,
17 |     "noUnusedLocals": true,
18 |     "noUnusedParameters": true,
19 |     "noImplicitReturns": true,
20 |     "noFallthroughCasesInSwitch": true
21 |   },
22 |   "include": ["src/ts/**/*"],
23 |   "exclude": ["node_modules", "dist"]
24 | }


--------------------------------------------------------------------------------
/src/gjdutils/misc.py:
--------------------------------------------------------------------------------
 1 | from .dicts import print_dict
 2 | from .typ import isfunction
 3 | 
 4 | 
 5 | def print_locals(
 6 |     d: dict, ignore_functions: bool = True, ignore_underscores: bool = True
 7 | ):
 8 |     """
 9 |     e.g. print_locals(locals())
10 |     """
11 | 
12 |     def del_robust(k):
13 |         if k in d:
14 |             del d[k]
15 | 
16 |     assert isinstance(d, dict)
17 |     for k in d.keys():
18 |         if ignore_functions and isfunction(d[k]):
19 |             del_robust(k)
20 |         if ignore_underscores and k.startswith("_"):
21 |             del_robust(k)
22 |     return print_dict(d)
23 | 
24 | 
25 | def identity_func(x):
26 |     return x
27 | 
28 | 
29 | def empty_func(*args, **kwargs):
30 |     return None
31 | 


--------------------------------------------------------------------------------
/src/gjdutils/prompt_templates.py:
--------------------------------------------------------------------------------
 1 | # each of these is a Jinja template. see gjdutils.strings.jinja_render()
 2 | 
 3 | summarise_text = """
 4 | Summarise the following. Be as concise, concrete, and easy to understand as you can. Provide only the summary itself, without any superfluous conversation, commentary, markup, etc. {{ granularity }}
 5 | 
 6 | ----
 7 | {{ txt }}
 8 | """
 9 | 
10 | 
11 | # UNTESTED
12 | summarise_list_of_texts_as_one = """
13 | Summarise the whole of the following list. Be as concise, concrete, and easy to understand as you can. Provide only the summary itself, without any superfluous conversation or commentary etc. {{ granularity }}
14 | 
15 | ----
16 | {% for txt in txts %}
17 | - {{txt}}
18 | {% endfor %}
19 | ----
20 | """
21 | 


--------------------------------------------------------------------------------
/docs/instructions/DO_EXECUTE_PLANNING_DOC.md:
--------------------------------------------------------------------------------
 1 | Now work on the next stage of the planning doc (prioritising work suggested by the user).
 2 | 
 3 | If the planning doc is currently stored in `planning/later/` or `planning/discarded/`, first move it to `planning/` (to signal that it's in progress).
 4 | 
 5 | Start by reading relevant code & docs.
 6 | 
 7 | Ask if you need to clarify anything, or if you have concerns.
 8 | 
 9 | Use tasks and subagents (provided with lots of context), e.g. for curl/Puppeteer/Playwright MCP (preferring Puppeteer), running tests, and any other encapsulated tasks. Follow instructions in TASKS_SUBAGENTS.md
10 | 
11 | Always stop to review with the user at the end of a stage. If a stage is massive, then stop partway through. Output as per DEBRIEF_PROGRESS.md


--------------------------------------------------------------------------------
/src/gjdutils/runtime.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # keep this, because it makes sense for the user to be able to import this from here
 4 | from gjdutils.pytest_utils import in_pytest
 5 | 
 6 | 
 7 | def in_notebook() -> bool:
 8 |     # from https://stackoverflow.com/q/15411967
 9 |     try:
10 |         shell = get_ipython().__class__.__name__  # type: ignore
11 |         if shell == "ZMQInteractiveShell":
12 |             return True  # Jupyter notebook or qtconsole
13 |         elif shell == "TerminalInteractiveShell":
14 |             return False  # Terminal running IPython
15 |         else:
16 |             return False  # Other type (?)
17 |     except NameError:
18 |         return False  # Probably standard Python interpreter
19 | 
20 | 
21 | def in_colab():
22 |     return "google.colab" in sys.modules
23 | 


--------------------------------------------------------------------------------
/tests/test_num.py:
--------------------------------------------------------------------------------
 1 | from pytest import approx, raises
 2 | 
 3 | from gjdutils.num import discretise
 4 | 
 5 | 
 6 | def test_discretise():
 7 |     assert discretise(0.00, increment=0.05) == approx(0.00)
 8 |     assert discretise(0.01, increment=0.05) == approx(0.00)
 9 |     assert discretise(0.06, increment=0.05) == approx(0.05)
10 |     assert discretise(0.99, increment=0.05) == approx(0.95)
11 |     assert discretise(1.00, increment=0.05) == approx(1.00)
12 |     # check values outside the range
13 |     with raises(Exception):
14 |         discretise(5.00, increment=0.05)
15 |     with raises(Exception):
16 |         discretise(-1.00, increment=0.05)
17 |     assert discretise(-1.00, increment=0.05, enforce_range=False) == approx(0.00)
18 |     assert discretise(5.00, increment=0.05, enforce_range=False) == approx(1.00)
19 | 


--------------------------------------------------------------------------------
/src/gjdutils/todo/convert_parquet.py:
--------------------------------------------------------------------------------
 1 | # USAGE:
 2 | # python convert_parquet.py my_parquet_file.parquet
 3 | #
 4 | # based on https://chat.openai.com/c/ea3e9401-e7bb-4288-b270-83b0fb327abe
 5 | #
 6 | # pip install pandas openpyxl pyarrow
 7 | 
 8 | import sys
 9 | import pandas as pd
10 | 
11 | # Replace 'your_file.parquet' with the path to your Parquet file
12 | # parquet_file = 'vary_amount_of_training_data__adult_sexual__aps.parquet'
13 | parquet_file = sys.argv[1]
14 | 
15 | assert parquet_file.endswith(".parquet")
16 | 
17 | # Read the Parquet file
18 | df = pd.read_parquet(parquet_file)
19 | 
20 | # Replace 'output_file.xlsx' with the desired output file name
21 | output_file = parquet_file.replace('.parquet', '.xlsx')
22 | 
23 | # Write to an Excel file
24 | df.to_excel(output_file, index=False)
25 | 
26 | print(f"Wrote to {output_file}")
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/gjdutils/print_utils.py:
--------------------------------------------------------------------------------
 1 | from gjdutils.functions import variable_from_caller
 2 | 
 3 | 
 4 | def vprint(min_verbosity: int, msg: str, /, **kwargs):
 5 |     """Print if the caller's verbosity level is >= min_verbosity, e.g.
 6 | 
 7 |         verbose = 2
 8 |         vprint(1, "Hello, world!")  # prints because verbose >= 1
 9 | 
10 |     This function looks for a `verbose` variable in the caller's scope.
11 | 
12 |     Args:
13 |         min_verbosity: Minimum verbosity level required to print (positional-only)
14 |         **kwargs: Arguments to pass to print()
15 | 
16 |     Raises:
17 |         ValueError: If 'verbose' variable is not found in caller's scope
18 |     """
19 |     # get the `verbose` variable from the caller context
20 |     verbose = variable_from_caller("verbose")
21 | 
22 |     if verbose >= min_verbosity:
23 |         print(msg, **kwargs)
24 | 


--------------------------------------------------------------------------------
/tests/test_rand.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | from gjdutils.rand import assert_valid_readable_rand_id
 3 | 
 4 | 
 5 | def test_check_valid_readable_rand_id():
 6 |     # Test case: Valid ID with default parameters
 7 |     id_ = "abc234"
 8 |     assert_valid_readable_rand_id(id_)
 9 | 
10 |     # Test case: Valid ID with specified number of characters
11 |     assert_valid_readable_rand_id(id_, nchars=6)
12 | 
13 |     # Test case: Invalid ID with specified number of characters
14 |     with raises(AssertionError):
15 |         assert_valid_readable_rand_id(id_, nchars=8)
16 | 
17 |     assert_valid_readable_rand_id(id_, valid_chars="cba4321")
18 | 
19 |     # Test case: Invalid ID with default valid characters (doesn't allow '1')
20 |     with raises(AssertionError):
21 |         assert_valid_readable_rand_id(id_="abc123")
22 |     # Test case: Invalid ID with specified valid characters
23 |     with raises(AssertionError):
24 |         assert_valid_readable_rand_id(id_, valid_chars="xyz123")
25 | 


--------------------------------------------------------------------------------
/src/gjdutils/scripts/export_envs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # https://stackoverflow.com/a/20909045/230523
 4 | # e.g. source scripts/export_envs.sh .env && echo $TESTME
 5 | 
 6 | # this can't be a Python script, because the whole point is to
 7 | # add environment variables to the current shell, and Unix won't
 8 | # let a script manipulate its parent environment.
 9 | # 
10 | # see pyproject.toml for how `install.py` automatically gets called
11 | # by `pip install`
12 | 
13 | # Check if the script is being sourced
14 | if [ "$0" = "$BASH_SOURCE" ]; then
15 |     echo "Error: This script needs to be sourced. Please run: source $0 <env-file>"
16 |     exit 1
17 | fi
18 | 
19 | if [ $# -eq 0 ]; then
20 |     echo "Error: Environment file path is required"
21 |     echo "Usage: source $0 <env-file>"
22 |     return 1
23 | fi
24 | 
25 | ENV_FILE="$1"
26 | 
27 | if [ ! -f "$ENV_FILE" ]; then
28 |     echo "Error: File '$ENV_FILE' does not exist"
29 |     return 1
30 | fi
31 | 
32 | export $(grep -v '^#' "$ENV_FILE" | xargs)


--------------------------------------------------------------------------------
/src/gjdutils/colab.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .runtime import in_colab
 4 | 
 5 | # https://stackoverflow.com/a/53586419/230523
 6 | IN_COLAB = in_colab()
 7 | # also specified in authortools_demo.ipynb
 8 | GOOGLE_DRIVE_MOUNT_PATH = "/content/drive"
 9 | GOOGLE_DRIVE_OUTPUT_PATH = os.path.join(
10 |     GOOGLE_DRIVE_MOUNT_PATH,
11 |     "Shareddrives",
12 |     "Blah",  # TODO
13 | )
14 | 
15 | 
16 | def colab_path_if_needed(filen: str):
17 |     """
18 |     Prepend the Google Drive mount path for Colab if IN_COLAB is True.
19 |     """
20 |     if IN_COLAB:
21 |         filen = os.path.join(GOOGLE_DRIVE_OUTPUT_PATH, filen)
22 |     return filen
23 | 
24 | 
25 | def set_css_for_colab():
26 |     from IPython.display import HTML, display
27 | 
28 |     # from https://stackoverflow.com/a/61401455/230523
29 |     display(
30 |         HTML(
31 |             """
32 |         <style>
33 |             pre { 
34 |                 white-space: pre-wrap;
35 |             }
36 |         </style>
37 |         """
38 |         )
39 |     )
40 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/main.py:
--------------------------------------------------------------------------------
 1 | import typer
 2 | 
 3 | from gjdutils.shell import fatal_error_msg
 4 | from .pypi import app as pypi_app
 5 | from .check_git_clean import check_git_clean
 6 | 
 7 | app = typer.Typer(
 8 |     help="GJDutils CLI - utility functions for data science, AI, and web development",
 9 |     add_completion=True,
10 |     no_args_is_help=True,
11 |     context_settings={"help_option_names": ["-h", "--help"]},
12 | )
13 | 
14 | # Add PyPI commands
15 | app.add_typer(pypi_app, name="pypi")
16 | 
17 | 
18 | @app.command()
19 | def version():
20 |     """Show gjdutils version"""
21 |     from gjdutils.__version__ import __version__
22 | 
23 |     typer.echo(f"{__version__}")
24 | 
25 | 
26 | @app.command()
27 | def git_clean():
28 |     """Check if git working directory is clean"""
29 |     check_git_clean()
30 | 
31 | 
32 | @app.command()
33 | def export_envs():
34 |     fatal_error_msg(
35 |         "Exporting envs can't be run from Python - you have to run `source gjd-export-envs [ENV_FILE]`"
36 |     )
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     app()
41 | 


--------------------------------------------------------------------------------
/tests/test_embeddings_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | 
 4 | from gjdutils.embeddings_openai import get_openai_embeddings, compare_embedding_query
 5 | 
 6 | 
 7 | @pytest.mark.skipif(
 8 |     not os.environ.get("OPENAI_API_KEY"),
 9 |     reason="Requires OPENAI_API_KEY to call OpenAI embeddings API",
10 | )
11 | def test_cosine_similarity_three_sentences():
12 |     texts = [
13 |         "The quick brown fox jumps over the lazy dog.",
14 |         "A quick brown fox leaps over a lazy dog.",  # semantically similar
15 |         "The stock market closed higher today after strong earnings.",
16 |     ]
17 | 
18 |     embs, extra = get_openai_embeddings(texts, model="text-embedding-3-small")
19 | 
20 |     # Compare sentence 0 vs others (list inputs)
21 |     scores, _ = compare_embedding_query(embs[0], embs, metric="cosine")
22 |     # scores[0] is self-similarity (~1.0). Interested in indices 1 and 2.
23 |     assert scores[1] > scores[2], (
24 |         f"Expected similar sentence to have higher cosine similarity: {scores[1]=} vs {scores[2]=}"
25 |     )
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/instructions/THIRD_PARTY_LIBRARY_SELECTION.md:
--------------------------------------------------------------------------------
 1 | # Third-Party Library Selection
 2 | 
 3 | ## Selection Criteria
 4 | 
 5 | - **IMPORTANT** Long-lasting community, lots of docs/discussion/examples (so there will be lots of pretraining data to help LLM coding models).
 6 | - Well-designed API: Intuitive, composable, type-safe (for TypeScript).
 7 | - Plus any other criteria from the user.
 8 | 
 9 | 
10 | ## Process
11 | - Understand requirements first. see `SOUNDING_BOARD_MODE.md`. Ask questions if you need to clarify.
12 | - Read relevant code/docs to understand the relevant technology stack & architecture for this project.
13 | - Run `date` to get today's date, so you can judge recency.
14 | - Search the web. Evaluate options, tradeoffs.
15 | - Usually prefer the most recent stable version, but also consider whether an older, more popular version with more pretraining data would be a better fit. Discuss with the user if the answer isn't obvious.
16 | - Make a recommendation.
17 | - Discuss with user.
18 | - Write a doc describing the chosen library, as per `WRITE_DEEP_DIVE_DOC.md`.
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/reference/TESTING_PYTHON.md:
--------------------------------------------------------------------------------
 1 | # Automated testing
 2 | 
 3 | Always run tests with `pytest` (rather than `python -m blah`). Most of the time it's better to just run relevant tests, and only occasionally run all of them (e.g. after making major/wide-ranging changes).
 4 | 
 5 | If you've run the tests recently, use the `-x` and `--lf` flags, so that we zero in on the failing tests and avoid too much output.
 6 | 
 7 | Test functions should always be called `test_*.py`. Avoid creating test-related utility or fixture functions called `test_*.py` to avoid confusion with actual tests.
 8 | 
 9 | Write a test before writing new code. Run relevant tests after changing code.
10 | 
11 | Keep tests simple and readable. Start with testing the simple cases. Make a proposal and ask the user whether edge cases are important.
12 | 
13 | Aim to reuse fixtures and sample data.
14 | 
15 | When changing tests, make minimal changes that are directly relevant to the task at hand.
16 | 
17 | If there is a `docs/TESTING.md`, or `docs/FRONTEND_TESTING.md`, `docs/BACKEND_TESTING.md`, treat them as more important than these instructions.


--------------------------------------------------------------------------------
/src/gjdutils/decorators.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from rich.console import Console
 3 | from typing import Callable, TypeVar, Any, cast
 4 | 
 5 | console = Console()
 6 | 
 7 | F = TypeVar("F", bound=Callable[..., Any])
 8 | 
 9 | 
10 | def console_print_doc(color: str = "blue") -> Callable[[F], F]:
11 |     """
12 |     A decorator that prints the docstring of a function when it starts running.
13 |     The entire docstring will be printed in the specified color.
14 | 
15 |     Args:
16 |         color (str): Color for the docstring text. Defaults to "blue".
17 | 
18 |     Example:
19 |         @console_print_doc(color="green")
20 |         def my_function():
21 |             "This entire docstring will be green"
22 |             pass
23 |     """
24 | 
25 |     def decorator(func: F) -> F:
26 |         @wraps(func)
27 |         def wrapper(*args: Any, **kwargs: Any) -> Any:
28 |             if func.__doc__:
29 |                 console.print(func.__doc__.strip(), style=color)
30 |             return func(*args, **kwargs)
31 | 
32 |         return cast(F, wrapper)
33 | 
34 |     return decorator
35 | 


--------------------------------------------------------------------------------
/src/gjdutils/typ.py:
--------------------------------------------------------------------------------
 1 | # keep this, because it makes sense for the user to be able to import from here
 2 | from inspect import isfunction
 3 | 
 4 | # these are occasionally useful for if statements,
 5 | # though probably better to rely on type-hinting wherever possible
 6 | 
 7 | 
 8 | def isint(f, tol=0.00000001):
 9 |     """
10 |     Takes in a float F, and checks that it's within TOL of floor(f).
11 |     """
12 |     # we're casting to float before the comparison with TOL
13 |     # so that decimal Fs work
14 |     return abs(float(f) - int(f)) <= 0.00000001
15 | 
16 | 
17 | def isnum(n):
18 |     try:
19 |         float(n)
20 |         return True
21 |     except:
22 |         return False
23 | 
24 | 
25 | def is_same_sign(x1, x2):
26 |     if x1 > 0 and x2 > 0:
27 |         return True
28 |     if x1 < 0 and x2 < 0:
29 |         return True
30 | 
31 | 
32 | def isiterable(x):
33 |     """
34 |     from http://stackoverflow.com/questions/1952464/in-python-how-do-i-determine-if-a-variable-is-iterable
35 |     """
36 |     import collections.abc
37 | 
38 |     return isinstance(x, collections.abc.Iterable)
39 | 


--------------------------------------------------------------------------------
/docs/instructions/dev/PULL_PUSH_LOCAL_GJDUTILS_REPOS.md:
--------------------------------------------------------------------------------
 1 | # Pull/Push local `gjdutils` repos — concise prompt
 2 | 
 3 | Paste this prompt to rerun the workflow:
 4 | 
 5 | ```
 6 | Find all directories named "gjdutils" under $HOME/Dropbox/dev and $HOME/dev (exclude paths containing , `.venv`, `site-packages`).
 7 | 
 8 | For each found directory that is a git repo:
 9 | 
10 | - First, detect uncommitted changes. Print a flat list "REPOS WITH UNCOMMITTED CHANGES" (paths with $HOME abbreviated as ~), then pause for my decision. If I reply "skip" (default), skip these repos for pull/push.
11 | 
12 | - Switch ALL found repos' GitHub remotes (any remote) from HTTPS to SSH, format: git@github.com:OWNER/NAME.git. Do this even for skipped/dirty repos.
13 | 
14 | - For the remaining clean (non-skipped) repos: run git pull --ff-only --no-rebase, then git push. Do not attempt interactive auth; rely on SSH keys.
15 | 
16 | At the end, print a concise summary with three sections:
17 | 1) REPOS WITH UNCOMMITTED CHANGES
18 | 2) PULL/PUSH RESULTS (per repo)
19 | 3) SKIPPED REPOS
20 | 4) ANY OTHER NOTES, QUESTIONS OR PROBLEMS
21 | ```
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/src/gjdutils/hashing.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import hashlib
 3 | 
 4 | 
 5 | def hash_readable(s, n=10):
 6 |     """
 7 |     Returns a string hash that contains base32 characters instead of a number,
 8 |     to make it more readable (and still low risk of collisions if you truncate it).
 9 | 
10 |     e.g. hash_readable('hello') => 'vl2mmho4yx'
11 | 
12 |     Unlike Python's default hash function, this should be deterministic
13 |     across sessions (because we're using 'hashlib').
14 | 
15 |     I'm using this for anonymising email addresses if I don't have a user UUID.
16 |     """
17 |     if isinstance(s, str):
18 |         s = bytes(s, "utf-8")
19 |     hashed = hashlib.sha1(s).digest()
20 |     b32 = base64.b32encode(hashed)[:n]
21 |     return b32.decode("utf-8").lower()
22 | 
23 | 
24 | def hash_consistent(obj):
25 |     """
26 |     Supposedly gives the same response every time you call it, even after restarting the kernel.
27 | 
28 |     N.B. This is based on output from GitHub Copilot, and I haven't tried it.
29 |     """
30 |     obj_str = str(obj)
31 |     hash_obj = hashlib.sha256(obj_str.encode()).hexdigest()
32 |     return hash_obj
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Greg Detre
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/gjdutils/voice_speechrecognition.py:
--------------------------------------------------------------------------------
 1 | # from https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py
 2 | 
 3 | #!/usr/bin/env python3
 4 | 
 5 | # NOTE: this example requires PyAudio because it uses the Microphone class
 6 | 
 7 | from typing import Optional
 8 | import speech_recognition as sr
 9 | from gjdutils.env import get_env_var
10 | 
11 | 
12 | def recognise_speech(display: Optional[str], verbose: int = 0):
13 |     """
14 |     Recognises speech from the microphone and returns the transcribed text.
15 | 
16 |     Press ENTER when you've finished recording.
17 | 
18 |     Designed for command-line use.
19 |     """
20 |     openai_api_key = get_env_var("OPENAI_API_KEY")
21 |     if display:
22 |         print(display, end="", flush=True)
23 |     # obtain audio from the microphone
24 |     r = sr.Recognizer()
25 |     with sr.Microphone() as source:
26 |         audio = r.listen(source)
27 |     print("... PROCESSING")
28 |     text = r.recognize_whisper(audio, api_key=openai_api_key)
29 |     if verbose > 0:
30 |         print(text)
31 |     return text
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     recognise_speech("Say something!", verbose=1)
36 | 


--------------------------------------------------------------------------------
/tests/test_print_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from gjdutils.print_utils import vprint
 3 | 
 4 | 
 5 | def test_vprint_basic():
 6 |     verbose = 2
 7 |     # Should print
 8 |     vprint(1, "test1", end="")  # Empty print
 9 |     vprint(1, "test2", sep=" ", end="\n")  # Print with kwargs
10 |     vprint(2, "test3", end="")  # Exact match
11 | 
12 |     # Should not print
13 |     vprint(3, "test4", end="")
14 | 
15 | 
16 | def test_vprint_missing_verbose():
17 |     # Should raise ValueError when verbose is not defined
18 |     with pytest.raises(ValueError) as exc_info:
19 |         vprint(0, "test", end="")
20 |     assert "verbose" in str(exc_info.value)
21 |     assert "not found in caller function" in str(exc_info.value)
22 | 
23 | 
24 | def test_vprint_args_validation():
25 |     verbose = 1
26 |     # Should raise TypeError when passing extra positional args
27 |     with pytest.raises(TypeError) as exc_info:
28 |         vprint(1, "msg", "extra arg")  # type: ignore
29 |     assert "positional argument" in str(exc_info.value)
30 | 
31 |     # Should raise TypeError when required positional args are missing
32 |     with pytest.raises(TypeError) as exc_info:
33 |         vprint(msg="A message", min_verbosity=1)  # type: ignore
34 |     assert "missing 2 required positional arguments" in str(exc_info.value)
35 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "gjdutils-ts",
 3 |   "version": "1.0.0",
 4 |   "description": "TypeScript utilities for gjdutils - general-purpose scripts and CLI tools",
 5 |   "main": "dist/ts/index.js",
 6 |   "types": "dist/ts/index.d.ts",
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "watch": "tsc --watch",
10 |     "clean": "rm -rf dist/ts"
11 |   },
12 |   "keywords": [
13 |     "utilities",
14 |     "cli",
15 |     "typescript",
16 |     "development-tools",
17 |     "git-worktrees",
18 |     "llm-tools"
19 |   ],
20 |   "author": "Greg Detre",
21 |   "license": "MIT",
22 |   "dependencies": {
23 |     "clipanion": "^3.2.1"
24 |   },
25 |   "devDependencies": {
26 |     "@types/node": "^20.0.0",
27 |     "tsx": "^4.0.0",
28 |     "typescript": "^5.0.0"
29 |   },
30 |   "bin": {
31 |     "sequential-datetime-prefix": "./dist/ts/cli/sequential-datetime-prefix.js",
32 |     "extract-llm-conversation": "./dist/ts/cli/extract-llm-conversation.js",
33 |     "llm-critique-planning-docs": "./dist/ts/critique/llm-critique-planning-docs.js",
34 |     "parse-llm-output": "./dist/ts/critique/parse-llm-output.js",
35 |     "count-lines": "./dist/ts/scripts/count-lines.js",
36 |     "git-worktree-sync": "./dist/ts/scripts/git-worktree-sync.js",
37 |     "git-worktree-sync-all": "./dist/ts/scripts/git-worktree-sync-all.js"
38 |   }
39 | }


--------------------------------------------------------------------------------
/docs/instructions/AUDIT_ARCHITECTURE_MODE.md:
--------------------------------------------------------------------------------
 1 | Perform a technical audit/review.
 2 | 
 3 | - If a feature/area/question/file has been mentioned, use that to guide your investigation.
 4 | - If a planning doc has been mentioned, check whether recent changes (e.g. recent related Git commits, and also uncommitted changes) implement the planning doc correctly.
 5 | - Look for bugs, gotchas, potential problems
 6 | - Is there anything you would refactor (e.g. too-large files/functions, near-duplicated code that could be reused), or architectural best practices we should use?
 7 | - Anything else you notice that could be improved?
 8 | - Zoom out to consider whether the overall strategy/approach is sound.
 9 | 
10 | For background, read relevant docs if they exist (they may have different names), e.g.:
11 | - `README.md`
12 | - `PRODUCT_VISION_FEATURES.md`
13 | - `CODING_PRINCIPLES_GUIDELINES.md`
14 | - `docs/instructions/WRITE_EVERGREEN_DOC.md`
15 | - `docs/instructions/WRITE_PLANNING_DOC.md`
16 | - and any other relevant docs
17 | 
18 | Don't make changes. Just investigate, discuss.
19 | 
20 | Output:
21 | - Prioritise recommendations based on a combination of ease and value.
22 | - Indicate how important each finding is and why, whether there's an obvious fix or multiple options, and how complex/risky you expect it to be.
23 | 
24 | Ultrathink.
25 | 


--------------------------------------------------------------------------------
/docs/instructions/TASKS_SUBAGENTS.md:
--------------------------------------------------------------------------------
 1 | # Tasks and Subagents
 2 | 
 3 | Use subagents where appropriate:
 4 | - They are especially valuable as a way to avoid filling up your context window, e.g. for running a battery of tests, Playwright/curl/browser automation, MCPs with verbose output
 5 | - They are also a good fit for encapsulated & well-defined tasks, i.e. tasks that don't need the full context of the conversation so far, and/or where we only need a summary of what was done in order to proceed
 6 | - Use subagents in parallel where possible (because this is faster), but only if there isn't a dependency between tasks (e.g. the output of this one is useful as input for the next)
 7 | - Give them lots of background/careful instructions so that they can make good decisions, e.g. about goals, point them to relevant docs/code, what we've been changing, gotchas & things to avoid, relevant environment variables like $PORT for browser automation, using your test framework, the current date/time from `date`, and anything else that will help them to be effective but correct/careful.
 8 | - Tell subagents what to be cautious of, and to abort and provide feedback on what happened if there are problems or surprises (to avoid them going rogue and doing more harm than good)
 9 | 
10 | Use the task/todo list when you have more than a couple of things to track, or where ordering matters, when there are subagents involved, or if you think it will help.
11 | 


--------------------------------------------------------------------------------
/docs/reference/CODING_SHELL_SCRIPTS.md:
--------------------------------------------------------------------------------
 1 | # Shell Script Guidelines
 2 | 
 3 | ## General Principles
 4 | 
 5 | - Keep things simple and readable
 6 | - Prefer Python scripts over shell for longer scripts
 7 | - Keep scripts minimal, concise, and focused on a single task
 8 | - Break long main functions into sub-functions to make it easy to follow the logic
 9 | - Prefer to show the full tracebacks & error messages, to give the user full information. Minimise try/except. 
10 | - Fail explicitly and loudly, e.g. , and use `set -e` in bash scripts to exit on error
11 | 
12 | ## Coding details
13 | - Scripts live in `scripts/`
14 | - Make scripts executable with `chmod +x`
15 | - Use `#!/bin/bash` or `#!/usr/bin/env python3` shebang lines
16 | - Use python `Typer` if command-line arguments are needed
17 | - Use `cmd.py` functionality, e.g. `run_cmd()`
18 | - If there is overlapping functionality, maybe move it into `src/shell.py` or somewhere else reusable
19 | - Use colors for better readability (green for success, yellow for warnings, red for errors)
20 | - Show progress for long-running operations
21 | - If it will make it easier to see what the Python is doing in a script, include a comment showing the equivalent shell commands, e.g.:
22 | ```python
23 | # i.e. rm -rf dist/
24 | shutil.rmtree("dist", ignore_errors=True)
25 | ```
26 | 
27 | 
28 | ## Examples
29 | 
30 | See `scripts/check_locally.py` for an example following most of these guidelines.
31 | 


--------------------------------------------------------------------------------
/src/gjdutils/audios.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | 
 4 | 
 5 | def play_mp3(mp3_filen: str, prog: str = "cli", speed: Optional[float] = None):
 6 |     prog = prog.lower().strip()
 7 |     full_mp3_filen = os.path.abspath(os.path.expanduser(mp3_filen))
 8 |     if prog == "vlc":
 9 |         # pip install python-vlc
10 |         import vlc
11 | 
12 |         vlc_mp3_filen = os.path.join("file://", full_mp3_filen)
13 |         p = vlc.MediaPlayer(vlc_mp3_filen)
14 |         if speed is not None:
15 |             p.set_rate(speed)  # type: ignore
16 |         p.play()  # type: ignore
17 |     elif prog == "pygame":
18 |         assert speed is None, "Not implemented speed for pygame"
19 |         import pygame
20 | 
21 |         pygame.init()
22 |         pygame.mixer.init()
23 |         pygame.mixer.music.load(full_mp3_filen)
24 |         pygame.mixer.music.play()
25 |         pygame.event.wait()
26 |     elif prog == "playsound":
27 |         # maybe set to 1.2.2 if you're having trouble installing
28 |         from playsound import playsound
29 | 
30 |         assert speed is None, "Playsound doesn't support changing speed"
31 |         # https://stackoverflow.com/a/63147250/230523
32 |         playsound(mp3_filen)
33 |     elif prog == "cli":
34 |         cmd = f"afplay -r {speed} '{full_mp3_filen}'"
35 |         # print(cmd)
36 |         os.system(cmd)
37 |     else:
38 |         raise Exception(f"Unknown PROG '{prog}'")
39 |     return full_mp3_filen
40 | 


--------------------------------------------------------------------------------
/src/gjdutils/functions.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Any
 3 | 
 4 | 
 5 | def func_name():
 6 |     # https://stackoverflow.com/a/13514318/230523
 7 |     return inspect.currentframe().f_back.f_code.co_name  # type: ignore
 8 | 
 9 | 
10 | def variable_from_caller(var_name: str, frame_depth: int = 1) -> Any:
11 |     """Get a variable from the caller's frame.
12 | 
13 |     Args:
14 |         var_name: Name of the variable to retrieve
15 |         frame_depth: How many frames to go back (default: 1 for immediate caller)
16 | 
17 |     Raises:
18 |         ValueError: If the variable doesn't exist in the caller's scope
19 |     """
20 |     frame = inspect.currentframe()
21 |     try:
22 |         # Go back the specified number of frames
23 |         for _ in range(frame_depth + 1):
24 |             if frame.f_back is None:  # type: ignore
25 |                 raise ValueError(f"Cannot go back {frame_depth} frames")
26 |             frame = frame.f_back  # type: ignore
27 | 
28 |         if var_name not in frame.f_locals:  # type: ignore
29 |             caller_name = frame.f_code.co_name  # type: ignore
30 |             raise ValueError(
31 |                 f"Variable '{var_name}' not found in caller function '{caller_name}'. "
32 |                 f"Make sure to define a '{var_name}' parameter or variable."
33 |             )
34 | 
35 |         return frame.f_locals[var_name]  # type: ignore
36 |     finally:
37 |         # Clean up circular references
38 |         del frame
39 | 


--------------------------------------------------------------------------------
/src/gjdutils/collection_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Sequence, TypeVar
 2 | 
 3 | T = TypeVar("T")
 4 | 
 5 | 
 6 | def found_one(lst: Sequence[T]) -> T | Literal[False]:
 7 |     """
 8 |     e.g.
 9 |     >>> found_one([])
10 |     False
11 |     >>> found_one([10])
12 |     10
13 |     >>> found_one([10, 20])
14 |     False
15 |     """
16 |     if len(lst) == 0:
17 |         return False
18 |     elif len(lst) == 1:
19 |         found = lst[0]
20 |         assert found is not False, "Too confusing - we found something, but it's False"
21 |         return found
22 |     else:
23 |         return False
24 | 
25 | 
26 | def find_duplicates(lst: Sequence[T]) -> list[T]:
27 |     return [item for item in set(lst) if lst.count(item) > 1]
28 | 
29 | 
30 | # def uniquify(items: Sequence[T], key: Callable[[T], Any] | None = None) -> list[T]:
31 | # this would be useful if you wanted to uniquify something non-hashable, but I couldn't get it to work
32 | # https://www.perplexity.ai/search/in-python-unique-version-of-a-5r0iCRlBSjm2Dv6HGLu_6g
33 | # return list(OrderedDict.fromkeys(map(key, items) if key else items))
34 | 
35 | 
36 | def uniquify(items: Sequence[T]) -> list[T]:
37 |     # https://www.perplexity.ai/search/unique-version-of-a-list-prese-qYpae.JBRDedvHdmEyOqfA
38 |     # seen = set()
39 |     # return [x for x in lst if not (x in seen or seen.add(x))]
40 | 
41 |     # https://www.w3resource.com/python-exercises/list-advanced/python-list-advanced-exercise-8.php
42 |     return list(dict.fromkeys(items))
43 | 


--------------------------------------------------------------------------------
/docs/instructions/DEBRIEF_PROGRESS.md:
--------------------------------------------------------------------------------
 1 | # Debrief Progress
 2 | 
 3 | Update the relevant planning doc for this work if there is one. (If you don't know, there probably isn't, and you can safely ignore this suggestion.)
 4 | 
 5 | Report out loud on how this work is going. Any issues/surprises/complexity?
 6 | 
 7 | What's left to do? How complex will it be? What do you think about the cost/benefit ratio?
 8 | 
 9 | ## Questions to Address
10 | 
11 | ### Current Status
12 | - What has been completed successfully?
13 | - What challenges or surprises have emerged?
14 | - Are we on track with the original plan?
15 | 
16 | ### Technical Assessment
17 | - How is the code quality holding up?
18 | - Are there any technical debt concerns?
19 | - What patterns or approaches are working well?
20 | 
21 | ### Scope & Priority
22 | - What remains to be done?
23 | - How complex will the remaining work be?
24 | - Should priorities be adjusted based on what we've learned?
25 | 
26 | ### Value Assessment
27 | - Is the cost/benefit ratio still justified?
28 | - Are there simpler approaches that would achieve similar value?
29 | - Should we continue, pivot, or pause this work?
30 | 
31 | ## Documentation Updates
32 | 
33 | If there's a planning document for this work:
34 | 1. Update the progress/status section
35 | 2. Note any scope changes or new understanding
36 | 3. Adjust timelines if needed
37 | 4. Document any decisions or pivots made
38 | 
39 | If there's no planning document but this is significant work, consider creating one to track the remaining effort.


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # gjdutils Documentation
 2 | 
 3 | This documentation collection provides templates and workflows for highly effective AI-assisted programming, developed June 2025.
 4 | 
 5 | ## Quick Start
 6 | 
 7 | - **[WORKFLOW.md](WORKFLOW.md)** - Complete workflow for AI-assisted development using Claude Code or Cursor
 8 | 
 9 | ## Documentation Structure
10 | 
11 | ### 📋 [instructions/](instructions/)
12 | Commands, modes, and processes for AI-assisted development:
13 | - **Modes**: `SOUNDING_BOARD_MODE.md`, `SCIENTIST_DETECTIVE_MODE.md`, `SURGEON_MODE.md`
14 | - **Planning**: `WRITE_PLANNING_DOC.md`, `DO_PLANNING_DOC.md`, `CRITIQUE_OF_PLANNING_DOC.md`
15 | - **Documentation**: `WRITE_EVERGREEN_DOC.md`, `WRITE_DEEP_DIVE_AS_DOC.md`
16 | - **Maintenance**: `UPDATE_HOUSEKEEPING_DOCUMENTATION.md`, `DEBRIEF_PROGRESS.md`
17 | - **Development**: `CODING_PRINCIPLES.md`, `GIT_COMMITS.md`
18 | 
19 | ### 📚 [reference/](reference/)
20 | gjdutils-specific reference documentation:
21 | - **Development**: `TESTING_PYTHON.md`, `CODING_SHELL_SCRIPTS.md`
22 | - **Organization**: `DOCUMENTATION_ORGANISATION.md`
23 | 
24 | ## About gjdutils
25 | 
26 | This is part of the [gjdutils project](../README.md) - a collection of:
27 | 1. AI-assisted programming workflows (this documentation)
28 | 2. Python utilities and scripts ([src/gjdutils/](../src/gjdutils/))
29 | 3. TypeScript CLI tools and development utilities ([src/ts/](../src/ts/))
30 | 
31 | These templates have been battle-tested in real development projects and are designed to work with both Claude Code and Cursor.


--------------------------------------------------------------------------------
/docs/instructions/CODING_PRINCIPLES.md:
--------------------------------------------------------------------------------
 1 | # Coding Principles
 2 | 
 3 | ## Core Philosophy
 4 | - Prioritise code that's simple, easy-to-understand, debuggable, and readable
 5 | - Fix the root cause rather than putting on a band-aid
 6 | - Avoid fallbacks & defaults - better to fail if input assumptions aren't being met
 7 | 
 8 | ## Error Handling
 9 | - Raise errors early, clearly & fatally
10 | - Prefer not to wrap in try/except so that tracebacks are obvious
11 | 
12 | ## Development Approach
13 | - Don't try to write a full, final version immediately
14 | - Get a simple version working end-to-end first, then gradually layer in complexity in stages
15 | - Aim to keep changes minimal and focused on the task at hand
16 | - Try to keep things concise, don't over-engineer
17 | 
18 | ## Best Practices
19 | - Follow software engineering best practices:
20 |   - Reuse code where it makes sense
21 |   - Pull out core reusable functionality into utility functions
22 |   - Break long/complex functions down
23 | - Write code that's easy to test, prefer functional style
24 | - Avoid object-oriented unless it's a particularly good fit
25 | - Keep documentation up-to-date as you go
26 | 
27 | ## Collaboration
28 | - If the user asks you a question, answer it directly, and stop work on other tasks until consensus has been reached
29 | - If you notice other things that should be changed/updated, ask/suggest
30 | - If things don't make sense or seem like a bad idea, ask questions or discuss rather than just going along with it
31 | - Be a good collaborator and help make good decisions, rather than just obeying blindly
32 | 
33 | ## External Dependencies
34 | - When picking 3rd-party libraries, prefer ones with large communities
35 | 
36 | ## Comments
37 | - Comment sparingly - reserve it for explaining surprising or confusing sections
38 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/check_git_clean.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from rich.console import Console
 4 | from gjdutils.shell import fatal_error_msg
 5 | from gjdutils.cmd import run_cmd
 6 | 
 7 | console = Console()
 8 | 
 9 | 
10 | def check_git_clean():
11 |     """Check if git working directory is clean."""
12 |     # Check for unstaged changes
13 |     retcode, stdout, _ = run_cmd("git diff --quiet", check=False)
14 |     if retcode != 0:
15 |         _, diff_output, _ = run_cmd("git --no-pager diff --stat")
16 |         fatal_error_msg("Unstaged changes present:\n" + diff_output)
17 | 
18 |     # Check for staged but uncommitted changes
19 |     retcode, stdout, _ = run_cmd("git diff --cached --quiet", check=False)
20 |     if retcode != 0:
21 |         _, diff_output, _ = run_cmd("git --no-pager diff --cached --stat")
22 |         # fatal_error_msg("Uncommitted staged changes present:\n" + diff_output)
23 |         if (
24 |             input(
25 |                 f"\n{diff_output}\n\nAre you sure you want to deploy with staged but uncommited files? (y/N): "
26 |             ).lower()
27 |             != "y"
28 |         ):
29 |             fatal_error_msg("Deployment cancelled by user because of untracked files")
30 | 
31 |     # Check for untracked files
32 |     _, untracked, _ = run_cmd("git ls-files --others --exclude-standard")
33 |     if untracked.strip():
34 |         if (
35 |             input(
36 |                 f"\nAre you sure you want to deploy with untracked files: {untracked}? (y/N): "
37 |             ).lower()
38 |             != "y"
39 |         ):
40 |             fatal_error_msg("Deployment cancelled by user because of untracked files")
41 | 
42 |     console.print("[green]Git: clean[/green]")
43 | 
44 | 
45 | def main():
46 |     console.rule("[yellow]Checking Git Status")
47 |     check_git_clean()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/src/gjdutils/shell.py:
--------------------------------------------------------------------------------
 1 | """Shell and command-line utilities."""
 2 | 
 3 | from pathlib import Path
 4 | import shutil
 5 | import sys
 6 | import venv
 7 | from contextlib import contextmanager
 8 | from typing import Optional, Union
 9 | 
10 | 
11 | @contextmanager
12 | def temp_venv(path: Union[str, Path]):
13 |     """Create and manage a temporary virtualenv.
14 | 
15 |     Args:
16 |         path: Path where the virtualenv should be created
17 | 
18 |     Yields:
19 |         Path to the Python executable in the virtualenv
20 | 
21 |     Example:
22 |         ```python
23 |         with temp_venv("/tmp/my-venv") as python_path:
24 |             run_cmd([python_path, "-m", "pip", "install", "some-package"])
25 |         ```
26 |     """
27 |     path = Path(path)
28 | 
29 |     # Clean up any existing venv first
30 |     if path.exists():
31 |         shutil.rmtree(path)
32 | 
33 |     venv.create(path, with_pip=True)
34 | 
35 |     # Get the correct python executable path for this venv
36 |     if sys.platform == "win32":
37 |         python_path = path / "Scripts" / "python.exe"
38 |     else:
39 |         python_path = path / "bin" / "python"
40 | 
41 |     try:
42 |         yield python_path
43 |     finally:
44 |         if path.exists():
45 |             shutil.rmtree(path)
46 | 
47 | 
48 | def fatal_error_msg(msg: str, stderr: Optional[str] = None) -> None:
49 |     """Print a fatal error message and exit with code 1.
50 | 
51 |     Args:
52 |         msg: The error message to display
53 |         stderr: Optional stderr output to display after the message
54 | 
55 |     Example:
56 |         ```python
57 |         if result.returncode != 0:
58 |             fatal_error_msg("Failed to build package", result.stderr)
59 |         ```
60 |     """
61 |     from rich.console import Console
62 | 
63 |     console = Console()
64 | 
65 |     console.print(f"[red]{msg}[/red]")
66 |     if stderr:
67 |         console.print(stderr)
68 |     sys.exit(1)
69 | 


--------------------------------------------------------------------------------
/src/gjdutils/rand.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import Optional
 3 | import uuid
 4 | 
 5 | DEFAULT_RANDOM_SEED = 42
 6 | READABLE_RAND_CHARS = "23456789abcdefghjkmnprstuvwxyz"
 7 | 
 8 | 
 9 | def gen_uuid(n: int = 8):
10 |     # e.g. '161b58a4d8'
11 |     return str(uuid.uuid4()).replace("-", "")[:n]
12 | 
13 | 
14 | def gen_readable_rand_id(n: int = 7, valid: Optional[list | str] = None):
15 |     # build an N-digit random string with letters or numbers
16 |     # excluding o,0,i,1,l,q (too similar to g) for easy reading
17 |     #
18 |     # this isn't officially a UUID
19 |     if valid is None:
20 |         valid = READABLE_RAND_CHARS
21 |     return "".join([random.choice(valid) for _ in range(n)])
22 | 
23 | 
24 | def assert_valid_readable_rand_id(
25 |     id_: str, nchars: Optional[int] = None, valid_chars: Optional[str] = None
26 | ):
27 |     """
28 |     Check if ID_ is a valid readable random id, as generated by GEN_READABLE_UUID.
29 |     """
30 |     assert id_, "id_ cannot be empty"
31 |     if nchars is not None:
32 |         assert len(id_) == nchars, f"len({id_}) != {nchars}"
33 |     if valid_chars is None:
34 |         valid_chars = READABLE_RAND_CHARS
35 |     assert all([c in valid_chars for c in id_]), f"Invalid character in {id_}"
36 | 
37 | 
38 | def shuffle_copy(lst):
39 |     lst2 = lst.copy()
40 |     random.shuffle(lst2)
41 |     return lst2
42 | 
43 | 
44 | def set_seeds(seed: int = DEFAULT_RANDOM_SEED):
45 |     # TODO if SEED is None, make it dynamic
46 |     random.seed(seed)
47 | 
48 |     try:
49 |         import numpy as np
50 | 
51 |         np.random.seed(seed)
52 |     except ImportError:
53 |         pass
54 | 
55 |     try:
56 |         import torch  # type: ignore
57 | 
58 |         torch.manual_seed(seed)
59 |         torch.cuda.manual_seed(seed)
60 |         torch.cuda.manual_seed_all(seed)
61 |         # If you are using cudnn (a GPU-accelerated library for deep neural networks)
62 |         torch.backends.cudnn.deterministic = True
63 | 
64 |     except ImportError:
65 |         pass
66 | 


--------------------------------------------------------------------------------
/src/gjdutils/pytest_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | 
 5 | def in_pytest(check_modules=True, check_env=True):
 6 |     """Detect whether code is currently running within a pytest environment.
 7 | 
 8 |     This function uses two different methods to check if we're running in pytest:
 9 |     1. Checking if pytest is in sys.modules (if check_modules=True)
10 |     2. Checking if PYTEST_CURRENT_TEST is in environment variables (if check_env=True)
11 | 
12 |     Args:
13 |         check_modules (bool, optional): Whether to check sys.modules for pytest. Defaults to True.
14 |         check_env (bool, optional): Whether to check environment variables for PYTEST_CURRENT_TEST. Defaults to True.
15 | 
16 |     Returns:
17 |         bool: True if all enabled checks confirm we're in pytest, False if none do.
18 | 
19 |     Raises:
20 |         AssertionError: If both check_modules and check_env are False.
21 |         RuntimeError: If some checks are True and others False, indicating an ambiguous state.
22 |     """
23 |     assert check_modules or check_env, "At least one check must be performed"
24 |     checks = []
25 |     if check_modules:
26 |         # https://stackoverflow.com/a/44595269/230523
27 |         #
28 |         # "Of course, this solution only works if the code you're trying to test does not use pytest itself.
29 |         mod_bool = "pytest" in sys.modules
30 |         checks.append(mod_bool)
31 | 
32 |     if check_env:
33 |         # from https://stackoverflow.com/a/58866220/230523
34 |         #
35 |         # "This method works only when an actual test is being run.
36 |         # "This detection will not work when modules are imported during pytest collection.
37 |         env_bool = "PYTEST_CURRENT_TEST" in os.environ
38 |         checks.append(env_bool)
39 | 
40 |     if all(checks):
41 |         return True
42 |     elif not any(checks):
43 |         return False
44 |     else:
45 |         raise RuntimeError(
46 |             "It's unclear whether we're in a unit test - it might be part of the pytest setup, or you might have imported pytest as part of your main codebase."
47 |         )
48 | 


--------------------------------------------------------------------------------
/tests/test_env_integration.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import tempfile
 4 | from pathlib import Path
 5 | import pytest
 6 | from typing import Generator
 7 | 
 8 | from gjdutils.env import get_env_var
 9 | 
10 | 
11 | @pytest.fixture
12 | def temp_env_file(tmp_path):
13 |     """Create a temporary .env file for testing."""
14 |     with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f:
15 |         f.write("GJDUTILS_TEST_STR=hello\n")
16 |         f.write("GJDUTILS_TEST_INT=42\n")
17 |         path = Path(f.name)
18 |     yield path
19 |     path.unlink()
20 | 
21 | 
22 | def test_export_envs_script(temp_env_file):
23 |     """Test that export_envs.sh behaves correctly when executed vs sourced."""
24 |     script_path = Path("src/gjdutils/scripts/export_envs.sh")
25 | 
26 |     # Test direct execution fails
27 |     result = subprocess.run(
28 |         [str(script_path), str(temp_env_file)], capture_output=True, text=True
29 |     )
30 |     assert result.returncode != 0
31 |     assert "needs to be sourced" in result.stdout
32 | 
33 |     # Test sourcing works and sets variables
34 |     result = subprocess.run(
35 |         [
36 |             "bash",
37 |             "-c",
38 |             f"source {script_path} {temp_env_file} && echo $GJDUTILS_TEST_STR",
39 |         ],
40 |         capture_output=True,
41 |         text=True,
42 |     )
43 |     assert result.returncode == 0
44 |     assert "hello" in result.stdout
45 | 
46 | 
47 | def test_get_env_var():
48 |     """Test get_env_var functionality with different types."""
49 |     # Set test variables directly in Python's environment (we can't use
50 |     # export_envs.sh here because it doesn't work with subprocesses)
51 |     os.environ["GJDUTILS_TEST_STR"] = "hello"
52 |     os.environ["GJDUTILS_TEST_INT"] = "42"
53 | 
54 |     # Test string and int validation
55 |     assert get_env_var("GJDUTILS_TEST_STR") == "hello"
56 |     assert get_env_var("GJDUTILS_TEST_INT", typ=int) == 42
57 | 
58 |     # Test error cases
59 |     with pytest.raises(ValueError, match="Missing required environment variable"):
60 |         get_env_var("NONEXISTENT")
61 | 


--------------------------------------------------------------------------------
/src/gjdutils/obsolete/google_text_to_speech.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Synthesizes speech from the input string of text or ssml.
 3 | Make sure to be working in a virtual environment.
 4 | https://cloud.google.com/text-to-speech/docs/libraries
 5 | """
 6 | 
 7 | from google.cloud import texttospeech
 8 | 
 9 | 
10 | def outloud(text: str, language_code: str = "en-GB", bot_gender=None):
11 |     bot_gender = bot_gender.lower() if bot_gender else None
12 |     # not all genders supported for all languages. see https://cloud.google.com/text-to-speech/docs/voices
13 |     if bot_gender is None or bot_gender == "neutral":
14 |         bot_gender = texttospeech.SsmlVoiceGender.NEUTRAL
15 |     elif bot_gender in ["female", texttospeech.SsmlVoiceGender.FEMALE]:
16 |         bot_gender = texttospeech.SsmlVoiceGender.FEMALE
17 |     elif bot_gender in ["male", texttospeech.SsmlVoiceGender.MALE]:
18 |         bot_gender = texttospeech.SsmlVoiceGender.MALE
19 |     else:
20 |         # gender = texttospeech.SsmlVoiceGender.SSML_VOICE_GENDER_UNSPECIFIED
21 |         raise Exception(f"Unknown gender: {bot_gender}")
22 |     # Instantiates a client
23 |     client = texttospeech.TextToSpeechClient()
24 | 
25 |     # Set the text input to be synthesized
26 |     synthesis_input = texttospeech.SynthesisInput(text=text)
27 |     # synthesis_input = texttospeech.SynthesisInput(text="Bonjour, Monsieur Natterbot!")
28 |     # synthesis_input = texttospeech.SynthesisInput(text="Γεια σου, Natterbot!")
29 | 
30 |     # Build the voice request, select the language code ("en-US") and the ssml
31 |     # voice gender ("neutral")
32 |     voice = texttospeech.VoiceSelectionParams(
33 |         language_code=language_code, ssml_gender=bot_gender
34 |     )  # e.g. 'en-GB'
35 | 
36 |     # Select the type of audio file you want returned
37 |     audio_config = texttospeech.AudioConfig(
38 |         audio_encoding=texttospeech.AudioEncoding.MP3
39 |     )
40 | 
41 |     # Perform the text-to-speech request on the text input with the selected
42 |     # voice parameters and audio file type
43 |     response = client.synthesize_speech(
44 |         input=synthesis_input, voice=voice, audio_config=audio_config
45 |     )
46 |     return response
47 | 


--------------------------------------------------------------------------------
/src/gjdutils/scripts/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import stat
 4 | from pathlib import Path
 5 | import sys
 6 | import typer
 7 | 
 8 | 
 9 | def get_script_install_path() -> Path:
10 |     """Get the installation path for our scripts"""
11 |     # This will typically be something like /usr/local/bin or ~/.local/bin
12 |     if sys.prefix == sys.base_prefix:
13 |         # System Python installation
14 |         if os.access("/usr/local/bin", os.W_OK):
15 |             return Path("/usr/local/bin")
16 |         return Path(os.path.expanduser("~/.local/bin"))
17 |     else:
18 |         # Virtual environment
19 |         return Path(sys.prefix) / "bin"
20 | 
21 | 
22 | def install_export_envs():
23 |     """Install the export_envs.sh script to the appropriate location"""
24 |     try:
25 |         # Get our package's installed location
26 |         package_dir = Path(__file__).parent
27 |         source_script = package_dir / "export_envs.sh"
28 | 
29 |         if not source_script.exists():
30 |             # Try to find it in the shared data location
31 |             source_script = Path(sys.prefix) / "bin" / "export_envs.sh"
32 |             if not source_script.exists():
33 |                 typer.echo(f"Error: Could not find export_envs.sh script", err=True)
34 |                 raise typer.Exit(1)
35 | 
36 |         # Get the target installation directory
37 |         install_dir = get_script_install_path()
38 | 
39 |         # Create the directory if it doesn't exist
40 |         install_dir.mkdir(parents=True, exist_ok=True)
41 | 
42 |         # Copy the script
43 |         target_script = install_dir / "gjdutils-export-envs"
44 |         shutil.copy2(source_script, target_script)
45 | 
46 |         # Make it executable
47 |         target_script.chmod(target_script.stat().st_mode | stat.S_IEXEC)
48 | 
49 |         typer.echo(f"Installed gjdutils-export-envs to {target_script}")
50 |         typer.echo("\nTo use this script, you need to source it:")
51 |         typer.echo("  source gjdutils-export-envs .env")
52 | 
53 |     except Exception as e:
54 |         typer.echo(f"Error installing script: {e}", err=True)
55 |         raise typer.Exit(1)
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     typer.run(install_export_envs)
60 | 


--------------------------------------------------------------------------------
/src/gjdutils/stopwatch.py:
--------------------------------------------------------------------------------
 1 | ########################################################
 2 | # from http://effbot.org/librarybook/timing.htm
 3 | # File: timing-example-2.py
 4 | #
 5 | # copied from ~/fmri/distpat/trunk/users/greg/context/time_context.py
 6 | 
 7 | """
 8 | This is my wrapper for the time module. There's probably an
 9 | easier way to time the duration of things, but when I looked
10 | into timing stuff, this was the best I could come up with...
11 | 
12 | To use:
13 | 
14 |     t = Stopwatch()
15 |     # t.start()
16 |     
17 |     # do something
18 | 
19 |     elapsed = t.finish()
20 | """
21 | 
22 | import time
23 | 
24 | 
25 | class Stopwatch:
26 |     """
27 |     Creates stopwatch timer objects.
28 |     """
29 | 
30 |     # stores the time the stopwatch was started
31 |     t0 = None
32 | 
33 |     # stores the time the stopwatch was last looked at
34 |     t1 = None
35 | 
36 |     def __init__(self, do_start: bool = True):
37 |         self.t0 = 0
38 |         self.t1 = 0
39 |         if do_start:
40 |             self.start()
41 | 
42 |     def start(self):
43 |         """
44 |         Stores the current time in t0.
45 |         """
46 | 
47 |         self.t0 = time.time()
48 | 
49 |     def finish(self, milliseconds: bool = True):
50 |         """
51 |         Returns the elapsed duration in milliseconds. This
52 |         stores the current time in t1, and calculates the
53 |         difference between t0 (the stored start time) and
54 |         t1, so if you call this multiple times, you'll get a
55 |         larger answer each time.
56 | 
57 |         You have to call this in order to update t1.
58 |         """
59 | 
60 |         self.t1 = time.time()
61 |         return self.milli() if milliseconds else self.seconds()
62 | 
63 |     def seconds(self):
64 |         """
65 |         Returns t1 - t0 in seconds. Does not update t1.
66 |         """
67 |         return int(self.t1 - self.t0)
68 | 
69 |     def milli(self):
70 |         """
71 |         Returns t1 - t0 in milliseconds. Does not update t1.
72 |         """
73 |         return int((self.t1 - self.t0) * 1000)
74 | 
75 |     def micro(self):
76 |         """
77 |         Returns t1 - t0 in microseconds. Does not update t1.
78 |         """
79 |         return int((self.t1 - self.t0) * 1000000)
80 | 


--------------------------------------------------------------------------------
/docs/reference/DOCUMENTATION_ORGANISATION.md:
--------------------------------------------------------------------------------
 1 | # Documentation organisation
 2 | 
 3 | ## References
 4 | 
 5 | - `../README.md`
 6 | - `../instructions/WRITE_EVERGREEN_DOC.md`
 7 | 
 8 | 
 9 | ## Evergreen documentation Reference
10 | 
11 | (Written based on `../instructions/WRITE_EVERGREEN_DOC.md`)
12 | 
13 | Available evergreen documentation in `docs/` - here are the most useful gjdutils templates.
14 | 
15 | Coding & infrastructure:
16 | - `../instructions/CODING_PRINCIPLES.md` - Outlines development principles prioritising simplicity, readability, debugging, and rapid prototyping for early-stage development
17 | - `CODING_SHELL_SCRIPTS.md` - Guidelines for writing shell scripts and command-line utilities
18 | - `TESTING_PYTHON.md` - Testing approach documentation covering Python test framework setup, test structure, and coverage
19 | 
20 | 
21 | Docs, modes, and admin:
22 | - `../instructions/GIT_COMMITS.md` - Guidelines for Git commit best practices including batching changes, message format, and handling concurrent changes
23 | - `../instructions/SOUNDING_BOARD_MODE.md` - Instructions for collaborative discussion mode emphasising asking questions and suggesting alternatives rather than immediate implementation
24 | - `../instructions/SCIENTIST_DETECTIVE_MODE.md` - Methodical investigation approach for complex debugging and analysis
25 | - `../instructions/SURGEON_MODE.md` - Precise, minimal-change approach for critical fixes
26 | - `../instructions/WRITE_EVERGREEN_DOC.md` - Guidelines for writing evergreen documentation including structure, cross-references, status indicators, and maintenance practices
27 | - `../instructions/WRITE_PLANNING_DOC.md` - Guide for writing planning/project management documents with file naming conventions, structure, and stage-based action plans
28 | - `../instructions/UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Process for keeping project documentation up-to-date including review steps, update patterns, and quality checklist
29 | - `../instructions/DEBRIEF_PROGRESS.md` - Process for reviewing and documenting project progress and decisions
30 | - `../instructions/RESEARCH_POTENTIAL_LIBRARY_CHANGES_GOTCHAS.md` - Guide for researching and evaluating new libraries and dependencies
31 | 
32 | ## Planning docs
33 | 
34 | (Written based on `../instructions/WRITE_PLANNING_DOC.md`)
35 | 
36 | Recent planning decisions & progress-tracking of major features: `planning/*.md` (project-specific location)
37 | 


--------------------------------------------------------------------------------
/src/gjdutils/web.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from urllib import parse as urlparse
 3 | import webbrowser
 4 | 
 5 | from gjdutils.strings import PathOrStr
 6 | 
 7 | 
 8 | def webbrowser_open(filen: PathOrStr, browser=None):
 9 |     """
10 |     For some reason, the default webbrowser.open() doesn't work for me, so you may want to set browser to 'chrome'
11 |     """
12 |     # I had an issue where it refused to open a non .html file
13 |     assert filen.endswith(".html"), "File must end with .html"
14 |     if browser:
15 |         browser = webbrowser.get(browser)
16 |     else:
17 |         browser = webbrowser
18 |     full_filen = f"file://{Path.cwd() / filen}"
19 |     return browser.open(full_filen)
20 | 
21 | 
22 | def trunc_url(url):
23 |     """
24 |     e.g. 'http://www.guardian.co.uk/blah/ -> /blah
25 | 
26 |     Based on dev/guardian/data/data/greg/sharedwisdom/sharedwisdom/models.py
27 |     """
28 |     # URLPARSE returns ParseResult(scheme='http',
29 |     #                              netloc='memrise.com',
30 |     #                              path='/blah.png',
31 |     #                              params='',
32 |     #                              query='q1=x&q2=y',
33 |     #                              fragment='')
34 |     scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
35 |     # ditch the SCHEME and NETLOC
36 |     # PARAMS are an arcane part that comes after a semi-colon
37 |     return path  # + params
38 | 
39 | 
40 | def validate_request_args(args, defaults):
41 |     """
42 |     DEFAULTS = dict of allowed query parameters, with the keys
43 |     being the allowed query-string-parameter-keys and values
44 |     as their defaults.
45 |     """
46 |     if args:
47 |         unexpecteds = set(args.keys()) - set(defaults.keys())
48 |         assert not unexpecteds, "Unexpected key(s): %s" % unexpecteds
49 |     # params = {key: args.get(key, default)
50 |     #           for key, default in defaults.items()}
51 |     params = defaults | args
52 |     return params
53 | 
54 | 
55 | def query_string_from_dict(d):
56 |     return "?" + "&".join(["%s=%s" % (k, v) for k, v in d.items()])
57 | 
58 | 
59 | def params_from_request(request):
60 |     try:
61 |         if request.json:
62 |             params_in = request.json.get("params", {})
63 |         else:
64 |             params_in = dict(request.values)
65 |         return params_in
66 |     except:
67 |         print("Error in PARAMS_FROM_REQUEST")
68 |         return {}
69 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # IPython
 77 | profile_default/
 78 | ipython_config.py
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .env.*
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | .dmypy.json
112 | dmypy.json
113 | 
114 | # Pyre type checker
115 | .pyre/
116 | 
117 | # added by GD
118 | .#*
119 | .DS_Store
120 | .pypirc
121 | 
122 | mcp.json
123 | 
124 | # Claude Code local settings
125 | .claude/settings.local.json
126 | 
127 | # Node.js / JavaScript / TypeScript
128 | node_modules/
129 | npm-debug.log*
130 | yarn-debug.log*
131 | yarn-error.log*
132 | *.tsbuildinfo
133 | .npm
134 | .eslintcache
135 | .node_repl_history
136 | *.tgz
137 | *.tar.gz
138 | 
139 | # TypeScript
140 | *.js.map
141 | *.d.ts.map
142 | 
143 | # IDE files
144 | .vscode/
145 | .idea/
146 | *.swp
147 | *.swo
148 | 


--------------------------------------------------------------------------------
/src/gjdutils/jsons.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Optional
 3 | 
 4 | 
 5 | def jsonify(x):
 6 |     def json_dumper_robust(obj):
 7 |         try:
 8 |             return obj.toJSON()
 9 |         except AttributeError:
10 |             # Object doesn't have toJSON method, try string conversion
11 |             try:
12 |                 return str(obj)
13 |             except (ValueError, TypeError):
14 |                 # If string conversion fails, return None to skip this field
15 |                 return None
16 | 
17 |     return json.dumps(x, sort_keys=True, indent=4, default=json_dumper_robust)
18 | 
19 | 
20 | # from o-1
21 | # class RobustJSONEncoder(json.JSONEncoder):
22 | #     def __init__(self, *args, **kwargs):
23 | #         self.seen = set()
24 | #         super().__init__(*args, **kwargs)
25 | 
26 | #     def default(self, obj):
27 | #         if id(obj) in self.seen:
28 | #             return None  # Replace circular references with None or a placeholder
29 | #         self.seen.add(id(obj))
30 | #         try:
31 | #             return obj.toJSON()
32 | #         except:
33 | #             try:
34 | #                 return str(obj)
35 | #             except:
36 | #                 return None
37 | 
38 | 
39 | # def jsonify(x):
40 | #     return json.dumps(x, cls=RobustJSONEncoder, sort_keys=True, indent=4)
41 | 
42 | 
43 | def to_json(
44 |     inps: list,
45 |     fields: Optional[list] = None,
46 |     skip_if_missing: bool = False,
47 |     skip_empties: bool = True,
48 |     max_str_len: Optional[int] = 1000,
49 | ) -> str:
50 |     """
51 |     Convert a list of dicts to a JSON string, with only the fields we want,
52 |     and in the same order as FIELDS.
53 |     """
54 |     if fields is None:
55 |         fields = []
56 |     outs = []
57 |     for inp in inps:
58 |         if fields is None:
59 |             fields = inp.keys()
60 |         # we want to make sure to return a dict with only the fields we want,
61 |         # and in the same order as FIELDS
62 |         out = {}
63 |         for k in fields:  # type: ignore
64 |             if skip_if_missing and (k not in inp):
65 |                 continue
66 |             v = inp[k]  # will error if missing and !SKIP_IF_MISSING
67 |             if skip_empties and (v is None or v == ""):
68 |                 continue
69 |             if max_str_len and isinstance(v, str) and len(v) > max_str_len:
70 |                 v = v[:max_str_len] + "..."
71 |             out[k] = v
72 |         outs.append(out)
73 |     outs_j = json.dumps(outs, indent=2)
74 |     return outs_j
75 | 


--------------------------------------------------------------------------------
/src/gjdutils/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from typing import Any, TypeVar, cast
 4 | from pydantic import StrictStr, TypeAdapter
 5 | 
 6 | from gjdutils.print_utils import vprint
 7 | 
 8 | T = TypeVar("T")
 9 | _processed_vars = set()
10 | 
11 | 
12 | # You may find it useful to run `python -m gjdutils.scripts.export_envs .env` to first
13 | # export all the variables in your .env file to your environment.
14 | 
15 | 
16 | def get_env_var(name: str, typ: Any = StrictStr, verbose: int = 0) -> T:
17 |     """Get environment variable with type validation, e.g.
18 | 
19 |     OPENAI_API_KEY = get_env_var("OPENAI_API_KEY")
20 |     NUM_WORKERS = get_env_var("NUM_WORKERS", typ=int)
21 | 
22 |     Args:
23 |         name: Name of environment variable
24 |         type_: Pydantic type to validate against (default: StrictStr for non-empty string)
25 | 
26 |     Returns:
27 |         The validated value with the specified type
28 | 
29 |     Raises:
30 |         ValueError: If variable is missing or fails validation
31 |     """
32 |     vprint(1, f"Attempting to get environment variable: {name}")
33 |     vprint(2, f"Current environment variables: {list(os.environ.keys())}")
34 |     try:
35 |         value = os.environ[name]
36 |         _processed_vars.add(name)
37 | 
38 |         # Use TypeAdapter for validation
39 |         adapter = TypeAdapter(typ)
40 |         validated = adapter.validate_python(value)
41 | 
42 |         # Return validated value directly
43 |         return cast(T, validated)
44 |     except KeyError:
45 |         raise ValueError(f"Missing required environment variable: {name}")
46 |     except Exception as e:
47 |         raise ValueError(f"Invalid value for {name}: {e}")
48 | 
49 | 
50 | def list_env_example_vars(env_example_filen: Path) -> set[str]:
51 |     """Get set of required variables from .env.example.
52 | 
53 |     Args:
54 |         env_example_filen: Path to the .env.example file
55 | 
56 |     Returns:
57 |         Set of environment variable names found in the file
58 |     """
59 |     assert env_example_filen.exists(), f"Missing env example file: {env_example_filen}"
60 | 
61 |     required_vars = set()
62 |     with env_example_filen.open() as f:
63 |         for line in f:
64 |             line = line.strip()
65 |             # Skip comments and empty lines
66 |             if not line or line.startswith("#"):
67 |                 continue
68 |             # Get variable name (everything before =)
69 |             var_name = line.split("=")[0].strip()
70 |             required_vars.add(var_name)
71 | 
72 |     return required_vars
73 | 


--------------------------------------------------------------------------------
/src/gjdutils/num.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | # this doesn't support numpy's numeric types, but it's a good stopgap for now.
 4 | # there doesn't appear to be a perfect, agreed solution
 5 | #
 6 | # https://stackoverflow.com/questions/60616802/how-to-type-hint-a-generic-numeric-type-in-python
 7 | Numeric = Union[int, float]
 8 | 
 9 | 
10 | def percent(num, denom):
11 |     return (100 * (num / float(denom))) if denom else 0
12 | 
13 | 
14 | def percent_str(num, denom):
15 |     return str(percent) + "%"
16 | 
17 | 
18 | def discretise(
19 |     val,
20 |     increment: Union[int, float] = 0.1,
21 |     lower: Union[int, float] = 0.0,
22 |     upper: Union[int, float] = 1.0,
23 |     enforce_range: bool = True,
24 | ):
25 |     """
26 |     You will probably want to cache this.
27 |     """
28 |     import numpy as np
29 |     import pandas as pd
30 | 
31 |     def calc_increments(increment, lower, upper):
32 |         assert (
33 |             lower <= increment <= upper
34 |         ), f"Required: {lower:.2f} < {increment:.2f} <= {upper:.2f}"
35 |         # e.g. for lower=0, upper=1, increment_size=0.05, nincrements=21
36 |         nincrements = int((upper - lower) / increment) + 1
37 |         # e.g. for lower=0, upper=1, increment_size=0.05, increments = [0., 0.05, 0.1, ..., 0.95, 1. ]
38 |         increments = np.linspace(lower, upper, nincrements)
39 |         return increments
40 | 
41 |     if pd.isnull(val):
42 |         return upper
43 |     if enforce_range and (val < lower or val > upper):
44 |         raise ValueError(
45 |             f"Value {val:.2f} is outside the valid range [{lower:.2f}, {upper:.2f}]"
46 |         )
47 |     increments = calc_increments(increment, lower, upper)
48 |     if val < lower:
49 |         return increments[0]
50 |     if val > upper:
51 |         return increments[-1]
52 |     idx = np.digitize(val, increments)
53 |     # e.g.
54 |     #   0.00 -> 0.0
55 |     #   0.01 -> 0.0
56 |     #   0.06 -> 0.05
57 |     #   0.99 -> 0.95
58 |     #   1.00 -> 1.0
59 |     discretised = increments[idx - 1]
60 |     return discretised
61 | 
62 | 
63 | def ordinal(n: int):
64 |     """
65 |     e.g 1 -> "1st", 103 -> "103rd"
66 |     """
67 |     # from https://claude.ai/chat/87fad336-e0fa-4074-aed4-f4e57ed20bb7
68 | 
69 |     # TESTED:
70 |     # for i in [0, 1, 2, 3, 4, 10, 11, 12, 13, 22, 78, 103, 103231, 103235]:
71 |     #     print(i, ordinal(i))
72 |     assert n >= 0
73 |     if 10 <= n % 100 <= 20:
74 |         suffix = "th"
75 |     else:
76 |         suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
77 |     return f"{n}{suffix}"
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gjdutils
 2 | 
 3 | A comprehensive toolkit for highly effective AI-assisted programming, containing three complementary resource sets:
 4 | 
 5 | ## 🤖 AI-Assisted Programming Templates
 6 | 
 7 | **[docs/](docs/)** - Battle-tested workflows and instructions for AI development
 8 | 
 9 | A complete collection of templates, modes, and processes for working effectively with AI coding assistants like Claude Code and Cursor. These have been refined through real-world usage and provide:
10 | 
11 | - **Structured workflows** for planning, implementing, and maintaining code
12 | - **Specialized modes** for different types of development work (sounding board, detective, surgeon)
13 | - **Documentation practices** that scale with your project
14 | - **Git workflows** optimized for AI collaboration
15 | 
16 | **Quick start**: See [docs/WORKFLOW.md](docs/WORKFLOW.md) for the complete development workflow.
17 | 
18 | ## 🐍 Python Utilities
19 | 
20 | **[src/gjdutils/](src/gjdutils/)** - Production-ready Python utilities and tools
21 | 
22 | A curated collection of Python utilities for common development tasks including:
23 | 
24 | - Data science helpers and collection utilities
25 | - LLM integration tools (Claude, OpenAI)
26 | - File processing and automation scripts
27 | - Development workflow tools
28 | 
29 | **Quick start**: `pip install gjdutils` (see [src/gjdutils/README.md](src/gjdutils/README.md))
30 | 
31 | ## ⚒️ TypeScript Utilities
32 | 
33 | **[src/ts/](src/ts/)** - Cross-platform TypeScript tools and CLI utilities
34 | 
35 | A collection of battle-tested TypeScript utilities ported from real-world development workflows:
36 | 
37 | - **CLI tools** for date generation, conversation extraction, and code analysis
38 | - **Git worktree sync** scripts for managing parallel development branches
39 | - **LLM critique tools** for automated planning document review
40 | - **Development utilities** like line counting and file processing
41 | 
42 | **Quick start**: See [src/ts/README.md](src/ts/README.md) for installation and usage examples
43 | 
44 | ## Why gjdutils?
45 | 
46 | All components are designed around the principle of **making AI-assisted development more effective**:
47 | 
48 | - The **templates** provide the structure and process for working with AI assistants
49 | - The **Python utilities** handle data science and LLM integration tasks
50 | - The **TypeScript utilities** provide cross-platform development workflow tools
51 | 
52 | Whether you're building prototypes or production systems, gjdutils helps you work faster and more reliably with AI assistance.
53 | 
54 | ## License
55 | 
56 | MIT License - see [LICENSE](LICENSE) for details.


--------------------------------------------------------------------------------
/tests/test_git_status.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pathlib import Path
 3 | from gjdutils.cli.check_git_clean import check_git_clean
 4 | from gjdutils.shell import fatal_error_msg
 5 | from gjdutils.cmd import run_cmd
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def temp_git_repo(tmp_path):
10 |     """Create a temporary git repo for testing."""
11 |     # Initialize git repo
12 |     repo_path = tmp_path / "test_repo"
13 |     repo_path.mkdir(parents=True, exist_ok=True)
14 |     run_cmd("git init", cwd=str(repo_path))
15 |     run_cmd("git config user.email 'test@example.com'", cwd=str(repo_path))
16 |     run_cmd("git config user.name 'Test User'", cwd=str(repo_path))
17 | 
18 |     # Create and commit an initial file
19 |     initial_file = repo_path / "initial.txt"
20 |     initial_file.write_text("initial content")
21 |     run_cmd("git add initial.txt", cwd=str(repo_path))
22 |     run_cmd("git commit -m 'Initial commit'", cwd=str(repo_path))
23 | 
24 |     return repo_path
25 | 
26 | 
27 | def test_clean_repo(temp_git_repo, monkeypatch):
28 |     """Test check_git_clean with a clean repository."""
29 |     monkeypatch.chdir(temp_git_repo)
30 |     check_git_clean()  # Should not raise any errors
31 | 
32 | 
33 | def test_unstaged_changes(temp_git_repo, monkeypatch, capsys):
34 |     """Test check_git_clean detects unstaged changes."""
35 |     monkeypatch.chdir(temp_git_repo)
36 | 
37 |     # Create an unstaged change
38 |     (temp_git_repo / "initial.txt").write_text("modified content")
39 | 
40 |     with pytest.raises(SystemExit):
41 |         check_git_clean()
42 | 
43 |     captured = capsys.readouterr()
44 |     assert "Unstaged changes present" in captured.out
45 | 
46 | 
47 | def test_staged_changes(temp_git_repo, monkeypatch, capsys):
48 |     """Test check_git_clean detects staged but uncommitted changes."""
49 |     monkeypatch.chdir(temp_git_repo)
50 | 
51 |     # Create and stage a new file
52 |     new_file = temp_git_repo / "new.txt"
53 |     new_file.write_text("new content")
54 |     run_cmd("git add new.txt", cwd=str(temp_git_repo))
55 | 
56 |     with pytest.raises(SystemExit):
57 |         check_git_clean()
58 | 
59 |     captured = capsys.readouterr()
60 |     assert "Uncommitted staged changes present" in captured.out
61 | 
62 | 
63 | def test_untracked_files(temp_git_repo, monkeypatch, capsys):
64 |     """Test check_git_clean detects untracked files."""
65 |     monkeypatch.chdir(temp_git_repo)
66 | 
67 |     # Create an untracked file
68 |     untracked_file = temp_git_repo / "untracked.txt"
69 |     untracked_file.write_text("untracked content")
70 | 
71 |     with pytest.raises(SystemExit):
72 |         check_git_clean()
73 | 
74 |     captured = capsys.readouterr()
75 |     assert "Untracked files present" in captured.out
76 | 


--------------------------------------------------------------------------------
/src/gjdutils/google_translate.py:
--------------------------------------------------------------------------------
 1 | from google.cloud import translate_v2 as translate
 2 | import html
 3 | from typing import Optional
 4 | 
 5 | 
 6 | def translate_text(
 7 |     text: str,
 8 |     lang_src_code: Optional[str],
 9 |     lang_tgt_code: str,
10 |     verbose: int = 0,
11 | ):
12 |     """Translates text into the target language.
13 | 
14 |     Target must be an ISO 639-1 language code.
15 |     See https://g.co/cloud/translate/v2/translate-reference#supported_languages
16 | 
17 |     e.g.
18 |         translated_text, result = translate_text(
19 |             text="Hello, world",
20 |             lang_src_code="en",
21 |             lang_tgt_code="el",
22 |             verbose=0,
23 |         )
24 |     """
25 |     translate_client = translate.Client()
26 | 
27 |     lang_src_code = (
28 |         lang_src_code[:2].lower() if isinstance(lang_src_code, str) else None
29 |     )
30 |     lang_tgt_code = lang_tgt_code[:2].lower()
31 |     if lang_src_code == lang_tgt_code:
32 |         return text, None
33 | 
34 |     # assert lang_src_code != lang_tgt_code, (
35 |     #     "Identical src and tgt language codes: %s" % lang_src_code
36 |     # )
37 | 
38 |     # Text can also be a sequence of strings, in which case this method
39 |     # will return a sequence of results for each text.
40 |     if lang_src_code is None:
41 |         result = translate_client.translate(text, target_language=lang_tgt_code)
42 |     else:
43 |         result = translate_client.translate(
44 |             text,
45 |             target_language=lang_tgt_code,
46 |             source_language=lang_src_code,
47 |         )
48 | 
49 |     translated_text = result["translatedText"]
50 | 
51 |     # fix escaping, e.g.
52 |     #   I&#39;ve done it a week with no improvement
53 |     #   ->
54 |     #   I've done it a week with no improvement
55 |     translated_text = html.unescape(translated_text)
56 | 
57 |     if verbose > 0:
58 |         print(f"{lang_src_code} -> {lang_tgt_code}")
59 |         print(f"\t\"{result['input']}\" -> \"{translated_text}\"")
60 |         if lang_src_code is None:
61 |             print(f"\t\tDetected source language: {result['detectedSourceLanguage']}")
62 | 
63 |     return translated_text, result
64 | 
65 | 
66 | def detect_language(text: str, verbose: int = 0) -> tuple[str, dict]:
67 |     """
68 |     Detects the text's language.
69 |     """
70 |     translate_client = translate.Client()
71 | 
72 |     # Text can also be a sequence of strings, in which case this method
73 |     # will return a sequence of results for each text.
74 |     result = translate_client.detect_language(text)
75 |     language, confidence = result["language"], result["confidence"]
76 |     print(f"Ran detect_language for {text} -> {language} at confidence {confidence}")
77 |     return language, confidence
78 | 


--------------------------------------------------------------------------------
/docs/instructions/WRITE_DEEP_DIVE_AS_DOC.md:
--------------------------------------------------------------------------------
 1 | # Write Deep Dive as Documentation
 2 | 
 3 | Do a deep dive on the web about the topic that the user has asked about. If you need more clarification about the requirements to focus the search fruitfully, ask questions (ideally upfront). If you need more context from files, investigate for relevant code & docs.
 4 | 
 5 | Before you start, run `date` to get today's date, in case you need to assess how recent the search results are.
 6 | 
 7 | Then write this up as a detailed reference doc, following the instructions in `WRITE_EVERGREEN_DOC.md`. Include URL links/references (as well as mentions of your own code/docs etc), so you can track down the original sources later if you need to.
 8 | 
 9 | ## Process Guidelines
10 | 
11 | ### 1. Clarify the Scope
12 | Before diving into research, ask questions if it will help:
13 | - What specific aspects of the topic are most important?
14 | - What's the intended use case or application?
15 | - Are there particular problems you're trying to solve?
16 | - How deep should the technical detail go?
17 | - What's the target audience for this documentation?
18 | - etc
19 | 
20 | ### 2. Research Strategy
21 | - **Start broad** - Get an overview of the topic and ecosystem
22 | - **Go specific** - Focus on the aspects most relevant to your needs
23 | - **Check recency** - Note dates on articles, especially for fast-moving technologies
24 | - **Multiple sources** - Cross-reference information, taking into account authoritativeness
25 | - **Practical focus** - Prioritize actionable information over theory
26 | 
27 | ### 3. Documentation Structure
28 | Loosely follow `WRITE_EVERGREEN_DOC.md` format as appropriate, e.g.:
29 | - **Overview** - What is this technology/concept?
30 | - **Resources** - Links to official docs, tutorials, tools
31 | - **Use cases** - When and why to use it
32 | - **Getting started** - Quick setup or hello world
33 | - **Key concepts** - Essential understanding
34 | - **Best practices** - Proven approaches and patterns
35 | - **Risks/gotchas** - Known issues, e.g. recent API changes, common/likely confusions & error messages, risks, etc
36 | - etc
37 | 
38 | ### 4. Source Attribution
39 | - **Direct links** - Include URLs (or file paths, or whatever's appropriate) for all referenced sources
40 | - **Date notation** - Note when sources were published/accessed
41 | - **Authority assessment** - Prefer official docs, established experts, recent sources
42 | - **Code attribution** - Reference any code examples with their source
43 | 
44 | Remember: The goal is to create a reference that will explain, be up-to-date, help with decision-making, save time, and/or prevent mistakes/issues/surprises. Be proactive. Focus on the information that would be most valuable given the user's intent. Highlight anything worthy of remark.
45 | 


--------------------------------------------------------------------------------
/src/gjdutils/cli/pypi/deploy.py:
--------------------------------------------------------------------------------
 1 | """Deployment commands for PyPI packages."""
 2 | 
 3 | import typer
 4 | from rich.console import Console
 5 | from packaging.version import Version
 6 | 
 7 | from gjdutils import __version__
 8 | from gjdutils.cli.check_git_clean import check_git_clean
 9 | from gjdutils.cli.pypi.check import check_local, check_prod, check_test
10 | from gjdutils.pypi_build import (
11 |     build_package,
12 |     check_version_exists,
13 |     clean_build_dirs,
14 |     upload_to_pypi,
15 | )
16 | from gjdutils.shell import fatal_error_msg
17 | 
18 | # Create the deploy subcommand group
19 | app = typer.Typer(
20 |     help="Deploy package to PyPI",
21 |     add_completion=True,
22 |     no_args_is_help=True,
23 |     context_settings={"help_option_names": ["-h", "--help"]},
24 | )
25 | console = Console()
26 | 
27 | 
28 | @app.command(name="test")
29 | def deploy_test():
30 |     """Deploy package to Test PyPI."""
31 |     console.rule("[yellow]Starting Test PyPI Deployment")
32 | 
33 |     # Check if version already exists
34 |     if check_version_exists(Version(__version__), pypi_env="test"):
35 |         fatal_error_msg(f"Version {__version__} already exists on Test PyPI.\n")
36 | 
37 |     # Execute deployment steps
38 |     clean_build_dirs()
39 |     build_package()
40 |     upload_to_pypi(pypi_env="test")
41 | 
42 |     console.print("\n[green]Deployment to Test PyPI completed![/green]")
43 |     check_test()
44 | 
45 | 
46 | @app.command(name="prod")
47 | def deploy_prod():
48 |     """Deploy package to Production PyPI."""
49 |     console.rule("[yellow]Starting Production PyPI Deployment")
50 | 
51 |     # Check git status first
52 |     check_git_clean()
53 | 
54 |     # Check if version already exists
55 |     if check_version_exists(Version(__version__), pypi_env="prod"):
56 |         fatal_error_msg(f"Version {__version__} already exists on PyPI.\n")
57 | 
58 |     # Confirm with user before proceeding
59 |     version_confirm = input(
60 |         f"\nAre you sure you want to deploy version {__version__} to production PyPI? (y/N): "
61 |     )
62 |     if version_confirm.lower() != "y":
63 |         console.print("\n[yellow]Deployment cancelled by user[/yellow]")
64 |         return
65 | 
66 |     # Execute deployment steps
67 |     clean_build_dirs()
68 |     build_package()
69 |     upload_to_pypi(pypi_env="prod")
70 | 
71 |     console.print("\n[green]Deployment to Production PyPI completed![/green]")
72 |     check_prod()
73 | 
74 | 
75 | @app.command(name="all")
76 | def deploy_all():
77 |     """Run full deployment process (local -> test -> prod)."""
78 |     console.rule("[yellow]Starting Full Deployment Process")
79 | 
80 |     check_local()
81 |     deploy_test()
82 |     deploy_prod()
83 | 
84 |     console.print("\n[green]Full deployment process completed successfully! 🎉[/green]")
85 | 


--------------------------------------------------------------------------------
/src/gjdutils/iterfunc.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from typing import Sequence
  3 | 
  4 | 
  5 | def contiguous_pairs(lst: Sequence):
  6 |     """
  7 |     Given a list LST, return the contiguous pairs, e.g.
  8 | 
  9 |     [10, 20, 30, 40, 50]
 10 |     ->
 11 |     [(10, 20), (20, 30), (30, 40), (40, 50)]
 12 | 
 13 |     (from GitHub Copilot)
 14 |     """
 15 |     pairs = [(lst[i], lst[i + 1]) for i in range(len(lst) - 1)]
 16 |     return pairs
 17 | 
 18 | 
 19 | def flatten(lol):
 20 |     """
 21 |     See http://stackoverflow.com/questions/406121/flattening-a-shallow-list-in-python
 22 | 
 23 |     e.g. [['image00', 'image01'], ['image10'], []] -> ['image00', 'image01', 'image10']
 24 |     """
 25 | 
 26 |     chain = list(itertools.chain(*lol))
 27 |     return chain
 28 | 
 29 | 
 30 | # def flatten(list_of_lists):
 31 | #     """
 32 | #     Flatten one level of nesting
 33 | 
 34 | #     from https://docs.python.org/3/library/itertools.html#itertools-recipes
 35 | #     """
 36 | #     return list(chain.from_iterable(list_of_lists))
 37 | 
 38 | 
 39 | def unique(items):
 40 |     """
 41 |     Returns KEEP, a list based on ITEMS, but with duplicates
 42 |     removed (preserving order, based on first new example).
 43 | 
 44 |     http://stackoverflow.com/questions/89178/in-python-what-is-the-fastest-algorithm-for-removing-duplicates-from-a-list-so-t
 45 | 
 46 |     unique([1, 1, 2, 'a', 'a', 3]) -> [1, 2, 'a', 3]
 47 |     """
 48 |     found = set([])
 49 |     keep = []
 50 |     for item in items:
 51 |         if item not in found:
 52 |             found.add(item)
 53 |             keep.append(item)
 54 |     return keep
 55 | 
 56 | 
 57 | def uniquify_list(lst):
 58 |     """Return a list of the elements in s, but without duplicates, preserving order.
 59 | 
 60 |     from comment in http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560
 61 | 
 62 |       Lightweight and fast ..., Raymond Hettinger, 2002/03/17
 63 | 
 64 |     """
 65 | 
 66 |     set = {}
 67 |     return [set.setdefault(e, e) for e in lst if e not in set]
 68 | 
 69 | 
 70 | def grouper(iterable, n):
 71 |     """
 72 |     Collect data into fixed-length chunks or blocks. If
 73 |     the last block is too small, returns a truncated block.
 74 | 
 75 |     e.g. grouper('ABCDEFG', 3) --> ABC DEF G
 76 | 
 77 |     From http://stackoverflow.com/a/8991553/230523
 78 |     """
 79 |     it = iter(iterable)
 80 |     while True:
 81 |         chunk = tuple(itertools.islice(it, n))
 82 |         if not chunk:
 83 |             return
 84 |         yield chunk
 85 | 
 86 | 
 87 | def grouper_ragged(iterable, n):
 88 |     """
 89 |     Collect data into non-overlapping chunks - the last one might be shorter than the others
 90 | 
 91 |     >>> print(list(grouper('ABCDEFG', 3)))  # [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)]
 92 | 
 93 |     from https://stackoverflow.com/a/41333827/230523
 94 |     """
 95 |     it = iter(iterable)
 96 |     group = tuple(itertools.islice(it, n))
 97 |     while group:
 98 |         yield group
 99 |         group = tuple(itertools.islice(it, n))
100 | 


--------------------------------------------------------------------------------
/docs/instructions/RENAME_OR_MOVE.md:
--------------------------------------------------------------------------------
 1 | # Rename or Move Files
 2 | 
 3 | - Rename or move a file or files as per the user's explicit instructions. 
 4 |   - If asked to propose/discuss, then don't make changes until they have been agreed with the user.
 5 |   - If things are confusing, or you see potential problems, or have a better idea, then you should ask questions, raise concerns, make suggestions, etc.
 6 | 
 7 | - If there are multiple files, use tasks and subagents (provided with rich context) to:
 8 |   - Do the rename/move
 9 |     - Prefer to use `git mv` rather than `mv`, where appropriate. Or if there is a special tool for doing the move (e.g. a syntactically-aware refactoring tool, use that)
10 |   - Search carefully for all the places that refer to each file, and update them appropriately.
11 |     - Use **sd** for updating references across the codebase (see `docs/reference/SD_STRING_DISPLACEMENT_FIND_REPLACE.md`)
12 |     - Be careful not to break/disrupt functionality.
13 | 
14 | - IMPORTANT: If in doubt, or you notice any issues/surprises/complications stop and ask.
15 | 
16 | - Once you have finished, commit these changes as a single commit, following `GIT_COMMITS.md`
17 | 
18 | ## Process Guidelines
19 | 
20 | ### Before Starting
21 | 1. **Understand the scope** - How many files are affected?
22 | 2. **Check for references** - What refers to these files?
23 | 3. **Identify risks** - What could break with this change?
24 | 4. **Plan the approach** - Git mv, refactoring tools, or simple moves?
25 | 
26 | ### During Execution
27 | 1. **Use appropriate tools**:
28 |    - `git mv` for version-controlled files
29 |    - IDE refactoring tools for code symbols
30 |    - Search and replace for documentation references
31 |    
32 | 2. **Search thoroughly for references**:
33 |    - Import/require statements
34 |    - Documentation links
35 |    - Configuration files
36 |    - Build scripts and manifests
37 |    - Test files
38 |    - Comments and README files
39 | 
40 | 3. **Test incrementally** if possible:
41 |    - Check that code still compiles
42 |    - Run relevant tests
43 |    - Verify documentation links
44 | 
45 | ### Common Reference Patterns
46 | - **Code**: `import './old-name'`, `require('../old-path')`
47 | - **Documentation**: `[link](old-path.md)`, `see old-file.js`
48 | - **Configuration**: File paths in package.json, tsconfig.json, etc.
49 | - **Build systems**: File references in build scripts, CI configs
50 | - **URLs**: Repository links, deployment paths
51 | 
52 | ### Safety Checks
53 | - **Backup important changes** before large moves
54 | - **Use git status** to review all affected files
55 | - **Test functionality** after the move
56 | - **Review commit diff** before finalizing
57 | 
58 | ### Complex Scenarios
59 | For large refactoring operations:
60 | 1. Break into smaller, atomic moves when possible
61 | 2. Use subagents to handle different aspects (code vs docs vs config)
62 | 3. Consider doing a trial run or creating a branch first
63 | 4. Coordinate with team members if this affects shared code
64 | 
65 | Remember: It's better to ask questions and move carefully than to break working functionality.


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
 1 | # CLAUDE.md
 2 | 
 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 4 | 
 5 | see also: `README.md`
 6 | 
 7 | ## Development Commands
 8 | 
 9 | **Setup:**
10 | ```bash
11 | # Install in editable mode with all dependencies
12 | pip install -e ".[all_no_dev, dev]"
13 | 
14 | # Or use the convenience script
15 | python -m gjdutils.scripts.install_all_dev_dependencies
16 | ```
17 | 
18 | **Testing:**
19 | ```bash
20 | # Run all tests
21 | pytest
22 | 
23 | # Run specific test file
24 | pytest tests/test_strings.py
25 | 
26 | # Run with verbose output
27 | pytest -v
28 | ```
29 | 
30 | **Linting:**
31 | ```bash
32 | # Format code (uses black)
33 | black src/ tests/
34 | 
35 | # Lint code (uses flake8)
36 | flake8 src/ tests/
37 | ```
38 | 
39 | **Building:**
40 | ```bash
41 | # Build package
42 | python -m build
43 | 
44 | # Clean build directories
45 | rm -rf dist/ build/ *.egg-info/
46 | ```
47 | 
48 | **PyPI Deployment:**
49 | ```bash
50 | # Full deployment workflow (includes all checks)
51 | gjdutils pypi deploy all
52 | 
53 | # Individual steps
54 | gjdutils pypi check local    # Test locally
55 | gjdutils pypi deploy test    # Deploy to Test PyPI
56 | gjdutils pypi deploy prod    # Deploy to Production PyPI
57 | ```
58 | 
59 | ## Architecture Overview
60 | 
61 | **Module Organization:**
62 | - **Core utilities**: `strings`, `files`, `dicts`, `lists`, `functions` - fundamental data manipulation
63 | - **System interaction**: `cmd`, `shell`, `env`, `runtime` - process execution and environment management  
64 | - **AI/LLM**: `llm_utils`, `llms_claude`, `llms_openai` - unified interface for AI services
65 | - **Data science**: `dsci`, `num`, `indexing` - analysis and numerical operations
66 | - **Audio/Voice**: `audios`, `voice_speechrecognition`, `outloud_text_to_speech` - speech processing
67 | - **Web/HTML**: `html`, `web`, `google_translate` - web scraping and content processing
68 | 
69 | **CLI Structure:**
70 | Built with Typer in hierarchical command groups:
71 | - Main app in `cli/main.py` registers subcommands
72 | - PyPI management in `cli/pypi/` (check, deploy commands)
73 | - Utility commands: `check-git-clean`, version display
74 | - Special entry point: `gjdutils-export-envs` for shell environment setup
75 | 
76 | **Key Patterns:**
77 | - **Optional dependencies**: Features grouped by domain (`audio_lang`, `llm`, `dsci`, etc.)
78 | - **Error handling**: Uses `gjdutils.shell.fatal_error_msg()` for CLI error reporting
79 | - **Versioning**: Dynamic version from `__version__.py`, integrated into deployment checks
80 | - **Environment management**: `.env` file support with shell script generation
81 | 
82 | **Testing Strategy:**
83 | - Integration tests for CLI commands in `test_cli.py`
84 | - Environment variable testing in `test_env_integration.py`
85 | - Unit tests follow `test_{module}.py` naming convention
86 | - Uses pytest with typer testing utilities
87 | 
88 | **Deployment Process:**
89 | 1. Update version in `__version__.py`
90 | 2. Ensure git working directory is clean
91 | 3. Use `gjdutils pypi deploy all` for full automated workflow
92 | 4. Each step includes verification and rollback capabilities


--------------------------------------------------------------------------------
/docs/instructions/GIT_COMMIT_CHANGES.md:
--------------------------------------------------------------------------------
 1 | # Git Commit Guidelines
 2 | 
 3 | ## Initial Assessment
 4 | Have a look at Git diff. Batch the changes into commits, and make them one at a time.
 5 | 
 6 | ## Commit Best Practices
 7 | 
 8 | ### Don't ever do anything destructive
 9 | 
10 | ABOVE ALL, don't do anything that could result in lost work or mess up yet-to-be-committed changes, unless EXPLICITLY instructed to by the user after warning them.
11 | 
12 | 
13 | ### Batching changes into commits
14 | - Each commit should represent a small/medium feature, or stage, or cluster of related changes (e.g. tweaking a bunch of docs).
15 | - But strike a balance, e.g. the code and docs changes for a given feature should be in the same commit.
16 | - The codebase should (ideally) be in a working state after each commit.
17 | - Try not to mix unrelated changes.
18 | - Before making the commit, list all files that will be committed.
19 | - IMPORTANT If this is being run in a conversation, only commit changes relevant to this conversation. (Still use reset/add/commit single-command chaining)
20 | - When choosing the order of batches, prefer batches that concern files with older modification dates, in order to make it less likely that another agent is still working on them.
21 | 
22 | 
23 | ### Commit Message Format
24 | ```
25 | <type>: <subject> (50 chars max)
26 | 
27 | <body> (optional, wrap at 72 chars)
28 | - Include a reference to current planning doc at the top of the commit body if there is one, e.g. "Planning doc: yyMMddx_feature_name.md"
29 | - More detailed explanation
30 | - Bullet points for multiple changes
31 | ```
32 | 
33 | Types: feat, fix, docs, style, refactor, test, chore
34 | 
35 | ### Handling Concurrent Changes
36 | There may be other agents changing the code while you work, and they might have added other files already.
37 | - IMPORTANT: To minimise interference, ALWAYS chain the reset/add/commit operations (to make sure we unstage first, then stage, then commit, atomically):
38 |   ```bash
39 |   git reset HEAD unwanted-file && git add wanted-file && git commit -m "fix: resolve auth bug"
40 |   ```
41 | - This reduces the window where another agent's changes could interfere
42 | 
43 | ### Important Notes
44 | - If the code is in a partial/broken state, prioritise commits that leave the codebase working
45 | - If you encounter merge conflicts or ANY unexpected issues, stop and ask the user immediately
46 | - When in doubt, ask the user before proceeding
47 | - **ALWAYS quote file paths** when using git commands to avoid shell expansion issues:
48 |   - `git add "frontend/src/routes/language/[target_language_code]/+page.svelte"`
49 |   - This is especially important for SvelteKit routes with brackets: `[param]`
50 | 
51 | 
52 | ### Gitignore
53 | 
54 | If you notice files that almost certainly shouldn't be committed (e.g. `node_modules`, `passwords.secret`), read the `.gitignore`, and stop to ask the user whether to add them to it.
55 | 
56 | 
57 | ## Parallel AI Assistance
58 | 
59 | Run this with parallel AI subagents unless there is a good reason not to. Provide it with lots of context about what we've been doing that will help it to make good decisions and write a good commit message.


--------------------------------------------------------------------------------
/src/gjdutils/image_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import io
 3 | from typing import Optional
 4 | 
 5 | 
 6 | def image_to_base64_resized(image_full_filen: str, resize_target_size_kb: int = 100):
 7 |     from PIL import Image
 8 | 
 9 |     # based on https://claude.ai/chat/d0eb1f39-3f42-4cb5-a2ec-5aa102c60ea0
10 |     assert resize_target_size_kb > 0
11 |     with Image.open(image_full_filen) as img_orig:
12 |         width_orig, height_orig = img_orig.size
13 |         # Calculate initial file size
14 |         temp_buffer = io.BytesIO()
15 |         img_orig.save(temp_buffer, format=img_orig.format)
16 |         img_resized = img_orig.copy()
17 |         current_size_kb = len(temp_buffer.getvalue()) / 1024
18 | 
19 |         # Iteratively resize until file size is below target
20 |         resize_factor = 0.9
21 |         while current_size_kb > resize_target_size_kb:
22 |             resize_factor *= resize_factor
23 |             width = int(width_orig * resize_factor)
24 |             height = int(height_orig * resize_factor)
25 |             img_resized = img_orig.resize(
26 |                 (width, height), Image.LANCZOS  # type: ignore
27 |             )  #  type: ignore
28 |             # Check new file size
29 |             temp_buffer = io.BytesIO()
30 |             img_resized.save(temp_buffer, format=img_orig.format)
31 |             current_size_kb = len(temp_buffer.getvalue()) / 1024
32 | 
33 |         # Convert final resized image to base64
34 |         img_bytes = io.BytesIO()
35 |         img_resized.save(img_bytes, format=img_orig.format)
36 |         img_buffer = img_bytes.getvalue()
37 | 
38 |     return img_buffer
39 | 
40 | 
41 | def image_to_base64(img_full_filen: str, resize_target_size_kb: Optional[int] = None):
42 |     # from https://chat.openai.com/c/35f15af9-b947-4fa6-acbe-2a5ed26e7547
43 |     if resize_target_size_kb is None:
44 |         with open(img_full_filen, "rb") as image_file:
45 |             img_bytes = image_file.read()
46 |     else:
47 |         img_bytes = image_to_base64_resized(img_full_filen, resize_target_size_kb)
48 |     img_b64 = base64.b64encode(img_bytes).decode("utf-8")
49 |     return img_b64
50 | 
51 | 
52 | def image_to_base64_basic(image_filen: str) -> str:
53 |     with open(image_filen, "rb") as image_file:
54 |         return base64.b64encode(image_file.read()).decode("ascii")
55 | 
56 | 
57 | def contents_for_images(image_filens: list[str], resize_target_size_kb: int):
58 |     # assert (
59 |     #     1 <= len(image_filens) <= 10
60 |     # ), "You can only provide between 1 and 10 images"
61 |     base64_images = []
62 |     new_contents = []
63 |     for image_filen in image_filens:
64 |         base64_image = image_to_base64(
65 |             image_filen, resize_target_size_kb=resize_target_size_kb
66 |         )
67 |         filen_content = {
68 |             "type": "text",
69 |             "text": f"Filename: {image_filen}",
70 |         }
71 |         img_content = {
72 |             "type": "image_url",
73 |             "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
74 |         }
75 |         base64_images.append(base64_image)
76 |         new_contents.extend([filen_content, img_content])
77 |     return new_contents, base64_images
78 | 


--------------------------------------------------------------------------------
/planning/250215_publishing_to_pypi.md:
--------------------------------------------------------------------------------
  1 | # Publishing gjdutils to PyPI
  2 | 
  3 | ## Context
  4 | - First release of gjdutils to PyPI
  5 | - Version 0.2.0 (after rename from gdutils)
  6 | - Package contains utility functions for strings, dates, data science/AI, web development
  7 | 
  8 | ## Prerequisites
  9 | - Python >=3.10
 10 | - Build tools: `pip install build twine`
 11 | - PyPI account with 2FA configured
 12 | - .pypirc file configured with test and prod PyPI credentials
 13 | 
 14 | ## Steps
 15 | 
 16 | 1. Build and test package locally:
 17 |    ```bash
 18 |    # Option 1: Automated testing script (recommended)
 19 |    python -m gjdutils.scripts.check_locally
 20 |    
 21 |    # Option 2: Manual steps
 22 |    # Clean any existing builds
 23 |    rm -rf dist/ build/
 24 |    
 25 |    # Build the package
 26 |    python -m build
 27 |    ```
 28 | 
 29 | 2. Test PyPI Deployment:
 30 |    ```bash
 31 |    # Option 1: Automated testing script (recommended)
 32 |    python -m gjdutils.scripts.check_test_pypi
 33 |    
 34 |    # Option 2: Manual steps
 35 |    # Upload to test.pypi.org
 36 |    twine upload -r testpypi dist/*
 37 |    
 38 |    # Create a fresh virtualenv for testing
 39 |    python -m venv /tmp/test-gjdutils
 40 |    source /tmp/test-gjdutils/bin/activate
 41 |    
 42 |    # Test installation from test.pypi.org (with dependencies from PyPI)
 43 |    pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils
 44 |    
 45 |    # Test basic functionality
 46 |    python -c "import gjdutils; print(gjdutils.__version__)"
 47 |    ```
 48 | 
 49 | 3. Production PyPI Deployment:
 50 |    ```bash
 51 |    # Upload to PyPI
 52 |    twine upload dist/*
 53 |    
 54 |    # Create a fresh virtualenv for testing
 55 |    python -m venv /tmp/prod-gjdutils
 56 |    source /tmp/prod-gjdutils/bin/activate
 57 |    
 58 |    # Test installation
 59 |    pip install gjdutils
 60 |    
 61 |    # Test basic functionality
 62 |    python -c "import gjdutils; print(gjdutils.__version__)"
 63 |    ```
 64 | 
 65 | ## Optional Features
 66 | Package has several optional feature sets that can be installed:
 67 | ```bash
 68 | pip install "gjdutils[dt]"        # Date/time utilities
 69 | pip install "gjdutils[llm]"       # AI/LLM integrations
 70 | pip install "gjdutils[audio_lang]" # Speech/translation
 71 | pip install "gjdutils[html_web]"   # Web scraping
 72 | pip install "gjdutils[dev]"        # Development tools
 73 | ```
 74 | 
 75 | ## Progress Tracking
 76 | 
 77 | ### Current State
 78 | - Package renamed to gjdutils
 79 | - Version 0.2.0 ready for release
 80 | - All tests passing
 81 | 
 82 | ### Next Steps
 83 | 1. Build package:
 84 |    - Clean existing builds
 85 |    - Run build command
 86 |    - Verify dist/ contents
 87 | 
 88 | 2. Test deployment:
 89 |    - Upload to test.pypi.org
 90 |    - Test installation in fresh virtualenv
 91 |    - Verify basic functionality
 92 | 
 93 | 3. Production deployment:
 94 |    - Upload to PyPI
 95 |    - Test installation
 96 |    - Verify functionality
 97 | 
 98 | ### Post-deployment
 99 | - [ ] Update GitHub release description
100 | - [ ] Announce release (if needed)
101 | - [ ] Update documentation with PyPI installation instructions 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | # case-insensitive
  3 | name = "GJDutils"
  4 | dynamic = ["version"]
  5 | authors = [
  6 |   { name="Greg Detre", email="greg@gregdetre.com" },
  7 | ]
  8 | description = "A collection of useful utility functions (basics, data science/AI, web development, etc)"
  9 | readme = "README.md"
 10 | # because I use the | syntax for type Union
 11 | requires-python = ">=3.10"
 12 | classifiers = [
 13 |     "Programming Language :: Python :: 3",
 14 |     "License :: OSI Approved :: MIT License",
 15 |     "Operating System :: OS Independent",
 16 |     "Topic :: Utilities",
 17 |     "Development Status :: 4 - Beta",
 18 |     "Intended Audience :: Developers",
 19 | ]
 20 | keywords = ["utilities", "strings", "dates", "data science", "web development", "llm", "ai",]
 21 | 
 22 | dependencies = [
 23 |     "ipython",
 24 |     "jinja2",
 25 |     "python-dotenv",
 26 |     "pydantic",
 27 |     "six",
 28 |     "typer",  # For CLI
 29 |     "rich",   # For CLI output formatting
 30 | ]
 31 | 
 32 | [project.scripts]
 33 | gjdutils = "gjdutils.cli:app"
 34 | gjdutils-export-envs = "gjdutils.scripts.install:install_export_envs"
 35 | 
 36 | # if you add a new optional dependency group, remember to add it to the 'all' group below
 37 | [project.optional-dependencies]
 38 | audio_lang = [
 39 |     "azure-cognitiveservices-speech",
 40 |     "google-cloud-texttospeech",
 41 |     "google-cloud-translate",
 42 |     "elevenlabs>=2.1.0",
 43 |     # for playing mp3
 44 |     "playsound",
 45 |     # for voice_speechrecognition Microphone
 46 |     "pyaudio",
 47 |     # for playing mp3
 48 |     "pygame",
 49 |     # for playing mp3
 50 |     "python-vlc",
 51 |     "SpeechRecognition",
 52 |     "openai-whisper",  # for OpenAI Whisper speech recognition
 53 | ]
 54 | dev = [
 55 |     "black",
 56 |     "build",
 57 |     "flake8",
 58 |     "pytest",
 59 |     "rich",  # for console output formatting
 60 |     "twine",
 61 |     "typer",  # for CLI tools
 62 |     "wheel",
 63 | ]
 64 | dsci = [
 65 |     "numpy",
 66 |     "pandas",
 67 | ]
 68 | dt = [
 69 |     "humanize",
 70 |     "pendulum",
 71 | ]
 72 | html_web = [
 73 |     "bs4",
 74 |     "lxml",
 75 | ]
 76 | llm = [
 77 |     "anthropic",
 78 |     # "llm", no longer using this
 79 |     "openai",
 80 |     "pillow", # for images
 81 | ]
 82 | 
 83 | all_no_dev = [
 84 |     "gjdutils[audio_lang]",
 85 |     "gjdutils[dsci]",
 86 |     "gjdutils[dt]",
 87 |     "gjdutils[html_web]",
 88 |     "gjdutils[llm]",
 89 | ]
 90 | 
 91 | [project.urls]
 92 | Homepage = "https://github.com/gregdetre/gjdutils"
 93 | Repository = "https://github.com/gregdetre/gjdutils"
 94 | # Issues = "https://github.com/gregdetre/gjdutils/issues"
 95 | # Documentation
 96 | # Changelog
 97 | 
 98 | [build-system]
 99 | requires = ["hatchling"]
100 | build-backend = "hatchling.build"
101 | 
102 | [tool.hatch.version]
103 | path = "src/gjdutils/__version__.py"
104 | 
105 | [tool.hatch.build.targets.sdist]
106 | exclude = [
107 |     "dist/",
108 |     "gjdutils/obsolete/",
109 |     "gjdutils/todo/",
110 |     "tests/fixme_*.py",
111 | ]
112 | 
113 | [tool.hatch.build.targets.wheel]
114 | packages = ["src/gjdutils"]
115 | 
116 | [tool.hatch.build.targets.wheel.shared-data]
117 | "src/gjdutils/scripts/export_envs.sh" = "bin/export_envs.sh"
118 | 


--------------------------------------------------------------------------------
/src/gjdutils/html.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | from lxml.html import tostring
 3 | from lxml.etree import _Element as ElementType
 4 | from typing import Optional, Union
 5 | 
 6 | 
 7 | def remove_html_tags(html: str):
 8 |     soup = BeautifulSoup(html, "html.parser")
 9 |     return soup.get_text()
10 | 
11 | 
12 | def contents_of_body(soup):
13 |     """
14 |     e.g.
15 |       BeautifulSoup('<p>hello</p><p>world</world>', features='lxml')
16 |         =>
17 |           <p>hello</p>
18 |           <p>world</p>
19 | 
20 |     N.B. for html.parser, you might just be able to do: str(soup)
21 |     """
22 |     # it might be better to prettify with body hidden=True???
23 |     return "\n".join([str(t) for t in soup.body.contents])
24 | 
25 | 
26 | def compare_html(h1, h2):
27 |     h1p = BeautifulSoup(h1, features="html.parser").prettify().strip()
28 |     h2p = BeautifulSoup(h2, features="html.parser").prettify().strip()
29 |     assert h1p == h2p
30 | 
31 | 
32 | def remove_attrs_from_html(h):
33 |     """
34 |     Gets rid of all the attrs in the html.
35 |     """
36 |     soup = BeautifulSoup(h, features="lxml")
37 |     for t in soup.recursiveChildGenerator():
38 |         t.attrs = {}  # type: ignore
39 |     # whitespace_from_linebreaks(
40 |     # contents_of_body(soup)
41 |     # )
42 |     return contents_of_body(soup)
43 | 
44 | 
45 | def adjust_indentation(pretty_html, indent: int):
46 |     # from https://www.perplexity.ai/search/can-you-customise-the-beautifu-225tf.pISaiggsL5tNL.gA
47 |     lines = pretty_html.split("\n")
48 |     adjusted_lines = []
49 |     for line in lines:
50 |         line_lstrip = line.lstrip(" ")
51 |         leading_spaces = len(line) - len(line_lstrip)
52 |         indent_level = leading_spaces // 1  # default indent is 1 space
53 |         adjusted_lines.append(" " * (indent_level * indent) + line_lstrip)
54 |     return "\n".join(adjusted_lines)
55 | 
56 | 
57 | def prettify_html(
58 |     html: Union[str, ElementType, list[ElementType]],  # BeautifulSoup
59 |     indent: int = 2,
60 |     n: Optional[int] = None,  # number of chars to show
61 | ):
62 |     # if isinstance(html, pq):
63 |     #     html = html.outer_html()  # type: ignore
64 |     if isinstance(html, list):
65 |         # then we'll handle it as a string in a moment
66 |         html = "".join(
67 |             [tostring(e, method="html").decode() for e in html]  # type: ignore
68 |         )  #  type: ignore
69 |     if isinstance(html, ElementType):
70 |         # this will do some cleaning and fixing. but
71 |         # you need document_fromstring() if you want to make sure
72 |         # that it's a full html doc, e.g. with html, body
73 |         html = tostring(html, method="html").decode()  # type: ignore
74 | 
75 |     soup = BeautifulSoup(html, "html.parser")  # type: ignore
76 |     # html2 = tostring(html, pretty_print=True, method="html").decode()  # type: ignore
77 |     # the lxml pretty_print just isn't as good as BS4, e.g. with a list of elements
78 |     # it wraps things in a div fragment, but the pretty-print of that isn't right
79 |     html2 = soup.prettify()
80 |     prettified = adjust_indentation(html2, indent=indent)[:n]  # type: ignore
81 |     return prettified
82 | 
83 | 
84 | def pprettify_html(*args, **kwargs) -> None:
85 |     html = prettify_html(*args, **kwargs)
86 |     print(html)
87 | 


--------------------------------------------------------------------------------
/planning/250215_rename_gdutils_to_gjdutils.md:
--------------------------------------------------------------------------------
 1 | # Renaming gdutils to gjdutils
 2 | 
 3 | ## Context
 4 | - Renaming project from `gdutils` to `gjdutils` (existing `gdutils` name is taken)
 5 | - Package contains utility functions for strings, dates, data science/AI, web development
 6 | - Currently at version 0.1.0, moving to 0.2.0 for the rename
 7 | 
 8 | ## Files Requiring Updates
 9 | 1. Package files:
10 |    - pyproject.toml:
11 |      - Update name from "GDutils" to "GJDutils"
12 |      - Update GitHub URLs from gdutils to gjdutils
13 |    - __VERSION__.py: Update version to 0.2.0
14 |    - Rename directory from gdutils/ to gjdutils/
15 |    - Update imports in all Python files:
16 |      - `from gdutils import ...`
17 |      - `import gdutils`
18 |      - References like `gdutils.something()`
19 | 
20 | 2. Documentation/Meta:
21 |    - README.md: Update all references and examples
22 |    - .gitignore: Check for any gdutils-specific entries
23 |    - Any additional .md files in docs/ or root directory
24 | 
25 | ## Steps
26 | 
27 | 0. Backup (Important!)
28 |    ```bash
29 |    # Create a backup branch
30 |    git checkout -b backup-before-rename
31 |    git push origin backup-before-rename
32 |    # Return to main
33 |    git checkout main
34 |    ```
35 | 
36 | 1. Rename GitHub Repository (✓ DONE)
37 |    - In GitHub web UI: Settings -> rename repository from 'gdutils' to 'gjdutils'
38 |    - Update local git remote:
39 |      ```bash
40 |      git remote set-url origin https://github.com/gregdetre/gjdutils.git
41 |      ```
42 |    - Verify: `git remote -v`
43 | 
44 | 2. Local Development Changes
45 |    a. Create a new branch for rename changes:
46 |       ```bash
47 |       git checkout -b rename-to-gjdutils
48 |       ```
49 |    
50 |    b. Update configuration files:
51 |       - ✓ Update version to 0.2.0 in __VERSION__.py
52 |       - ✓ Update pyproject.toml with new name and URLs
53 |       - ✓ Update README.md with new package name
54 |       - ✓ Review other documentation files
55 |    
56 |    c. Rename the local directory:
57 |       ```bash
58 |       # From the parent directory containing gdutils/
59 |       mv gdutils gjdutils
60 |       ```
61 |    
62 |    d. Update all internal imports and references
63 | 
64 | 3. Testing
65 |    - ✓ Run existing tests to ensure they pass
66 |    - ✓ Test local import: `pip install -e .`
67 |    - ✓ Verify imports work
68 | 
69 | ## Progress Tracking
70 | 
71 | ### ✓ Completed Steps
72 | - ✓ Created backup branch
73 | - ✓ Renamed GitHub repository
74 | - ✓ Updated local git remote
75 | - ✓ Updated version to 0.2.0 in __VERSION__.py
76 | - ✓ Updated pyproject.toml with new name and URLs
77 | - ✓ Renamed source directory from src/gdutils to src/gjdutils
78 | - ✓ Updated imports in Python files to use gjdutils
79 | - ✓ Updated test files to use gjdutils
80 | - ✓ Fixed package __init__.py to expose version
81 | - ✓ Verified all tests are passing
82 | - ✓ Committed all changes to rename-to-gjdutils branch
83 | - ✓ Merged rename-to-gjdutils to main
84 | - ✓ Tagged v0.2.0
85 | 
86 | ### Files Updated
87 | - [x] pyproject.toml
88 | - [x] __VERSION__.py
89 | - [x] tests/test_gdutils.py (imports updated)
90 | - [x] All Python files in src/gjdutils/ (imports updated)
91 | - [x] README.md
92 | 
93 | ## Next Steps
94 | See @250215_publishing_to_pypi.md for next steps on publishing to PyPI.
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/docs/instructions/UPDATE_DOCUMENTATION_ORGANISATION_DOC.md:
--------------------------------------------------------------------------------
 1 | # Update Documentation Organisation Doc
 2 | 
 3 | Creates or updates `docs/reference/DOCUMENTATION_ORGANISATION.md` - a navigational guide to all project documentation.
 4 | 
 5 | ## See also
 6 | 
 7 | - `../reference/DOCUMENTATION_ORGANISATION.md` - Current documentation organisation guide
 8 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Run this first for content review
 9 | - `WRITE_EVERGREEN_DOC.md` - Structure guidelines
10 | 
11 | ## Task
12 | 
13 | Create/update the documentation organisation guide with sensible categories and clear starting points for newcomers.
14 | 
15 | **Run after housekeeping**: This should be done after `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` to ensure structural changes reflect current content.
16 | 
17 | ## Content Requirements
18 | 
19 | 1. **Use your judgement** to organise docs into sensible categories (don't move files, just categorise in the guide)
20 | 2. **Highlight key starting points** for newcomers and different personas
21 | 3. **Cover all significant docs** in relevant directories plus project root files like `README.md`, agent instruction files, and planning structure
22 | 
23 | ## Process
24 | 
25 | 1. **Discover**: Use Glob to find all documentation files
26 | 2. **Categorise**: Create logical groupings based on project needs and user personas
27 | 3. **Describe**: 1-2 sentences per doc, mark important/starter docs clearly
28 | 
29 | ## Common Categories
30 | 
31 | ### By User Type
32 | - **New contributors** - Setup, architecture overview, coding principles
33 | - **AI agents** - Instruction files, workflow docs, debugging guides
34 | - **Maintainers** - Housekeeping processes, planning workflows
35 | 
36 | ### By Content Type
37 | - **Setup & Infrastructure** - Installation, configuration, tooling
38 | - **Architecture & Design** - System overview, key decisions, patterns
39 | - **Development Workflows** - Git, testing, debugging, planning
40 | - **AI-Assisted Development** - Agent instructions, modes, processes
41 | - **Domain-Specific** - Feature documentation, API references
42 | 
43 | ### By Frequency
44 | - **Daily use** - Common commands, debugging, development modes
45 | - **Occasional** - Setup, major changes, housekeeping
46 | - **Reference** - Architecture decisions, comprehensive guides
47 | 
48 | ## Structure Template
49 | 
50 | ```markdown
51 | # Documentation Organisation
52 | 
53 | ## Quick Start
54 | - New to the project? Start here: ...
55 | - Setting up development? See: ...
56 | - Working with AI assistants? Begin with: ...
57 | 
58 | ## By Category
59 | 
60 | ### [Category Name]
61 | Brief description of what this category covers.
62 | 
63 | - **[Important Doc]** (⭐ START HERE) - Brief description
64 | - [Regular Doc] - Brief description
65 | - [Another Doc] - Brief description
66 | 
67 | ### [Another Category]
68 | ...
69 | 
70 | ## By Persona
71 | - **New Developer**: [doc1], [doc2], [doc3]
72 | - **AI Agent**: [instruction1], [mode1], [process1]
73 | - **Maintainer**: [housekeeping1], [planning1]
74 | ```
75 | 
76 | ## Focus
77 | 
78 | **This task**: Documentation discovery, categorisation, and navigation structure
79 | **Housekeeping**: Content accuracy, cross-references, implementation status
80 | 
81 | **Sequence**: Run housekeeping first to ensure content is current, then update organisation guide to reflect any structural changes.


--------------------------------------------------------------------------------
/docs/instructions/CAPTURE_SOUNDING_BOARD_CONVERSATION.md:
--------------------------------------------------------------------------------
 1 | Transform conversations into structured, preservable documents that capture nuance, decisions, and context for future reference and reflection.
 2 | 
 3 | ## Core Principles
 4 | 
 5 | ### Preserve vs Synthesize Balance
 6 | 
 7 | **Capture/preserve**: (always quote verbatim for user input)
 8 | - Background/context, intentions, requirements, decisions, principles, preferences, rationale, terminology, framings, constraints, criteria, etc from the user, etc
 9 | - Memorable insights, tradeoffs, recommendations, etc
10 | - Specific proposals, examples, and code snippets discussed
11 | - Citations/references, e.g. to specific files, documentation, conversations, or code, external sources mentioned, web research
12 | - Tool outputs or data that informed decisions
13 | 
14 | **Synthesize and clean up:**
15 | - Rambling or repetitive exchanges
16 | - Scattered thoughts into organized themes
17 | - AI responses (focus on key insights, not verbose explanations)
18 | - Technical details that can be summarized
19 | - Back-and-forth that reaches the same conclusion
20 | - Dead-ends
21 | 
22 | This approach ensures valuable conversational insights are preserved in a structured, accessible format that serves multiple audiences and supports ongoing reflection and decision-making. 
23 | 
24 | 
25 | ## File Naming and Organization
26 | 
27 | `yyMMdd[letter]_description_in_normal_case.md`
28 | 
29 | Use `npx tsx scripts/sequential-datetime-prefix.ts docs/conversations/` if available, otherwise use `date +"%y%m%d"` to get the current date for the prefix, then add description in lowercase words separated by underscores (except proper names/acronyms).
30 | 
31 | Example: `250616a_research_instructions_improvement.md`
32 | 
33 | Save to: `docs/conversations/` (or equivalent conversation documentation directory)
34 | 
35 | 
36 | ### Metadata
37 | 
38 | Include frontmatter metadata at top of document, e.g.:
39 | 
40 | ```markdown
41 | ---
42 | Date: [Conversation date, e.g. 2025-June-16 & timestamp]
43 | Type: [Decision-making, Exploratory, Problem-solving, Research Review]
44 | ---
45 | ```
46 | 
47 | 
48 | ## Common Conversation Patterns
49 | 
50 | ### Decision-Making Conversations
51 | Focus on:
52 | - What options were considered and why
53 | - What criteria drove the decision
54 | - What concerns or trade-offs were discussed
55 | - The final decision and rationale
56 | - Specific proposals and examples that influenced the decision
57 | 
58 | ### Exploratory Conversations
59 | Focus on:
60 | - What questions or curiosities drove the discussion
61 | - What insights or patterns emerged
62 | - What new questions arose
63 | - What areas warrant further investigation
64 | 
65 | ### Problem-Solving Conversations
66 | Focus on:
67 | - How the problem was defined and understood
68 | - What root causes were identified
69 | - What solutions were brainstormed
70 | - What approach was recommended and why
71 | 
72 | ### Research Review Conversations
73 | Focus on:
74 | - What research question prompted the investigation
75 | - What key findings emerged
76 | - How findings were interpreted or applied
77 | - What gaps or follow-up research were identified
78 | - Specific examples, data points, or methodological insights that stood out
79 | 
80 | ## References
81 | 
82 | Provide comprehensive signposting/citations/references where applicable. Link forwards and backwards.
83 | 
84 | 


--------------------------------------------------------------------------------
/src/gjdutils/regex.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Iterable
 3 | 
 4 | # Based on:
 5 | #   https://github.com/gregdetre/emacs-freex/blob/63523bf3b9032cc75b55ee28929dcdaf7714a419/freex_sqlalchemy.py#L653
 6 | #   https://github.com/gregdetre/emacs-freex/blob/63523bf3b9032cc75b55ee28929dcdaf7714a419/freex_sqlalchemy.py#L713
 7 | # That code has faster versions for much bigger lists of aliases, but this is fine for now.
 8 | 
 9 | 
10 | def compile_regex_for_matching_aliases(aliases: Iterable[str]):
11 |     """
12 |     Builds and compiles the implicit link regular expression.
13 |     """
14 |     # add .lower() here and elsewhere if you want case-insensitive
15 |     aliases = [re.escape(a) for a in aliases]
16 | 
17 |     # ensure that 'jubba wubba' comes before 'jubba'
18 |     aliases.sort(reverse=True)
19 | 
20 |     # build the regexp string to be single words
21 |     #
22 |     # it used to look like this, which worked nicely, unless
23 |     # there was a carriage return
24 |     alias_regex_str = "\\b" + "\\b|\\b".join(aliases) + "\\b"
25 | 
26 |     # we want to deal with the possibility that there are 0
27 |     # or 1 spaces after a word, followed by 0 or 1 carriage
28 |     # returns, followed by zero or more spaces, which is
29 |     # what might happen if an implicit link was to span two
30 |     # lines in an indented paragraph. that's what ' ?\n? *' does
31 |     #
32 |     # the \\b that gets
33 |     # added later will ensure that there's a word boundary
34 |     # of *some* kind.
35 |     alias_regex_str = alias_regex_str.replace("\\ ", " ?\\\n? *")
36 | 
37 |     # for ages, it wasn't matching things like 'Smith &
38 |     # Jones (2006)', because there was some problem with the
39 |     # parentheses. i eventually realized that it matched the
40 |     # first, but not the second parenthesis, because (i
41 |     # think) the parenthesis was screwing with the \b (bare
42 |     # word) separator. if you remove all the bare word
43 |     # separators that follow closing parentheses, the sun
44 |     # comes back out again
45 |     alias_regex_str = alias_regex_str.replace(")\\b", ")")
46 | 
47 |     # compile the regexp
48 |     # impLinkRegexp = re.compile(aliasRegexpStr, re.IGNORECASE|re.MULTILINE)
49 |     compiled_regex = re.compile(alias_regex_str, re.MULTILINE)
50 |     return compiled_regex
51 | 
52 | 
53 | def find_matchranges_for_aliases(
54 |     compiled_regex_of_aliases: re.Pattern, txt_to_search: str
55 | ):
56 |     """
57 |     Return a list of (beg,end) tuples for all the matching implicit
58 |     links in the provided string.
59 |     """
60 |     # get the start and endpoints for the matchranges
61 |     matchranges = [
62 |         list(match.span())
63 |         for match in compiled_regex_of_aliases.finditer(txt_to_search)
64 |     ]
65 | 
66 |     # return the matchranges
67 |     return matchranges
68 | 
69 | 
70 | # txt = """There was a young European man called Friedrich Nietzsche, who most went by just "Nietzsche" (and but never 'Friedrich'). He was a friend of Little Hans and Little Richard but he was not little or Little."""
71 | 
72 | # aliasRegexpStr, impLinkRegexp = update_implicit_link_regexp_original(all_aliases)
73 | # match_ranges = get_all_matching_implicit_links_original(impLinkRegexp, txt)
74 | 
75 | # for match_range in match_ranges:
76 | #     match_txt = txt[match_range[0]:match_range[1]]
77 | #     matched_name = name_from_alias[match_txt]
78 | #     print('MATCHED: ', match_txt, ' <- NAME: ', matched_name, sep='')
79 | 


--------------------------------------------------------------------------------
/docs/instructions/CRITIQUE_OF_PLANNING_DOC.md:
--------------------------------------------------------------------------------
 1 | # Critique of Planning Document
 2 | 
 3 | Read this planning document, and all relevant code & documentation.
 4 | 
 5 | Review progress, evaluate the approach taken so far, and what's proposed for the next stages. Evaluate the proposal thoroughly. Has it adequately characterised/understood the problem? Will its solution address it? Focus especially on things that seem to be causing problems, or that you think could become a problem in the future. Are there things that haven't been considered, potential problems/concerns, anything inconsistent or unclear? Is there a better way? In general try to help make sure that our plan is as good as it can be.
 6 | 
 7 | Explore, investigate, search the web, ask for clarification, discuss, critique, make proposals.
 8 | 
 9 | If the user has asked you to, update the planning document (e.g. with an Appendix "Critique of planning doc"). But don't make any changes to the code beyond that critique.
10 | 
11 | ## Evaluation Framework
12 | 
13 | ### Problem Understanding
14 | - Is the problem clearly defined and well-understood?
15 | - Are the root causes identified correctly?
16 | - Are we solving the right problem, or just symptoms?
17 | - What assumptions are being made, and are they valid?
18 | 
19 | ### Solution Approach
20 | - Does the proposed solution adequately address the problem?
21 | - Are there simpler approaches that would achieve the same goals?
22 | - What are the trade-offs and have they been considered?
23 | - Are there alternative approaches worth exploring?
24 | 
25 | ### Technical Implementation
26 | - Is the technical approach sound and well-architected?
27 | - Are there potential scalability, maintainability, or performance issues?
28 | - What technical risks or complexity hasn't been addressed?
29 | - Are the dependencies and integrations well-understood?
30 | 
31 | ### Scope and Priorities
32 | - Is the scope appropriate for the problem size?
33 | - Are the priorities aligned with business/project value?
34 | - What could be deferred or simplified without significant impact?
35 | - Are there quick wins that could be prioritized?
36 | 
37 | ### Risk Assessment
38 | - What could go wrong with this approach?
39 | - What external dependencies or unknowns exist?
40 | - How will success be measured?
41 | - What's the fallback plan if this approach doesn't work?
42 | 
43 | ### Resource and Timeline
44 | - Are the time estimates realistic?
45 | - What skills or resources are needed that might not be available?
46 | - Are there blockers or dependencies that could delay progress?
47 | - What's the minimum viable version of this plan?
48 | 
49 | ## Research and Investigation
50 | 
51 | - Search for similar problems and solutions in the industry
52 | - Investigate relevant libraries, frameworks, or tools
53 | - Look for case studies or best practices
54 | - Check for recent developments that might change the approach
55 | 
56 | ## Critique Output
57 | 
58 | Structure your critique to be constructive and actionable:
59 | 
60 | 1. **Strengths** - What's working well in the current plan
61 | 2. **Concerns** - Specific issues or risks you've identified
62 | 3. **Alternatives** - Different approaches worth considering
63 | 4. **Recommendations** - Specific changes or improvements to make
64 | 5. **Questions** - Areas that need clarification or further investigation
65 | 
66 | Remember: The goal is to strengthen the plan, not just find problems. Focus on making the plan as robust and effective as possible.


--------------------------------------------------------------------------------
/src/ts/README.md:
--------------------------------------------------------------------------------
  1 | # gjdutils TypeScript Utilities
  2 | 
  3 | A collection of general-purpose TypeScript utilities and CLI tools for development workflows.
  4 | 
  5 | ## Installation
  6 | 
  7 | ```bash
  8 | cd gjdutils
  9 | npm install
 10 | npm run build
 11 | ```
 12 | 
 13 | ## Available Tools
 14 | 
 15 | ### CLI Utilities
 16 | 
 17 | #### sequential-datetime-prefix
 18 | Generate sequential datetime prefixes in yyMMdd[x]_ format for organizing files by date.
 19 | 
 20 | ```bash
 21 | # Generate next available prefix for a folder
 22 | npx tsx src/ts/cli/sequential-datetime-prefix.ts planning/
 23 | 
 24 | # Output: 241225a_ (if no files exist for today)
 25 | # Output: 241225b_ (if 241225a_ already exists)
 26 | ```
 27 | 
 28 | #### extract-llm-conversation
 29 | Extract and format LLM conversations from JSON exports to structured markdown.
 30 | 
 31 | ```bash
 32 | # Extract single conversation
 33 | npx tsx src/ts/cli/extract-llm-conversation.ts --uuid <conversation-id> --input conversations.json
 34 | 
 35 | # Extract multiple conversations
 36 | npx tsx src/ts/cli/extract-llm-conversation.ts --uuid id1,id2,id3 --input conversations.json --output docs/
 37 | ```
 38 | 
 39 | ### Script Utilities
 40 | 
 41 | #### count-lines
 42 | Count lines of code with configurable exclusions.
 43 | 
 44 | ```bash
 45 | # Count all code
 46 | npx tsx src/ts/scripts/count-lines.ts
 47 | 
 48 | # Count by file
 49 | npx tsx src/ts/scripts/count-lines.ts --by-file
 50 | 
 51 | # Exclude tests
 52 | npx tsx src/ts/scripts/count-lines.ts --exclude-tests
 53 | ```
 54 | 
 55 | #### git-worktree-sync
 56 | Synchronize Git worktree branches with main branch.
 57 | 
 58 | ```bash
 59 | # From feature branch: sync main → current
 60 | npx tsx src/ts/scripts/git-worktree-sync.ts
 61 | 
 62 | # From main: sync specific branch → main
 63 | npx tsx src/ts/scripts/git-worktree-sync.ts --branch feature-branch
 64 | 
 65 | # From main: sync all worktrees → main
 66 | npx tsx src/ts/scripts/git-worktree-sync.ts
 67 | ```
 68 | 
 69 | ### Critique Tools
 70 | 
 71 | #### llm-critique-planning-docs
 72 | Generate comprehensive codebase context and send to LLMs for planning document critique.
 73 | 
 74 | ```bash
 75 | # Critique with default model
 76 | npx tsx src/ts/critique/llm-critique-planning-docs.ts planning/my-plan.md
 77 | 
 78 | # Use specific model
 79 | npx tsx src/ts/critique/llm-critique-planning-docs.ts --model anthropic:claude-3-opus:latest planning/my-plan.md
 80 | 
 81 | # Include specific files
 82 | npx tsx src/ts/critique/llm-critique-planning-docs.ts --files src/api.ts --files lib/db.ts planning/my-plan.md
 83 | ```
 84 | 
 85 | #### parse-llm-output
 86 | Parse LLM critique output and format it nicely.
 87 | 
 88 | ```bash
 89 | # Parse from file
 90 | npx tsx src/ts/critique/parse-llm-output.ts critique-output.json
 91 | 
 92 | # Parse from stdin
 93 | cat critique-output.json | npx tsx src/ts/critique/parse-llm-output.ts
 94 | ```
 95 | 
 96 | ## Development
 97 | 
 98 | ```bash
 99 | # Build TypeScript
100 | npm run build
101 | 
102 | # Watch mode for development
103 | npm run watch
104 | 
105 | # Clean build artifacts
106 | npm run clean
107 | ```
108 | 
109 | ## Contributing
110 | 
111 | When adding new utilities:
112 | 1. Follow the existing patterns for CLI tools using Clipanion
113 | 2. Make tools configurable and general-purpose
114 | 3. Add comprehensive documentation and examples
115 | 4. Include type definitions for better IDE support
116 | 
117 | ## License
118 | 
119 | MIT License - see root LICENSE file for details.


--------------------------------------------------------------------------------
/docs/WORKFLOW.md:
--------------------------------------------------------------------------------
 1 | Some of these docs were written with Claude Code in mind, e.g. they reference `tasks` and `subagents`.
 2 | 
 3 | But for the most part they should work fairly well in other contexts (e.g. Cursor).
 4 | 
 5 | My workflow for starting a new epic is usually something like:
 6 | 
 7 | - **Switch to best model** - I usually use Claude Opus 4 where I want the most brains, e.g. upfront thinking & planning (though I'm not certain it's really that much smarter than Sonnet)
 8 |   - Claude Code: `/model opus` 
 9 |   - Cursor: Switch your model to Claude Sonnet 4 in the model selector
10 | 
11 | - `We want to build X. Here's some background, desired features, concerns, etc. Be in @instructions/SOUNDING_BOARD_MODE.md`
12 | 
13 | - Discuss. This step takes the longest, answering the model's questions, considering various options & tradeoffs, etc.
14 | 
15 | -  If there's a new software library or specialist topic involved, I might say `"Follow instructions in @instructions/WRITE_DEEP_DIVE_AS_DOC.md for topic X`. That way, I'll have a new `docs/SOFTWARE_LIBRARY_X.md` that we can continually refer back to, containing up-to-date snippets and best practices from the web.
16 | 
17 | - `Create a new planning doc for this, following instructions in @instructions/WRITE_PLANNING_DOC.md`. Read that, check I'm happy with it, discuss/manually edit as needed. This is the key step. Because it has all the context from the deep dive and our conversation, the planning document is usually pretty rich.
18 | 
19 | - I occasionally `Run @instructions/CRITIQUE_OF_PLANNING_DOC.md` in Cursor with o3, and then feed that critique back to Claude to see if it wants to update its plan. (In practice, I mostly just rely on Claude, and only rope in o3 if we're doing something really tricky, or if we get struck.)
20 | 
21 | - **Clear context** - Clear the context window, adding a nice summary of what has been discussed before
22 |   - Claude Code: `/compact`
23 |   - Cursor: Start a new chat (there's no equivalent to `/compact` in Cursor, but fortunately you can just reference the planning doc)
24 | 
25 | - **Switch to implementation model** - I might switch over to Sonnet if I think the implementation part is straightforward. (Even with the more expensive [Anthropic Max Plan](https://www.anthropic.com/news/max-plan), I hit the rate limits for Opus sometimes).
26 |   - Claude Code: `/model sonnet`
27 |   - Cursor: Switch your model to Claude Sonnet 4 (or Gemini 2.5 is great too) in the model selector
28 | 
29 | - `Run @instructions/DO_PLANNING_DOC.md for [planning doc]`. Make a cup of tea. I have the Claude permissions mostly in YOLO mode, but it can't commit. The model will do a single stage (with lots of sub-actions), and then stop.
30 | 
31 | - It'll pause at the end of the stage, often waiting for approval on a commit message. Read the summary, do some manual testing, perhaps also `Run @instructions/DEBRIEF_PROGRESS.md`.
32 | 
33 | - **Continue iteration** - Clear context as above, then:
34 |   - `Do next stage of planning doc, as per @instructions/DO_PLANNING_DOC.md`
35 | 
36 | 
37 | - **Housekeeping** - Every so often:
38 |   
39 |   - Run `@instructions/UPDATE_HOUSEKEEPING_DOCUMENTATION.md`.
40 |   
41 |   - Run `@instructions/UPDATE_CLAUDE_INSTRUCTIONS.md`. I think it's probably important that `CLAUDE.md` (or some equivalent Cursor rules) includes important stuff, e.g. a summary of `instructions/CODING_PRINCIPLES.md`, project-specific coding guidelines, and `reference/DOCUMENTATION_ORGANISATION.md`). Then the prompts can be very short, and you can trust that the agent will find the right bit of the code reliably and without wasting too much context.
42 | 


--------------------------------------------------------------------------------
/src/gjdutils/indexing.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal, getcontext
 2 | from typing import Optional
 3 | 
 4 | """
 5 | For manual ordering and reording in a database:
 6 | - every item gets a Decimal location (LOC) between 0 and 1
 7 | - the LOCs of all items are sorted
 8 | - the LOC of a new item is calculated as the average of the LOCs of the items before and after it
 9 | - the LOC won't ever be 0 or 1, so there will always be a gap for you to insert afterwards
10 | - if you insert at the beginning or end, the LOC will be half of the first or last item's LOC
11 | 
12 | I think Figma used this.
13 | """
14 | 
15 | # Set precision high enough to handle many divisions
16 | getcontext().prec = 28
17 | 
18 | 
19 | def locs_for(n: int) -> list[Decimal]:
20 |     # Assign initial idx values with buffers at both ends
21 |     locs = []
22 |     for i in range(n):
23 |         loc = Decimal(i + 1) / Decimal(n + 1)
24 |         locs.append(loc)
25 |     assert len(locs) == n
26 |     return locs
27 | 
28 | 
29 | def loc_for_insert_at(locs: list[Decimal], position: int, do_insert: bool = True):
30 |     """
31 |     TODO: rewrite in terms of LOC_BETWEEN
32 |     """
33 |     assert locs == sorted(
34 |         locs
35 |     ), f"Input LOCS are unsorted, so things are already broken - {locs}"
36 |     list_length = len(locs)
37 |     if position == 0:  # Insert at the beginning
38 |         newloc = locs[0] / 2 if list_length > 0 else Decimal("0.5")
39 |     elif position >= list_length:  # Insert at the end
40 |         newloc = locs[-1] + (1 - locs[-1]) / 2 if list_length > 0 else Decimal("0.5")
41 |     elif position < 0:
42 |         raise Exception(f"Position must be non-negative, but got {position}")
43 |     else:  # Insert between two items
44 |         newloc = (locs[position - 1] + locs[position]) / 2
45 |     if do_insert:
46 |         locs.insert(position, newloc)
47 |         assert locs == sorted(locs), f"Somehow we've broken the LOCS sorting: {locs}"
48 |     return newloc
49 | 
50 | 
51 | def loc_for_insert_at2(locs: list[Decimal], position: int, do_insert: bool = True):
52 |     """
53 |     Functional version of LOC_FOR_INSERT_AT that returns a new list instead of
54 |     modifying the input list.
55 | 
56 |     Uses LOC_BETWEEN.
57 |     """
58 |     assert locs == sorted(
59 |         locs
60 |     ), f"Input LOCS are unsorted, so things are already broken - {locs}"
61 |     list_length = len(locs)
62 |     if position < 0:
63 |         raise Exception(f"Position must be non-negative, but got {position}")
64 |     elif position == 0:  # Insert at the beginning
65 |         loc1 = None
66 |         loc2 = locs[0] if list_length > 0 else None
67 |     elif position >= list_length:  # Insert at the end
68 |         loc1 = locs[-1] if list_length > 0 else None
69 |         loc2 = None
70 |     else:  # Insert between two items
71 |         loc1 = locs[position - 1]
72 |         loc2 = locs[position]
73 |     newloc = loc_between(loc1, loc2)
74 |     if do_insert:
75 |         locs.insert(position, newloc)
76 |         assert locs == sorted(locs), f"Somehow we've broken the LOCS sorting: {locs}"
77 |     return newloc
78 | 
79 | 
80 | def loc_between(loc1: Optional[Decimal], loc2: Optional[Decimal]) -> Decimal:
81 |     if loc1 is not None and loc2 is not None:
82 |         assert (
83 |             loc1 >= 0 and loc2 <= 1
84 |         ), f"LOCs must be between 0 and 1, but got {loc1} and {loc2}"
85 |         return (loc1 + loc2) / 2
86 |     elif loc1 is None and loc2 is not None:
87 |         return loc2 / 2
88 |     elif loc1 is not None and loc2 is None:
89 |         return loc1 + (1 - loc1) / 2
90 |     elif loc1 is None and loc2 is None:
91 |         return Decimal("0.5")
92 |     else:
93 |         raise Exception(f"This should never happen: {loc1}, {loc2}")
94 | 
95 | 
96 | def disp(locs: list[Decimal]):
97 |     print(", ".join(["%.3f" % loc for loc in locs]))
98 | 


--------------------------------------------------------------------------------
/tests/test_indexing.py:
--------------------------------------------------------------------------------
  1 | from decimal import Decimal
  2 | from typing import Optional
  3 | 
  4 | from gjdutils.indexing import (
  5 |     loc_between,
  6 |     loc_for_insert_at,
  7 |     loc_for_insert_at2,
  8 | )
  9 | 
 10 | 
 11 | def test_loc_for_insert_at():
 12 |     # Test inserting at the beginning
 13 |     locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 14 |     position = 0
 15 |     loc = loc_for_insert_at(locs, position)
 16 |     assert loc == Decimal("0.1")
 17 |     assert locs == [Decimal("0.1"), Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 18 | 
 19 |     # Test inserting at the end
 20 |     locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 21 |     position = 3
 22 |     loc = loc_for_insert_at(locs, position)
 23 |     assert loc == Decimal("0.8")
 24 |     assert locs == [Decimal("0.2"), Decimal("0.4"), Decimal("0.6"), Decimal("0.8")]
 25 | 
 26 |     # Test inserting between two items
 27 |     locs = [Decimal("0.2"), Decimal("0.6")]
 28 |     position = 1
 29 |     loc = loc_for_insert_at(locs, position)
 30 |     assert loc == Decimal("0.4")
 31 |     assert locs == [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 32 | 
 33 |     # Test inserting into an empty list
 34 |     locs = []
 35 |     position = 0
 36 |     loc = loc_for_insert_at(locs, position)
 37 |     assert loc == Decimal("0.5")
 38 |     assert locs == [Decimal("0.5")]
 39 | 
 40 |     # Test inserting into a list with one item
 41 |     locs = [Decimal("0.2")]
 42 |     position = 1
 43 |     loc = loc_for_insert_at(locs, position)
 44 |     assert loc == Decimal("0.6")
 45 |     assert locs == [Decimal("0.2"), Decimal("0.6")]
 46 | 
 47 | 
 48 | def test_loc_for_insert_at2():
 49 |     # Test inserting at the beginning
 50 |     locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 51 |     position = 0
 52 |     locs_copy = locs.copy()
 53 |     locs_copy.sort()
 54 |     loc = loc_for_insert_at2(locs, position)
 55 |     assert loc == Decimal("0.1")
 56 |     assert locs == sorted(locs_copy + [Decimal("0.1")])
 57 | 
 58 |     # Test inserting at the end
 59 |     locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")]
 60 |     position = 3
 61 |     locs_copy = locs.copy()
 62 |     locs_copy.sort()
 63 |     loc = loc_for_insert_at2(locs, position)
 64 |     assert loc == Decimal("0.8")
 65 |     assert locs == sorted(locs_copy + [Decimal("0.8")])
 66 | 
 67 |     # Test inserting between two items
 68 |     locs = [Decimal("0.2"), Decimal("0.6")]
 69 |     position = 1
 70 |     locs_copy = locs.copy()
 71 |     locs_copy.sort()
 72 |     loc = loc_for_insert_at2(locs, position)
 73 |     assert loc == Decimal("0.4")
 74 |     assert locs == sorted(locs_copy + [Decimal("0.4")])
 75 | 
 76 |     # Test inserting into an empty list
 77 |     locs = []
 78 |     position = 0
 79 |     locs_copy = locs.copy()
 80 |     locs_copy.sort()
 81 |     loc = loc_for_insert_at2(locs, position)
 82 |     assert loc == Decimal("0.5")
 83 |     assert locs == sorted(locs_copy + [Decimal("0.5")])
 84 | 
 85 |     # Test inserting into a list with one item
 86 |     locs = [Decimal("0.2")]
 87 |     position = 1
 88 |     locs_copy = locs.copy()
 89 |     locs_copy.sort()
 90 |     loc = loc_for_insert_at2(locs, position)
 91 |     assert loc == Decimal("0.6")
 92 |     assert locs == sorted(locs_copy + [Decimal("0.6")])
 93 | 
 94 | 
 95 | def test_loc_between():
 96 |     # Test when both loc1 and loc2 are not None
 97 |     loc1 = Decimal("0.2")
 98 |     loc2 = Decimal("0.6")
 99 |     result = loc_between(loc1, loc2)
100 |     assert result == Decimal("0.4")
101 | 
102 |     # Test when loc1 is None and loc2 is not None
103 |     loc1 = None
104 |     loc2 = Decimal("0.6")
105 |     result = loc_between(loc1, loc2)
106 |     assert result == Decimal("0.3")
107 | 
108 |     # Test when loc1 is not None and loc2 is None
109 |     loc1 = Decimal("0.2")
110 |     loc2 = None
111 |     result = loc_between(loc1, loc2)
112 |     assert result == Decimal("0.6")
113 | 
114 |     # Test when both loc1 and loc2 are None
115 |     loc1 = None
116 |     loc2 = None
117 |     result = loc_between(loc1, loc2)
118 |     assert result == Decimal("0.5")
119 | 


--------------------------------------------------------------------------------
/src/gjdutils/webserver.py:
--------------------------------------------------------------------------------
  1 | """Generic HTTP server utilities for serving static files.
  2 | 
  3 | Provides a reusable HTTP server with features like:
  4 | - Extensionless URL support (serves /foo as /foo.html)
  5 | - Cache control options
  6 | - Request logging control
  7 | - Address reuse to avoid "address already in use" errors
  8 | """
  9 | 
 10 | import http.server
 11 | import socketserver
 12 | import os
 13 | from functools import partial
 14 | from typing import Optional, Callable, Any
 15 | 
 16 | 
 17 | class ReusableTCPServer(socketserver.TCPServer):
 18 |     """TCP server that allows address reuse to avoid 'Address already in use' errors."""
 19 |     allow_reuse_address = True
 20 | 
 21 | 
 22 | class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
 23 |     """Custom HTTP request handler with extensionless URL support and cache control.
 24 |     
 25 |     Features:
 26 |     - Serves /foo as /foo.html if the HTML file exists
 27 |     - Optional cache disabling for development
 28 |     - Optional request logging control
 29 |     """
 30 |     
 31 |     def __init__(self, *args, **kwargs):
 32 |         self.disable_cache = kwargs.pop("disable_cache", False)
 33 |         self.log_requests = kwargs.pop("log_requests", True)
 34 |         super().__init__(*args, **kwargs)
 35 | 
 36 |     def do_GET(self):
 37 |         """Handle GET requests with extensionless URL support."""
 38 |         # Parse the requested path
 39 |         path = self.path
 40 |         # Check if the path has no extension
 41 |         if not os.path.splitext(path)[1]:
 42 |             # Append '.html' to the path
 43 |             new_path = f"{path}.html"
 44 |             # Construct the full file path
 45 |             full_path = os.path.join(self.directory, new_path.lstrip("/"))
 46 |             # Check if the .html file exists
 47 |             if os.path.exists(full_path):
 48 |                 self.path = new_path  # Update the path to the .html file
 49 |         # Call the superclass method to handle the request
 50 |         return super().do_GET()
 51 | 
 52 |     def end_headers(self):
 53 |         """Add cache control headers if cache is disabled."""
 54 |         if self.disable_cache:
 55 |             # Disable caching by setting appropriate headers
 56 |             self.send_header(
 57 |                 "Cache-Control", "no-store, no-cache, must-revalidate, proxy-revalidate"
 58 |             )
 59 |             self.send_header("Pragma", "no-cache")
 60 |             self.send_header("Expires", "0")
 61 |             self.send_header("Surrogate-Control", "no-store")
 62 |         super().end_headers()
 63 | 
 64 |     def log_message(self, format: str, *args):
 65 |         """Override to control request logging."""
 66 |         if not self.log_requests:
 67 |             return
 68 |         return super().log_message(format, *args)
 69 | 
 70 | 
 71 | def start_server(
 72 |     host: str,
 73 |     port: int,
 74 |     directory: str,
 75 |     disable_cache: bool = False,
 76 |     quiet_requests: bool = False,
 77 |     handler_class: Optional[Callable[..., Any]] = None,
 78 | ) -> None:
 79 |     """Start an HTTP server to serve static files.
 80 |     
 81 |     Args:
 82 |         host: Host interface to bind to (e.g., "127.0.0.1", "0.0.0.0")
 83 |         port: Port number to listen on
 84 |         directory: Directory to serve files from
 85 |         disable_cache: If True, send no-cache headers
 86 |         quiet_requests: If True, suppress per-request log messages
 87 |         handler_class: Optional custom handler class (defaults to CustomHTTPRequestHandler)
 88 |         
 89 |     Raises:
 90 |         ValueError: If directory doesn't exist or isn't a directory
 91 |         OSError: If server can't bind to the specified host/port
 92 |     """
 93 |     # Fail fast on invalid directory
 94 |     if not os.path.isdir(directory):
 95 |         raise ValueError(f"Directory is missing or not a directory: {directory}")
 96 |     
 97 |     if handler_class is None:
 98 |         handler_class = CustomHTTPRequestHandler
 99 |         
100 |     handler = partial(
101 |         handler_class,
102 |         directory=directory,
103 |         disable_cache=disable_cache,
104 |         log_requests=not quiet_requests,
105 |     )
106 |     
107 |     with ReusableTCPServer((host, port), handler) as httpd:
108 |         print(f"Serving at http://{host}:{port}")
109 |         try:
110 |             httpd.serve_forever()
111 |         except KeyboardInterrupt:
112 |             print("Server stopped.")
113 |         finally:
114 |             httpd.server_close()


--------------------------------------------------------------------------------
/src/gjdutils/ports.py:
--------------------------------------------------------------------------------
  1 | """Port management utilities for network servers.
  2 | 
  3 | Utilities for checking port availability, freeing occupied ports,
  4 | and managing network server processes.
  5 | """
  6 | 
  7 | import os
  8 | import shutil
  9 | import signal
 10 | import subprocess
 11 | import time
 12 | from typing import List
 13 | 
 14 | 
 15 | def looks_like_addr_in_use(e: OSError) -> bool:
 16 |     """Detect EADDRINUSE across platforms using errno or message text.
 17 |     
 18 |     Args:
 19 |         e: The OSError exception to check
 20 |         
 21 |     Returns:
 22 |         True if the error indicates "address already in use"
 23 |     """
 24 |     try:
 25 |         err_no = getattr(e, "errno", None)
 26 |         if isinstance(err_no, int) and err_no in {48, 98, 10048}:
 27 |             return True
 28 |     except Exception:
 29 |         pass
 30 |     msg = str(e).lower()
 31 |     return "address already in use" in msg or "errno 48" in msg or "errno 98" in msg
 32 | 
 33 | 
 34 | def pids_listening_on_port(port: int) -> List[int]:
 35 |     """Return a list of PIDs that appear to be listening on the given TCP port.
 36 | 
 37 |     Prefers lsof; falls back to fuser if available.
 38 |     
 39 |     Args:
 40 |         port: The TCP port number to check
 41 |         
 42 |     Returns:
 43 |         List of process IDs listening on the port
 44 |     """
 45 |     pids: List[int] = []
 46 |     try:
 47 |         if shutil.which("lsof"):
 48 |             # Use LISTEN state to avoid client connections
 49 |             proc = subprocess.run(
 50 |                 ["lsof", "-ti", f"tcp:{port}", "-sTCP:LISTEN"],
 51 |                 capture_output=True,
 52 |                 text=True,
 53 |                 check=False,
 54 |             )
 55 |             for line in proc.stdout.splitlines():
 56 |                 try:
 57 |                     pid = int(line.strip())
 58 |                     if pid not in pids:
 59 |                         pids.append(pid)
 60 |                 except Exception:
 61 |                     pass
 62 |         if not pids and shutil.which("fuser"):
 63 |             # Try Linux-style fuser
 64 |             proc = subprocess.run(
 65 |                 ["fuser", "-n", "tcp", str(port)],
 66 |                 capture_output=True,
 67 |                 text=True,
 68 |                 check=False,
 69 |             )
 70 |             # Output may be like: "8000/tcp: 1234 5678"
 71 |             tokens = (proc.stdout or "").replace("/tcp:", " ").split()
 72 |             for tok in tokens:
 73 |                 try:
 74 |                     pid = int(tok)
 75 |                     if pid not in pids:
 76 |                         pids.append(pid)
 77 |                 except Exception:
 78 |                     pass
 79 |     except Exception:
 80 |         # Best-effort: ignore detection errors
 81 |         pass
 82 |     return pids
 83 | 
 84 | 
 85 | def kill_pids(pids: List[int], verbose: int = 0) -> None:
 86 |     """Kill a list of process IDs, trying SIGTERM first, then SIGKILL.
 87 |     
 88 |     Args:
 89 |         pids: List of process IDs to terminate
 90 |         verbose: Verbosity level (0=quiet, 1+=show warnings)
 91 |     """
 92 |     if not pids:
 93 |         return
 94 |     for sig in (signal.SIGTERM, signal.SIGKILL):
 95 |         for pid in list(pids):
 96 |             try:
 97 |                 os.kill(pid, sig)
 98 |             except ProcessLookupError:
 99 |                 # Already gone
100 |                 try:
101 |                     pids.remove(pid)
102 |                 except ValueError:
103 |                     pass
104 |             except Exception:
105 |                 # Ignore permission or other errors
106 |                 pass
107 |         # Brief wait and re-check
108 |         time.sleep(0.2 if sig == signal.SIGTERM else 0.05)
109 |         remaining = []
110 |         for pid in pids:
111 |             try:
112 |                 os.kill(pid, 0)
113 |                 remaining.append(pid)
114 |             except Exception:
115 |                 pass
116 |         pids[:] = remaining
117 |         if not pids:
118 |             break
119 |     if verbose >= 1 and pids:
120 |         print(f"Warning: some processes may still be using the port: {pids}")
121 | 
122 | 
123 | def free_port_if_in_use(port: int, verbose: int = 0) -> None:
124 |     """Free a port by killing any processes listening on it.
125 |     
126 |     Args:
127 |         port: The TCP port number to free
128 |         verbose: Verbosity level (0=quiet, 1+=show what's being killed)
129 |     """
130 |     pids = pids_listening_on_port(port)
131 |     if pids:
132 |         if verbose >= 1:
133 |             print(f"Killing processes on port {port}: {pids}")
134 |         kill_pids(pids, verbose)


--------------------------------------------------------------------------------
/src/gjdutils/cli/pypi/check.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import typer
  4 | from rich.console import Console
  5 | from pathlib import Path
  6 | import shutil
  7 | 
  8 | from gjdutils.decorators import console_print_doc
  9 | from gjdutils.shell import temp_venv
 10 | from gjdutils.cmd import run_cmd
 11 | from gjdutils.pypi_build import verify_installation, check_install_optional_features
 12 | 
 13 | # Create the check subcommand group
 14 | app = typer.Typer(
 15 |     help="Check package installation",
 16 |     add_completion=True,
 17 |     no_args_is_help=True,
 18 |     context_settings={"help_option_names": ["-h", "--help"]},
 19 | )
 20 | console = Console()
 21 | 
 22 | 
 23 | @console_print_doc(color="yellow")
 24 | def clean_build_dirs():
 25 |     """Cleaning existing builds..."""
 26 |     # Command: rm -rf dist/ build/
 27 |     shutil.rmtree("dist", ignore_errors=True)
 28 |     shutil.rmtree("build", ignore_errors=True)
 29 | 
 30 | 
 31 | def build_package():
 32 |     return run_cmd(
 33 |         f"python -m build",
 34 |         before_msg="Building package...",
 35 |         fatal_msg="Failed to build package",
 36 |     )
 37 | 
 38 | 
 39 | def install_and_test_locally(python_path: Path, wheel_file: Path):
 40 |     """Installing and testing package..."""
 41 |     # Command: pip install dist/*.whl
 42 |     run_cmd(
 43 |         f"{python_path} -m pip install {wheel_file}",
 44 |         before_msg="Installing package wheel file from local build...",
 45 |         fatal_msg="Failed to install package",
 46 |     )
 47 | 
 48 |     # Install all optional dependencies first
 49 |     check_install_optional_features(python_path)
 50 | 
 51 |     # Command: pip install ".[dev]"
 52 |     run_cmd(
 53 |         f"{python_path} -m pip install '.[dev]'",
 54 |         before_msg="Installing dev dependencies...",
 55 |         fatal_msg="Failed to install dev dependencies",
 56 |     )
 57 | 
 58 | 
 59 | def run_test_suite(python_path: Path):
 60 |     return run_cmd(
 61 |         f"{python_path} -m pytest",
 62 |         before_msg="Running test suite...",
 63 |         fatal_msg="Test suite failed",
 64 |     )
 65 | 
 66 | 
 67 | @app.command(name="local")
 68 | def check_local():
 69 |     """Test package installation and functionality locally."""
 70 |     console.rule("[yellow]Starting local package testing")
 71 | 
 72 |     clean_build_dirs()
 73 |     build_package()
 74 | 
 75 |     venv_path = Path("/tmp/test-gjdutils")
 76 |     with temp_venv(venv_path) as python_path:
 77 |         wheel_file = next(Path("dist").glob("*.whl"))
 78 |         install_and_test_locally(python_path, wheel_file)
 79 |         verify_installation(python_path)
 80 |         run_test_suite(python_path)
 81 | 
 82 |     console.print("\nLocal testing completed successfully!", style="green")
 83 | 
 84 | 
 85 | def install_from_test_pypi(python_path: Path):
 86 |     # Command: pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils
 87 |     run_cmd(
 88 |         f"{python_path} -m pip install --index-url https://test.pypi.org/simple/ "
 89 |         "--extra-index-url https://pypi.org/simple/ gjdutils",
 90 |         before_msg="Installing package from Test PyPI...",
 91 |         fatal_msg="Failed to install package from Test PyPI",
 92 |     )
 93 | 
 94 |     # Install all optional dependencies
 95 |     check_install_optional_features(python_path, from_test_pypi=True)
 96 | 
 97 | 
 98 | @app.command(name="test")
 99 | def check_test():
100 |     """Test package installation from Test PyPI."""
101 |     console.rule("[yellow]Starting Test PyPI package testing")
102 | 
103 |     venv_path = Path("/tmp/test-gjdutils-pypi")
104 |     with temp_venv(venv_path) as python_path:
105 |         install_from_test_pypi(python_path)
106 |         verify_installation(python_path)
107 | 
108 |     console.print("\nTest PyPI testing completed successfully!", style="green")
109 | 
110 | 
111 | def install_from_pypiprod(python_path: Path):
112 |     # Command: pip install gjdutils
113 |     run_cmd(
114 |         f"{python_path} -m pip install gjdutils",
115 |         before_msg="Installing package from PyPI prod...",
116 |         fatal_msg="Failed to install package from PyPI prod",
117 |     )
118 | 
119 |     # Install all optional dependencies
120 |     check_install_optional_features(python_path, from_test_pypi=False)
121 | 
122 | 
123 | @app.command(name="prod")
124 | def check_prod():
125 |     """Test package installation from Production PyPI."""
126 |     console.rule("[yellow]Starting Production PyPI package testing")
127 | 
128 |     venv_path = Path("/tmp/prod-gjdutils-pypi")
129 |     with temp_venv(venv_path) as python_path:
130 |         install_from_pypiprod(python_path)
131 |         verify_installation(python_path)
132 | 
133 |     console.print("\nProduction PyPI testing completed successfully!", style="green")
134 | 


--------------------------------------------------------------------------------
/src/gjdutils/pypi_build.py:
--------------------------------------------------------------------------------
  1 | """Shared utilities for PyPI package building and testing."""
  2 | 
  3 | from pathlib import Path
  4 | import urllib.request
  5 | import urllib.error
  6 | import shutil
  7 | import tomllib
  8 | from typing import Literal
  9 | from rich.console import Console
 10 | from rich.progress import track
 11 | from packaging.version import Version
 12 | from importlib.metadata import metadata
 13 | 
 14 | from gjdutils.cmd import run_cmd
 15 | from gjdutils import __version__
 16 | 
 17 | console = Console()
 18 | 
 19 | 
 20 | def verify_installation(python_path: Path):
 21 |     # Command: python -c "import gjdutils; print(gjdutils.__version__)"
 22 |     retcode, installed_version, extra = run_cmd(
 23 |         f'{python_path} -c "import gjdutils; print(gjdutils.__version__)"',
 24 |         before_msg="Verify package installation by importing and checking version...",
 25 |         fatal_msg="Failed to import gjdutils",
 26 |     )
 27 |     expected_version = __version__
 28 |     assert (
 29 |         installed_version == expected_version
 30 |     ), f"Installed version {installed_version} does not match expected version {expected_version}"
 31 |     console.print(f"gjdutils version: {installed_version}")
 32 |     return installed_version
 33 | 
 34 | 
 35 | # Type for PyPI environment
 36 | PyPIEnv = Literal["test", "prod"]
 37 | 
 38 | 
 39 | def check_install_optional_features(python_path: Path, *, from_test_pypi: bool = False):
 40 |     """Test installation of optional feature sets."""
 41 |     # Get optional dependency groups from package metadata
 42 |     pkg_metadata = metadata("gjdutils")
 43 |     # Parse the provides-extra field to get optional dependency groups
 44 |     # get_all() returns None if the field doesn't exist
 45 |     extra_features = pkg_metadata.get_all("Provides-Extra") or []
 46 |     features = [group for group in extra_features if group not in ["dev", "all_no_dev"]]
 47 | 
 48 |     for feature in track(features, description="Installing features"):
 49 |         console.print(f"\nTesting feature set: {feature}", style="yellow")
 50 |         if from_test_pypi:
 51 |             cmd = f"{python_path} -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils[{feature}]"
 52 |         else:
 53 |             cmd = f"{python_path} -m pip install '.[{feature}]'"
 54 |         run_cmd(
 55 |             cmd,
 56 |             before_msg=f"Installing feature set: {feature}...",
 57 |             fatal_msg=f"Failed to install {feature} feature",
 58 |         )
 59 |         console.print(f"[green]Successfully installed {feature} feature[/green]")
 60 | 
 61 | 
 62 | def check_version_exists(version: Version, pypi_env: PyPIEnv) -> bool:
 63 |     """Check if version already exists on specified PyPI environment.
 64 | 
 65 |     Args:
 66 |         version: Version string to check (must be valid semantic version)
 67 |         pypi_env: PyPI environment to check ("test" or "prod")
 68 | 
 69 |     Raises:
 70 |         TypeError: If version is not a packaging.version.Version instance
 71 |     """
 72 |     if not isinstance(version, Version):
 73 |         raise TypeError(
 74 |             f"version must be a packaging.version.Version instance, got {type(version)}"
 75 |         )
 76 | 
 77 |     base_url = {
 78 |         "test": "https://test.pypi.org",
 79 |         "prod": "https://pypi.org",
 80 |     }[pypi_env]
 81 |     try:
 82 |         url = f"{base_url}/pypi/gjdutils/{str(version)}/json"
 83 |         urllib.request.urlopen(url)
 84 |         return True
 85 |     except urllib.error.HTTPError as e:
 86 |         if e.code == 404:
 87 |             return False
 88 |         raise  # Re-raise other HTTP errors
 89 | 
 90 | 
 91 | def clean_build_dirs():
 92 |     """Clean build directories (dist/ and build/)."""
 93 |     # Command: rm -rf dist/ build/
 94 |     shutil.rmtree("dist", ignore_errors=True)
 95 |     shutil.rmtree("build", ignore_errors=True)
 96 | 
 97 | 
 98 | def build_package():
 99 |     """Build package with python -m build."""
100 |     return run_cmd(
101 |         "python -m build",
102 |         before_msg="Building package...",
103 |         fatal_msg="Failed to build package",
104 |     )
105 | 
106 | 
107 | def upload_to_pypi(pypi_env: PyPIEnv):
108 |     """Upload package to specified PyPI environment.
109 | 
110 |     Args:
111 |         pypi_env: PyPI environment to upload to ("test" or "prod")
112 |     """
113 |     if pypi_env == "test":
114 |         cmd = "twine upload -r testpypi dist/*"
115 |     elif pypi_env == "prod":
116 |         cmd = "twine upload dist/*"
117 |     else:
118 |         raise ValueError(f"Invalid PyPI environment: {pypi_env}")
119 | 
120 |     return run_cmd(
121 |         cmd,
122 |         before_msg=f"Uploading package to {pypi_env} PyPI...",
123 |         fatal_msg=f"Failed to upload to {pypi_env} PyPI",
124 |     )
125 | 


--------------------------------------------------------------------------------
/src/gjdutils/files.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from typing import Sequence
  4 | 
  5 | from .cmd import run_cmd
  6 | 
  7 | # keep this, because it makes sense for the user to be able to import from here
  8 | from .strings import is_string, PathOrStr
  9 | 
 10 | 
 11 | def split_filen(filen: Path | str):
 12 |     """
 13 |     Splits a filename into its path, stem, and extension (without dot), e.g.
 14 | 
 15 |         split_filen('data/blah.mp4') -> ('data', 'blah', 'mp4')
 16 |     """
 17 |     filen = Path(filen)
 18 |     return filen.parent, filen.stem, filen.suffix[1:] if filen.suffix else ""
 19 | 
 20 | 
 21 | def create_dir_if_not_exists(dirn: str):
 22 |     if not os.path.exists(dirn):
 23 |         os.makedirs(dirn)
 24 | 
 25 | 
 26 | def validate_ext(ext):
 27 |     assert is_string(ext)
 28 |     assert ext.lower() == ext
 29 |     assert ext
 30 |     assert ext[0] != "."
 31 | 
 32 | 
 33 | def validate_dir(dirn):
 34 |     dirn_path = Path(dirn)
 35 |     assert dirn_path.exists() and dirn_path.is_dir()
 36 |     return dirn_path
 37 | 
 38 | 
 39 | def fulltext(
 40 |     filens: Sequence[str],
 41 |     patterns: list[str],
 42 |     dirn: str,
 43 |     file_ext: str,
 44 |     case_sensitive=False,
 45 | ):
 46 |     """
 47 |     Returns: FOUND_FILES (list of filename strings)
 48 | 
 49 |     Feed in a list of filenames (complete with extensions),
 50 |     which will be fed to agrep for full-text
 51 |     searching. Returns a list of files.
 52 | 
 53 |     FILENS is a list of strings. If its non-empty, then
 54 |     these will be fed in to agrep. If it's empty, then we'll
 55 |     just feed in a '*.[freex_extension]'. Spaces in
 56 |     filenames are escaped with backslashes, but this is the
 57 |     only thing we're escaping.
 58 | 
 59 |     PATTERNS is a list of strings, which will be ANDed
 60 |     together in the agrep regex. Currently, this doesn't
 61 |     escape the pattern regex at all, though it does surround it in
 62 |     quotes, so the usual agrep rules apply.
 63 | 
 64 |     Unless case_sensitive==True, will append a -i flag.
 65 |     """
 66 |     # from freex_sqlalchemy.py
 67 | 
 68 |     # xxx this should check that all the files have extensions
 69 | 
 70 |     if case_sensitive:
 71 |         case_flag = ""
 72 |     else:
 73 |         case_flag = "-i"
 74 | 
 75 |     # xxx should check that all the items in the pattern
 76 |     # list are strings...
 77 |     #
 78 |     # first strip each of the pattern strings of whitespace,
 79 |     # and remove the surrounding quotes - we'll add them
 80 |     # back to the whole pattern_str when we create the CMD
 81 |     #
 82 |     # then AND together multiple patterns with agrep,
 83 |     # using semicolons
 84 |     for pat in patterns:
 85 |         if pat[0] == '"':
 86 |             pat = pat[1:]
 87 |         if pat[-1] == '"':
 88 |             pat = pat[0:-1]
 89 | 
 90 |     pattern_str = ";".join([x.strip() for x in patterns])
 91 | 
 92 |     if len(filens) > 0:
 93 |         # escape all the spaces with back-slashes
 94 |         filens = [x.replace(" ", "\\ ") for x in filens]
 95 | 
 96 |         # convert to a space-delimited string (with spaces
 97 |         # escaped by backslashes), and each file prepended by the
 98 |         # database_dir, e.g.
 99 |         # /blah/test0.freex /blah/hello\ world.freex
100 |         fnames_str = " ".join([os.path.join(dirn, filen) for filen in filens])
101 | 
102 |         # the -l says to just return filenames only (no text
103 |         # context)
104 |         #
105 |         # put the pattern in quotes
106 |         #
107 |         # and then just list the files at the end
108 |         cmd = 'agrep -l %s "%s" %s' % (case_flag, pattern_str, fnames_str)
109 | 
110 |     else:
111 |         # if we're not restricting the files we're looking
112 |         # through, then there could be too many files to run
113 |         # agrep on directly, so we have to pipe it from a
114 |         # find
115 |         #
116 |         # this is to avoid the '/usr/local/bin/agrep:
117 |         # Argument list too long' error
118 |         cmd = 'find %s -name "*.%s" -print0 | xargs -0 agrep -l %s "%s"' % (
119 |             dirn,
120 |             file_ext,
121 |             case_flag,
122 |             pattern_str,
123 |         )
124 | 
125 |     # Run command with minimal output unless there's an error
126 |     retcode, out_str, _ = run_cmd(
127 |         cmd,
128 |         verbose=0,
129 |         check=False,  # Don't raise exception if no matches found (agrep returns 1)
130 |     )
131 | 
132 |     if len(out_str) > 0:
133 |         # strip away the path to yield just the filename for
134 |         # each of the files in out_str
135 |         found_files = [os.path.basename(x) for x in out_str.strip().split("\n")]
136 |     else:
137 |         # if you run the above on an empty string, you get
138 |         # [''], whereas we really want to return an empty
139 |         # list if we didn't find anything
140 |         found_files = []
141 | 
142 |     return found_files
143 | 


--------------------------------------------------------------------------------
/src/gjdutils/llm_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Literal, Optional, Union, TYPE_CHECKING
  2 | from pathlib import Path
  3 | import json
  4 | 
  5 | from gjdutils.llms_claude import call_claude_gpt
  6 | from gjdutils.strings import jinja_render
  7 | 
  8 | if TYPE_CHECKING:  # for type hints only; avoids runtime imports
  9 |     from anthropic import Anthropic
 10 |     from openai import OpenAI
 11 | 
 12 | 
 13 | MODEL_TYPE = Literal["openai", "claude"]
 14 | 
 15 | 
 16 | def extract_json_from_markdown(text: str, verbose: int = 0) -> str:
 17 |     """
 18 |     Extracts JSON content from text that may be wrapped in markdown code blocks.
 19 | 
 20 |     Args:
 21 |         text: The text that may contain JSON, possibly within markdown code blocks
 22 |         verbose: Whether to print debug information
 23 | 
 24 |     Returns:
 25 |         A string containing just the JSON content (still as a string, not parsed)
 26 |     """
 27 |     # If it's already valid JSON, return as is
 28 |     try:
 29 |         json.loads(text)
 30 |         if verbose >= 2:
 31 |             print("Input is already valid JSON")
 32 |         return text
 33 |     except json.JSONDecodeError:
 34 |         # Not valid JSON, may be wrapped in markdown
 35 |         pass
 36 | 
 37 |     # Handle JSON wrapped in markdown code blocks
 38 |     if text.strip().startswith("```") and "```" in text:
 39 |         if verbose >= 2:
 40 |             print("Detected markdown code block")
 41 | 
 42 |         # Extract content between backticks
 43 |         parts = text.split("```", 2)
 44 |         if len(parts) >= 2:
 45 |             extracted = parts[1]  # Get the middle part
 46 | 
 47 |             # Remove the language identifier if present
 48 |             if extracted.strip().startswith("json"):
 49 |                 extracted = extracted[4:].strip()
 50 |             else:
 51 |                 extracted = extracted.strip()
 52 | 
 53 |             # If there are closing backticks, remove everything from them onwards
 54 |             if "```" in extracted:
 55 |                 extracted = extracted.split("```", 1)[0].strip()
 56 | 
 57 |             if verbose >= 2:
 58 |                 print(f"Extracted content from markdown: {extracted[:50]}...")
 59 | 
 60 |             return extracted
 61 | 
 62 |     # If we got here, we couldn't extract JSON from markdown
 63 |     return text
 64 | 
 65 | 
 66 | def generate_gpt_from_template(
 67 |     client: "Anthropic | OpenAI",  # type: ignore[name-defined]
 68 |     prompt_template: Union[str, Path],
 69 |     context_d: dict,
 70 |     response_json: bool,
 71 |     image_filens: list[str] | str | None = None,
 72 |     model_type: MODEL_TYPE = "claude",
 73 |     max_tokens: Optional[int] = None,
 74 |     verbose: int = 0,
 75 | ) -> tuple[str | dict[str, Any], dict[str, Any]]:
 76 |     """Generate a response from GPT using a template.
 77 | 
 78 |     Args:
 79 |         client: The Anthropic or OpenAI client
 80 |         prompt_template: Either a template string or Path to a template file
 81 |         context_d: Dictionary of variables to render in the template
 82 |         response_json: Whether to parse the response as JSON
 83 |         image_filens: Optional paths to image files to include
 84 |         model_type: Which model type to use ("openai" or "claude")
 85 |         max_tokens: Maximum tokens in the response
 86 |         verbose: Verbosity level
 87 |     """
 88 |     # Load template content from Path or use string directly
 89 |     if isinstance(prompt_template, Path):
 90 |         with open(prompt_template, "r") as f:
 91 |             template_content = f.read()
 92 |         template_name = prompt_template.stem
 93 |     else:
 94 |         template_content = prompt_template
 95 |         template_name = "template from input string"
 96 | 
 97 |     prompt = jinja_render(template_content, context_d)
 98 |     if model_type == "openai":
 99 |         # Lazy import to avoid requiring OpenAI when only using Anthropic
100 |         from gjdutils.llms_openai import call_openai_gpt
101 | 
102 |         out, _, extra = call_openai_gpt(
103 |             prompt,
104 |             client=client,
105 |             image_filens=image_filens,
106 |             response_json=response_json,
107 |             max_tokens=max_tokens,
108 |         )
109 |     else:
110 |         out, extra = call_claude_gpt(
111 |             prompt,
112 |             client=client,
113 |             image_filens=image_filens,
114 |             response_json=response_json,
115 |             max_tokens=max_tokens if max_tokens is not None else 4096,
116 |         )
117 |         print(f"{out=}")
118 |         print(f"{max_tokens=}")
119 |     if response_json:
120 |         assert isinstance(out, dict), f"Expected dict, got {type(out)}"
121 |     else:
122 |         assert isinstance(out, str), f"Expected str, got {type(out)}"
123 |     if verbose >= 1:
124 |         print(f"Called GPT on '{template_name}', context keys {list(context_d.keys())}")
125 |     extra.update(
126 |         {
127 |             "model_type": model_type,
128 |             "prompt_template": template_name,
129 |             "prompt_context_d": context_d,
130 |         }
131 |     )
132 |     return out, extra  # type: ignore
133 | 


--------------------------------------------------------------------------------
/docs/instructions/GENERATE_MERMAID_DIAGRAM.md:
--------------------------------------------------------------------------------
  1 | # Generate Mermaid Diagram Images
  2 | 
  3 | Quick instructions for creating and updating Mermaid diagrams in your project.
  4 | 
  5 | ## Setup
  6 | 
  7 | Install Mermaid CLI if not already available:
  8 | ```bash
  9 | npm install -g @mermaid-js/mermaid-cli
 10 | ```
 11 | 
 12 | For detailed configuration options, see the [Mermaid CLI documentation](https://github.com/mermaid-js/mermaid-cli).
 13 | 
 14 | ## File Organization
 15 | 
 16 | - **Source files**: Store `.mermaid` files in `docs/diagrams/` (or your preferred documentation directory)
 17 | - **Generated files**: Save SVG/PNG outputs in same directory
 18 | 
 19 | ## File Naming
 20 | 
 21 | Mermaid diagrams should follow this naming format: `yyMMdd[letter]_description_in_normal_case.mermaid`
 22 | 
 23 | - Generate date prefix with: `date +%y%m%d` (add letter suffix manually if multiple diagrams per day)
 24 | - **Alternative**: Use `npx tsx src/ts/cli/sequential-datetime-prefix.ts` for automated sequential prefixes
 25 | - Description: lowercase words separated by underscores (except proper names/acronyms)
 26 | - Examples:
 27 |   - `250701a_flow_iterative_heading_generation.mermaid`
 28 |   - `250701b_architecture_glossary_complete.mermaid`
 29 |   - `250701c_diagram_tools_flow_ToC.mermaid`
 30 | 
 31 | ## Generation Commands
 32 | 
 33 | **Prefer SVG format** (scalable, web-friendly):
 34 | ```bash
 35 | npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.svg -w 1400 -H 1600 -s 2 -b transparent -t default
 36 | ```
 37 | 
 38 | **PNG only when specifically requested**:
 39 | ```bash
 40 | npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.png -w 1400 -H 1600 -s 2 -b transparent -t default
 41 | ```
 42 | 
 43 | ## Best Practices
 44 | 
 45 | ### Always Regenerate
 46 | **Automatically regenerate SVG whenever you update a `.mermaid` file** - keep diagrams in sync with source.
 47 | 
 48 | ### Simplify Linear Flows
 49 | If you have a stack of sequential boxes with no branches, **collapse into a single box** with steps as bullet points:
 50 | 
 51 | ❌ **Avoid this**:
 52 | ```mermaid
 53 | A[Step 1] --> B[Step 2] --> C[Step 3] --> D[Step 4]
 54 | ```
 55 | 
 56 | ✅ **Prefer this**:
 57 | ```mermaid
 58 | Process[Process Flow:<br/>• Step 1<br/>• Step 2<br/>• Step 3<br/>• Step 4]
 59 | ```
 60 | 
 61 | ### Syntax Tips
 62 | - **Avoid special characters** in node labels (quotes, parentheses)
 63 | - **Use emojis** for visual clarity
 64 | - **Keep labels concise** - detailed descriptions go in documentation
 65 | - **Test syntax** before generating images
 66 | 
 67 | 
 68 | ### Comments
 69 | 
 70 | Include a detailed comment in the `.mermaid` file with a prompt to describe/reproduce this diagram for future reference, including references to relevant files/functions and any other details/intent mentioned by the user. If asked to update the diagram, update the prompt-comment accordingly.
 71 | 
 72 | 
 73 | ### Spacing
 74 | 
 75 | Reduce spacing so that more information fits on the screen.
 76 | 
 77 | 
 78 | ### Icons
 79 | 
 80 | Include icons sparingly, e.g. for different systems, actions, components.
 81 | 
 82 | 
 83 | ### Fonts
 84 | 
 85 | Use `courier` (or other monospaced font) for API endpoints, urls, function names, variables, etc.
 86 | 
 87 | Use `italic` or `bold` in other ways to help aid comprehension.
 88 | 
 89 | 
 90 | ### Use colour
 91 | 
 92 | Use colour appropriately to distinguish major components.
 93 | 
 94 | For example, you might give different colours to different systems, e.g.
 95 | - purple for user
 96 | - brown for browser
 97 | - orange for mobile/device
 98 | - blue for backend
 99 | - yellow for database
100 | - purple for cloud storage (including cloud storage services, S3, etc)
101 | - green for payments
102 | 
103 | Perhaps use one colour scheme for the outer boxes that group things (e.g. for systems), and another colour scheme for inner boxes based on a different typology. Use your judgment.
104 | 
105 | 
106 | ### Shapes, lines, arrows, etc
107 | 
108 | For database relationships, use database UML-style lines/arrows/shapes.
109 | 
110 | Likewise, if there's a domain-standard visual-notational scheme that's relevant for (part of) the diagram, use it there.
111 | 
112 | 
113 | ## Quick Workflow
114 | 
115 | 1. Generate filename prefix: `npx tsx src/ts/cli/sequential-datetime-prefix.ts docs/diagrams/` if available, otherwise `date +%y%m%d` (add letter suffix if needed)
116 | 2. Create/edit `.mermaid` file with the generated prefix in `docs/diagrams/`
117 | 3. Generate SVG: `npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.svg -w 1400 -H 1600 -s 2 -b transparent`
118 | 4. Open the SVG in default app: `open docs/diagrams/FILENAME.svg` (or `xdg-open` on Linux)
119 | 5. Output the generated filename
120 | 
121 | ### Example Workflow
122 | 
123 | ```bash
124 | # Get the next sequential prefix (manual approach)
125 | date +%y%m%d
126 | # Output: 250701 (add letter suffix manually: 250701a, 250701b, etc.)
127 | 
128 | # OR get automated sequential prefix
129 | npx tsx src/ts/cli/sequential-datetime-prefix.ts
130 | # Output: 250701a (automatically finds next available letter)
131 | 
132 | # Create the diagram file
133 | # Create: docs/diagrams/250701d_tool_execution_flow.mermaid
134 | 
135 | # Generate the SVG
136 | npx mmdc -i docs/diagrams/250701d_tool_execution_flow.mermaid -o docs/diagrams/250701d_tool_execution_flow.svg -w 1400 -H 1600 -s 2 -b transparent
137 | 
138 | # Open to verify
139 | open docs/diagrams/250701d_tool_execution_flow.svg
140 | ```
141 | 


--------------------------------------------------------------------------------
/docs/reference/SD_STRING_DISPLACEMENT_FIND_REPLACE.md:
--------------------------------------------------------------------------------
  1 | # sd Find-Replace Tool ✓
  2 | 
  3 | sd is a modern, intuitive find-replace CLI tool designed for safe codebase refactoring, with excellent dry-run capabilities and literal string handling perfect for LLM automation.
  4 | 
  5 | **Important**: sd uses **regex mode by default**. Use `--string-mode` (or `-s`) for literal string matching to avoid issues with special characters in file paths.
  6 | 
  7 | ## See Also
  8 | 
  9 | - Official documentation: [sd GitHub](https://github.com/chmln/sd)
 10 | - Installation: Available via Rust cargo, Homebrew (macOS), or package managers
 11 | 
 12 | ## Key Benefits
 13 | 
 14 | - **Safety-first**: True dry-run preview mode with `--preview`
 15 | - **LLM-friendly**: Literal string mode eliminates regex escaping issues
 16 | - **Modern design**: Clean, intuitive syntax focused on find-replace only
 17 | - **Simple syntax**: Readable command structure with clear options
 18 | - **Path-friendly**: Handles file paths with special characters seamlessly
 19 | 
 20 | ## Installation
 21 | 
 22 | ```bash
 23 | # Rust (cross-platform)
 24 | cargo install sd
 25 | 
 26 | # macOS (Homebrew)
 27 | brew install sd
 28 | 
 29 | # Ubuntu/Debian
 30 | sudo apt install sd
 31 | ```
 32 | 
 33 | ## Usage Patterns
 34 | 
 35 | ### Dry-Run Mode (Recommended for LLMs)
 36 | 
 37 | ```bash
 38 | # Preview changes without modifying files
 39 | sd --preview "old-string" "new-string" file.txt
 40 | 
 41 | # Preview with literal strings (no regex)
 42 | sd --preview --string-mode "app/[slug]/page.tsx" "app/[id]/page.tsx" **/*.tsx
 43 | 
 44 | # Short form
 45 | sd -ps "old-string" "new-string" .
 46 | ```
 47 | 
 48 | ### Apply Changes Mode
 49 | 
 50 | ```bash
 51 | # Apply changes after preview looks good
 52 | sd --string-mode "old-string" "new-string" file.txt
 53 | 
 54 | # Recursive across directories
 55 | sd -s "old/path" "new/path" **/*.md
 56 | ```
 57 | 
 58 | ## Common Use Cases
 59 | 
 60 | ### Path Updates (File Paths with Special Characters)
 61 | ```bash
 62 | # Preview import path changes (dry-run)
 63 | sd --preview --string-mode "from '@/old/path'" "from '@/new/path'" **/*.{ts,tsx}
 64 | 
 65 | # Apply after preview
 66 | sd -s "from '@/old/path'" "from '@/new/path'" **/*.{ts,tsx}
 67 | 
 68 | # Update file references in documentation
 69 | sd -ps "/old/location/" "/new/location/" **/*.md
 70 | ```
 71 | 
 72 | ### String Replacements
 73 | ```bash
 74 | # Preview function name changes
 75 | sd --preview "oldFunctionName" "newFunctionName" **/*.{ts,tsx,js,jsx}
 76 | 
 77 | # Update configuration values (literal strings)
 78 | sd -ps "OLD_CONFIG_VALUE" "NEW_CONFIG_VALUE" .
 79 | ```
 80 | 
 81 | ### Documentation Updates
 82 | ```bash
 83 | # Preview cross-reference updates
 84 | sd --preview --string-mode "old-doc-name.md" "new-doc-name.md" **/*.md
 85 | ```
 86 | 
 87 | ## Safety Best Practices
 88 | 
 89 | 1. **Always preview first**: Use `--preview` (or `-p`) to see exactly what will change
 90 | 2. **Use literal strings**: Use `--string-mode` (or `-s`) for file paths and exact matches
 91 | 3. **Test scope first**: Start with single files, then expand to directories
 92 | 4. **Use version control**: Commit before running large refactors
 93 | 5. **Verify with build**: Run build and tests after changes
 94 | 
 95 | ## Advanced Options
 96 | 
 97 | ```bash
 98 | # Case-insensitive matching
 99 | sd --ignore-case "CamelCase" "snake_case" .
100 | 
101 | # Preview multiline replacements with literal strings
102 | sd --preview --string-mode --multiline "old\nline" "new\nline" file.txt
103 | 
104 | # Include hidden files and directories
105 | sd --hidden ".oldValue" ".newValue" .
106 | 
107 | # Use shell globbing for file selection
108 | sd -ps "oldKey" "newKey" **/*.json
109 | ```
110 | 
111 | ## Troubleshooting
112 | 
113 | **No matches found**: Check file paths and use preview mode
114 | ```bash
115 | # Debug: preview what would be matched
116 | sd --preview "search-term" "replacement" **/*.md
117 | ```
118 | 
119 | **Special character issues**: Use literal string mode for paths
120 | ```bash
121 | # Problematic (regex metacharacters like [, ], ., *, etc.)
122 | sd "app/[slug]/page.tsx" "app/[id]/page.tsx" .
123 | 
124 | # Safe (literal strings)
125 | sd --string-mode "app/[slug]/page.tsx" "app/[id]/page.tsx" .
126 | ```
127 | 
128 | **Permission errors**: Ensure files are writable and not locked by editors
129 | 
130 | ## Integration with Development Workflow
131 | 
132 | sd integrates seamlessly with codebase development:
133 | 
134 | - **File moves**: Update all references after moving files (use `--string-mode`)
135 | - **Refactoring**: Rename functions, variables, and imports across the codebase
136 | - **Documentation**: Update cross-references when files are renamed (literal strings)
137 | - **Configuration**: Update environment variables and configuration keys
138 | - **LLM automation**: Perfect for programmatic find-replace with predictable behavior
139 | 
140 | ## Key Options Summary
141 | 
142 | - `--preview` (`-p`): Show changes without applying them (dry-run)
143 | - `--string-mode` (`-s`): Treat search/replace as literal strings (no regex)
144 | - `--ignore-case` (`-i`): Case-insensitive matching
145 | - `--multiline` (`-m`): Enable multiline matching
146 | - `--hidden`: Include hidden files and directories
147 | 
148 | ## Perfect for LLM Usage
149 | 
150 | - **Predictable**: Literal string mode eliminates regex escaping surprises
151 | - **Safe**: Dry-run mode shows exactly what will change
152 | - **Simple**: Clean syntax that's easy to generate programmatically
153 | - **Path-friendly**: Handles file paths with brackets, dots, and other special characters


--------------------------------------------------------------------------------
/src/gjdutils/cmd.py:
--------------------------------------------------------------------------------
  1 | from rich.console import Console
  2 | import subprocess
  3 | import sys
  4 | import time
  5 | from typing import Union, Optional, Dict
  6 | from pathlib import Path
  7 | 
  8 | from gjdutils.shell import fatal_error_msg
  9 | 
 10 | 
 11 | def run_cmd(
 12 |     cmd: Union[str, list[str]],
 13 |     before_msg: Optional[str] = None,
 14 |     fatal_msg: Optional[str] = None,
 15 |     verbose: int = 2,
 16 |     replace_sys_python_executable: bool = True,
 17 |     dry_run: bool = False,
 18 |     **subprocess_kwargs,
 19 | ) -> tuple[int, str, Dict]:
 20 |     """Run a shell command with enhanced output and error handling.
 21 | 
 22 |     Args:
 23 |         cmd: Command to run as string (shell=True) or list of strings (shell=False)
 24 |         before_msg: Optional message to display before running command (green)
 25 |         fatal_msg: Optional message to use if command fails (calls fatal_error_msg)
 26 |         verbose: Output verbosity level:
 27 |             0 = silent
 28 |             1 = show before_msg if provided
 29 |             2 = also show command being run (default)
 30 |             3 = also show working directory and duration
 31 |             4 = also show command stdout output
 32 |         replace_sys_python_executable: Replace 'python ' with sys.executable
 33 |         dry_run: If True, only print what would be run
 34 |         **subprocess_kwargs: Additional arguments passed to subprocess.run
 35 | 
 36 |     Returns:
 37 |         Tuple of (returncode, stdout, extra) where extra is a dict containing:
 38 |         - stderr: Standard error output
 39 |         - duration: Time taken to run command
 40 |         - cmd_str: Final command string that was run
 41 |         - cwd: Working directory
 42 |         - input_args: Original function arguments
 43 |         - subprocess_result: Full subprocess.CompletedProcess object
 44 | 
 45 |     Examples:
 46 |         Simple usage with string command:
 47 |         >>> retcode, out, _ = run_cmd4("ls -l", before_msg="Listing files...")
 48 |         Listing files...
 49 |         $ ls -l
 50 |         >>> print(out)
 51 |         total 8
 52 |         -rw-r--r-- 1 user user 2048 Mar 15 10:00 example.txt
 53 | 
 54 |         Complex usage with list command and error handling:
 55 |         >>> cmd = ["pytest", "tests/", "-v", "--cov"]
 56 |         >>> retcode, out, extra = run_cmd4(
 57 |         ...     cmd,
 58 |         ...     before_msg="Running tests with coverage...",
 59 |         ...     fatal_msg="Tests failed!",
 60 |         ...     verbose=2,
 61 |         ...     timeout=300,
 62 |         ...     check=True
 63 |         ... )
 64 |         Running tests with coverage...
 65 |         $ pytest tests/ -v --cov
 66 |         === test session starts ===
 67 |         ...
 68 |     """
 69 |     input_args = locals()
 70 | 
 71 |     console = Console()
 72 | 
 73 |     start_time = time.time()
 74 | 
 75 |     # Convert list command to string if needed
 76 |     cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd
 77 | 
 78 |     # Replace python executable if requested
 79 |     if replace_sys_python_executable and cmd_str.startswith("python "):
 80 |         cmd_str = f"{sys.executable} {cmd_str[7:]}"
 81 | 
 82 |     # Handle verbosity
 83 |     if verbose >= 1 and before_msg:
 84 |         console.print(f"[green]{before_msg}[/green]")
 85 |     if verbose >= 2:
 86 |         console.print(f"[white]$ {cmd_str}[/white]")
 87 | 
 88 |     # Handle dry run
 89 |     if dry_run:
 90 |         return (
 91 |             0,
 92 |             "",
 93 |             {
 94 |                 "stderr": "",
 95 |                 "duration": 0,
 96 |                 "cmd_str": cmd_str,
 97 |                 "cwd": str(Path.cwd()),
 98 |                 "input_args": input_args,
 99 |                 "subprocess_result": None,
100 |             },
101 |         )
102 | 
103 |     # Set defaults for subprocess
104 |     subprocess_kwargs.setdefault("shell", isinstance(cmd, str))
105 |     subprocess_kwargs.setdefault("capture_output", True)
106 |     subprocess_kwargs.setdefault("text", True)
107 | 
108 |     try:
109 |         result = subprocess.run(
110 |             cmd if isinstance(cmd, list) else cmd_str,
111 |             **subprocess_kwargs,
112 |         )
113 |     except subprocess.TimeoutExpired as e:
114 |         if fatal_msg:
115 |             fatal_error_msg(
116 |                 fatal_msg,
117 |                 f"Command timed out after {subprocess_kwargs.get('timeout', '?')}s",
118 |             )
119 |         raise
120 | 
121 |     duration = time.time() - start_time
122 | 
123 |     # Show additional info at verbose level 3
124 |     if verbose >= 3:
125 |         console.print(f"[blue]Working directory: {Path.cwd()}[/blue]")
126 |         console.print(f"[blue]Duration: {duration:.2f}s[/blue]")
127 |     if verbose >= 4:
128 |         console.print(f"[blue]Command output:[/blue]\n{result.stdout}")
129 | 
130 |     # Handle errors
131 |     if result.returncode != 0:
132 |         # Show both stdout and stderr for failed commands
133 |         if result.stdout:
134 |             console.print(f"[red]Command output:[/red]\n{result.stdout}")
135 |         if result.stderr:
136 |             console.print(f"[red]Command error output:[/red]\n{result.stderr}")
137 |         if fatal_msg:
138 |             fatal_error_msg(fatal_msg)
139 | 
140 |     extra = {
141 |         "stderr": result.stderr,
142 |         "duration": duration,
143 |         "cmd_str": cmd_str,
144 |         "cwd": str(Path.cwd()),
145 |         "input_args": input_args,
146 |         "subprocess_result": result,
147 |     }
148 | 
149 |     return result.returncode, result.stdout.strip(), extra
150 | 


--------------------------------------------------------------------------------
/src/gjdutils/llms_claude.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | from anthropic import Anthropic, NOT_GIVEN
  4 | from typing import Optional
  5 | 
  6 | from gjdutils.image_utils import image_to_base64_basic
  7 | from gjdutils.env import get_env_var
  8 | 
  9 | 
 10 | CLAUDE_API_KEY = get_env_var("CLAUDE_API_KEY")
 11 | # https://docs.anthropic.com/en/docs/about-claude/models
 12 | MODEL_NAME_CLAUDE_SONNET_GOOD_LATEST = "claude-sonnet-4-0"
 13 | MODEL_NAME_CLAUDE_SONNET_CHEAP_LATEST = "claude-3-5-haiku-latest"
 14 | 
 15 | 
 16 | def img_as_content_dict(img_filen: str):
 17 |     media_type_from_extension = {
 18 |         ".jpg": "image/jpeg",
 19 |         ".jpeg": "image/jpeg",
 20 |         ".png": "image/png",
 21 |         ".webp": "image/webp",
 22 |         ".gif": "image/gif",
 23 |         ".bmp": "image/bmp",
 24 |         ".tiff": "image/tiff",
 25 |         ".ico": "image/vnd.microsoft.icon",
 26 |         ".svg": "image/svg+xml",
 27 |         ".heic": "image/heic",
 28 |         ".heif": "image/heif",
 29 |     }
 30 | 
 31 |     ext = Path(img_filen).suffix
 32 |     if ext not in media_type_from_extension:
 33 |         raise ValueError(f"Unknown image file extension: {img_filen}")
 34 |     media_type = media_type_from_extension[ext]
 35 | 
 36 |     img_base64 = image_to_base64_basic(img_filen)
 37 |     return {
 38 |         "type": "image",
 39 |         "source": {
 40 |             "type": "base64",
 41 |             "media_type": media_type,
 42 |             "data": img_base64,
 43 |         },
 44 |     }
 45 | 
 46 | 
 47 | def call_claude_gpt(
 48 |     prompt: str,
 49 |     tools: Optional[list[dict]] = None,
 50 |     image_filens: str | list[str] | None = None,
 51 |     image_resize_target_size_kb: Optional[int] = 100,
 52 |     client: Optional[Anthropic] = None,
 53 |     model: str = MODEL_NAME_CLAUDE_SONNET_GOOD_LATEST,
 54 |     temperature: Optional[float] = 0.001,
 55 |     response_json: bool = False,
 56 |     # seed: Optional[int] = DEFAULT_RANDOM_SEED,
 57 |     max_tokens: int = 4096,
 58 |     verbose: int = 0,
 59 | ):
 60 |     """Call Claude API with support for text, images, and function calling"""
 61 |     from gjdutils.llm_utils import extract_json_from_markdown
 62 | 
 63 |     extra = locals()
 64 |     extra.pop("client")
 65 | 
 66 |     if tools is not None:
 67 |         raise NotImplementedError(
 68 |             "I think tools are supported, but not implemented in this function"
 69 |         )
 70 | 
 71 |     if client is None:
 72 |         client = Anthropic(api_key=CLAUDE_API_KEY)
 73 | 
 74 |     # Prepare image contents if provided
 75 |     contents = []
 76 |     if image_filens:
 77 |         if isinstance(image_filens, str):
 78 |             image_filens = [image_filens]
 79 |         assert image_resize_target_size_kb is not None
 80 |         for i, img_filen in enumerate(image_filens):
 81 |             contents.extend(
 82 |                 [
 83 |                     {"type": "text", "text": f"Image {i+1}:"},
 84 |                     img_as_content_dict(image_filens[i]),
 85 |                     # {
 86 |                     #     "type": "image",
 87 |                     #     "source": {
 88 |                     #         "type": "base64",
 89 |                     #         "media_type": "image/jpeg",  # Adjust based on actual image type
 90 |                     #         "data": b64,
 91 |                     #     },
 92 |                     # },
 93 |                 ]
 94 |             )
 95 | 
 96 |     # if response_json:
 97 |     #     # Add instruction to respond in JSON format - be very explicit
 98 |     #     prompt = f"Please provide your response in valid JSON format without any markdown formatting or backticks. Provide ONLY the JSON object, not any explanatory text before or after the JSON. {prompt}"
 99 | 
100 |     contents.append({"type": "text", "text": prompt})
101 | 
102 |     # not supported
103 |     # response_format = {"type": "json_object"} if response_json else None
104 | 
105 |     # Make API call
106 |     response = client.messages.create(
107 |         model=model,
108 |         max_tokens=max_tokens,
109 |         messages=[{"role": "user", "content": contents}],
110 |         temperature=temperature if temperature is not None else NOT_GIVEN,
111 |         # seed=seed,
112 |         # response_format=response_format,
113 |     )
114 | 
115 |     msg = response.content[0].text  # type: ignore
116 |     if response_json:
117 |         try:
118 |             # Use our utility function to handle markdown-wrapped JSON
119 |             clean_json_text = extract_json_from_markdown(msg, verbose=verbose)
120 |             msg = json.loads(clean_json_text)
121 |         except json.JSONDecodeError as e:
122 |             if verbose:
123 |                 print(f"JSON decode error: {e}")
124 |                 print(f"Raw message causing error: {msg}")
125 |                 print(f"Cleaned: {clean_json_text}")
126 |             # Return a structured error response instead of failing
127 |             msg = {
128 |                 "error": "Failed to parse API response",
129 |                 "raw_response": msg[:500] if msg else "Empty response",
130 |             }
131 |     extra.update(
132 |         {
133 |             "response": response.model_dump(),
134 |             "msg": msg,
135 |             # "tool_calls": tool_calls,
136 |             "model": model,
137 |             "contents": contents,
138 |         }
139 |     )
140 | 
141 |     if verbose >= 2:
142 |         print(f"PROMPT:\n{prompt}")
143 |     if verbose >= 1:
144 |         print(f"LLM MESSAGE:\n{msg}")
145 |     if verbose >= 2:
146 |         # print(f"TOOL CALLS:\n{tool_calls}")
147 |         print(f"LLM RESPONSE:\n{json.dumps(response.model_dump(), indent=2)}")
148 |     return msg, extra
149 | 


--------------------------------------------------------------------------------
/docs/instructions/FIX_HOUSEKEEPING_BUILD_TYPECHECK_LINT.md:
--------------------------------------------------------------------------------
  1 | # Fix Housekeeping Build, TypeCheck & Lint Issues
  2 | 
  3 | This is a periodic housekeeping task to maintain code quality and catch potential issues early in the development process.
  4 | 
  5 | ## Goal
  6 | 
  7 | Systematically address build, TypeScript, and linting issues to prevent accumulation of technical debt and ensure the codebase remains healthy for AI-first development.
  8 | 
  9 | ## Comprehensive Health Check Process
 10 | 
 11 | ### Stage 1: Assessment & Prioritisation
 12 | 
 13 | #### Run Full Health Checks
 14 | ```bash
 15 | # Type checking - most critical for runtime safety
 16 | npm run build            # Project build with some lenience
 17 | tsc --noEmit            # Strict TypeScript type checking
 18 | 
 19 | # Code quality and patterns  
 20 | npm run lint            # ESLint issues and warnings
 21 | 
 22 | # Functionality verification
 23 | npm test               # Test suite
 24 | npm run test:e2e       # E2E tests (if available and time permits)
 25 | ```
 26 | 
 27 | #### Assess Scope & Impact
 28 | - **Count issues**: `tsc --noEmit 2>&1 | grep -c "error TS"` and `npm run lint 2>&1 | grep -c Warning`
 29 | - **Categorise by severity**:
 30 |   - 🔴 **BLOCKING**: TypeScript errors that could cause runtime failures
 31 |   - 🟡 **HIGH**: ESLint errors, deprecated patterns, security issues
 32 |   - 🟢 **LOW**: Style warnings, minor inconsistencies
 33 | - **Identify patterns**: Are errors clustered in specific files/areas?
 34 | 
 35 | #### Prioritisation Strategy
 36 | 1. **Production code over test code**: Fix core functionality first
 37 | 2. **Runtime safety over style**: TypeScript errors before ESLint warnings
 38 | 3. **Recently modified files**: Focus on active development areas
 39 | 4. **Shared/core modules**: Fix widely-used utilities before isolated features
 40 | 5. **Quick wins**: Simple fixes that resolve multiple issues
 41 | 
 42 | ### Stage 2: Systematic Resolution
 43 | 
 44 | #### Use Subagents (if available) for Investigation
 45 | Deploy subagents (if available) with specific focus areas:
 46 | ```
 47 | "Investigate TypeScript errors in core modules - focus on main API routes and business logic. 
 48 | Categorise by: type safety issues, missing types, configuration problems. 
 49 | Suggest fix priorities and identify any dangerous patterns."
 50 | ```
 51 | 
 52 | #### Fix in Batches
 53 | - **Batch by file/module**: Complete one area before moving to next
 54 | - **Batch by error type**: Fix all `exactOptionalPropertyTypes` issues together
 55 | - **Test after each batch**: Verify fixes don't break functionality
 56 | 
 57 | #### Safety Practices
 58 | - **Understand before fixing**: Don't apply mechanical fixes without understanding
 59 | - **Preserve functionality**: Use tests to verify changes don't break behaviour  
 60 | - **Conservative approach**: If unsure about a fix, mark for discussion rather than guessing
 61 | - **Document complex fixes**: Add comments explaining non-obvious corrections
 62 | 
 63 | ### Stage 3: Verification & Prevention
 64 | 
 65 | #### Comprehensive Re-check
 66 | ```bash
 67 | # Verify all issues resolved
 68 | npm run build && echo "✅ Build successful"
 69 | tsc --noEmit && echo "✅ TypeScript clean" 
 70 | npm run lint && echo "✅ Linting clean"
 71 | npm test && echo "✅ Tests passing"
 72 | ```
 73 | 
 74 | #### Update Documentation
 75 | - Update any affected documentation files if patterns changed
 76 | - Note any systematic issues discovered for future prevention
 77 | - Update this document if new issue patterns emerge
 78 | 
 79 | #### Prevention Measures
 80 | - **Consider TypeScript config adjustments**: Should `exactOptionalPropertyTypes` be relaxed for legacy code?
 81 | - **Evaluate ESLint rules**: Are any rules generating more noise than value?
 82 | - **IDE integration**: Ensure development environment catches issues early
 83 | - **Documentation updates**: Add patterns to coding guidelines if needed
 84 | 
 85 | ## Decision Framework
 86 | 
 87 | ### When to Fix vs When to Document/Defer
 88 | 
 89 | **Fix Immediately**:
 90 | - Type errors that could cause runtime crashes
 91 | - Security-related linting issues  
 92 | - Deprecated API usage that could break in future updates
 93 | - Simple mechanical fixes (unused imports, missing semicolons)
 94 | 
 95 | **Document & Schedule**:
 96 | - Complex refactoring needs that require architectural decisions
 97 | - Issues in legacy code that isn't actively maintained
 98 | - Style inconsistencies that don't affect functionality
 99 | - Performance optimizations that need measurement
100 | 
101 | **Skip/Disable**:
102 | - ESLint rules that conflict with project conventions
103 | - TypeScript strictness that creates excessive overhead
104 | - Test-only issues that don't affect production
105 | 
106 | ### Stopping Criteria
107 | 
108 | **Stop and discuss with user if**:
109 | - Fixes require significant architectural changes
110 | - Multiple approaches exist with unclear trade-offs
111 | - Error counts remain high after systematic fixing
112 | - Tests start failing due to corrections
113 | - Uncertainty about safety of changes
114 | 
115 | ## Post-Completion Actions
116 | 
117 | 1. **Commit in logical batches**: Group related fixes together
118 | 2. **Update issue tracking**: Note patterns for future housekeeping
119 | 3. **Review effectiveness**: Did this process catch important issues?
120 | 4. **Schedule next cycle**: Based on issue accumulation rate
121 | 5. **Share learnings**: Update team practices if systematic issues found
122 | 
123 | ## Think Hard
124 | 
125 | Consider the bigger picture:
126 | - Are we fixing symptoms or root causes?
127 | - What does the error pattern tell us about our development process?
128 | - Should we adjust our TypeScript/ESLint configuration for better AI-first development?
129 | - Are there preventive measures (pre-commit hooks, CI checks) worth implementing?
130 | - Is the balance right between strictness and development velocity?
131 | 
132 | Remember: The goal is sustainable code quality, not perfect cleanliness. Focus on issues that matter for functionality, security, and maintainability.


--------------------------------------------------------------------------------
/docs/instructions/UPDATE_CLAUDE_INSTRUCTIONS.md:
--------------------------------------------------------------------------------
  1 | # Updating AI Agent Instructions
  2 | 
  3 | Guidelines for maintaining CLAUDE.md (or equivalent Cursor rules file) to help AI agents operate effectively on your codebase.
  4 | 
  5 | ## See also
  6 | 
  7 | - `CLAUDE.md` - The main instructions file for AI agents (or `.cursorrules`, etc.)
  8 | - `WRITE_EVERGREEN_DOC.md` - General documentation writing guidelines
  9 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Documentation maintenance process
 10 | 
 11 | ## Purpose of Agent Instructions File
 12 | 
 13 | This file (CLAUDE.md, .cursorrules, etc.) serves as the primary orientation document for AI agents working on your codebase. It should provide essential context and signposts without duplicating information that exists elsewhere in the documentation.
 14 | 
 15 | ## What to Include
 16 | 
 17 | ### Essential Project Context
 18 | - **Project overview** - Brief description of goals and current phase
 19 | - **Architecture summary** - Key framework and storage decisions
 20 | - **Build commands** - How to run, test, and debug the application
 21 | - **Project structure** - Where to find different types of code/docs
 22 | 
 23 | ### Debugging and Development Aids
 24 | - **Type checking** - Commands for compilation errors
 25 | - **Linting** - Code quality checking commands
 26 | - **Testing** - Test commands and coverage info
 27 | - **Log files** - Location of development logs
 28 | - **Test locations** - Where to find existing tests
 29 | - **Database info** - Migration files and schema documentation
 30 | 
 31 | ### Navigation Signposts
 32 | - **Architecture docs** - Link to main architecture documentation
 33 | - **Planning docs** - Point to recent decisions and planning documents
 34 | - **Specific domains** - Database, API, UI components documentation
 35 | 
 36 | ### Operational Guidelines
 37 | - **Git practices** - Reference to commit and workflow guidelines
 38 | - **Code style** - Spelling preferences, existing patterns
 39 | - **Environment setup** - Key variables and configuration
 40 | 
 41 | ## What NOT to Include
 42 | 
 43 | - **Detailed instructions** - These belong in specific domain docs
 44 | - **Code examples** - Link to actual implementation files instead
 45 | - **Duplicate information** - Always reference canonical source
 46 | - **Step-by-step tutorials** - These belong in setup documentation
 47 | 
 48 | ## Maintenance Principles
 49 | 
 50 | ### Conciseness
 51 | Keep the instructions file focused and scannable. Each section should be 3-5 bullet points maximum. Use signposting rather than explanation.
 52 | 
 53 | ### Signposting Over Duplication
 54 | Instead of explaining how something works, point to where the information lives:
 55 | - "Database schema: `migrations/` directory and `../reference/DATABASE_SCHEMA.md`"
 56 | - "Testing: Framework setup in test config, tests in `tests/` or `__tests__/`"
 57 | 
 58 | ### Current State Focus
 59 | Document what exists now, not what's planned. Use status indicators (✓ implemented, 📋 planned) when helpful.
 60 | 
 61 | ### User-Driven Updates
 62 | Update the instructions file based on:
 63 | - **User feedback** - What agents needed but couldn't find
 64 | - **Common pain points** - Debugging paths that weren't obvious
 65 | - **New major features** - Changes to build process, architecture
 66 | - **Structural changes** - New documentation, moved files
 67 | 
 68 | ## Review Triggers
 69 | 
 70 | Update your agent instructions when:
 71 | - AI agents struggle to find essential information
 72 | - Major architectural changes occur
 73 | - New debugging tools or processes are added
 74 | - Project structure changes significantly
 75 | - User identifies missing signposts during development
 76 | 
 77 | ## Quality Checklist
 78 | 
 79 | Before updating agent instructions:
 80 | - [ ] Information is essential for AI agent effectiveness
 81 | - [ ] No duplication of content available elsewhere
 82 | - [ ] All links and references are valid
 83 | - [ ] Debugging paths are clear and actionable
 84 | - [ ] Structure remains scannable and concise
 85 | - [ ] Cross-references point to canonical sources
 86 | 
 87 | ## Tool-Specific Considerations
 88 | 
 89 | ### Claude Code (CLAUDE.md)
 90 | - Include tasks and subagents guidance for context window management
 91 | - Reference specific tools and permissions needed
 92 | - Include parallel tool execution patterns
 93 | - Document debugging workflows for command-line environments
 94 | 
 95 | ### Cursor (.cursorrules)
 96 | - Include workspace configuration hints where relevant
 97 | - Reference model selection best practices if applicable
 98 | - Include shortcuts and workflow patterns for IDE users
 99 | - Consider IDE-specific debugging approaches
100 | 
101 | ### Other AI Tools
102 | - Adapt structure to tool capabilities and limitations
103 | - Include tool-specific workflow patterns
104 | - Reference appropriate documentation formats for the platform
105 | - Consider different context window constraints
106 | 
107 | ## Customization Guidelines
108 | 
109 | When maintaining CLAUDE.md:
110 | 
111 | 1. **Keep project-specific** - Tailor content to actual codebase needs
112 | 2. **Remove inapplicable sections** - Don't include generic content that doesn't apply
113 | 3. **Add project-specific sections** as needed (API keys, special workflows, domain knowledge)
114 | 4. **Maintain conciseness** - File should be scannable, not comprehensive
115 | 5. **Use signposting approach** - Point to detailed docs rather than duplicating content
116 | 6. **Update regularly** - Keep build commands and key information current
117 | 
118 | ## Essential References Structure
119 | 
120 | When organizing the "See also" section in CLAUDE.md, consider this structure:
121 | 
122 | ```markdown
123 | see:
124 | - `README.md` for project goals and features
125 | - `docs/reference/CODING_PRINCIPLES.md` for development principles
126 | - `docs/instructions/GIT_COMMIT_CHANGES.md` for Git workflow
127 | - `docs/reference/ARCHITECTURE_OVERVIEW.md` for system architecture
128 | - `docs/reference/[DOMAIN]_*.md` for specific domain documentation
129 | ```
130 | 
131 | This provides a logical hierarchy from general (README) to specific (domain docs).


--------------------------------------------------------------------------------
/docs/instructions/draft/NONINTERACTIVE.md:
--------------------------------------------------------------------------------
  1 | # Non-Interactive AI Assistant Usage
  2 | 
  3 | Non-interactive mode allows AI assistants to execute tasks without human intervention.
  4 | 
  5 | **Note**: This document is specifically written for Claude Code (`claude -p`) but the principles apply to other AI tools.
  6 | 
  7 | ## See Also
  8 | 
  9 | - `../WRITE_PLANNING_DOC.md` - Creating structured task documents
 10 | - `../GIT_COMMITS.md` - Git workflow practices
 11 | - `../CODING_PRINCIPLES.md` - Development principles
 12 | - Planning documents in your project's planning/ directory
 13 | 
 14 | ## Tool Access Philosophy
 15 | 
 16 | **Non-interactive AI assistants typically cannot:**
 17 | - Run applications (no access to development servers, browsers, or live applications)
 18 | - Execute tests interactively (no access to test runners that require interaction)
 19 | - Access specialized MCP tools (browser automation, database queries)
 20 | - Commit changes to git (this should be handled externally)
 21 | - Access running development servers or databases
 22 | 
 23 | **Non-interactive AI assistants can:**
 24 | - Read, write, and edit files
 25 | - Perform static analysis of code
 26 | - Search and research via web
 27 | - Use basic command line tools for file operations
 28 | - Generate and modify documentation
 29 | - Analyse project structure and dependencies
 30 | 
 31 | ## Basic Usage
 32 | 
 33 | ### Claude Code Example
 34 | ```bash
 35 | claude -p "your task description" \
 36 |   --allowedTools "Bash Edit MultiEdit Read Write Glob Grep LS Task WebFetch WebSearch TodoRead TodoWrite" \
 37 |   --output-format stream-json
 38 | ```
 39 | 
 40 | ### Using Wrapper Scripts
 41 | 
 42 | Consider creating a wrapper script (e.g., `scripts/ai-batch.sh`) to standardize your non-interactive AI usage patterns.
 43 | 
 44 | ## Planning Document Integration
 45 | 
 46 | Non-interactive mode works best with well-structured planning documents (see `WRITE_PLANNING_DOC.md`). Feed the AI the entire planning document content:
 47 | 
 48 | ```bash
 49 | # Claude Code example
 50 | ./scripts/claude-batch.sh "$(cat planning/your_task.md)"
 51 | ```
 52 | 
 53 | This approach:
 54 | - Provides complete context upfront
 55 | - Reduces need for clarifying questions
 56 | - Enables autonomous task execution
 57 | - Works well with parallel execution
 58 | 
 59 | ## CI/CD Integration
 60 | 
 61 | ### Example GitHub Actions Workflow
 62 | ```yaml
 63 | name: AI-Assisted Development
 64 | on:
 65 |   workflow_dispatch:
 66 |     inputs:
 67 |       task_description:
 68 |         description: 'Task for AI to execute'
 69 |         required: true
 70 | 
 71 | jobs:
 72 |   ai-task:
 73 |     runs-on: ubuntu-latest
 74 |     steps:
 75 |       - uses: actions/checkout@v3
 76 |       - name: Setup Node.js
 77 |         uses: actions/setup-node@v3
 78 |         with:
 79 |           node-version: '18'
 80 |       - name: Install AI Tools
 81 |         run: npm install -g @anthropic-ai/claude-code
 82 |       - name: Run AI Task
 83 |         env:
 84 |           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
 85 |         run: |
 86 |           claude -p "${{ github.event.inputs.task_description }}" \
 87 |             --allowedTools "Edit MultiEdit Read Write Glob Grep LS Task WebFetch WebSearch" \
 88 |             --output-format stream-json
 89 | ```
 90 | 
 91 | ## Tool Configuration
 92 | 
 93 | ### Recommended Tool Set
 94 | - **Core file operations**: `Edit MultiEdit Read Write`
 95 | - **Search and discovery**: `Glob Grep LS`
 96 | - **Research and analysis**: `WebFetch WebSearch`
 97 | - **Task management**: `TodoRead TodoWrite`
 98 | - **Basic system operations**: `Bash` (limited to file operations)
 99 | - **Subtask delegation**: `Task`
100 | 
101 | ### Security Considerations
102 | - Use specific tool allowlists rather than broad permissions
103 | - Limit shell access to safe operations
104 | - Run in isolated environments for untrusted tasks
105 | - Store API keys securely in CI environments
106 | 
107 | ## Error Handling
108 | 
109 | Non-interactive mode requires robust error handling since AI cannot ask for clarification:
110 | 
111 | ### In Task Descriptions
112 | ```markdown
113 | # Task: Refactor authentication system
114 | 
115 | ## Error Handling
116 | - If compilation errors occur, document them in /tmp/issues.md
117 | - If tests would be needed, create them but note they cannot be run
118 | - If unclear about implementation details, make reasonable assumptions and document them
119 | 
120 | ## Constraints  
121 | - Cannot run application or tests
122 | - Cannot commit changes
123 | - Must work with existing code patterns
124 | ```
125 | 
126 | ### In Wrapper Scripts
127 | ```bash
128 | ai-batch() {
129 |     # ... setup ...
130 |     
131 |     if ! claude -p "$prompt" --allowedTools "$tools" --output-format stream-json; then
132 |         echo "AI task failed. Check output above for details."
133 |         return 1
134 |     fi
135 | }
136 | ```
137 | 
138 | ## Best Practices
139 | 
140 | 1. **Provide complete context** in planning documents
141 | 2. **Specify constraints clearly** (no testing, no commits, etc.)
142 | 3. **Use structured output** for automation parsing
143 | 4. **Handle failures gracefully** in CI environments
144 | 5. **Limit scope** to tasks that don't require runtime verification
145 | 6. **Document assumptions** when requirements are ambiguous
146 | 
147 | ## Unresolved Questions
148 | 
149 | ### Git & Branch Management
150 | - Should AI be able to make its own Git commits?
151 | - Should wrapper scripts automatically create branches for each task?
152 | - How should branch naming be standardised for parallel execution?
153 | - Should cleanup of completed branches be automated?
154 | 
155 | ### Output Format
156 | - Is `stream-json` the best format for CI integration?
157 | - Should results be structured differently for different use cases?
158 | - How should partial results be handled if AI is interrupted?
159 | 
160 | ### Task Scope
161 | - Should there be timeout limits for long-running tasks?
162 | 
163 | ### Error Recovery
164 | - How should the system handle partial completions?
165 | - Should failed tasks be automatically retried with modified parameters?
166 | - What level of rollback capability is needed?


--------------------------------------------------------------------------------
/docs/instructions/WRITE_EVERGREEN_DOC.md:
--------------------------------------------------------------------------------
  1 | @# Writing evergreen documentation
  2 | 
  3 | see also: 
  4 | - `WRITE_PLANNING_DOC.md` - for writing ephemeral decision/planning docs
  5 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - for keeping documentation current every so often
  6 | 
  7 | 
  8 | # What are evergreen docs?
  9 | 
 10 | This is for writing evergreen, general documentation on how the system works.
 11 | 
 12 | These should be a concise, clear, well-structured, complete-enough, up-to-date description of things. By "complete-enough", they should cover most of the important topics, if only to signpost to where more information can be found, or to the code itself.
 13 | 
 14 | They should refer to one another, and avoid too much overlap in content, so that if information changes, we ideally only need to change the documentation in one place.
 15 | 
 16 | 
 17 | # Format
 18 | 
 19 | They should be written in Markdown, stored as `TOPIC_NAME.md` (e.g. in `docs/project/`, `docs/reference/`, `docs/instructions/` or similar), following project-specific documentation guidelines (for example an `AGENTS.md` or local docs README) for storage.
 20 | 
 21 | ## Filename Guidelines
 22 | 
 23 | Choose descriptive filenames that clearly indicate the document's content:
 24 | 
 25 | - **Be specific**: `UPLOAD_DOCUMENT_PROCESSING_PIPELINE.md` instead of just `UPLOAD.md`
 26 | - **Include context**: `NAVIGATION_COMPONENT_DESIGN.md` instead of just `NAVIGATION.md`
 27 | - **Keep existing names**: Where possible, include the current name as a prefix (e.g., `SETUP_DEVELOPMENT_ENVIRONMENT.md` keeps `SETUP`)
 28 | - **Group related docs**: Use similar prefixes so related docs sort together (e.g., all `DATABASE_*.md` files)
 29 | - **Maintain prefix conventions**: Keep category prefixes (DATABASE_, TESTING_, API_, etc.)
 30 | 
 31 | Good examples:
 32 | - `DATABASE_INTEGRATION_REFERENCE.md`
 33 | - `TESTING_AUTOMATION_OVERVIEW.md`
 34 | - `API_CLIENT_INTEGRATION.md`
 35 | - `AUTHENTICATION_SECURITY.md`
 36 | 
 37 | 
 38 | ## Document structure
 39 | 
 40 | They might be organised into something like the following sections. Use your judgment. Probably only a few of these will be relevant for each doc, feel free to rename them, etc.
 41 | 
 42 | 
 43 | ### Introduction
 44 | 
 45 | 2-sentence summary of the topic, and what the document covers.
 46 | 
 47 | ### See also
 48 | 
 49 | Bullet-point list of other relevant docs, code, urls, or other resources that provide related information, or more detail. Provide a 1-sentence summary or explanation of how each one is relevant. 
 50 | 
 51 | Examples of good cross-references:
 52 | - `WRITE_PLANNING_DOC.md` - for information about writing ephemeral decision/planning docs
 53 | - `src/components/example.tsx` - implementation of features described here
 54 | - `planning/YYYYMMDD_feature_planning.md` - historical decision context
 55 | - External URLs when relevant (e.g., library documentation)
 56 | 
 57 | Add references to and from this new doc (e.g. in relevant code, planning docs in `planning/*.md`, etc) - use a subagent for this
 58 | 
 59 | #### Cross-Reference Best Practices
 60 | 
 61 | - **Update documentation organisation index** if your project has one
 62 | - **Link to canonical source** (e.g. functions, files, docs, urls, etc) for detailed information rather than duplicating
 63 | - **Provide 1-sentence context** with each link explaining its relevance
 64 | - **Use relative paths** for internal documentation links
 65 | - **Avoid content duplication** - if information exists elsewhere, link to it
 66 | 
 67 | Add references to and from this new doc (e.g. in relevant code, planning docs, etc) - use parallel AI assistance for this
 68 | 
 69 | 
 70 | ### Principles, key decisions
 71 | 
 72 | - Include any specific principles/approaches or decisions that have been explicitly agreed with the user (over and above existing coding rules, project examples, best practices, etc).
 73 | - As you get new information from the user, update this doc so it's always up-to-date.
 74 | 
 75 | ### [Provide a few detailed sections here, depending on the topic]
 76 | 
 77 | Include as appropriate:
 78 | - high-level overview, architecture
 79 | - common patterns, howtos
 80 | - examples
 81 | - gotchas
 82 | - limitations
 83 | - troubleshooting
 84 | - planned future work
 85 | 
 86 | 
 87 | ### Documenting Systems in Transition
 88 | 
 89 | When documenting systems that are changing (e.g., architectural migrations):
 90 | 
 91 | 1. **Clearly distinguish states**:
 92 |    - **Current State**: How the system works today
 93 |    - **Target State**: The intended future architecture
 94 |    - **Migration Status**: Progress and timeline if known
 95 | 
 96 | 2. **Reference decisions**: Link to planning docs or architecture decision records for rationale
 97 | 
 98 | 3. **Update incrementally**: As migration progresses, update the documentation
 99 | 
100 | Example:
101 | ```markdown
102 | ## Database Architecture
103 | 
104 | **Current State**: Uses legacy schema approach
105 | **Target State**: New optimized schema with performance improvements
106 | **Migration Status**: Schema designed, code updates pending
107 | 
108 | see `../reference/ARCHITECTURE_DECISIONS.md` for migration rationale
109 | ```
110 | 
111 | 
112 | ### Appendix
113 | 
114 | Add any other important context here, e.g.
115 | - example data
116 | - other information that should be captured but doesn't fit neatly in the above sections
117 | 
118 | 
119 | # Maintenance
120 | 
121 | ## Review Frequency
122 | 
123 | Regular documentation review ensures accuracy:
124 | - **After major features** - Update immediately after implementation
125 | - **During housekeeping** - Monthly review recommended
126 | - **When outdated** - Fix immediately when noticed
127 | - **Before milestones** - Ensure docs reflect current state
128 | 
129 | see `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` for the complete housekeeping process
130 | 
131 | ## Common Pitfalls to Avoid
132 | 
133 | 1. **Information duplication** - Creates maintenance burden when things change
134 | 2. **Vague status descriptions** - Be specific about implementation state
135 | 3. **Missing cross-references** - Always link to related documentation
136 | 4. **Outdated examples** - Ensure code samples match current patterns
137 | 5. **Forgotten transitions** - Update docs as systems migrate
138 | 
139 | ## Quality Checklist
140 | 
141 | Before committing documentation:
142 | - [ ] Cross-references are valid and helpful
143 | - [ ] No contradictions with other documents
144 | - [ ] Examples match current code patterns
145 | - [ ] Transitional states are clearly marked
146 | - [ ] "See also" sections are comprehensive


--------------------------------------------------------------------------------