├── tests ├── __init__.py ├── test_strings.py ├── test_cli.py ├── test_num.py ├── test_rand.py ├── test_embeddings_openai.py ├── test_print_utils.py ├── test_env_integration.py ├── test_git_status.py └── test_indexing.py ├── src ├── gjdutils │ ├── cli │ │ ├── pypi │ │ │ ├── __init__.py │ │ │ ├── app.py │ │ │ ├── deploy.py │ │ │ └── check.py │ │ ├── __init__.py │ │ ├── install_all_dev_dependencies.py │ │ ├── main.py │ │ └── check_git_clean.py │ ├── __version__.py │ ├── __init__.py │ ├── scripts │ │ ├── __init__.py │ │ ├── export_envs.sh │ │ └── install.py │ ├── errors.py │ ├── sets.py │ ├── lists.py │ ├── misc.py │ ├── prompt_templates.py │ ├── runtime.py │ ├── todo │ │ └── convert_parquet.py │ ├── print_utils.py │ ├── colab.py │ ├── decorators.py │ ├── typ.py │ ├── hashing.py │ ├── voice_speechrecognition.py │ ├── audios.py │ ├── functions.py │ ├── collection_utils.py │ ├── shell.py │ ├── rand.py │ ├── pytest_utils.py │ ├── obsolete │ │ └── google_text_to_speech.py │ ├── stopwatch.py │ ├── web.py │ ├── jsons.py │ ├── env.py │ ├── num.py │ ├── google_translate.py │ ├── iterfunc.py │ ├── image_utils.py │ ├── html.py │ ├── regex.py │ ├── indexing.py │ ├── webserver.py │ ├── ports.py │ ├── pypi_build.py │ ├── files.py │ ├── llm_utils.py │ ├── cmd.py │ └── llms_claude.py └── ts │ └── README.md ├── docs ├── instructions │ ├── EDIT_LIGHTLY_CONVERSATION_TRANSCRIPT.md │ ├── DEBRIEF_UPDATE_COMMIT.md │ ├── SURGEON_MODE.md │ ├── RESOLVE_MERGE_CONFLICTS.md │ ├── SOUNDING_BOARD_MODE.md │ ├── HOUSEKEEPING_OLD_PLANNING_DOC.md │ ├── GIT_CREATE_BRANCH.md │ ├── DETECTIVE_SCIENTIST_MODE.md │ ├── DO_EXECUTE_PLANNING_DOC.md │ ├── THIRD_PARTY_LIBRARY_SELECTION.md │ ├── dev │ │ └── PULL_PUSH_LOCAL_GJDUTILS_REPOS.md │ ├── AUDIT_ARCHITECTURE_MODE.md │ ├── TASKS_SUBAGENTS.md │ ├── DEBRIEF_PROGRESS.md │ ├── CODING_PRINCIPLES.md │ ├── WRITE_DEEP_DIVE_AS_DOC.md │ ├── RENAME_OR_MOVE.md │ ├── GIT_COMMIT_CHANGES.md │ ├── UPDATE_DOCUMENTATION_ORGANISATION_DOC.md │ ├── CAPTURE_SOUNDING_BOARD_CONVERSATION.md │ ├── CRITIQUE_OF_PLANNING_DOC.md │ ├── GENERATE_MERMAID_DIAGRAM.md │ ├── FIX_HOUSEKEEPING_BUILD_TYPECHECK_LINT.md │ ├── UPDATE_CLAUDE_INSTRUCTIONS.md │ ├── draft │ │ └── NONINTERACTIVE.md │ └── WRITE_EVERGREEN_DOC.md ├── reference │ ├── TESTING_PYTHON.md │ ├── CODING_SHELL_SCRIPTS.md │ ├── DOCUMENTATION_ORGANISATION.md │ └── SD_STRING_DISPLACEMENT_FIND_REPLACE.md ├── README.md └── WORKFLOW.md ├── .cursor └── rules │ └── README.md ├── tsconfig.json ├── LICENSE ├── package.json ├── .gitignore ├── README.md ├── CLAUDE.md ├── planning ├── 250215_publishing_to_pypi.md └── 250215_rename_gdutils_to_gjdutils.md └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gjdutils/cli/pypi/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import app 2 | 3 | __all__ = ["app"] 4 | -------------------------------------------------------------------------------- /src/gjdutils/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from gjdutils.cli.main import app 2 | 3 | __all__ = ["app"] 4 | -------------------------------------------------------------------------------- /src/gjdutils/__version__.py: -------------------------------------------------------------------------------- 1 | """Single source of truth for package version.""" 2 | 3 | __version__ = "0.6.1" 4 | -------------------------------------------------------------------------------- /tests/test_strings.py: -------------------------------------------------------------------------------- 1 | # test_strings.py 2 | 3 | # test that jinja_render raises an error if missing variables 4 | -------------------------------------------------------------------------------- /src/gjdutils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | GJDutils - A collection of useful utility functions 3 | """ 4 | 5 | from gjdutils.__version__ import __version__ 6 | -------------------------------------------------------------------------------- /src/gjdutils/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Scripts package for gjdutils.""" 2 | 3 | from .install import install_export_envs 4 | 5 | __all__ = ["install_export_envs"] 6 | -------------------------------------------------------------------------------- /src/gjdutils/cli/install_all_dev_dependencies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from gjdutils.cmd import run_cmd 4 | 5 | if __name__ == "__main__": 6 | run_cmd("pip install -e '.[all_no_dev,dev]'", verbose=4) 7 | -------------------------------------------------------------------------------- /docs/instructions/EDIT_LIGHTLY_CONVERSATION_TRANSCRIPT.md: -------------------------------------------------------------------------------- 1 | This is a verbatim transcript of a conversation. Tighten it up slightly, keeping all of the detail and nuance, and preserving as much verbatim phrasing as possible, but a bit more compact. 2 | -------------------------------------------------------------------------------- /docs/instructions/DEBRIEF_UPDATE_COMMIT.md: -------------------------------------------------------------------------------- 1 | # Debrief and Commit 2 | 3 | Run @docs/instructions/DEBRIEF_PROGRESS.md . 4 | 5 | Update the planning doc with your progress. 6 | 7 | Then commit these changes, as per docs/instructions/GIT_COMMIT_CHANGES.md . -------------------------------------------------------------------------------- /docs/instructions/SURGEON_MODE.md: -------------------------------------------------------------------------------- 1 | # Surgeon mode 2 | 3 | Make minimal changes, focused on the task at hand. 4 | 5 | Do no harm if in doubt ask. If there are decisions to be made talk through the various options and the trade-offs so that we can decide together before making changes -------------------------------------------------------------------------------- /src/gjdutils/errors.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import sys 3 | import traceback 4 | 5 | # see also functions.func_name() 6 | 7 | 8 | def str_from_exception(name=None): 9 | return { 10 | "name": name, 11 | "msg": "".join(traceback.format_exception(*sys.exc_info())), 12 | } 13 | -------------------------------------------------------------------------------- /src/gjdutils/sets.py: -------------------------------------------------------------------------------- 1 | def assert_sets_identical(set1: set, set2: set): 2 | # assert that the sets are the same, and print the difference if not 3 | if set1 != set2: 4 | assert not set1 - set2, f"Set 1 has extra elements: {set1 - set2}" 5 | assert not set2 - set1, f"Set 2 has extra elements: {set2 - set1}" 6 | -------------------------------------------------------------------------------- /src/gjdutils/lists.py: -------------------------------------------------------------------------------- 1 | def get_list_from_str_or_list(str_or_list: str | list[str]) -> list[str]: 2 | """ 3 | e.g. 4 | - get_list_from_str_or_list("asdf") -> ["asdf"] 5 | - get_list_from_str_or_list(["asdf"]) -> ["asdf"] 6 | """ 7 | if isinstance(str_or_list, str): 8 | aliases = [str_or_list] 9 | elif isinstance(str_or_list, list): 10 | aliases = str_or_list 11 | else: 12 | raise Exception(f"Unknown typ: {type(str_or_list)}") 13 | return aliases 14 | -------------------------------------------------------------------------------- /docs/instructions/RESOLVE_MERGE_CONFLICTS.md: -------------------------------------------------------------------------------- 1 | # Resolve Merge Conflicts 2 | 3 | Review: 4 | - Git status 5 | - the recent/relevant Git history 6 | - planning docs for any relevant pieces of work 7 | - the details of the merge conflict itself 8 | - relevant code & docs, starting with relevant planning documents 9 | 10 | Look for a way to preserve the best of both worlds. 11 | 12 | Do you feel confident about how to resolve the merge conflict? Ask if you have questions. 13 | 14 | Ultrathink. 15 | 16 | Make a proposal. Don't make changes yet. 17 | -------------------------------------------------------------------------------- /.cursor/rules/README.md: -------------------------------------------------------------------------------- 1 | I've found the Cursor rules to be a bit unreliable, so I've moved all of them to straight Markdown .md docs in `docs/`. 2 | 3 | My current approach is to create lots of small rules and explicitly reference one or more of them explicitly, but I'm sure this will evolve over time. 4 | 5 | e.g. 6 | 7 | - Debug problem X, following @scientistic_detective_mode.mdc 8 | 9 | - Do X, following `coding_principles.md`. Be in `sounding_board_mode.md`. See also `testing_python.md` 10 | 11 | - Write a planning doc for X, following `WRITE_PLANNING_DOC.md`. -------------------------------------------------------------------------------- /src/gjdutils/cli/pypi/app.py: -------------------------------------------------------------------------------- 1 | """PyPI-related CLI commands.""" 2 | 3 | import typer 4 | from rich.console import Console 5 | 6 | from .check import app as check_app 7 | from .deploy import app as deploy_app 8 | 9 | app = typer.Typer( 10 | help="PyPI package management commands", 11 | add_completion=True, 12 | no_args_is_help=True, 13 | context_settings={"help_option_names": ["-h", "--help"]}, 14 | ) 15 | 16 | # Add subcommand groups to main app 17 | app.add_typer(check_app, name="check") 18 | app.add_typer(deploy_app, name="deploy") 19 | 20 | console = Console() 21 | -------------------------------------------------------------------------------- /docs/instructions/SOUNDING_BOARD_MODE.md: -------------------------------------------------------------------------------- 1 | Investigate the codebase, search the web if you need to, ask questions to clarify requirements if you need to, raise concerns if you have them, consider the desiderata/criteria for success, suggest alternatives, weigh up options and trade-offs, point out if you think the user is wrong or see a better way. 2 | 3 | Do everything you can to help the user to think this through, and make the best decision. 4 | 5 | When asking questions, ask at most a couple at a time, to avoid overwhelming cognitive overload for the user. 6 | 7 | Ultrathink. 8 | 9 | Don't make changes yet. -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | from gjdutils.cli.main import app 3 | from gjdutils.__version__ import __version__ 4 | 5 | runner = CliRunner() 6 | 7 | 8 | def test_version(): 9 | result = runner.invoke(app, ["version"]) 10 | assert result.exit_code == 0 11 | assert result.stdout.strip() == __version__ 12 | 13 | 14 | def test_help(): 15 | result = runner.invoke(app, ["--help"]) 16 | assert result.exit_code == 0 17 | assert "GJDutils CLI" in result.stdout 18 | assert "version" in result.stdout 19 | assert "git-clean" in result.stdout 20 | -------------------------------------------------------------------------------- /docs/instructions/HOUSEKEEPING_OLD_PLANNING_DOC.md: -------------------------------------------------------------------------------- 1 | Read the planning doc, and relevant code & docs. 2 | 3 | This is an old planning doc. We might have basically finished it. Is there anything important remaining? 4 | 5 | - If so, stop and let's discuss. 6 | 7 | - If not: 8 | - Update documentation if needed 9 | - Follow instructions in docs/instructions/RENAME_OR_MOVE.md to move the planning doc to planning/finished/ and update references to the new path with a subagent (if available) 10 | - Commit this set of changes in a single commit (otherwise following docs/instructions/GIT_COMMIT_CHANGES.md ). -------------------------------------------------------------------------------- /docs/instructions/GIT_CREATE_BRANCH.md: -------------------------------------------------------------------------------- 1 | If the user hasn't provided info about what the branch will be for, stop and ask them. 2 | 3 | Decide on a short phrase, based on the task defined by the user, as the branch name, e.g. `refactor_blah_for_foo` 4 | 5 | Run this in a subagent (if available): 6 | - Check that we're on the main branch (typically `main` or `master`) or another appropriate base branch - if not, double-check with the user before continuing. 7 | - Generate date prefix using `npx tsx src/ts/cli/sequential-datetime-prefix.ts .` and prepend to the short-phrase branch-name 8 | - Then create that as a new branch -------------------------------------------------------------------------------- /docs/instructions/DETECTIVE_SCIENTIST_MODE.md: -------------------------------------------------------------------------------- 1 | # Scientist-detective mode 2 | 3 | Your job is to help understand what's going on, and why things are happening the way they are, and perhaps inform a solution. 4 | 5 | Be a combination of detective (sniffing around for clues) and scientist (systematically considering and testing possible explanations). 6 | 7 | Gather information, generate hypotheses, look for ways to test/confirm/disconfirm those hypotheses (starting with the most likely and easiest to test). 8 | 9 | Ask questions of the user if that will clarify (about the problem or the goals) or help you to gather more information. 10 | 11 | Don't make changes unless instructed, or do anything risky/destructive. -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "commonjs", 5 | "lib": ["ES2022"], 6 | "outDir": "./dist/ts", 7 | "rootDir": "./src/ts", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "resolveJsonModule": true, 13 | "declaration": true, 14 | "declarationMap": true, 15 | "sourceMap": true, 16 | "removeComments": false, 17 | "noUnusedLocals": true, 18 | "noUnusedParameters": true, 19 | "noImplicitReturns": true, 20 | "noFallthroughCasesInSwitch": true 21 | }, 22 | "include": ["src/ts/**/*"], 23 | "exclude": ["node_modules", "dist"] 24 | } -------------------------------------------------------------------------------- /src/gjdutils/misc.py: -------------------------------------------------------------------------------- 1 | from .dicts import print_dict 2 | from .typ import isfunction 3 | 4 | 5 | def print_locals( 6 | d: dict, ignore_functions: bool = True, ignore_underscores: bool = True 7 | ): 8 | """ 9 | e.g. print_locals(locals()) 10 | """ 11 | 12 | def del_robust(k): 13 | if k in d: 14 | del d[k] 15 | 16 | assert isinstance(d, dict) 17 | for k in d.keys(): 18 | if ignore_functions and isfunction(d[k]): 19 | del_robust(k) 20 | if ignore_underscores and k.startswith("_"): 21 | del_robust(k) 22 | return print_dict(d) 23 | 24 | 25 | def identity_func(x): 26 | return x 27 | 28 | 29 | def empty_func(*args, **kwargs): 30 | return None 31 | -------------------------------------------------------------------------------- /src/gjdutils/prompt_templates.py: -------------------------------------------------------------------------------- 1 | # each of these is a Jinja template. see gjdutils.strings.jinja_render() 2 | 3 | summarise_text = """ 4 | Summarise the following. Be as concise, concrete, and easy to understand as you can. Provide only the summary itself, without any superfluous conversation, commentary, markup, etc. {{ granularity }} 5 | 6 | ---- 7 | {{ txt }} 8 | """ 9 | 10 | 11 | # UNTESTED 12 | summarise_list_of_texts_as_one = """ 13 | Summarise the whole of the following list. Be as concise, concrete, and easy to understand as you can. Provide only the summary itself, without any superfluous conversation or commentary etc. {{ granularity }} 14 | 15 | ---- 16 | {% for txt in txts %} 17 | - {{txt}} 18 | {% endfor %} 19 | ---- 20 | """ 21 | -------------------------------------------------------------------------------- /docs/instructions/DO_EXECUTE_PLANNING_DOC.md: -------------------------------------------------------------------------------- 1 | Now work on the next stage of the planning doc (prioritising work suggested by the user). 2 | 3 | If the planning doc is currently stored in `planning/later/` or `planning/discarded/`, first move it to `planning/` (to signal that it's in progress). 4 | 5 | Start by reading relevant code & docs. 6 | 7 | Ask if you need to clarify anything, or if you have concerns. 8 | 9 | Use tasks and subagents (provided with lots of context), e.g. for curl/Puppeteer/Playwright MCP (preferring Puppeteer), running tests, and any other encapsulated tasks. Follow instructions in TASKS_SUBAGENTS.md 10 | 11 | Always stop to review with the user at the end of a stage. If a stage is massive, then stop partway through. Output as per DEBRIEF_PROGRESS.md -------------------------------------------------------------------------------- /src/gjdutils/runtime.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # keep this, because it makes sense for the user to be able to import this from here 4 | from gjdutils.pytest_utils import in_pytest 5 | 6 | 7 | def in_notebook() -> bool: 8 | # from https://stackoverflow.com/q/15411967 9 | try: 10 | shell = get_ipython().__class__.__name__ # type: ignore 11 | if shell == "ZMQInteractiveShell": 12 | return True # Jupyter notebook or qtconsole 13 | elif shell == "TerminalInteractiveShell": 14 | return False # Terminal running IPython 15 | else: 16 | return False # Other type (?) 17 | except NameError: 18 | return False # Probably standard Python interpreter 19 | 20 | 21 | def in_colab(): 22 | return "google.colab" in sys.modules 23 | -------------------------------------------------------------------------------- /tests/test_num.py: -------------------------------------------------------------------------------- 1 | from pytest import approx, raises 2 | 3 | from gjdutils.num import discretise 4 | 5 | 6 | def test_discretise(): 7 | assert discretise(0.00, increment=0.05) == approx(0.00) 8 | assert discretise(0.01, increment=0.05) == approx(0.00) 9 | assert discretise(0.06, increment=0.05) == approx(0.05) 10 | assert discretise(0.99, increment=0.05) == approx(0.95) 11 | assert discretise(1.00, increment=0.05) == approx(1.00) 12 | # check values outside the range 13 | with raises(Exception): 14 | discretise(5.00, increment=0.05) 15 | with raises(Exception): 16 | discretise(-1.00, increment=0.05) 17 | assert discretise(-1.00, increment=0.05, enforce_range=False) == approx(0.00) 18 | assert discretise(5.00, increment=0.05, enforce_range=False) == approx(1.00) 19 | -------------------------------------------------------------------------------- /src/gjdutils/todo/convert_parquet.py: -------------------------------------------------------------------------------- 1 | # USAGE: 2 | # python convert_parquet.py my_parquet_file.parquet 3 | # 4 | # based on https://chat.openai.com/c/ea3e9401-e7bb-4288-b270-83b0fb327abe 5 | # 6 | # pip install pandas openpyxl pyarrow 7 | 8 | import sys 9 | import pandas as pd 10 | 11 | # Replace 'your_file.parquet' with the path to your Parquet file 12 | # parquet_file = 'vary_amount_of_training_data__adult_sexual__aps.parquet' 13 | parquet_file = sys.argv[1] 14 | 15 | assert parquet_file.endswith(".parquet") 16 | 17 | # Read the Parquet file 18 | df = pd.read_parquet(parquet_file) 19 | 20 | # Replace 'output_file.xlsx' with the desired output file name 21 | output_file = parquet_file.replace('.parquet', '.xlsx') 22 | 23 | # Write to an Excel file 24 | df.to_excel(output_file, index=False) 25 | 26 | print(f"Wrote to {output_file}") 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/gjdutils/print_utils.py: -------------------------------------------------------------------------------- 1 | from gjdutils.functions import variable_from_caller 2 | 3 | 4 | def vprint(min_verbosity: int, msg: str, /, **kwargs): 5 | """Print if the caller's verbosity level is >= min_verbosity, e.g. 6 | 7 | verbose = 2 8 | vprint(1, "Hello, world!") # prints because verbose >= 1 9 | 10 | This function looks for a `verbose` variable in the caller's scope. 11 | 12 | Args: 13 | min_verbosity: Minimum verbosity level required to print (positional-only) 14 | **kwargs: Arguments to pass to print() 15 | 16 | Raises: 17 | ValueError: If 'verbose' variable is not found in caller's scope 18 | """ 19 | # get the `verbose` variable from the caller context 20 | verbose = variable_from_caller("verbose") 21 | 22 | if verbose >= min_verbosity: 23 | print(msg, **kwargs) 24 | -------------------------------------------------------------------------------- /tests/test_rand.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | from gjdutils.rand import assert_valid_readable_rand_id 3 | 4 | 5 | def test_check_valid_readable_rand_id(): 6 | # Test case: Valid ID with default parameters 7 | id_ = "abc234" 8 | assert_valid_readable_rand_id(id_) 9 | 10 | # Test case: Valid ID with specified number of characters 11 | assert_valid_readable_rand_id(id_, nchars=6) 12 | 13 | # Test case: Invalid ID with specified number of characters 14 | with raises(AssertionError): 15 | assert_valid_readable_rand_id(id_, nchars=8) 16 | 17 | assert_valid_readable_rand_id(id_, valid_chars="cba4321") 18 | 19 | # Test case: Invalid ID with default valid characters (doesn't allow '1') 20 | with raises(AssertionError): 21 | assert_valid_readable_rand_id(id_="abc123") 22 | # Test case: Invalid ID with specified valid characters 23 | with raises(AssertionError): 24 | assert_valid_readable_rand_id(id_, valid_chars="xyz123") 25 | -------------------------------------------------------------------------------- /src/gjdutils/scripts/export_envs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # https://stackoverflow.com/a/20909045/230523 4 | # e.g. source scripts/export_envs.sh .env && echo $TESTME 5 | 6 | # this can't be a Python script, because the whole point is to 7 | # add environment variables to the current shell, and Unix won't 8 | # let a script manipulate its parent environment. 9 | # 10 | # see pyproject.toml for how `install.py` automatically gets called 11 | # by `pip install` 12 | 13 | # Check if the script is being sourced 14 | if [ "$0" = "$BASH_SOURCE" ]; then 15 | echo "Error: This script needs to be sourced. Please run: source $0 " 16 | exit 1 17 | fi 18 | 19 | if [ $# -eq 0 ]; then 20 | echo "Error: Environment file path is required" 21 | echo "Usage: source $0 " 22 | return 1 23 | fi 24 | 25 | ENV_FILE="$1" 26 | 27 | if [ ! -f "$ENV_FILE" ]; then 28 | echo "Error: File '$ENV_FILE' does not exist" 29 | return 1 30 | fi 31 | 32 | export $(grep -v '^#' "$ENV_FILE" | xargs) -------------------------------------------------------------------------------- /src/gjdutils/colab.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .runtime import in_colab 4 | 5 | # https://stackoverflow.com/a/53586419/230523 6 | IN_COLAB = in_colab() 7 | # also specified in authortools_demo.ipynb 8 | GOOGLE_DRIVE_MOUNT_PATH = "/content/drive" 9 | GOOGLE_DRIVE_OUTPUT_PATH = os.path.join( 10 | GOOGLE_DRIVE_MOUNT_PATH, 11 | "Shareddrives", 12 | "Blah", # TODO 13 | ) 14 | 15 | 16 | def colab_path_if_needed(filen: str): 17 | """ 18 | Prepend the Google Drive mount path for Colab if IN_COLAB is True. 19 | """ 20 | if IN_COLAB: 21 | filen = os.path.join(GOOGLE_DRIVE_OUTPUT_PATH, filen) 22 | return filen 23 | 24 | 25 | def set_css_for_colab(): 26 | from IPython.display import HTML, display 27 | 28 | # from https://stackoverflow.com/a/61401455/230523 29 | display( 30 | HTML( 31 | """ 32 | 37 | """ 38 | ) 39 | ) 40 | -------------------------------------------------------------------------------- /src/gjdutils/cli/main.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | from gjdutils.shell import fatal_error_msg 4 | from .pypi import app as pypi_app 5 | from .check_git_clean import check_git_clean 6 | 7 | app = typer.Typer( 8 | help="GJDutils CLI - utility functions for data science, AI, and web development", 9 | add_completion=True, 10 | no_args_is_help=True, 11 | context_settings={"help_option_names": ["-h", "--help"]}, 12 | ) 13 | 14 | # Add PyPI commands 15 | app.add_typer(pypi_app, name="pypi") 16 | 17 | 18 | @app.command() 19 | def version(): 20 | """Show gjdutils version""" 21 | from gjdutils.__version__ import __version__ 22 | 23 | typer.echo(f"{__version__}") 24 | 25 | 26 | @app.command() 27 | def git_clean(): 28 | """Check if git working directory is clean""" 29 | check_git_clean() 30 | 31 | 32 | @app.command() 33 | def export_envs(): 34 | fatal_error_msg( 35 | "Exporting envs can't be run from Python - you have to run `source gjd-export-envs [ENV_FILE]`" 36 | ) 37 | 38 | 39 | if __name__ == "__main__": 40 | app() 41 | -------------------------------------------------------------------------------- /tests/test_embeddings_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | from gjdutils.embeddings_openai import get_openai_embeddings, compare_embedding_query 5 | 6 | 7 | @pytest.mark.skipif( 8 | not os.environ.get("OPENAI_API_KEY"), 9 | reason="Requires OPENAI_API_KEY to call OpenAI embeddings API", 10 | ) 11 | def test_cosine_similarity_three_sentences(): 12 | texts = [ 13 | "The quick brown fox jumps over the lazy dog.", 14 | "A quick brown fox leaps over a lazy dog.", # semantically similar 15 | "The stock market closed higher today after strong earnings.", 16 | ] 17 | 18 | embs, extra = get_openai_embeddings(texts, model="text-embedding-3-small") 19 | 20 | # Compare sentence 0 vs others (list inputs) 21 | scores, _ = compare_embedding_query(embs[0], embs, metric="cosine") 22 | # scores[0] is self-similarity (~1.0). Interested in indices 1 and 2. 23 | assert scores[1] > scores[2], ( 24 | f"Expected similar sentence to have higher cosine similarity: {scores[1]=} vs {scores[2]=}" 25 | ) 26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/instructions/THIRD_PARTY_LIBRARY_SELECTION.md: -------------------------------------------------------------------------------- 1 | # Third-Party Library Selection 2 | 3 | ## Selection Criteria 4 | 5 | - **IMPORTANT** Long-lasting community, lots of docs/discussion/examples (so there will be lots of pretraining data to help LLM coding models). 6 | - Well-designed API: Intuitive, composable, type-safe (for TypeScript). 7 | - Plus any other criteria from the user. 8 | 9 | 10 | ## Process 11 | - Understand requirements first. see `SOUNDING_BOARD_MODE.md`. Ask questions if you need to clarify. 12 | - Read relevant code/docs to understand the relevant technology stack & architecture for this project. 13 | - Run `date` to get today's date, so you can judge recency. 14 | - Search the web. Evaluate options, tradeoffs. 15 | - Usually prefer the most recent stable version, but also consider whether an older, more popular version with more pretraining data would be a better fit. Discuss with the user if the answer isn't obvious. 16 | - Make a recommendation. 17 | - Discuss with user. 18 | - Write a doc describing the chosen library, as per `WRITE_DEEP_DIVE_DOC.md`. 19 | 20 | -------------------------------------------------------------------------------- /docs/reference/TESTING_PYTHON.md: -------------------------------------------------------------------------------- 1 | # Automated testing 2 | 3 | Always run tests with `pytest` (rather than `python -m blah`). Most of the time it's better to just run relevant tests, and only occasionally run all of them (e.g. after making major/wide-ranging changes). 4 | 5 | If you've run the tests recently, use the `-x` and `--lf` flags, so that we zero in on the failing tests and avoid too much output. 6 | 7 | Test functions should always be called `test_*.py`. Avoid creating test-related utility or fixture functions called `test_*.py` to avoid confusion with actual tests. 8 | 9 | Write a test before writing new code. Run relevant tests after changing code. 10 | 11 | Keep tests simple and readable. Start with testing the simple cases. Make a proposal and ask the user whether edge cases are important. 12 | 13 | Aim to reuse fixtures and sample data. 14 | 15 | When changing tests, make minimal changes that are directly relevant to the task at hand. 16 | 17 | If there is a `docs/TESTING.md`, or `docs/FRONTEND_TESTING.md`, `docs/BACKEND_TESTING.md`, treat them as more important than these instructions. -------------------------------------------------------------------------------- /src/gjdutils/decorators.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from rich.console import Console 3 | from typing import Callable, TypeVar, Any, cast 4 | 5 | console = Console() 6 | 7 | F = TypeVar("F", bound=Callable[..., Any]) 8 | 9 | 10 | def console_print_doc(color: str = "blue") -> Callable[[F], F]: 11 | """ 12 | A decorator that prints the docstring of a function when it starts running. 13 | The entire docstring will be printed in the specified color. 14 | 15 | Args: 16 | color (str): Color for the docstring text. Defaults to "blue". 17 | 18 | Example: 19 | @console_print_doc(color="green") 20 | def my_function(): 21 | "This entire docstring will be green" 22 | pass 23 | """ 24 | 25 | def decorator(func: F) -> F: 26 | @wraps(func) 27 | def wrapper(*args: Any, **kwargs: Any) -> Any: 28 | if func.__doc__: 29 | console.print(func.__doc__.strip(), style=color) 30 | return func(*args, **kwargs) 31 | 32 | return cast(F, wrapper) 33 | 34 | return decorator 35 | -------------------------------------------------------------------------------- /src/gjdutils/typ.py: -------------------------------------------------------------------------------- 1 | # keep this, because it makes sense for the user to be able to import from here 2 | from inspect import isfunction 3 | 4 | # these are occasionally useful for if statements, 5 | # though probably better to rely on type-hinting wherever possible 6 | 7 | 8 | def isint(f, tol=0.00000001): 9 | """ 10 | Takes in a float F, and checks that it's within TOL of floor(f). 11 | """ 12 | # we're casting to float before the comparison with TOL 13 | # so that decimal Fs work 14 | return abs(float(f) - int(f)) <= 0.00000001 15 | 16 | 17 | def isnum(n): 18 | try: 19 | float(n) 20 | return True 21 | except: 22 | return False 23 | 24 | 25 | def is_same_sign(x1, x2): 26 | if x1 > 0 and x2 > 0: 27 | return True 28 | if x1 < 0 and x2 < 0: 29 | return True 30 | 31 | 32 | def isiterable(x): 33 | """ 34 | from http://stackoverflow.com/questions/1952464/in-python-how-do-i-determine-if-a-variable-is-iterable 35 | """ 36 | import collections.abc 37 | 38 | return isinstance(x, collections.abc.Iterable) 39 | -------------------------------------------------------------------------------- /docs/instructions/dev/PULL_PUSH_LOCAL_GJDUTILS_REPOS.md: -------------------------------------------------------------------------------- 1 | # Pull/Push local `gjdutils` repos — concise prompt 2 | 3 | Paste this prompt to rerun the workflow: 4 | 5 | ``` 6 | Find all directories named "gjdutils" under $HOME/Dropbox/dev and $HOME/dev (exclude paths containing , `.venv`, `site-packages`). 7 | 8 | For each found directory that is a git repo: 9 | 10 | - First, detect uncommitted changes. Print a flat list "REPOS WITH UNCOMMITTED CHANGES" (paths with $HOME abbreviated as ~), then pause for my decision. If I reply "skip" (default), skip these repos for pull/push. 11 | 12 | - Switch ALL found repos' GitHub remotes (any remote) from HTTPS to SSH, format: git@github.com:OWNER/NAME.git. Do this even for skipped/dirty repos. 13 | 14 | - For the remaining clean (non-skipped) repos: run git pull --ff-only --no-rebase, then git push. Do not attempt interactive auth; rely on SSH keys. 15 | 16 | At the end, print a concise summary with three sections: 17 | 1) REPOS WITH UNCOMMITTED CHANGES 18 | 2) PULL/PUSH RESULTS (per repo) 19 | 3) SKIPPED REPOS 20 | 4) ANY OTHER NOTES, QUESTIONS OR PROBLEMS 21 | ``` 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/gjdutils/hashing.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import hashlib 3 | 4 | 5 | def hash_readable(s, n=10): 6 | """ 7 | Returns a string hash that contains base32 characters instead of a number, 8 | to make it more readable (and still low risk of collisions if you truncate it). 9 | 10 | e.g. hash_readable('hello') => 'vl2mmho4yx' 11 | 12 | Unlike Python's default hash function, this should be deterministic 13 | across sessions (because we're using 'hashlib'). 14 | 15 | I'm using this for anonymising email addresses if I don't have a user UUID. 16 | """ 17 | if isinstance(s, str): 18 | s = bytes(s, "utf-8") 19 | hashed = hashlib.sha1(s).digest() 20 | b32 = base64.b32encode(hashed)[:n] 21 | return b32.decode("utf-8").lower() 22 | 23 | 24 | def hash_consistent(obj): 25 | """ 26 | Supposedly gives the same response every time you call it, even after restarting the kernel. 27 | 28 | N.B. This is based on output from GitHub Copilot, and I haven't tried it. 29 | """ 30 | obj_str = str(obj) 31 | hash_obj = hashlib.sha256(obj_str.encode()).hexdigest() 32 | return hash_obj 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Greg Detre 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/gjdutils/voice_speechrecognition.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py 2 | 3 | #!/usr/bin/env python3 4 | 5 | # NOTE: this example requires PyAudio because it uses the Microphone class 6 | 7 | from typing import Optional 8 | import speech_recognition as sr 9 | from gjdutils.env import get_env_var 10 | 11 | 12 | def recognise_speech(display: Optional[str], verbose: int = 0): 13 | """ 14 | Recognises speech from the microphone and returns the transcribed text. 15 | 16 | Press ENTER when you've finished recording. 17 | 18 | Designed for command-line use. 19 | """ 20 | openai_api_key = get_env_var("OPENAI_API_KEY") 21 | if display: 22 | print(display, end="", flush=True) 23 | # obtain audio from the microphone 24 | r = sr.Recognizer() 25 | with sr.Microphone() as source: 26 | audio = r.listen(source) 27 | print("... PROCESSING") 28 | text = r.recognize_whisper(audio, api_key=openai_api_key) 29 | if verbose > 0: 30 | print(text) 31 | return text 32 | 33 | 34 | if __name__ == "__main__": 35 | recognise_speech("Say something!", verbose=1) 36 | -------------------------------------------------------------------------------- /tests/test_print_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from gjdutils.print_utils import vprint 3 | 4 | 5 | def test_vprint_basic(): 6 | verbose = 2 7 | # Should print 8 | vprint(1, "test1", end="") # Empty print 9 | vprint(1, "test2", sep=" ", end="\n") # Print with kwargs 10 | vprint(2, "test3", end="") # Exact match 11 | 12 | # Should not print 13 | vprint(3, "test4", end="") 14 | 15 | 16 | def test_vprint_missing_verbose(): 17 | # Should raise ValueError when verbose is not defined 18 | with pytest.raises(ValueError) as exc_info: 19 | vprint(0, "test", end="") 20 | assert "verbose" in str(exc_info.value) 21 | assert "not found in caller function" in str(exc_info.value) 22 | 23 | 24 | def test_vprint_args_validation(): 25 | verbose = 1 26 | # Should raise TypeError when passing extra positional args 27 | with pytest.raises(TypeError) as exc_info: 28 | vprint(1, "msg", "extra arg") # type: ignore 29 | assert "positional argument" in str(exc_info.value) 30 | 31 | # Should raise TypeError when required positional args are missing 32 | with pytest.raises(TypeError) as exc_info: 33 | vprint(msg="A message", min_verbosity=1) # type: ignore 34 | assert "missing 2 required positional arguments" in str(exc_info.value) 35 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gjdutils-ts", 3 | "version": "1.0.0", 4 | "description": "TypeScript utilities for gjdutils - general-purpose scripts and CLI tools", 5 | "main": "dist/ts/index.js", 6 | "types": "dist/ts/index.d.ts", 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc --watch", 10 | "clean": "rm -rf dist/ts" 11 | }, 12 | "keywords": [ 13 | "utilities", 14 | "cli", 15 | "typescript", 16 | "development-tools", 17 | "git-worktrees", 18 | "llm-tools" 19 | ], 20 | "author": "Greg Detre", 21 | "license": "MIT", 22 | "dependencies": { 23 | "clipanion": "^3.2.1" 24 | }, 25 | "devDependencies": { 26 | "@types/node": "^20.0.0", 27 | "tsx": "^4.0.0", 28 | "typescript": "^5.0.0" 29 | }, 30 | "bin": { 31 | "sequential-datetime-prefix": "./dist/ts/cli/sequential-datetime-prefix.js", 32 | "extract-llm-conversation": "./dist/ts/cli/extract-llm-conversation.js", 33 | "llm-critique-planning-docs": "./dist/ts/critique/llm-critique-planning-docs.js", 34 | "parse-llm-output": "./dist/ts/critique/parse-llm-output.js", 35 | "count-lines": "./dist/ts/scripts/count-lines.js", 36 | "git-worktree-sync": "./dist/ts/scripts/git-worktree-sync.js", 37 | "git-worktree-sync-all": "./dist/ts/scripts/git-worktree-sync-all.js" 38 | } 39 | } -------------------------------------------------------------------------------- /docs/instructions/AUDIT_ARCHITECTURE_MODE.md: -------------------------------------------------------------------------------- 1 | Perform a technical audit/review. 2 | 3 | - If a feature/area/question/file has been mentioned, use that to guide your investigation. 4 | - If a planning doc has been mentioned, check whether recent changes (e.g. recent related Git commits, and also uncommitted changes) implement the planning doc correctly. 5 | - Look for bugs, gotchas, potential problems 6 | - Is there anything you would refactor (e.g. too-large files/functions, near-duplicated code that could be reused), or architectural best practices we should use? 7 | - Anything else you notice that could be improved? 8 | - Zoom out to consider whether the overall strategy/approach is sound. 9 | 10 | For background, read relevant docs if they exist (they may have different names), e.g.: 11 | - `README.md` 12 | - `PRODUCT_VISION_FEATURES.md` 13 | - `CODING_PRINCIPLES_GUIDELINES.md` 14 | - `docs/instructions/WRITE_EVERGREEN_DOC.md` 15 | - `docs/instructions/WRITE_PLANNING_DOC.md` 16 | - and any other relevant docs 17 | 18 | Don't make changes. Just investigate, discuss. 19 | 20 | Output: 21 | - Prioritise recommendations based on a combination of ease and value. 22 | - Indicate how important each finding is and why, whether there's an obvious fix or multiple options, and how complex/risky you expect it to be. 23 | 24 | Ultrathink. 25 | -------------------------------------------------------------------------------- /docs/instructions/TASKS_SUBAGENTS.md: -------------------------------------------------------------------------------- 1 | # Tasks and Subagents 2 | 3 | Use subagents where appropriate: 4 | - They are especially valuable as a way to avoid filling up your context window, e.g. for running a battery of tests, Playwright/curl/browser automation, MCPs with verbose output 5 | - They are also a good fit for encapsulated & well-defined tasks, i.e. tasks that don't need the full context of the conversation so far, and/or where we only need a summary of what was done in order to proceed 6 | - Use subagents in parallel where possible (because this is faster), but only if there isn't a dependency between tasks (e.g. the output of this one is useful as input for the next) 7 | - Give them lots of background/careful instructions so that they can make good decisions, e.g. about goals, point them to relevant docs/code, what we've been changing, gotchas & things to avoid, relevant environment variables like $PORT for browser automation, using your test framework, the current date/time from `date`, and anything else that will help them to be effective but correct/careful. 8 | - Tell subagents what to be cautious of, and to abort and provide feedback on what happened if there are problems or surprises (to avoid them going rogue and doing more harm than good) 9 | 10 | Use the task/todo list when you have more than a couple of things to track, or where ordering matters, when there are subagents involved, or if you think it will help. 11 | -------------------------------------------------------------------------------- /docs/reference/CODING_SHELL_SCRIPTS.md: -------------------------------------------------------------------------------- 1 | # Shell Script Guidelines 2 | 3 | ## General Principles 4 | 5 | - Keep things simple and readable 6 | - Prefer Python scripts over shell for longer scripts 7 | - Keep scripts minimal, concise, and focused on a single task 8 | - Break long main functions into sub-functions to make it easy to follow the logic 9 | - Prefer to show the full tracebacks & error messages, to give the user full information. Minimise try/except. 10 | - Fail explicitly and loudly, e.g. , and use `set -e` in bash scripts to exit on error 11 | 12 | ## Coding details 13 | - Scripts live in `scripts/` 14 | - Make scripts executable with `chmod +x` 15 | - Use `#!/bin/bash` or `#!/usr/bin/env python3` shebang lines 16 | - Use python `Typer` if command-line arguments are needed 17 | - Use `cmd.py` functionality, e.g. `run_cmd()` 18 | - If there is overlapping functionality, maybe move it into `src/shell.py` or somewhere else reusable 19 | - Use colors for better readability (green for success, yellow for warnings, red for errors) 20 | - Show progress for long-running operations 21 | - If it will make it easier to see what the Python is doing in a script, include a comment showing the equivalent shell commands, e.g.: 22 | ```python 23 | # i.e. rm -rf dist/ 24 | shutil.rmtree("dist", ignore_errors=True) 25 | ``` 26 | 27 | 28 | ## Examples 29 | 30 | See `scripts/check_locally.py` for an example following most of these guidelines. 31 | -------------------------------------------------------------------------------- /src/gjdutils/audios.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | 5 | def play_mp3(mp3_filen: str, prog: str = "cli", speed: Optional[float] = None): 6 | prog = prog.lower().strip() 7 | full_mp3_filen = os.path.abspath(os.path.expanduser(mp3_filen)) 8 | if prog == "vlc": 9 | # pip install python-vlc 10 | import vlc 11 | 12 | vlc_mp3_filen = os.path.join("file://", full_mp3_filen) 13 | p = vlc.MediaPlayer(vlc_mp3_filen) 14 | if speed is not None: 15 | p.set_rate(speed) # type: ignore 16 | p.play() # type: ignore 17 | elif prog == "pygame": 18 | assert speed is None, "Not implemented speed for pygame" 19 | import pygame 20 | 21 | pygame.init() 22 | pygame.mixer.init() 23 | pygame.mixer.music.load(full_mp3_filen) 24 | pygame.mixer.music.play() 25 | pygame.event.wait() 26 | elif prog == "playsound": 27 | # maybe set to 1.2.2 if you're having trouble installing 28 | from playsound import playsound 29 | 30 | assert speed is None, "Playsound doesn't support changing speed" 31 | # https://stackoverflow.com/a/63147250/230523 32 | playsound(mp3_filen) 33 | elif prog == "cli": 34 | cmd = f"afplay -r {speed} '{full_mp3_filen}'" 35 | # print(cmd) 36 | os.system(cmd) 37 | else: 38 | raise Exception(f"Unknown PROG '{prog}'") 39 | return full_mp3_filen 40 | -------------------------------------------------------------------------------- /src/gjdutils/functions.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Any 3 | 4 | 5 | def func_name(): 6 | # https://stackoverflow.com/a/13514318/230523 7 | return inspect.currentframe().f_back.f_code.co_name # type: ignore 8 | 9 | 10 | def variable_from_caller(var_name: str, frame_depth: int = 1) -> Any: 11 | """Get a variable from the caller's frame. 12 | 13 | Args: 14 | var_name: Name of the variable to retrieve 15 | frame_depth: How many frames to go back (default: 1 for immediate caller) 16 | 17 | Raises: 18 | ValueError: If the variable doesn't exist in the caller's scope 19 | """ 20 | frame = inspect.currentframe() 21 | try: 22 | # Go back the specified number of frames 23 | for _ in range(frame_depth + 1): 24 | if frame.f_back is None: # type: ignore 25 | raise ValueError(f"Cannot go back {frame_depth} frames") 26 | frame = frame.f_back # type: ignore 27 | 28 | if var_name not in frame.f_locals: # type: ignore 29 | caller_name = frame.f_code.co_name # type: ignore 30 | raise ValueError( 31 | f"Variable '{var_name}' not found in caller function '{caller_name}'. " 32 | f"Make sure to define a '{var_name}' parameter or variable." 33 | ) 34 | 35 | return frame.f_locals[var_name] # type: ignore 36 | finally: 37 | # Clean up circular references 38 | del frame 39 | -------------------------------------------------------------------------------- /src/gjdutils/collection_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Sequence, TypeVar 2 | 3 | T = TypeVar("T") 4 | 5 | 6 | def found_one(lst: Sequence[T]) -> T | Literal[False]: 7 | """ 8 | e.g. 9 | >>> found_one([]) 10 | False 11 | >>> found_one([10]) 12 | 10 13 | >>> found_one([10, 20]) 14 | False 15 | """ 16 | if len(lst) == 0: 17 | return False 18 | elif len(lst) == 1: 19 | found = lst[0] 20 | assert found is not False, "Too confusing - we found something, but it's False" 21 | return found 22 | else: 23 | return False 24 | 25 | 26 | def find_duplicates(lst: Sequence[T]) -> list[T]: 27 | return [item for item in set(lst) if lst.count(item) > 1] 28 | 29 | 30 | # def uniquify(items: Sequence[T], key: Callable[[T], Any] | None = None) -> list[T]: 31 | # this would be useful if you wanted to uniquify something non-hashable, but I couldn't get it to work 32 | # https://www.perplexity.ai/search/in-python-unique-version-of-a-5r0iCRlBSjm2Dv6HGLu_6g 33 | # return list(OrderedDict.fromkeys(map(key, items) if key else items)) 34 | 35 | 36 | def uniquify(items: Sequence[T]) -> list[T]: 37 | # https://www.perplexity.ai/search/unique-version-of-a-list-prese-qYpae.JBRDedvHdmEyOqfA 38 | # seen = set() 39 | # return [x for x in lst if not (x in seen or seen.add(x))] 40 | 41 | # https://www.w3resource.com/python-exercises/list-advanced/python-list-advanced-exercise-8.php 42 | return list(dict.fromkeys(items)) 43 | -------------------------------------------------------------------------------- /docs/instructions/DEBRIEF_PROGRESS.md: -------------------------------------------------------------------------------- 1 | # Debrief Progress 2 | 3 | Update the relevant planning doc for this work if there is one. (If you don't know, there probably isn't, and you can safely ignore this suggestion.) 4 | 5 | Report out loud on how this work is going. Any issues/surprises/complexity? 6 | 7 | What's left to do? How complex will it be? What do you think about the cost/benefit ratio? 8 | 9 | ## Questions to Address 10 | 11 | ### Current Status 12 | - What has been completed successfully? 13 | - What challenges or surprises have emerged? 14 | - Are we on track with the original plan? 15 | 16 | ### Technical Assessment 17 | - How is the code quality holding up? 18 | - Are there any technical debt concerns? 19 | - What patterns or approaches are working well? 20 | 21 | ### Scope & Priority 22 | - What remains to be done? 23 | - How complex will the remaining work be? 24 | - Should priorities be adjusted based on what we've learned? 25 | 26 | ### Value Assessment 27 | - Is the cost/benefit ratio still justified? 28 | - Are there simpler approaches that would achieve similar value? 29 | - Should we continue, pivot, or pause this work? 30 | 31 | ## Documentation Updates 32 | 33 | If there's a planning document for this work: 34 | 1. Update the progress/status section 35 | 2. Note any scope changes or new understanding 36 | 3. Adjust timelines if needed 37 | 4. Document any decisions or pivots made 38 | 39 | If there's no planning document but this is significant work, consider creating one to track the remaining effort. -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # gjdutils Documentation 2 | 3 | This documentation collection provides templates and workflows for highly effective AI-assisted programming, developed June 2025. 4 | 5 | ## Quick Start 6 | 7 | - **[WORKFLOW.md](WORKFLOW.md)** - Complete workflow for AI-assisted development using Claude Code or Cursor 8 | 9 | ## Documentation Structure 10 | 11 | ### 📋 [instructions/](instructions/) 12 | Commands, modes, and processes for AI-assisted development: 13 | - **Modes**: `SOUNDING_BOARD_MODE.md`, `SCIENTIST_DETECTIVE_MODE.md`, `SURGEON_MODE.md` 14 | - **Planning**: `WRITE_PLANNING_DOC.md`, `DO_PLANNING_DOC.md`, `CRITIQUE_OF_PLANNING_DOC.md` 15 | - **Documentation**: `WRITE_EVERGREEN_DOC.md`, `WRITE_DEEP_DIVE_AS_DOC.md` 16 | - **Maintenance**: `UPDATE_HOUSEKEEPING_DOCUMENTATION.md`, `DEBRIEF_PROGRESS.md` 17 | - **Development**: `CODING_PRINCIPLES.md`, `GIT_COMMITS.md` 18 | 19 | ### 📚 [reference/](reference/) 20 | gjdutils-specific reference documentation: 21 | - **Development**: `TESTING_PYTHON.md`, `CODING_SHELL_SCRIPTS.md` 22 | - **Organization**: `DOCUMENTATION_ORGANISATION.md` 23 | 24 | ## About gjdutils 25 | 26 | This is part of the [gjdutils project](../README.md) - a collection of: 27 | 1. AI-assisted programming workflows (this documentation) 28 | 2. Python utilities and scripts ([src/gjdutils/](../src/gjdutils/)) 29 | 3. TypeScript CLI tools and development utilities ([src/ts/](../src/ts/)) 30 | 31 | These templates have been battle-tested in real development projects and are designed to work with both Claude Code and Cursor. -------------------------------------------------------------------------------- /docs/instructions/CODING_PRINCIPLES.md: -------------------------------------------------------------------------------- 1 | # Coding Principles 2 | 3 | ## Core Philosophy 4 | - Prioritise code that's simple, easy-to-understand, debuggable, and readable 5 | - Fix the root cause rather than putting on a band-aid 6 | - Avoid fallbacks & defaults - better to fail if input assumptions aren't being met 7 | 8 | ## Error Handling 9 | - Raise errors early, clearly & fatally 10 | - Prefer not to wrap in try/except so that tracebacks are obvious 11 | 12 | ## Development Approach 13 | - Don't try to write a full, final version immediately 14 | - Get a simple version working end-to-end first, then gradually layer in complexity in stages 15 | - Aim to keep changes minimal and focused on the task at hand 16 | - Try to keep things concise, don't over-engineer 17 | 18 | ## Best Practices 19 | - Follow software engineering best practices: 20 | - Reuse code where it makes sense 21 | - Pull out core reusable functionality into utility functions 22 | - Break long/complex functions down 23 | - Write code that's easy to test, prefer functional style 24 | - Avoid object-oriented unless it's a particularly good fit 25 | - Keep documentation up-to-date as you go 26 | 27 | ## Collaboration 28 | - If the user asks you a question, answer it directly, and stop work on other tasks until consensus has been reached 29 | - If you notice other things that should be changed/updated, ask/suggest 30 | - If things don't make sense or seem like a bad idea, ask questions or discuss rather than just going along with it 31 | - Be a good collaborator and help make good decisions, rather than just obeying blindly 32 | 33 | ## External Dependencies 34 | - When picking 3rd-party libraries, prefer ones with large communities 35 | 36 | ## Comments 37 | - Comment sparingly - reserve it for explaining surprising or confusing sections 38 | -------------------------------------------------------------------------------- /src/gjdutils/cli/check_git_clean.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from rich.console import Console 4 | from gjdutils.shell import fatal_error_msg 5 | from gjdutils.cmd import run_cmd 6 | 7 | console = Console() 8 | 9 | 10 | def check_git_clean(): 11 | """Check if git working directory is clean.""" 12 | # Check for unstaged changes 13 | retcode, stdout, _ = run_cmd("git diff --quiet", check=False) 14 | if retcode != 0: 15 | _, diff_output, _ = run_cmd("git --no-pager diff --stat") 16 | fatal_error_msg("Unstaged changes present:\n" + diff_output) 17 | 18 | # Check for staged but uncommitted changes 19 | retcode, stdout, _ = run_cmd("git diff --cached --quiet", check=False) 20 | if retcode != 0: 21 | _, diff_output, _ = run_cmd("git --no-pager diff --cached --stat") 22 | # fatal_error_msg("Uncommitted staged changes present:\n" + diff_output) 23 | if ( 24 | input( 25 | f"\n{diff_output}\n\nAre you sure you want to deploy with staged but uncommited files? (y/N): " 26 | ).lower() 27 | != "y" 28 | ): 29 | fatal_error_msg("Deployment cancelled by user because of untracked files") 30 | 31 | # Check for untracked files 32 | _, untracked, _ = run_cmd("git ls-files --others --exclude-standard") 33 | if untracked.strip(): 34 | if ( 35 | input( 36 | f"\nAre you sure you want to deploy with untracked files: {untracked}? (y/N): " 37 | ).lower() 38 | != "y" 39 | ): 40 | fatal_error_msg("Deployment cancelled by user because of untracked files") 41 | 42 | console.print("[green]Git: clean[/green]") 43 | 44 | 45 | def main(): 46 | console.rule("[yellow]Checking Git Status") 47 | check_git_clean() 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /src/gjdutils/shell.py: -------------------------------------------------------------------------------- 1 | """Shell and command-line utilities.""" 2 | 3 | from pathlib import Path 4 | import shutil 5 | import sys 6 | import venv 7 | from contextlib import contextmanager 8 | from typing import Optional, Union 9 | 10 | 11 | @contextmanager 12 | def temp_venv(path: Union[str, Path]): 13 | """Create and manage a temporary virtualenv. 14 | 15 | Args: 16 | path: Path where the virtualenv should be created 17 | 18 | Yields: 19 | Path to the Python executable in the virtualenv 20 | 21 | Example: 22 | ```python 23 | with temp_venv("/tmp/my-venv") as python_path: 24 | run_cmd([python_path, "-m", "pip", "install", "some-package"]) 25 | ``` 26 | """ 27 | path = Path(path) 28 | 29 | # Clean up any existing venv first 30 | if path.exists(): 31 | shutil.rmtree(path) 32 | 33 | venv.create(path, with_pip=True) 34 | 35 | # Get the correct python executable path for this venv 36 | if sys.platform == "win32": 37 | python_path = path / "Scripts" / "python.exe" 38 | else: 39 | python_path = path / "bin" / "python" 40 | 41 | try: 42 | yield python_path 43 | finally: 44 | if path.exists(): 45 | shutil.rmtree(path) 46 | 47 | 48 | def fatal_error_msg(msg: str, stderr: Optional[str] = None) -> None: 49 | """Print a fatal error message and exit with code 1. 50 | 51 | Args: 52 | msg: The error message to display 53 | stderr: Optional stderr output to display after the message 54 | 55 | Example: 56 | ```python 57 | if result.returncode != 0: 58 | fatal_error_msg("Failed to build package", result.stderr) 59 | ``` 60 | """ 61 | from rich.console import Console 62 | 63 | console = Console() 64 | 65 | console.print(f"[red]{msg}[/red]") 66 | if stderr: 67 | console.print(stderr) 68 | sys.exit(1) 69 | -------------------------------------------------------------------------------- /src/gjdutils/rand.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Optional 3 | import uuid 4 | 5 | DEFAULT_RANDOM_SEED = 42 6 | READABLE_RAND_CHARS = "23456789abcdefghjkmnprstuvwxyz" 7 | 8 | 9 | def gen_uuid(n: int = 8): 10 | # e.g. '161b58a4d8' 11 | return str(uuid.uuid4()).replace("-", "")[:n] 12 | 13 | 14 | def gen_readable_rand_id(n: int = 7, valid: Optional[list | str] = None): 15 | # build an N-digit random string with letters or numbers 16 | # excluding o,0,i,1,l,q (too similar to g) for easy reading 17 | # 18 | # this isn't officially a UUID 19 | if valid is None: 20 | valid = READABLE_RAND_CHARS 21 | return "".join([random.choice(valid) for _ in range(n)]) 22 | 23 | 24 | def assert_valid_readable_rand_id( 25 | id_: str, nchars: Optional[int] = None, valid_chars: Optional[str] = None 26 | ): 27 | """ 28 | Check if ID_ is a valid readable random id, as generated by GEN_READABLE_UUID. 29 | """ 30 | assert id_, "id_ cannot be empty" 31 | if nchars is not None: 32 | assert len(id_) == nchars, f"len({id_}) != {nchars}" 33 | if valid_chars is None: 34 | valid_chars = READABLE_RAND_CHARS 35 | assert all([c in valid_chars for c in id_]), f"Invalid character in {id_}" 36 | 37 | 38 | def shuffle_copy(lst): 39 | lst2 = lst.copy() 40 | random.shuffle(lst2) 41 | return lst2 42 | 43 | 44 | def set_seeds(seed: int = DEFAULT_RANDOM_SEED): 45 | # TODO if SEED is None, make it dynamic 46 | random.seed(seed) 47 | 48 | try: 49 | import numpy as np 50 | 51 | np.random.seed(seed) 52 | except ImportError: 53 | pass 54 | 55 | try: 56 | import torch # type: ignore 57 | 58 | torch.manual_seed(seed) 59 | torch.cuda.manual_seed(seed) 60 | torch.cuda.manual_seed_all(seed) 61 | # If you are using cudnn (a GPU-accelerated library for deep neural networks) 62 | torch.backends.cudnn.deterministic = True 63 | 64 | except ImportError: 65 | pass 66 | -------------------------------------------------------------------------------- /src/gjdutils/pytest_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def in_pytest(check_modules=True, check_env=True): 6 | """Detect whether code is currently running within a pytest environment. 7 | 8 | This function uses two different methods to check if we're running in pytest: 9 | 1. Checking if pytest is in sys.modules (if check_modules=True) 10 | 2. Checking if PYTEST_CURRENT_TEST is in environment variables (if check_env=True) 11 | 12 | Args: 13 | check_modules (bool, optional): Whether to check sys.modules for pytest. Defaults to True. 14 | check_env (bool, optional): Whether to check environment variables for PYTEST_CURRENT_TEST. Defaults to True. 15 | 16 | Returns: 17 | bool: True if all enabled checks confirm we're in pytest, False if none do. 18 | 19 | Raises: 20 | AssertionError: If both check_modules and check_env are False. 21 | RuntimeError: If some checks are True and others False, indicating an ambiguous state. 22 | """ 23 | assert check_modules or check_env, "At least one check must be performed" 24 | checks = [] 25 | if check_modules: 26 | # https://stackoverflow.com/a/44595269/230523 27 | # 28 | # "Of course, this solution only works if the code you're trying to test does not use pytest itself. 29 | mod_bool = "pytest" in sys.modules 30 | checks.append(mod_bool) 31 | 32 | if check_env: 33 | # from https://stackoverflow.com/a/58866220/230523 34 | # 35 | # "This method works only when an actual test is being run. 36 | # "This detection will not work when modules are imported during pytest collection. 37 | env_bool = "PYTEST_CURRENT_TEST" in os.environ 38 | checks.append(env_bool) 39 | 40 | if all(checks): 41 | return True 42 | elif not any(checks): 43 | return False 44 | else: 45 | raise RuntimeError( 46 | "It's unclear whether we're in a unit test - it might be part of the pytest setup, or you might have imported pytest as part of your main codebase." 47 | ) 48 | -------------------------------------------------------------------------------- /tests/test_env_integration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | from pathlib import Path 5 | import pytest 6 | from typing import Generator 7 | 8 | from gjdutils.env import get_env_var 9 | 10 | 11 | @pytest.fixture 12 | def temp_env_file(tmp_path): 13 | """Create a temporary .env file for testing.""" 14 | with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: 15 | f.write("GJDUTILS_TEST_STR=hello\n") 16 | f.write("GJDUTILS_TEST_INT=42\n") 17 | path = Path(f.name) 18 | yield path 19 | path.unlink() 20 | 21 | 22 | def test_export_envs_script(temp_env_file): 23 | """Test that export_envs.sh behaves correctly when executed vs sourced.""" 24 | script_path = Path("src/gjdutils/scripts/export_envs.sh") 25 | 26 | # Test direct execution fails 27 | result = subprocess.run( 28 | [str(script_path), str(temp_env_file)], capture_output=True, text=True 29 | ) 30 | assert result.returncode != 0 31 | assert "needs to be sourced" in result.stdout 32 | 33 | # Test sourcing works and sets variables 34 | result = subprocess.run( 35 | [ 36 | "bash", 37 | "-c", 38 | f"source {script_path} {temp_env_file} && echo $GJDUTILS_TEST_STR", 39 | ], 40 | capture_output=True, 41 | text=True, 42 | ) 43 | assert result.returncode == 0 44 | assert "hello" in result.stdout 45 | 46 | 47 | def test_get_env_var(): 48 | """Test get_env_var functionality with different types.""" 49 | # Set test variables directly in Python's environment (we can't use 50 | # export_envs.sh here because it doesn't work with subprocesses) 51 | os.environ["GJDUTILS_TEST_STR"] = "hello" 52 | os.environ["GJDUTILS_TEST_INT"] = "42" 53 | 54 | # Test string and int validation 55 | assert get_env_var("GJDUTILS_TEST_STR") == "hello" 56 | assert get_env_var("GJDUTILS_TEST_INT", typ=int) == 42 57 | 58 | # Test error cases 59 | with pytest.raises(ValueError, match="Missing required environment variable"): 60 | get_env_var("NONEXISTENT") 61 | -------------------------------------------------------------------------------- /src/gjdutils/obsolete/google_text_to_speech.py: -------------------------------------------------------------------------------- 1 | """ 2 | Synthesizes speech from the input string of text or ssml. 3 | Make sure to be working in a virtual environment. 4 | https://cloud.google.com/text-to-speech/docs/libraries 5 | """ 6 | 7 | from google.cloud import texttospeech 8 | 9 | 10 | def outloud(text: str, language_code: str = "en-GB", bot_gender=None): 11 | bot_gender = bot_gender.lower() if bot_gender else None 12 | # not all genders supported for all languages. see https://cloud.google.com/text-to-speech/docs/voices 13 | if bot_gender is None or bot_gender == "neutral": 14 | bot_gender = texttospeech.SsmlVoiceGender.NEUTRAL 15 | elif bot_gender in ["female", texttospeech.SsmlVoiceGender.FEMALE]: 16 | bot_gender = texttospeech.SsmlVoiceGender.FEMALE 17 | elif bot_gender in ["male", texttospeech.SsmlVoiceGender.MALE]: 18 | bot_gender = texttospeech.SsmlVoiceGender.MALE 19 | else: 20 | # gender = texttospeech.SsmlVoiceGender.SSML_VOICE_GENDER_UNSPECIFIED 21 | raise Exception(f"Unknown gender: {bot_gender}") 22 | # Instantiates a client 23 | client = texttospeech.TextToSpeechClient() 24 | 25 | # Set the text input to be synthesized 26 | synthesis_input = texttospeech.SynthesisInput(text=text) 27 | # synthesis_input = texttospeech.SynthesisInput(text="Bonjour, Monsieur Natterbot!") 28 | # synthesis_input = texttospeech.SynthesisInput(text="Γεια σου, Natterbot!") 29 | 30 | # Build the voice request, select the language code ("en-US") and the ssml 31 | # voice gender ("neutral") 32 | voice = texttospeech.VoiceSelectionParams( 33 | language_code=language_code, ssml_gender=bot_gender 34 | ) # e.g. 'en-GB' 35 | 36 | # Select the type of audio file you want returned 37 | audio_config = texttospeech.AudioConfig( 38 | audio_encoding=texttospeech.AudioEncoding.MP3 39 | ) 40 | 41 | # Perform the text-to-speech request on the text input with the selected 42 | # voice parameters and audio file type 43 | response = client.synthesize_speech( 44 | input=synthesis_input, voice=voice, audio_config=audio_config 45 | ) 46 | return response 47 | -------------------------------------------------------------------------------- /src/gjdutils/scripts/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import stat 4 | from pathlib import Path 5 | import sys 6 | import typer 7 | 8 | 9 | def get_script_install_path() -> Path: 10 | """Get the installation path for our scripts""" 11 | # This will typically be something like /usr/local/bin or ~/.local/bin 12 | if sys.prefix == sys.base_prefix: 13 | # System Python installation 14 | if os.access("/usr/local/bin", os.W_OK): 15 | return Path("/usr/local/bin") 16 | return Path(os.path.expanduser("~/.local/bin")) 17 | else: 18 | # Virtual environment 19 | return Path(sys.prefix) / "bin" 20 | 21 | 22 | def install_export_envs(): 23 | """Install the export_envs.sh script to the appropriate location""" 24 | try: 25 | # Get our package's installed location 26 | package_dir = Path(__file__).parent 27 | source_script = package_dir / "export_envs.sh" 28 | 29 | if not source_script.exists(): 30 | # Try to find it in the shared data location 31 | source_script = Path(sys.prefix) / "bin" / "export_envs.sh" 32 | if not source_script.exists(): 33 | typer.echo(f"Error: Could not find export_envs.sh script", err=True) 34 | raise typer.Exit(1) 35 | 36 | # Get the target installation directory 37 | install_dir = get_script_install_path() 38 | 39 | # Create the directory if it doesn't exist 40 | install_dir.mkdir(parents=True, exist_ok=True) 41 | 42 | # Copy the script 43 | target_script = install_dir / "gjdutils-export-envs" 44 | shutil.copy2(source_script, target_script) 45 | 46 | # Make it executable 47 | target_script.chmod(target_script.stat().st_mode | stat.S_IEXEC) 48 | 49 | typer.echo(f"Installed gjdutils-export-envs to {target_script}") 50 | typer.echo("\nTo use this script, you need to source it:") 51 | typer.echo(" source gjdutils-export-envs .env") 52 | 53 | except Exception as e: 54 | typer.echo(f"Error installing script: {e}", err=True) 55 | raise typer.Exit(1) 56 | 57 | 58 | if __name__ == "__main__": 59 | typer.run(install_export_envs) 60 | -------------------------------------------------------------------------------- /src/gjdutils/stopwatch.py: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | # from http://effbot.org/librarybook/timing.htm 3 | # File: timing-example-2.py 4 | # 5 | # copied from ~/fmri/distpat/trunk/users/greg/context/time_context.py 6 | 7 | """ 8 | This is my wrapper for the time module. There's probably an 9 | easier way to time the duration of things, but when I looked 10 | into timing stuff, this was the best I could come up with... 11 | 12 | To use: 13 | 14 | t = Stopwatch() 15 | # t.start() 16 | 17 | # do something 18 | 19 | elapsed = t.finish() 20 | """ 21 | 22 | import time 23 | 24 | 25 | class Stopwatch: 26 | """ 27 | Creates stopwatch timer objects. 28 | """ 29 | 30 | # stores the time the stopwatch was started 31 | t0 = None 32 | 33 | # stores the time the stopwatch was last looked at 34 | t1 = None 35 | 36 | def __init__(self, do_start: bool = True): 37 | self.t0 = 0 38 | self.t1 = 0 39 | if do_start: 40 | self.start() 41 | 42 | def start(self): 43 | """ 44 | Stores the current time in t0. 45 | """ 46 | 47 | self.t0 = time.time() 48 | 49 | def finish(self, milliseconds: bool = True): 50 | """ 51 | Returns the elapsed duration in milliseconds. This 52 | stores the current time in t1, and calculates the 53 | difference between t0 (the stored start time) and 54 | t1, so if you call this multiple times, you'll get a 55 | larger answer each time. 56 | 57 | You have to call this in order to update t1. 58 | """ 59 | 60 | self.t1 = time.time() 61 | return self.milli() if milliseconds else self.seconds() 62 | 63 | def seconds(self): 64 | """ 65 | Returns t1 - t0 in seconds. Does not update t1. 66 | """ 67 | return int(self.t1 - self.t0) 68 | 69 | def milli(self): 70 | """ 71 | Returns t1 - t0 in milliseconds. Does not update t1. 72 | """ 73 | return int((self.t1 - self.t0) * 1000) 74 | 75 | def micro(self): 76 | """ 77 | Returns t1 - t0 in microseconds. Does not update t1. 78 | """ 79 | return int((self.t1 - self.t0) * 1000000) 80 | -------------------------------------------------------------------------------- /docs/reference/DOCUMENTATION_ORGANISATION.md: -------------------------------------------------------------------------------- 1 | # Documentation organisation 2 | 3 | ## References 4 | 5 | - `../README.md` 6 | - `../instructions/WRITE_EVERGREEN_DOC.md` 7 | 8 | 9 | ## Evergreen documentation Reference 10 | 11 | (Written based on `../instructions/WRITE_EVERGREEN_DOC.md`) 12 | 13 | Available evergreen documentation in `docs/` - here are the most useful gjdutils templates. 14 | 15 | Coding & infrastructure: 16 | - `../instructions/CODING_PRINCIPLES.md` - Outlines development principles prioritising simplicity, readability, debugging, and rapid prototyping for early-stage development 17 | - `CODING_SHELL_SCRIPTS.md` - Guidelines for writing shell scripts and command-line utilities 18 | - `TESTING_PYTHON.md` - Testing approach documentation covering Python test framework setup, test structure, and coverage 19 | 20 | 21 | Docs, modes, and admin: 22 | - `../instructions/GIT_COMMITS.md` - Guidelines for Git commit best practices including batching changes, message format, and handling concurrent changes 23 | - `../instructions/SOUNDING_BOARD_MODE.md` - Instructions for collaborative discussion mode emphasising asking questions and suggesting alternatives rather than immediate implementation 24 | - `../instructions/SCIENTIST_DETECTIVE_MODE.md` - Methodical investigation approach for complex debugging and analysis 25 | - `../instructions/SURGEON_MODE.md` - Precise, minimal-change approach for critical fixes 26 | - `../instructions/WRITE_EVERGREEN_DOC.md` - Guidelines for writing evergreen documentation including structure, cross-references, status indicators, and maintenance practices 27 | - `../instructions/WRITE_PLANNING_DOC.md` - Guide for writing planning/project management documents with file naming conventions, structure, and stage-based action plans 28 | - `../instructions/UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Process for keeping project documentation up-to-date including review steps, update patterns, and quality checklist 29 | - `../instructions/DEBRIEF_PROGRESS.md` - Process for reviewing and documenting project progress and decisions 30 | - `../instructions/RESEARCH_POTENTIAL_LIBRARY_CHANGES_GOTCHAS.md` - Guide for researching and evaluating new libraries and dependencies 31 | 32 | ## Planning docs 33 | 34 | (Written based on `../instructions/WRITE_PLANNING_DOC.md`) 35 | 36 | Recent planning decisions & progress-tracking of major features: `planning/*.md` (project-specific location) 37 | -------------------------------------------------------------------------------- /src/gjdutils/web.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from urllib import parse as urlparse 3 | import webbrowser 4 | 5 | from gjdutils.strings import PathOrStr 6 | 7 | 8 | def webbrowser_open(filen: PathOrStr, browser=None): 9 | """ 10 | For some reason, the default webbrowser.open() doesn't work for me, so you may want to set browser to 'chrome' 11 | """ 12 | # I had an issue where it refused to open a non .html file 13 | assert filen.endswith(".html"), "File must end with .html" 14 | if browser: 15 | browser = webbrowser.get(browser) 16 | else: 17 | browser = webbrowser 18 | full_filen = f"file://{Path.cwd() / filen}" 19 | return browser.open(full_filen) 20 | 21 | 22 | def trunc_url(url): 23 | """ 24 | e.g. 'http://www.guardian.co.uk/blah/ -> /blah 25 | 26 | Based on dev/guardian/data/data/greg/sharedwisdom/sharedwisdom/models.py 27 | """ 28 | # URLPARSE returns ParseResult(scheme='http', 29 | # netloc='memrise.com', 30 | # path='/blah.png', 31 | # params='', 32 | # query='q1=x&q2=y', 33 | # fragment='') 34 | scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) 35 | # ditch the SCHEME and NETLOC 36 | # PARAMS are an arcane part that comes after a semi-colon 37 | return path # + params 38 | 39 | 40 | def validate_request_args(args, defaults): 41 | """ 42 | DEFAULTS = dict of allowed query parameters, with the keys 43 | being the allowed query-string-parameter-keys and values 44 | as their defaults. 45 | """ 46 | if args: 47 | unexpecteds = set(args.keys()) - set(defaults.keys()) 48 | assert not unexpecteds, "Unexpected key(s): %s" % unexpecteds 49 | # params = {key: args.get(key, default) 50 | # for key, default in defaults.items()} 51 | params = defaults | args 52 | return params 53 | 54 | 55 | def query_string_from_dict(d): 56 | return "?" + "&".join(["%s=%s" % (k, v) for k, v in d.items()]) 57 | 58 | 59 | def params_from_request(request): 60 | try: 61 | if request.json: 62 | params_in = request.json.get("params", {}) 63 | else: 64 | params_in = dict(request.values) 65 | return params_in 66 | except: 67 | print("Error in PARAMS_FROM_REQUEST") 68 | return {} 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .env.* 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | 117 | # added by GD 118 | .#* 119 | .DS_Store 120 | .pypirc 121 | 122 | mcp.json 123 | 124 | # Claude Code local settings 125 | .claude/settings.local.json 126 | 127 | # Node.js / JavaScript / TypeScript 128 | node_modules/ 129 | npm-debug.log* 130 | yarn-debug.log* 131 | yarn-error.log* 132 | *.tsbuildinfo 133 | .npm 134 | .eslintcache 135 | .node_repl_history 136 | *.tgz 137 | *.tar.gz 138 | 139 | # TypeScript 140 | *.js.map 141 | *.d.ts.map 142 | 143 | # IDE files 144 | .vscode/ 145 | .idea/ 146 | *.swp 147 | *.swo 148 | -------------------------------------------------------------------------------- /src/gjdutils/jsons.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Optional 3 | 4 | 5 | def jsonify(x): 6 | def json_dumper_robust(obj): 7 | try: 8 | return obj.toJSON() 9 | except AttributeError: 10 | # Object doesn't have toJSON method, try string conversion 11 | try: 12 | return str(obj) 13 | except (ValueError, TypeError): 14 | # If string conversion fails, return None to skip this field 15 | return None 16 | 17 | return json.dumps(x, sort_keys=True, indent=4, default=json_dumper_robust) 18 | 19 | 20 | # from o-1 21 | # class RobustJSONEncoder(json.JSONEncoder): 22 | # def __init__(self, *args, **kwargs): 23 | # self.seen = set() 24 | # super().__init__(*args, **kwargs) 25 | 26 | # def default(self, obj): 27 | # if id(obj) in self.seen: 28 | # return None # Replace circular references with None or a placeholder 29 | # self.seen.add(id(obj)) 30 | # try: 31 | # return obj.toJSON() 32 | # except: 33 | # try: 34 | # return str(obj) 35 | # except: 36 | # return None 37 | 38 | 39 | # def jsonify(x): 40 | # return json.dumps(x, cls=RobustJSONEncoder, sort_keys=True, indent=4) 41 | 42 | 43 | def to_json( 44 | inps: list, 45 | fields: Optional[list] = None, 46 | skip_if_missing: bool = False, 47 | skip_empties: bool = True, 48 | max_str_len: Optional[int] = 1000, 49 | ) -> str: 50 | """ 51 | Convert a list of dicts to a JSON string, with only the fields we want, 52 | and in the same order as FIELDS. 53 | """ 54 | if fields is None: 55 | fields = [] 56 | outs = [] 57 | for inp in inps: 58 | if fields is None: 59 | fields = inp.keys() 60 | # we want to make sure to return a dict with only the fields we want, 61 | # and in the same order as FIELDS 62 | out = {} 63 | for k in fields: # type: ignore 64 | if skip_if_missing and (k not in inp): 65 | continue 66 | v = inp[k] # will error if missing and !SKIP_IF_MISSING 67 | if skip_empties and (v is None or v == ""): 68 | continue 69 | if max_str_len and isinstance(v, str) and len(v) > max_str_len: 70 | v = v[:max_str_len] + "..." 71 | out[k] = v 72 | outs.append(out) 73 | outs_j = json.dumps(outs, indent=2) 74 | return outs_j 75 | -------------------------------------------------------------------------------- /src/gjdutils/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Any, TypeVar, cast 4 | from pydantic import StrictStr, TypeAdapter 5 | 6 | from gjdutils.print_utils import vprint 7 | 8 | T = TypeVar("T") 9 | _processed_vars = set() 10 | 11 | 12 | # You may find it useful to run `python -m gjdutils.scripts.export_envs .env` to first 13 | # export all the variables in your .env file to your environment. 14 | 15 | 16 | def get_env_var(name: str, typ: Any = StrictStr, verbose: int = 0) -> T: 17 | """Get environment variable with type validation, e.g. 18 | 19 | OPENAI_API_KEY = get_env_var("OPENAI_API_KEY") 20 | NUM_WORKERS = get_env_var("NUM_WORKERS", typ=int) 21 | 22 | Args: 23 | name: Name of environment variable 24 | type_: Pydantic type to validate against (default: StrictStr for non-empty string) 25 | 26 | Returns: 27 | The validated value with the specified type 28 | 29 | Raises: 30 | ValueError: If variable is missing or fails validation 31 | """ 32 | vprint(1, f"Attempting to get environment variable: {name}") 33 | vprint(2, f"Current environment variables: {list(os.environ.keys())}") 34 | try: 35 | value = os.environ[name] 36 | _processed_vars.add(name) 37 | 38 | # Use TypeAdapter for validation 39 | adapter = TypeAdapter(typ) 40 | validated = adapter.validate_python(value) 41 | 42 | # Return validated value directly 43 | return cast(T, validated) 44 | except KeyError: 45 | raise ValueError(f"Missing required environment variable: {name}") 46 | except Exception as e: 47 | raise ValueError(f"Invalid value for {name}: {e}") 48 | 49 | 50 | def list_env_example_vars(env_example_filen: Path) -> set[str]: 51 | """Get set of required variables from .env.example. 52 | 53 | Args: 54 | env_example_filen: Path to the .env.example file 55 | 56 | Returns: 57 | Set of environment variable names found in the file 58 | """ 59 | assert env_example_filen.exists(), f"Missing env example file: {env_example_filen}" 60 | 61 | required_vars = set() 62 | with env_example_filen.open() as f: 63 | for line in f: 64 | line = line.strip() 65 | # Skip comments and empty lines 66 | if not line or line.startswith("#"): 67 | continue 68 | # Get variable name (everything before =) 69 | var_name = line.split("=")[0].strip() 70 | required_vars.add(var_name) 71 | 72 | return required_vars 73 | -------------------------------------------------------------------------------- /src/gjdutils/num.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | # this doesn't support numpy's numeric types, but it's a good stopgap for now. 4 | # there doesn't appear to be a perfect, agreed solution 5 | # 6 | # https://stackoverflow.com/questions/60616802/how-to-type-hint-a-generic-numeric-type-in-python 7 | Numeric = Union[int, float] 8 | 9 | 10 | def percent(num, denom): 11 | return (100 * (num / float(denom))) if denom else 0 12 | 13 | 14 | def percent_str(num, denom): 15 | return str(percent) + "%" 16 | 17 | 18 | def discretise( 19 | val, 20 | increment: Union[int, float] = 0.1, 21 | lower: Union[int, float] = 0.0, 22 | upper: Union[int, float] = 1.0, 23 | enforce_range: bool = True, 24 | ): 25 | """ 26 | You will probably want to cache this. 27 | """ 28 | import numpy as np 29 | import pandas as pd 30 | 31 | def calc_increments(increment, lower, upper): 32 | assert ( 33 | lower <= increment <= upper 34 | ), f"Required: {lower:.2f} < {increment:.2f} <= {upper:.2f}" 35 | # e.g. for lower=0, upper=1, increment_size=0.05, nincrements=21 36 | nincrements = int((upper - lower) / increment) + 1 37 | # e.g. for lower=0, upper=1, increment_size=0.05, increments = [0., 0.05, 0.1, ..., 0.95, 1. ] 38 | increments = np.linspace(lower, upper, nincrements) 39 | return increments 40 | 41 | if pd.isnull(val): 42 | return upper 43 | if enforce_range and (val < lower or val > upper): 44 | raise ValueError( 45 | f"Value {val:.2f} is outside the valid range [{lower:.2f}, {upper:.2f}]" 46 | ) 47 | increments = calc_increments(increment, lower, upper) 48 | if val < lower: 49 | return increments[0] 50 | if val > upper: 51 | return increments[-1] 52 | idx = np.digitize(val, increments) 53 | # e.g. 54 | # 0.00 -> 0.0 55 | # 0.01 -> 0.0 56 | # 0.06 -> 0.05 57 | # 0.99 -> 0.95 58 | # 1.00 -> 1.0 59 | discretised = increments[idx - 1] 60 | return discretised 61 | 62 | 63 | def ordinal(n: int): 64 | """ 65 | e.g 1 -> "1st", 103 -> "103rd" 66 | """ 67 | # from https://claude.ai/chat/87fad336-e0fa-4074-aed4-f4e57ed20bb7 68 | 69 | # TESTED: 70 | # for i in [0, 1, 2, 3, 4, 10, 11, 12, 13, 22, 78, 103, 103231, 103235]: 71 | # print(i, ordinal(i)) 72 | assert n >= 0 73 | if 10 <= n % 100 <= 20: 74 | suffix = "th" 75 | else: 76 | suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th") 77 | return f"{n}{suffix}" 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gjdutils 2 | 3 | A comprehensive toolkit for highly effective AI-assisted programming, containing three complementary resource sets: 4 | 5 | ## 🤖 AI-Assisted Programming Templates 6 | 7 | **[docs/](docs/)** - Battle-tested workflows and instructions for AI development 8 | 9 | A complete collection of templates, modes, and processes for working effectively with AI coding assistants like Claude Code and Cursor. These have been refined through real-world usage and provide: 10 | 11 | - **Structured workflows** for planning, implementing, and maintaining code 12 | - **Specialized modes** for different types of development work (sounding board, detective, surgeon) 13 | - **Documentation practices** that scale with your project 14 | - **Git workflows** optimized for AI collaboration 15 | 16 | **Quick start**: See [docs/WORKFLOW.md](docs/WORKFLOW.md) for the complete development workflow. 17 | 18 | ## 🐍 Python Utilities 19 | 20 | **[src/gjdutils/](src/gjdutils/)** - Production-ready Python utilities and tools 21 | 22 | A curated collection of Python utilities for common development tasks including: 23 | 24 | - Data science helpers and collection utilities 25 | - LLM integration tools (Claude, OpenAI) 26 | - File processing and automation scripts 27 | - Development workflow tools 28 | 29 | **Quick start**: `pip install gjdutils` (see [src/gjdutils/README.md](src/gjdutils/README.md)) 30 | 31 | ## ⚒️ TypeScript Utilities 32 | 33 | **[src/ts/](src/ts/)** - Cross-platform TypeScript tools and CLI utilities 34 | 35 | A collection of battle-tested TypeScript utilities ported from real-world development workflows: 36 | 37 | - **CLI tools** for date generation, conversation extraction, and code analysis 38 | - **Git worktree sync** scripts for managing parallel development branches 39 | - **LLM critique tools** for automated planning document review 40 | - **Development utilities** like line counting and file processing 41 | 42 | **Quick start**: See [src/ts/README.md](src/ts/README.md) for installation and usage examples 43 | 44 | ## Why gjdutils? 45 | 46 | All components are designed around the principle of **making AI-assisted development more effective**: 47 | 48 | - The **templates** provide the structure and process for working with AI assistants 49 | - The **Python utilities** handle data science and LLM integration tasks 50 | - The **TypeScript utilities** provide cross-platform development workflow tools 51 | 52 | Whether you're building prototypes or production systems, gjdutils helps you work faster and more reliably with AI assistance. 53 | 54 | ## License 55 | 56 | MIT License - see [LICENSE](LICENSE) for details. -------------------------------------------------------------------------------- /tests/test_git_status.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pathlib import Path 3 | from gjdutils.cli.check_git_clean import check_git_clean 4 | from gjdutils.shell import fatal_error_msg 5 | from gjdutils.cmd import run_cmd 6 | 7 | 8 | @pytest.fixture 9 | def temp_git_repo(tmp_path): 10 | """Create a temporary git repo for testing.""" 11 | # Initialize git repo 12 | repo_path = tmp_path / "test_repo" 13 | repo_path.mkdir(parents=True, exist_ok=True) 14 | run_cmd("git init", cwd=str(repo_path)) 15 | run_cmd("git config user.email 'test@example.com'", cwd=str(repo_path)) 16 | run_cmd("git config user.name 'Test User'", cwd=str(repo_path)) 17 | 18 | # Create and commit an initial file 19 | initial_file = repo_path / "initial.txt" 20 | initial_file.write_text("initial content") 21 | run_cmd("git add initial.txt", cwd=str(repo_path)) 22 | run_cmd("git commit -m 'Initial commit'", cwd=str(repo_path)) 23 | 24 | return repo_path 25 | 26 | 27 | def test_clean_repo(temp_git_repo, monkeypatch): 28 | """Test check_git_clean with a clean repository.""" 29 | monkeypatch.chdir(temp_git_repo) 30 | check_git_clean() # Should not raise any errors 31 | 32 | 33 | def test_unstaged_changes(temp_git_repo, monkeypatch, capsys): 34 | """Test check_git_clean detects unstaged changes.""" 35 | monkeypatch.chdir(temp_git_repo) 36 | 37 | # Create an unstaged change 38 | (temp_git_repo / "initial.txt").write_text("modified content") 39 | 40 | with pytest.raises(SystemExit): 41 | check_git_clean() 42 | 43 | captured = capsys.readouterr() 44 | assert "Unstaged changes present" in captured.out 45 | 46 | 47 | def test_staged_changes(temp_git_repo, monkeypatch, capsys): 48 | """Test check_git_clean detects staged but uncommitted changes.""" 49 | monkeypatch.chdir(temp_git_repo) 50 | 51 | # Create and stage a new file 52 | new_file = temp_git_repo / "new.txt" 53 | new_file.write_text("new content") 54 | run_cmd("git add new.txt", cwd=str(temp_git_repo)) 55 | 56 | with pytest.raises(SystemExit): 57 | check_git_clean() 58 | 59 | captured = capsys.readouterr() 60 | assert "Uncommitted staged changes present" in captured.out 61 | 62 | 63 | def test_untracked_files(temp_git_repo, monkeypatch, capsys): 64 | """Test check_git_clean detects untracked files.""" 65 | monkeypatch.chdir(temp_git_repo) 66 | 67 | # Create an untracked file 68 | untracked_file = temp_git_repo / "untracked.txt" 69 | untracked_file.write_text("untracked content") 70 | 71 | with pytest.raises(SystemExit): 72 | check_git_clean() 73 | 74 | captured = capsys.readouterr() 75 | assert "Untracked files present" in captured.out 76 | -------------------------------------------------------------------------------- /src/gjdutils/google_translate.py: -------------------------------------------------------------------------------- 1 | from google.cloud import translate_v2 as translate 2 | import html 3 | from typing import Optional 4 | 5 | 6 | def translate_text( 7 | text: str, 8 | lang_src_code: Optional[str], 9 | lang_tgt_code: str, 10 | verbose: int = 0, 11 | ): 12 | """Translates text into the target language. 13 | 14 | Target must be an ISO 639-1 language code. 15 | See https://g.co/cloud/translate/v2/translate-reference#supported_languages 16 | 17 | e.g. 18 | translated_text, result = translate_text( 19 | text="Hello, world", 20 | lang_src_code="en", 21 | lang_tgt_code="el", 22 | verbose=0, 23 | ) 24 | """ 25 | translate_client = translate.Client() 26 | 27 | lang_src_code = ( 28 | lang_src_code[:2].lower() if isinstance(lang_src_code, str) else None 29 | ) 30 | lang_tgt_code = lang_tgt_code[:2].lower() 31 | if lang_src_code == lang_tgt_code: 32 | return text, None 33 | 34 | # assert lang_src_code != lang_tgt_code, ( 35 | # "Identical src and tgt language codes: %s" % lang_src_code 36 | # ) 37 | 38 | # Text can also be a sequence of strings, in which case this method 39 | # will return a sequence of results for each text. 40 | if lang_src_code is None: 41 | result = translate_client.translate(text, target_language=lang_tgt_code) 42 | else: 43 | result = translate_client.translate( 44 | text, 45 | target_language=lang_tgt_code, 46 | source_language=lang_src_code, 47 | ) 48 | 49 | translated_text = result["translatedText"] 50 | 51 | # fix escaping, e.g. 52 | # I've done it a week with no improvement 53 | # -> 54 | # I've done it a week with no improvement 55 | translated_text = html.unescape(translated_text) 56 | 57 | if verbose > 0: 58 | print(f"{lang_src_code} -> {lang_tgt_code}") 59 | print(f"\t\"{result['input']}\" -> \"{translated_text}\"") 60 | if lang_src_code is None: 61 | print(f"\t\tDetected source language: {result['detectedSourceLanguage']}") 62 | 63 | return translated_text, result 64 | 65 | 66 | def detect_language(text: str, verbose: int = 0) -> tuple[str, dict]: 67 | """ 68 | Detects the text's language. 69 | """ 70 | translate_client = translate.Client() 71 | 72 | # Text can also be a sequence of strings, in which case this method 73 | # will return a sequence of results for each text. 74 | result = translate_client.detect_language(text) 75 | language, confidence = result["language"], result["confidence"] 76 | print(f"Ran detect_language for {text} -> {language} at confidence {confidence}") 77 | return language, confidence 78 | -------------------------------------------------------------------------------- /docs/instructions/WRITE_DEEP_DIVE_AS_DOC.md: -------------------------------------------------------------------------------- 1 | # Write Deep Dive as Documentation 2 | 3 | Do a deep dive on the web about the topic that the user has asked about. If you need more clarification about the requirements to focus the search fruitfully, ask questions (ideally upfront). If you need more context from files, investigate for relevant code & docs. 4 | 5 | Before you start, run `date` to get today's date, in case you need to assess how recent the search results are. 6 | 7 | Then write this up as a detailed reference doc, following the instructions in `WRITE_EVERGREEN_DOC.md`. Include URL links/references (as well as mentions of your own code/docs etc), so you can track down the original sources later if you need to. 8 | 9 | ## Process Guidelines 10 | 11 | ### 1. Clarify the Scope 12 | Before diving into research, ask questions if it will help: 13 | - What specific aspects of the topic are most important? 14 | - What's the intended use case or application? 15 | - Are there particular problems you're trying to solve? 16 | - How deep should the technical detail go? 17 | - What's the target audience for this documentation? 18 | - etc 19 | 20 | ### 2. Research Strategy 21 | - **Start broad** - Get an overview of the topic and ecosystem 22 | - **Go specific** - Focus on the aspects most relevant to your needs 23 | - **Check recency** - Note dates on articles, especially for fast-moving technologies 24 | - **Multiple sources** - Cross-reference information, taking into account authoritativeness 25 | - **Practical focus** - Prioritize actionable information over theory 26 | 27 | ### 3. Documentation Structure 28 | Loosely follow `WRITE_EVERGREEN_DOC.md` format as appropriate, e.g.: 29 | - **Overview** - What is this technology/concept? 30 | - **Resources** - Links to official docs, tutorials, tools 31 | - **Use cases** - When and why to use it 32 | - **Getting started** - Quick setup or hello world 33 | - **Key concepts** - Essential understanding 34 | - **Best practices** - Proven approaches and patterns 35 | - **Risks/gotchas** - Known issues, e.g. recent API changes, common/likely confusions & error messages, risks, etc 36 | - etc 37 | 38 | ### 4. Source Attribution 39 | - **Direct links** - Include URLs (or file paths, or whatever's appropriate) for all referenced sources 40 | - **Date notation** - Note when sources were published/accessed 41 | - **Authority assessment** - Prefer official docs, established experts, recent sources 42 | - **Code attribution** - Reference any code examples with their source 43 | 44 | Remember: The goal is to create a reference that will explain, be up-to-date, help with decision-making, save time, and/or prevent mistakes/issues/surprises. Be proactive. Focus on the information that would be most valuable given the user's intent. Highlight anything worthy of remark. 45 | -------------------------------------------------------------------------------- /src/gjdutils/cli/pypi/deploy.py: -------------------------------------------------------------------------------- 1 | """Deployment commands for PyPI packages.""" 2 | 3 | import typer 4 | from rich.console import Console 5 | from packaging.version import Version 6 | 7 | from gjdutils import __version__ 8 | from gjdutils.cli.check_git_clean import check_git_clean 9 | from gjdutils.cli.pypi.check import check_local, check_prod, check_test 10 | from gjdutils.pypi_build import ( 11 | build_package, 12 | check_version_exists, 13 | clean_build_dirs, 14 | upload_to_pypi, 15 | ) 16 | from gjdutils.shell import fatal_error_msg 17 | 18 | # Create the deploy subcommand group 19 | app = typer.Typer( 20 | help="Deploy package to PyPI", 21 | add_completion=True, 22 | no_args_is_help=True, 23 | context_settings={"help_option_names": ["-h", "--help"]}, 24 | ) 25 | console = Console() 26 | 27 | 28 | @app.command(name="test") 29 | def deploy_test(): 30 | """Deploy package to Test PyPI.""" 31 | console.rule("[yellow]Starting Test PyPI Deployment") 32 | 33 | # Check if version already exists 34 | if check_version_exists(Version(__version__), pypi_env="test"): 35 | fatal_error_msg(f"Version {__version__} already exists on Test PyPI.\n") 36 | 37 | # Execute deployment steps 38 | clean_build_dirs() 39 | build_package() 40 | upload_to_pypi(pypi_env="test") 41 | 42 | console.print("\n[green]Deployment to Test PyPI completed![/green]") 43 | check_test() 44 | 45 | 46 | @app.command(name="prod") 47 | def deploy_prod(): 48 | """Deploy package to Production PyPI.""" 49 | console.rule("[yellow]Starting Production PyPI Deployment") 50 | 51 | # Check git status first 52 | check_git_clean() 53 | 54 | # Check if version already exists 55 | if check_version_exists(Version(__version__), pypi_env="prod"): 56 | fatal_error_msg(f"Version {__version__} already exists on PyPI.\n") 57 | 58 | # Confirm with user before proceeding 59 | version_confirm = input( 60 | f"\nAre you sure you want to deploy version {__version__} to production PyPI? (y/N): " 61 | ) 62 | if version_confirm.lower() != "y": 63 | console.print("\n[yellow]Deployment cancelled by user[/yellow]") 64 | return 65 | 66 | # Execute deployment steps 67 | clean_build_dirs() 68 | build_package() 69 | upload_to_pypi(pypi_env="prod") 70 | 71 | console.print("\n[green]Deployment to Production PyPI completed![/green]") 72 | check_prod() 73 | 74 | 75 | @app.command(name="all") 76 | def deploy_all(): 77 | """Run full deployment process (local -> test -> prod).""" 78 | console.rule("[yellow]Starting Full Deployment Process") 79 | 80 | check_local() 81 | deploy_test() 82 | deploy_prod() 83 | 84 | console.print("\n[green]Full deployment process completed successfully! 🎉[/green]") 85 | -------------------------------------------------------------------------------- /src/gjdutils/iterfunc.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from typing import Sequence 3 | 4 | 5 | def contiguous_pairs(lst: Sequence): 6 | """ 7 | Given a list LST, return the contiguous pairs, e.g. 8 | 9 | [10, 20, 30, 40, 50] 10 | -> 11 | [(10, 20), (20, 30), (30, 40), (40, 50)] 12 | 13 | (from GitHub Copilot) 14 | """ 15 | pairs = [(lst[i], lst[i + 1]) for i in range(len(lst) - 1)] 16 | return pairs 17 | 18 | 19 | def flatten(lol): 20 | """ 21 | See http://stackoverflow.com/questions/406121/flattening-a-shallow-list-in-python 22 | 23 | e.g. [['image00', 'image01'], ['image10'], []] -> ['image00', 'image01', 'image10'] 24 | """ 25 | 26 | chain = list(itertools.chain(*lol)) 27 | return chain 28 | 29 | 30 | # def flatten(list_of_lists): 31 | # """ 32 | # Flatten one level of nesting 33 | 34 | # from https://docs.python.org/3/library/itertools.html#itertools-recipes 35 | # """ 36 | # return list(chain.from_iterable(list_of_lists)) 37 | 38 | 39 | def unique(items): 40 | """ 41 | Returns KEEP, a list based on ITEMS, but with duplicates 42 | removed (preserving order, based on first new example). 43 | 44 | http://stackoverflow.com/questions/89178/in-python-what-is-the-fastest-algorithm-for-removing-duplicates-from-a-list-so-t 45 | 46 | unique([1, 1, 2, 'a', 'a', 3]) -> [1, 2, 'a', 3] 47 | """ 48 | found = set([]) 49 | keep = [] 50 | for item in items: 51 | if item not in found: 52 | found.add(item) 53 | keep.append(item) 54 | return keep 55 | 56 | 57 | def uniquify_list(lst): 58 | """Return a list of the elements in s, but without duplicates, preserving order. 59 | 60 | from comment in http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 61 | 62 | Lightweight and fast ..., Raymond Hettinger, 2002/03/17 63 | 64 | """ 65 | 66 | set = {} 67 | return [set.setdefault(e, e) for e in lst if e not in set] 68 | 69 | 70 | def grouper(iterable, n): 71 | """ 72 | Collect data into fixed-length chunks or blocks. If 73 | the last block is too small, returns a truncated block. 74 | 75 | e.g. grouper('ABCDEFG', 3) --> ABC DEF G 76 | 77 | From http://stackoverflow.com/a/8991553/230523 78 | """ 79 | it = iter(iterable) 80 | while True: 81 | chunk = tuple(itertools.islice(it, n)) 82 | if not chunk: 83 | return 84 | yield chunk 85 | 86 | 87 | def grouper_ragged(iterable, n): 88 | """ 89 | Collect data into non-overlapping chunks - the last one might be shorter than the others 90 | 91 | >>> print(list(grouper('ABCDEFG', 3))) # [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)] 92 | 93 | from https://stackoverflow.com/a/41333827/230523 94 | """ 95 | it = iter(iterable) 96 | group = tuple(itertools.islice(it, n)) 97 | while group: 98 | yield group 99 | group = tuple(itertools.islice(it, n)) 100 | -------------------------------------------------------------------------------- /docs/instructions/RENAME_OR_MOVE.md: -------------------------------------------------------------------------------- 1 | # Rename or Move Files 2 | 3 | - Rename or move a file or files as per the user's explicit instructions. 4 | - If asked to propose/discuss, then don't make changes until they have been agreed with the user. 5 | - If things are confusing, or you see potential problems, or have a better idea, then you should ask questions, raise concerns, make suggestions, etc. 6 | 7 | - If there are multiple files, use tasks and subagents (provided with rich context) to: 8 | - Do the rename/move 9 | - Prefer to use `git mv` rather than `mv`, where appropriate. Or if there is a special tool for doing the move (e.g. a syntactically-aware refactoring tool, use that) 10 | - Search carefully for all the places that refer to each file, and update them appropriately. 11 | - Use **sd** for updating references across the codebase (see `docs/reference/SD_STRING_DISPLACEMENT_FIND_REPLACE.md`) 12 | - Be careful not to break/disrupt functionality. 13 | 14 | - IMPORTANT: If in doubt, or you notice any issues/surprises/complications stop and ask. 15 | 16 | - Once you have finished, commit these changes as a single commit, following `GIT_COMMITS.md` 17 | 18 | ## Process Guidelines 19 | 20 | ### Before Starting 21 | 1. **Understand the scope** - How many files are affected? 22 | 2. **Check for references** - What refers to these files? 23 | 3. **Identify risks** - What could break with this change? 24 | 4. **Plan the approach** - Git mv, refactoring tools, or simple moves? 25 | 26 | ### During Execution 27 | 1. **Use appropriate tools**: 28 | - `git mv` for version-controlled files 29 | - IDE refactoring tools for code symbols 30 | - Search and replace for documentation references 31 | 32 | 2. **Search thoroughly for references**: 33 | - Import/require statements 34 | - Documentation links 35 | - Configuration files 36 | - Build scripts and manifests 37 | - Test files 38 | - Comments and README files 39 | 40 | 3. **Test incrementally** if possible: 41 | - Check that code still compiles 42 | - Run relevant tests 43 | - Verify documentation links 44 | 45 | ### Common Reference Patterns 46 | - **Code**: `import './old-name'`, `require('../old-path')` 47 | - **Documentation**: `[link](old-path.md)`, `see old-file.js` 48 | - **Configuration**: File paths in package.json, tsconfig.json, etc. 49 | - **Build systems**: File references in build scripts, CI configs 50 | - **URLs**: Repository links, deployment paths 51 | 52 | ### Safety Checks 53 | - **Backup important changes** before large moves 54 | - **Use git status** to review all affected files 55 | - **Test functionality** after the move 56 | - **Review commit diff** before finalizing 57 | 58 | ### Complex Scenarios 59 | For large refactoring operations: 60 | 1. Break into smaller, atomic moves when possible 61 | 2. Use subagents to handle different aspects (code vs docs vs config) 62 | 3. Consider doing a trial run or creating a branch first 63 | 4. Coordinate with team members if this affects shared code 64 | 65 | Remember: It's better to ask questions and move carefully than to break working functionality. -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | see also: `README.md` 6 | 7 | ## Development Commands 8 | 9 | **Setup:** 10 | ```bash 11 | # Install in editable mode with all dependencies 12 | pip install -e ".[all_no_dev, dev]" 13 | 14 | # Or use the convenience script 15 | python -m gjdutils.scripts.install_all_dev_dependencies 16 | ``` 17 | 18 | **Testing:** 19 | ```bash 20 | # Run all tests 21 | pytest 22 | 23 | # Run specific test file 24 | pytest tests/test_strings.py 25 | 26 | # Run with verbose output 27 | pytest -v 28 | ``` 29 | 30 | **Linting:** 31 | ```bash 32 | # Format code (uses black) 33 | black src/ tests/ 34 | 35 | # Lint code (uses flake8) 36 | flake8 src/ tests/ 37 | ``` 38 | 39 | **Building:** 40 | ```bash 41 | # Build package 42 | python -m build 43 | 44 | # Clean build directories 45 | rm -rf dist/ build/ *.egg-info/ 46 | ``` 47 | 48 | **PyPI Deployment:** 49 | ```bash 50 | # Full deployment workflow (includes all checks) 51 | gjdutils pypi deploy all 52 | 53 | # Individual steps 54 | gjdutils pypi check local # Test locally 55 | gjdutils pypi deploy test # Deploy to Test PyPI 56 | gjdutils pypi deploy prod # Deploy to Production PyPI 57 | ``` 58 | 59 | ## Architecture Overview 60 | 61 | **Module Organization:** 62 | - **Core utilities**: `strings`, `files`, `dicts`, `lists`, `functions` - fundamental data manipulation 63 | - **System interaction**: `cmd`, `shell`, `env`, `runtime` - process execution and environment management 64 | - **AI/LLM**: `llm_utils`, `llms_claude`, `llms_openai` - unified interface for AI services 65 | - **Data science**: `dsci`, `num`, `indexing` - analysis and numerical operations 66 | - **Audio/Voice**: `audios`, `voice_speechrecognition`, `outloud_text_to_speech` - speech processing 67 | - **Web/HTML**: `html`, `web`, `google_translate` - web scraping and content processing 68 | 69 | **CLI Structure:** 70 | Built with Typer in hierarchical command groups: 71 | - Main app in `cli/main.py` registers subcommands 72 | - PyPI management in `cli/pypi/` (check, deploy commands) 73 | - Utility commands: `check-git-clean`, version display 74 | - Special entry point: `gjdutils-export-envs` for shell environment setup 75 | 76 | **Key Patterns:** 77 | - **Optional dependencies**: Features grouped by domain (`audio_lang`, `llm`, `dsci`, etc.) 78 | - **Error handling**: Uses `gjdutils.shell.fatal_error_msg()` for CLI error reporting 79 | - **Versioning**: Dynamic version from `__version__.py`, integrated into deployment checks 80 | - **Environment management**: `.env` file support with shell script generation 81 | 82 | **Testing Strategy:** 83 | - Integration tests for CLI commands in `test_cli.py` 84 | - Environment variable testing in `test_env_integration.py` 85 | - Unit tests follow `test_{module}.py` naming convention 86 | - Uses pytest with typer testing utilities 87 | 88 | **Deployment Process:** 89 | 1. Update version in `__version__.py` 90 | 2. Ensure git working directory is clean 91 | 3. Use `gjdutils pypi deploy all` for full automated workflow 92 | 4. Each step includes verification and rollback capabilities -------------------------------------------------------------------------------- /docs/instructions/GIT_COMMIT_CHANGES.md: -------------------------------------------------------------------------------- 1 | # Git Commit Guidelines 2 | 3 | ## Initial Assessment 4 | Have a look at Git diff. Batch the changes into commits, and make them one at a time. 5 | 6 | ## Commit Best Practices 7 | 8 | ### Don't ever do anything destructive 9 | 10 | ABOVE ALL, don't do anything that could result in lost work or mess up yet-to-be-committed changes, unless EXPLICITLY instructed to by the user after warning them. 11 | 12 | 13 | ### Batching changes into commits 14 | - Each commit should represent a small/medium feature, or stage, or cluster of related changes (e.g. tweaking a bunch of docs). 15 | - But strike a balance, e.g. the code and docs changes for a given feature should be in the same commit. 16 | - The codebase should (ideally) be in a working state after each commit. 17 | - Try not to mix unrelated changes. 18 | - Before making the commit, list all files that will be committed. 19 | - IMPORTANT If this is being run in a conversation, only commit changes relevant to this conversation. (Still use reset/add/commit single-command chaining) 20 | - When choosing the order of batches, prefer batches that concern files with older modification dates, in order to make it less likely that another agent is still working on them. 21 | 22 | 23 | ### Commit Message Format 24 | ``` 25 | : (50 chars max) 26 | 27 | (optional, wrap at 72 chars) 28 | - Include a reference to current planning doc at the top of the commit body if there is one, e.g. "Planning doc: yyMMddx_feature_name.md" 29 | - More detailed explanation 30 | - Bullet points for multiple changes 31 | ``` 32 | 33 | Types: feat, fix, docs, style, refactor, test, chore 34 | 35 | ### Handling Concurrent Changes 36 | There may be other agents changing the code while you work, and they might have added other files already. 37 | - IMPORTANT: To minimise interference, ALWAYS chain the reset/add/commit operations (to make sure we unstage first, then stage, then commit, atomically): 38 | ```bash 39 | git reset HEAD unwanted-file && git add wanted-file && git commit -m "fix: resolve auth bug" 40 | ``` 41 | - This reduces the window where another agent's changes could interfere 42 | 43 | ### Important Notes 44 | - If the code is in a partial/broken state, prioritise commits that leave the codebase working 45 | - If you encounter merge conflicts or ANY unexpected issues, stop and ask the user immediately 46 | - When in doubt, ask the user before proceeding 47 | - **ALWAYS quote file paths** when using git commands to avoid shell expansion issues: 48 | - `git add "frontend/src/routes/language/[target_language_code]/+page.svelte"` 49 | - This is especially important for SvelteKit routes with brackets: `[param]` 50 | 51 | 52 | ### Gitignore 53 | 54 | If you notice files that almost certainly shouldn't be committed (e.g. `node_modules`, `passwords.secret`), read the `.gitignore`, and stop to ask the user whether to add them to it. 55 | 56 | 57 | ## Parallel AI Assistance 58 | 59 | Run this with parallel AI subagents unless there is a good reason not to. Provide it with lots of context about what we've been doing that will help it to make good decisions and write a good commit message. -------------------------------------------------------------------------------- /src/gjdutils/image_utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from typing import Optional 4 | 5 | 6 | def image_to_base64_resized(image_full_filen: str, resize_target_size_kb: int = 100): 7 | from PIL import Image 8 | 9 | # based on https://claude.ai/chat/d0eb1f39-3f42-4cb5-a2ec-5aa102c60ea0 10 | assert resize_target_size_kb > 0 11 | with Image.open(image_full_filen) as img_orig: 12 | width_orig, height_orig = img_orig.size 13 | # Calculate initial file size 14 | temp_buffer = io.BytesIO() 15 | img_orig.save(temp_buffer, format=img_orig.format) 16 | img_resized = img_orig.copy() 17 | current_size_kb = len(temp_buffer.getvalue()) / 1024 18 | 19 | # Iteratively resize until file size is below target 20 | resize_factor = 0.9 21 | while current_size_kb > resize_target_size_kb: 22 | resize_factor *= resize_factor 23 | width = int(width_orig * resize_factor) 24 | height = int(height_orig * resize_factor) 25 | img_resized = img_orig.resize( 26 | (width, height), Image.LANCZOS # type: ignore 27 | ) #  type: ignore 28 | # Check new file size 29 | temp_buffer = io.BytesIO() 30 | img_resized.save(temp_buffer, format=img_orig.format) 31 | current_size_kb = len(temp_buffer.getvalue()) / 1024 32 | 33 | # Convert final resized image to base64 34 | img_bytes = io.BytesIO() 35 | img_resized.save(img_bytes, format=img_orig.format) 36 | img_buffer = img_bytes.getvalue() 37 | 38 | return img_buffer 39 | 40 | 41 | def image_to_base64(img_full_filen: str, resize_target_size_kb: Optional[int] = None): 42 | # from https://chat.openai.com/c/35f15af9-b947-4fa6-acbe-2a5ed26e7547 43 | if resize_target_size_kb is None: 44 | with open(img_full_filen, "rb") as image_file: 45 | img_bytes = image_file.read() 46 | else: 47 | img_bytes = image_to_base64_resized(img_full_filen, resize_target_size_kb) 48 | img_b64 = base64.b64encode(img_bytes).decode("utf-8") 49 | return img_b64 50 | 51 | 52 | def image_to_base64_basic(image_filen: str) -> str: 53 | with open(image_filen, "rb") as image_file: 54 | return base64.b64encode(image_file.read()).decode("ascii") 55 | 56 | 57 | def contents_for_images(image_filens: list[str], resize_target_size_kb: int): 58 | # assert ( 59 | # 1 <= len(image_filens) <= 10 60 | # ), "You can only provide between 1 and 10 images" 61 | base64_images = [] 62 | new_contents = [] 63 | for image_filen in image_filens: 64 | base64_image = image_to_base64( 65 | image_filen, resize_target_size_kb=resize_target_size_kb 66 | ) 67 | filen_content = { 68 | "type": "text", 69 | "text": f"Filename: {image_filen}", 70 | } 71 | img_content = { 72 | "type": "image_url", 73 | "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, 74 | } 75 | base64_images.append(base64_image) 76 | new_contents.extend([filen_content, img_content]) 77 | return new_contents, base64_images 78 | -------------------------------------------------------------------------------- /planning/250215_publishing_to_pypi.md: -------------------------------------------------------------------------------- 1 | # Publishing gjdutils to PyPI 2 | 3 | ## Context 4 | - First release of gjdutils to PyPI 5 | - Version 0.2.0 (after rename from gdutils) 6 | - Package contains utility functions for strings, dates, data science/AI, web development 7 | 8 | ## Prerequisites 9 | - Python >=3.10 10 | - Build tools: `pip install build twine` 11 | - PyPI account with 2FA configured 12 | - .pypirc file configured with test and prod PyPI credentials 13 | 14 | ## Steps 15 | 16 | 1. Build and test package locally: 17 | ```bash 18 | # Option 1: Automated testing script (recommended) 19 | python -m gjdutils.scripts.check_locally 20 | 21 | # Option 2: Manual steps 22 | # Clean any existing builds 23 | rm -rf dist/ build/ 24 | 25 | # Build the package 26 | python -m build 27 | ``` 28 | 29 | 2. Test PyPI Deployment: 30 | ```bash 31 | # Option 1: Automated testing script (recommended) 32 | python -m gjdutils.scripts.check_test_pypi 33 | 34 | # Option 2: Manual steps 35 | # Upload to test.pypi.org 36 | twine upload -r testpypi dist/* 37 | 38 | # Create a fresh virtualenv for testing 39 | python -m venv /tmp/test-gjdutils 40 | source /tmp/test-gjdutils/bin/activate 41 | 42 | # Test installation from test.pypi.org (with dependencies from PyPI) 43 | pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils 44 | 45 | # Test basic functionality 46 | python -c "import gjdutils; print(gjdutils.__version__)" 47 | ``` 48 | 49 | 3. Production PyPI Deployment: 50 | ```bash 51 | # Upload to PyPI 52 | twine upload dist/* 53 | 54 | # Create a fresh virtualenv for testing 55 | python -m venv /tmp/prod-gjdutils 56 | source /tmp/prod-gjdutils/bin/activate 57 | 58 | # Test installation 59 | pip install gjdutils 60 | 61 | # Test basic functionality 62 | python -c "import gjdutils; print(gjdutils.__version__)" 63 | ``` 64 | 65 | ## Optional Features 66 | Package has several optional feature sets that can be installed: 67 | ```bash 68 | pip install "gjdutils[dt]" # Date/time utilities 69 | pip install "gjdutils[llm]" # AI/LLM integrations 70 | pip install "gjdutils[audio_lang]" # Speech/translation 71 | pip install "gjdutils[html_web]" # Web scraping 72 | pip install "gjdutils[dev]" # Development tools 73 | ``` 74 | 75 | ## Progress Tracking 76 | 77 | ### Current State 78 | - Package renamed to gjdutils 79 | - Version 0.2.0 ready for release 80 | - All tests passing 81 | 82 | ### Next Steps 83 | 1. Build package: 84 | - Clean existing builds 85 | - Run build command 86 | - Verify dist/ contents 87 | 88 | 2. Test deployment: 89 | - Upload to test.pypi.org 90 | - Test installation in fresh virtualenv 91 | - Verify basic functionality 92 | 93 | 3. Production deployment: 94 | - Upload to PyPI 95 | - Test installation 96 | - Verify functionality 97 | 98 | ### Post-deployment 99 | - [ ] Update GitHub release description 100 | - [ ] Announce release (if needed) 101 | - [ ] Update documentation with PyPI installation instructions -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # case-insensitive 3 | name = "GJDutils" 4 | dynamic = ["version"] 5 | authors = [ 6 | { name="Greg Detre", email="greg@gregdetre.com" }, 7 | ] 8 | description = "A collection of useful utility functions (basics, data science/AI, web development, etc)" 9 | readme = "README.md" 10 | # because I use the | syntax for type Union 11 | requires-python = ">=3.10" 12 | classifiers = [ 13 | "Programming Language :: Python :: 3", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: OS Independent", 16 | "Topic :: Utilities", 17 | "Development Status :: 4 - Beta", 18 | "Intended Audience :: Developers", 19 | ] 20 | keywords = ["utilities", "strings", "dates", "data science", "web development", "llm", "ai",] 21 | 22 | dependencies = [ 23 | "ipython", 24 | "jinja2", 25 | "python-dotenv", 26 | "pydantic", 27 | "six", 28 | "typer", # For CLI 29 | "rich", # For CLI output formatting 30 | ] 31 | 32 | [project.scripts] 33 | gjdutils = "gjdutils.cli:app" 34 | gjdutils-export-envs = "gjdutils.scripts.install:install_export_envs" 35 | 36 | # if you add a new optional dependency group, remember to add it to the 'all' group below 37 | [project.optional-dependencies] 38 | audio_lang = [ 39 | "azure-cognitiveservices-speech", 40 | "google-cloud-texttospeech", 41 | "google-cloud-translate", 42 | "elevenlabs>=2.1.0", 43 | # for playing mp3 44 | "playsound", 45 | # for voice_speechrecognition Microphone 46 | "pyaudio", 47 | # for playing mp3 48 | "pygame", 49 | # for playing mp3 50 | "python-vlc", 51 | "SpeechRecognition", 52 | "openai-whisper", # for OpenAI Whisper speech recognition 53 | ] 54 | dev = [ 55 | "black", 56 | "build", 57 | "flake8", 58 | "pytest", 59 | "rich", # for console output formatting 60 | "twine", 61 | "typer", # for CLI tools 62 | "wheel", 63 | ] 64 | dsci = [ 65 | "numpy", 66 | "pandas", 67 | ] 68 | dt = [ 69 | "humanize", 70 | "pendulum", 71 | ] 72 | html_web = [ 73 | "bs4", 74 | "lxml", 75 | ] 76 | llm = [ 77 | "anthropic", 78 | # "llm", no longer using this 79 | "openai", 80 | "pillow", # for images 81 | ] 82 | 83 | all_no_dev = [ 84 | "gjdutils[audio_lang]", 85 | "gjdutils[dsci]", 86 | "gjdutils[dt]", 87 | "gjdutils[html_web]", 88 | "gjdutils[llm]", 89 | ] 90 | 91 | [project.urls] 92 | Homepage = "https://github.com/gregdetre/gjdutils" 93 | Repository = "https://github.com/gregdetre/gjdutils" 94 | # Issues = "https://github.com/gregdetre/gjdutils/issues" 95 | # Documentation 96 | # Changelog 97 | 98 | [build-system] 99 | requires = ["hatchling"] 100 | build-backend = "hatchling.build" 101 | 102 | [tool.hatch.version] 103 | path = "src/gjdutils/__version__.py" 104 | 105 | [tool.hatch.build.targets.sdist] 106 | exclude = [ 107 | "dist/", 108 | "gjdutils/obsolete/", 109 | "gjdutils/todo/", 110 | "tests/fixme_*.py", 111 | ] 112 | 113 | [tool.hatch.build.targets.wheel] 114 | packages = ["src/gjdutils"] 115 | 116 | [tool.hatch.build.targets.wheel.shared-data] 117 | "src/gjdutils/scripts/export_envs.sh" = "bin/export_envs.sh" 118 | -------------------------------------------------------------------------------- /src/gjdutils/html.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from lxml.html import tostring 3 | from lxml.etree import _Element as ElementType 4 | from typing import Optional, Union 5 | 6 | 7 | def remove_html_tags(html: str): 8 | soup = BeautifulSoup(html, "html.parser") 9 | return soup.get_text() 10 | 11 | 12 | def contents_of_body(soup): 13 | """ 14 | e.g. 15 | BeautifulSoup('

hello

world', features='lxml') 16 | => 17 |

hello

18 |

world

19 | 20 | N.B. for html.parser, you might just be able to do: str(soup) 21 | """ 22 | # it might be better to prettify with body hidden=True??? 23 | return "\n".join([str(t) for t in soup.body.contents]) 24 | 25 | 26 | def compare_html(h1, h2): 27 | h1p = BeautifulSoup(h1, features="html.parser").prettify().strip() 28 | h2p = BeautifulSoup(h2, features="html.parser").prettify().strip() 29 | assert h1p == h2p 30 | 31 | 32 | def remove_attrs_from_html(h): 33 | """ 34 | Gets rid of all the attrs in the html. 35 | """ 36 | soup = BeautifulSoup(h, features="lxml") 37 | for t in soup.recursiveChildGenerator(): 38 | t.attrs = {} # type: ignore 39 | # whitespace_from_linebreaks( 40 | # contents_of_body(soup) 41 | # ) 42 | return contents_of_body(soup) 43 | 44 | 45 | def adjust_indentation(pretty_html, indent: int): 46 | # from https://www.perplexity.ai/search/can-you-customise-the-beautifu-225tf.pISaiggsL5tNL.gA 47 | lines = pretty_html.split("\n") 48 | adjusted_lines = [] 49 | for line in lines: 50 | line_lstrip = line.lstrip(" ") 51 | leading_spaces = len(line) - len(line_lstrip) 52 | indent_level = leading_spaces // 1 # default indent is 1 space 53 | adjusted_lines.append(" " * (indent_level * indent) + line_lstrip) 54 | return "\n".join(adjusted_lines) 55 | 56 | 57 | def prettify_html( 58 | html: Union[str, ElementType, list[ElementType]], # BeautifulSoup 59 | indent: int = 2, 60 | n: Optional[int] = None, # number of chars to show 61 | ): 62 | # if isinstance(html, pq): 63 | # html = html.outer_html() # type: ignore 64 | if isinstance(html, list): 65 | # then we'll handle it as a string in a moment 66 | html = "".join( 67 | [tostring(e, method="html").decode() for e in html] # type: ignore 68 | ) #  type: ignore 69 | if isinstance(html, ElementType): 70 | # this will do some cleaning and fixing. but 71 | # you need document_fromstring() if you want to make sure 72 | # that it's a full html doc, e.g. with html, body 73 | html = tostring(html, method="html").decode() # type: ignore 74 | 75 | soup = BeautifulSoup(html, "html.parser") # type: ignore 76 | # html2 = tostring(html, pretty_print=True, method="html").decode() # type: ignore 77 | # the lxml pretty_print just isn't as good as BS4, e.g. with a list of elements 78 | # it wraps things in a div fragment, but the pretty-print of that isn't right 79 | html2 = soup.prettify() 80 | prettified = adjust_indentation(html2, indent=indent)[:n] # type: ignore 81 | return prettified 82 | 83 | 84 | def pprettify_html(*args, **kwargs) -> None: 85 | html = prettify_html(*args, **kwargs) 86 | print(html) 87 | -------------------------------------------------------------------------------- /planning/250215_rename_gdutils_to_gjdutils.md: -------------------------------------------------------------------------------- 1 | # Renaming gdutils to gjdutils 2 | 3 | ## Context 4 | - Renaming project from `gdutils` to `gjdutils` (existing `gdutils` name is taken) 5 | - Package contains utility functions for strings, dates, data science/AI, web development 6 | - Currently at version 0.1.0, moving to 0.2.0 for the rename 7 | 8 | ## Files Requiring Updates 9 | 1. Package files: 10 | - pyproject.toml: 11 | - Update name from "GDutils" to "GJDutils" 12 | - Update GitHub URLs from gdutils to gjdutils 13 | - __VERSION__.py: Update version to 0.2.0 14 | - Rename directory from gdutils/ to gjdutils/ 15 | - Update imports in all Python files: 16 | - `from gdutils import ...` 17 | - `import gdutils` 18 | - References like `gdutils.something()` 19 | 20 | 2. Documentation/Meta: 21 | - README.md: Update all references and examples 22 | - .gitignore: Check for any gdutils-specific entries 23 | - Any additional .md files in docs/ or root directory 24 | 25 | ## Steps 26 | 27 | 0. Backup (Important!) 28 | ```bash 29 | # Create a backup branch 30 | git checkout -b backup-before-rename 31 | git push origin backup-before-rename 32 | # Return to main 33 | git checkout main 34 | ``` 35 | 36 | 1. Rename GitHub Repository (✓ DONE) 37 | - In GitHub web UI: Settings -> rename repository from 'gdutils' to 'gjdutils' 38 | - Update local git remote: 39 | ```bash 40 | git remote set-url origin https://github.com/gregdetre/gjdutils.git 41 | ``` 42 | - Verify: `git remote -v` 43 | 44 | 2. Local Development Changes 45 | a. Create a new branch for rename changes: 46 | ```bash 47 | git checkout -b rename-to-gjdutils 48 | ``` 49 | 50 | b. Update configuration files: 51 | - ✓ Update version to 0.2.0 in __VERSION__.py 52 | - ✓ Update pyproject.toml with new name and URLs 53 | - ✓ Update README.md with new package name 54 | - ✓ Review other documentation files 55 | 56 | c. Rename the local directory: 57 | ```bash 58 | # From the parent directory containing gdutils/ 59 | mv gdutils gjdutils 60 | ``` 61 | 62 | d. Update all internal imports and references 63 | 64 | 3. Testing 65 | - ✓ Run existing tests to ensure they pass 66 | - ✓ Test local import: `pip install -e .` 67 | - ✓ Verify imports work 68 | 69 | ## Progress Tracking 70 | 71 | ### ✓ Completed Steps 72 | - ✓ Created backup branch 73 | - ✓ Renamed GitHub repository 74 | - ✓ Updated local git remote 75 | - ✓ Updated version to 0.2.0 in __VERSION__.py 76 | - ✓ Updated pyproject.toml with new name and URLs 77 | - ✓ Renamed source directory from src/gdutils to src/gjdutils 78 | - ✓ Updated imports in Python files to use gjdutils 79 | - ✓ Updated test files to use gjdutils 80 | - ✓ Fixed package __init__.py to expose version 81 | - ✓ Verified all tests are passing 82 | - ✓ Committed all changes to rename-to-gjdutils branch 83 | - ✓ Merged rename-to-gjdutils to main 84 | - ✓ Tagged v0.2.0 85 | 86 | ### Files Updated 87 | - [x] pyproject.toml 88 | - [x] __VERSION__.py 89 | - [x] tests/test_gdutils.py (imports updated) 90 | - [x] All Python files in src/gjdutils/ (imports updated) 91 | - [x] README.md 92 | 93 | ## Next Steps 94 | See @250215_publishing_to_pypi.md for next steps on publishing to PyPI. 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /docs/instructions/UPDATE_DOCUMENTATION_ORGANISATION_DOC.md: -------------------------------------------------------------------------------- 1 | # Update Documentation Organisation Doc 2 | 3 | Creates or updates `docs/reference/DOCUMENTATION_ORGANISATION.md` - a navigational guide to all project documentation. 4 | 5 | ## See also 6 | 7 | - `../reference/DOCUMENTATION_ORGANISATION.md` - Current documentation organisation guide 8 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Run this first for content review 9 | - `WRITE_EVERGREEN_DOC.md` - Structure guidelines 10 | 11 | ## Task 12 | 13 | Create/update the documentation organisation guide with sensible categories and clear starting points for newcomers. 14 | 15 | **Run after housekeeping**: This should be done after `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` to ensure structural changes reflect current content. 16 | 17 | ## Content Requirements 18 | 19 | 1. **Use your judgement** to organise docs into sensible categories (don't move files, just categorise in the guide) 20 | 2. **Highlight key starting points** for newcomers and different personas 21 | 3. **Cover all significant docs** in relevant directories plus project root files like `README.md`, agent instruction files, and planning structure 22 | 23 | ## Process 24 | 25 | 1. **Discover**: Use Glob to find all documentation files 26 | 2. **Categorise**: Create logical groupings based on project needs and user personas 27 | 3. **Describe**: 1-2 sentences per doc, mark important/starter docs clearly 28 | 29 | ## Common Categories 30 | 31 | ### By User Type 32 | - **New contributors** - Setup, architecture overview, coding principles 33 | - **AI agents** - Instruction files, workflow docs, debugging guides 34 | - **Maintainers** - Housekeeping processes, planning workflows 35 | 36 | ### By Content Type 37 | - **Setup & Infrastructure** - Installation, configuration, tooling 38 | - **Architecture & Design** - System overview, key decisions, patterns 39 | - **Development Workflows** - Git, testing, debugging, planning 40 | - **AI-Assisted Development** - Agent instructions, modes, processes 41 | - **Domain-Specific** - Feature documentation, API references 42 | 43 | ### By Frequency 44 | - **Daily use** - Common commands, debugging, development modes 45 | - **Occasional** - Setup, major changes, housekeeping 46 | - **Reference** - Architecture decisions, comprehensive guides 47 | 48 | ## Structure Template 49 | 50 | ```markdown 51 | # Documentation Organisation 52 | 53 | ## Quick Start 54 | - New to the project? Start here: ... 55 | - Setting up development? See: ... 56 | - Working with AI assistants? Begin with: ... 57 | 58 | ## By Category 59 | 60 | ### [Category Name] 61 | Brief description of what this category covers. 62 | 63 | - **[Important Doc]** (⭐ START HERE) - Brief description 64 | - [Regular Doc] - Brief description 65 | - [Another Doc] - Brief description 66 | 67 | ### [Another Category] 68 | ... 69 | 70 | ## By Persona 71 | - **New Developer**: [doc1], [doc2], [doc3] 72 | - **AI Agent**: [instruction1], [mode1], [process1] 73 | - **Maintainer**: [housekeeping1], [planning1] 74 | ``` 75 | 76 | ## Focus 77 | 78 | **This task**: Documentation discovery, categorisation, and navigation structure 79 | **Housekeeping**: Content accuracy, cross-references, implementation status 80 | 81 | **Sequence**: Run housekeeping first to ensure content is current, then update organisation guide to reflect any structural changes. -------------------------------------------------------------------------------- /docs/instructions/CAPTURE_SOUNDING_BOARD_CONVERSATION.md: -------------------------------------------------------------------------------- 1 | Transform conversations into structured, preservable documents that capture nuance, decisions, and context for future reference and reflection. 2 | 3 | ## Core Principles 4 | 5 | ### Preserve vs Synthesize Balance 6 | 7 | **Capture/preserve**: (always quote verbatim for user input) 8 | - Background/context, intentions, requirements, decisions, principles, preferences, rationale, terminology, framings, constraints, criteria, etc from the user, etc 9 | - Memorable insights, tradeoffs, recommendations, etc 10 | - Specific proposals, examples, and code snippets discussed 11 | - Citations/references, e.g. to specific files, documentation, conversations, or code, external sources mentioned, web research 12 | - Tool outputs or data that informed decisions 13 | 14 | **Synthesize and clean up:** 15 | - Rambling or repetitive exchanges 16 | - Scattered thoughts into organized themes 17 | - AI responses (focus on key insights, not verbose explanations) 18 | - Technical details that can be summarized 19 | - Back-and-forth that reaches the same conclusion 20 | - Dead-ends 21 | 22 | This approach ensures valuable conversational insights are preserved in a structured, accessible format that serves multiple audiences and supports ongoing reflection and decision-making. 23 | 24 | 25 | ## File Naming and Organization 26 | 27 | `yyMMdd[letter]_description_in_normal_case.md` 28 | 29 | Use `npx tsx scripts/sequential-datetime-prefix.ts docs/conversations/` if available, otherwise use `date +"%y%m%d"` to get the current date for the prefix, then add description in lowercase words separated by underscores (except proper names/acronyms). 30 | 31 | Example: `250616a_research_instructions_improvement.md` 32 | 33 | Save to: `docs/conversations/` (or equivalent conversation documentation directory) 34 | 35 | 36 | ### Metadata 37 | 38 | Include frontmatter metadata at top of document, e.g.: 39 | 40 | ```markdown 41 | --- 42 | Date: [Conversation date, e.g. 2025-June-16 & timestamp] 43 | Type: [Decision-making, Exploratory, Problem-solving, Research Review] 44 | --- 45 | ``` 46 | 47 | 48 | ## Common Conversation Patterns 49 | 50 | ### Decision-Making Conversations 51 | Focus on: 52 | - What options were considered and why 53 | - What criteria drove the decision 54 | - What concerns or trade-offs were discussed 55 | - The final decision and rationale 56 | - Specific proposals and examples that influenced the decision 57 | 58 | ### Exploratory Conversations 59 | Focus on: 60 | - What questions or curiosities drove the discussion 61 | - What insights or patterns emerged 62 | - What new questions arose 63 | - What areas warrant further investigation 64 | 65 | ### Problem-Solving Conversations 66 | Focus on: 67 | - How the problem was defined and understood 68 | - What root causes were identified 69 | - What solutions were brainstormed 70 | - What approach was recommended and why 71 | 72 | ### Research Review Conversations 73 | Focus on: 74 | - What research question prompted the investigation 75 | - What key findings emerged 76 | - How findings were interpreted or applied 77 | - What gaps or follow-up research were identified 78 | - Specific examples, data points, or methodological insights that stood out 79 | 80 | ## References 81 | 82 | Provide comprehensive signposting/citations/references where applicable. Link forwards and backwards. 83 | 84 | -------------------------------------------------------------------------------- /src/gjdutils/regex.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Iterable 3 | 4 | # Based on: 5 | # https://github.com/gregdetre/emacs-freex/blob/63523bf3b9032cc75b55ee28929dcdaf7714a419/freex_sqlalchemy.py#L653 6 | # https://github.com/gregdetre/emacs-freex/blob/63523bf3b9032cc75b55ee28929dcdaf7714a419/freex_sqlalchemy.py#L713 7 | # That code has faster versions for much bigger lists of aliases, but this is fine for now. 8 | 9 | 10 | def compile_regex_for_matching_aliases(aliases: Iterable[str]): 11 | """ 12 | Builds and compiles the implicit link regular expression. 13 | """ 14 | # add .lower() here and elsewhere if you want case-insensitive 15 | aliases = [re.escape(a) for a in aliases] 16 | 17 | # ensure that 'jubba wubba' comes before 'jubba' 18 | aliases.sort(reverse=True) 19 | 20 | # build the regexp string to be single words 21 | # 22 | # it used to look like this, which worked nicely, unless 23 | # there was a carriage return 24 | alias_regex_str = "\\b" + "\\b|\\b".join(aliases) + "\\b" 25 | 26 | # we want to deal with the possibility that there are 0 27 | # or 1 spaces after a word, followed by 0 or 1 carriage 28 | # returns, followed by zero or more spaces, which is 29 | # what might happen if an implicit link was to span two 30 | # lines in an indented paragraph. that's what ' ?\n? *' does 31 | # 32 | # the \\b that gets 33 | # added later will ensure that there's a word boundary 34 | # of *some* kind. 35 | alias_regex_str = alias_regex_str.replace("\\ ", " ?\\\n? *") 36 | 37 | # for ages, it wasn't matching things like 'Smith & 38 | # Jones (2006)', because there was some problem with the 39 | # parentheses. i eventually realized that it matched the 40 | # first, but not the second parenthesis, because (i 41 | # think) the parenthesis was screwing with the \b (bare 42 | # word) separator. if you remove all the bare word 43 | # separators that follow closing parentheses, the sun 44 | # comes back out again 45 | alias_regex_str = alias_regex_str.replace(")\\b", ")") 46 | 47 | # compile the regexp 48 | # impLinkRegexp = re.compile(aliasRegexpStr, re.IGNORECASE|re.MULTILINE) 49 | compiled_regex = re.compile(alias_regex_str, re.MULTILINE) 50 | return compiled_regex 51 | 52 | 53 | def find_matchranges_for_aliases( 54 | compiled_regex_of_aliases: re.Pattern, txt_to_search: str 55 | ): 56 | """ 57 | Return a list of (beg,end) tuples for all the matching implicit 58 | links in the provided string. 59 | """ 60 | # get the start and endpoints for the matchranges 61 | matchranges = [ 62 | list(match.span()) 63 | for match in compiled_regex_of_aliases.finditer(txt_to_search) 64 | ] 65 | 66 | # return the matchranges 67 | return matchranges 68 | 69 | 70 | # txt = """There was a young European man called Friedrich Nietzsche, who most went by just "Nietzsche" (and but never 'Friedrich'). He was a friend of Little Hans and Little Richard but he was not little or Little.""" 71 | 72 | # aliasRegexpStr, impLinkRegexp = update_implicit_link_regexp_original(all_aliases) 73 | # match_ranges = get_all_matching_implicit_links_original(impLinkRegexp, txt) 74 | 75 | # for match_range in match_ranges: 76 | # match_txt = txt[match_range[0]:match_range[1]] 77 | # matched_name = name_from_alias[match_txt] 78 | # print('MATCHED: ', match_txt, ' <- NAME: ', matched_name, sep='') 79 | -------------------------------------------------------------------------------- /docs/instructions/CRITIQUE_OF_PLANNING_DOC.md: -------------------------------------------------------------------------------- 1 | # Critique of Planning Document 2 | 3 | Read this planning document, and all relevant code & documentation. 4 | 5 | Review progress, evaluate the approach taken so far, and what's proposed for the next stages. Evaluate the proposal thoroughly. Has it adequately characterised/understood the problem? Will its solution address it? Focus especially on things that seem to be causing problems, or that you think could become a problem in the future. Are there things that haven't been considered, potential problems/concerns, anything inconsistent or unclear? Is there a better way? In general try to help make sure that our plan is as good as it can be. 6 | 7 | Explore, investigate, search the web, ask for clarification, discuss, critique, make proposals. 8 | 9 | If the user has asked you to, update the planning document (e.g. with an Appendix "Critique of planning doc"). But don't make any changes to the code beyond that critique. 10 | 11 | ## Evaluation Framework 12 | 13 | ### Problem Understanding 14 | - Is the problem clearly defined and well-understood? 15 | - Are the root causes identified correctly? 16 | - Are we solving the right problem, or just symptoms? 17 | - What assumptions are being made, and are they valid? 18 | 19 | ### Solution Approach 20 | - Does the proposed solution adequately address the problem? 21 | - Are there simpler approaches that would achieve the same goals? 22 | - What are the trade-offs and have they been considered? 23 | - Are there alternative approaches worth exploring? 24 | 25 | ### Technical Implementation 26 | - Is the technical approach sound and well-architected? 27 | - Are there potential scalability, maintainability, or performance issues? 28 | - What technical risks or complexity hasn't been addressed? 29 | - Are the dependencies and integrations well-understood? 30 | 31 | ### Scope and Priorities 32 | - Is the scope appropriate for the problem size? 33 | - Are the priorities aligned with business/project value? 34 | - What could be deferred or simplified without significant impact? 35 | - Are there quick wins that could be prioritized? 36 | 37 | ### Risk Assessment 38 | - What could go wrong with this approach? 39 | - What external dependencies or unknowns exist? 40 | - How will success be measured? 41 | - What's the fallback plan if this approach doesn't work? 42 | 43 | ### Resource and Timeline 44 | - Are the time estimates realistic? 45 | - What skills or resources are needed that might not be available? 46 | - Are there blockers or dependencies that could delay progress? 47 | - What's the minimum viable version of this plan? 48 | 49 | ## Research and Investigation 50 | 51 | - Search for similar problems and solutions in the industry 52 | - Investigate relevant libraries, frameworks, or tools 53 | - Look for case studies or best practices 54 | - Check for recent developments that might change the approach 55 | 56 | ## Critique Output 57 | 58 | Structure your critique to be constructive and actionable: 59 | 60 | 1. **Strengths** - What's working well in the current plan 61 | 2. **Concerns** - Specific issues or risks you've identified 62 | 3. **Alternatives** - Different approaches worth considering 63 | 4. **Recommendations** - Specific changes or improvements to make 64 | 5. **Questions** - Areas that need clarification or further investigation 65 | 66 | Remember: The goal is to strengthen the plan, not just find problems. Focus on making the plan as robust and effective as possible. -------------------------------------------------------------------------------- /src/ts/README.md: -------------------------------------------------------------------------------- 1 | # gjdutils TypeScript Utilities 2 | 3 | A collection of general-purpose TypeScript utilities and CLI tools for development workflows. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | cd gjdutils 9 | npm install 10 | npm run build 11 | ``` 12 | 13 | ## Available Tools 14 | 15 | ### CLI Utilities 16 | 17 | #### sequential-datetime-prefix 18 | Generate sequential datetime prefixes in yyMMdd[x]_ format for organizing files by date. 19 | 20 | ```bash 21 | # Generate next available prefix for a folder 22 | npx tsx src/ts/cli/sequential-datetime-prefix.ts planning/ 23 | 24 | # Output: 241225a_ (if no files exist for today) 25 | # Output: 241225b_ (if 241225a_ already exists) 26 | ``` 27 | 28 | #### extract-llm-conversation 29 | Extract and format LLM conversations from JSON exports to structured markdown. 30 | 31 | ```bash 32 | # Extract single conversation 33 | npx tsx src/ts/cli/extract-llm-conversation.ts --uuid --input conversations.json 34 | 35 | # Extract multiple conversations 36 | npx tsx src/ts/cli/extract-llm-conversation.ts --uuid id1,id2,id3 --input conversations.json --output docs/ 37 | ``` 38 | 39 | ### Script Utilities 40 | 41 | #### count-lines 42 | Count lines of code with configurable exclusions. 43 | 44 | ```bash 45 | # Count all code 46 | npx tsx src/ts/scripts/count-lines.ts 47 | 48 | # Count by file 49 | npx tsx src/ts/scripts/count-lines.ts --by-file 50 | 51 | # Exclude tests 52 | npx tsx src/ts/scripts/count-lines.ts --exclude-tests 53 | ``` 54 | 55 | #### git-worktree-sync 56 | Synchronize Git worktree branches with main branch. 57 | 58 | ```bash 59 | # From feature branch: sync main → current 60 | npx tsx src/ts/scripts/git-worktree-sync.ts 61 | 62 | # From main: sync specific branch → main 63 | npx tsx src/ts/scripts/git-worktree-sync.ts --branch feature-branch 64 | 65 | # From main: sync all worktrees → main 66 | npx tsx src/ts/scripts/git-worktree-sync.ts 67 | ``` 68 | 69 | ### Critique Tools 70 | 71 | #### llm-critique-planning-docs 72 | Generate comprehensive codebase context and send to LLMs for planning document critique. 73 | 74 | ```bash 75 | # Critique with default model 76 | npx tsx src/ts/critique/llm-critique-planning-docs.ts planning/my-plan.md 77 | 78 | # Use specific model 79 | npx tsx src/ts/critique/llm-critique-planning-docs.ts --model anthropic:claude-3-opus:latest planning/my-plan.md 80 | 81 | # Include specific files 82 | npx tsx src/ts/critique/llm-critique-planning-docs.ts --files src/api.ts --files lib/db.ts planning/my-plan.md 83 | ``` 84 | 85 | #### parse-llm-output 86 | Parse LLM critique output and format it nicely. 87 | 88 | ```bash 89 | # Parse from file 90 | npx tsx src/ts/critique/parse-llm-output.ts critique-output.json 91 | 92 | # Parse from stdin 93 | cat critique-output.json | npx tsx src/ts/critique/parse-llm-output.ts 94 | ``` 95 | 96 | ## Development 97 | 98 | ```bash 99 | # Build TypeScript 100 | npm run build 101 | 102 | # Watch mode for development 103 | npm run watch 104 | 105 | # Clean build artifacts 106 | npm run clean 107 | ``` 108 | 109 | ## Contributing 110 | 111 | When adding new utilities: 112 | 1. Follow the existing patterns for CLI tools using Clipanion 113 | 2. Make tools configurable and general-purpose 114 | 3. Add comprehensive documentation and examples 115 | 4. Include type definitions for better IDE support 116 | 117 | ## License 118 | 119 | MIT License - see root LICENSE file for details. -------------------------------------------------------------------------------- /docs/WORKFLOW.md: -------------------------------------------------------------------------------- 1 | Some of these docs were written with Claude Code in mind, e.g. they reference `tasks` and `subagents`. 2 | 3 | But for the most part they should work fairly well in other contexts (e.g. Cursor). 4 | 5 | My workflow for starting a new epic is usually something like: 6 | 7 | - **Switch to best model** - I usually use Claude Opus 4 where I want the most brains, e.g. upfront thinking & planning (though I'm not certain it's really that much smarter than Sonnet) 8 | - Claude Code: `/model opus` 9 | - Cursor: Switch your model to Claude Sonnet 4 in the model selector 10 | 11 | - `We want to build X. Here's some background, desired features, concerns, etc. Be in @instructions/SOUNDING_BOARD_MODE.md` 12 | 13 | - Discuss. This step takes the longest, answering the model's questions, considering various options & tradeoffs, etc. 14 | 15 | - If there's a new software library or specialist topic involved, I might say `"Follow instructions in @instructions/WRITE_DEEP_DIVE_AS_DOC.md for topic X`. That way, I'll have a new `docs/SOFTWARE_LIBRARY_X.md` that we can continually refer back to, containing up-to-date snippets and best practices from the web. 16 | 17 | - `Create a new planning doc for this, following instructions in @instructions/WRITE_PLANNING_DOC.md`. Read that, check I'm happy with it, discuss/manually edit as needed. This is the key step. Because it has all the context from the deep dive and our conversation, the planning document is usually pretty rich. 18 | 19 | - I occasionally `Run @instructions/CRITIQUE_OF_PLANNING_DOC.md` in Cursor with o3, and then feed that critique back to Claude to see if it wants to update its plan. (In practice, I mostly just rely on Claude, and only rope in o3 if we're doing something really tricky, or if we get struck.) 20 | 21 | - **Clear context** - Clear the context window, adding a nice summary of what has been discussed before 22 | - Claude Code: `/compact` 23 | - Cursor: Start a new chat (there's no equivalent to `/compact` in Cursor, but fortunately you can just reference the planning doc) 24 | 25 | - **Switch to implementation model** - I might switch over to Sonnet if I think the implementation part is straightforward. (Even with the more expensive [Anthropic Max Plan](https://www.anthropic.com/news/max-plan), I hit the rate limits for Opus sometimes). 26 | - Claude Code: `/model sonnet` 27 | - Cursor: Switch your model to Claude Sonnet 4 (or Gemini 2.5 is great too) in the model selector 28 | 29 | - `Run @instructions/DO_PLANNING_DOC.md for [planning doc]`. Make a cup of tea. I have the Claude permissions mostly in YOLO mode, but it can't commit. The model will do a single stage (with lots of sub-actions), and then stop. 30 | 31 | - It'll pause at the end of the stage, often waiting for approval on a commit message. Read the summary, do some manual testing, perhaps also `Run @instructions/DEBRIEF_PROGRESS.md`. 32 | 33 | - **Continue iteration** - Clear context as above, then: 34 | - `Do next stage of planning doc, as per @instructions/DO_PLANNING_DOC.md` 35 | 36 | 37 | - **Housekeeping** - Every so often: 38 | 39 | - Run `@instructions/UPDATE_HOUSEKEEPING_DOCUMENTATION.md`. 40 | 41 | - Run `@instructions/UPDATE_CLAUDE_INSTRUCTIONS.md`. I think it's probably important that `CLAUDE.md` (or some equivalent Cursor rules) includes important stuff, e.g. a summary of `instructions/CODING_PRINCIPLES.md`, project-specific coding guidelines, and `reference/DOCUMENTATION_ORGANISATION.md`). Then the prompts can be very short, and you can trust that the agent will find the right bit of the code reliably and without wasting too much context. 42 | -------------------------------------------------------------------------------- /src/gjdutils/indexing.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal, getcontext 2 | from typing import Optional 3 | 4 | """ 5 | For manual ordering and reording in a database: 6 | - every item gets a Decimal location (LOC) between 0 and 1 7 | - the LOCs of all items are sorted 8 | - the LOC of a new item is calculated as the average of the LOCs of the items before and after it 9 | - the LOC won't ever be 0 or 1, so there will always be a gap for you to insert afterwards 10 | - if you insert at the beginning or end, the LOC will be half of the first or last item's LOC 11 | 12 | I think Figma used this. 13 | """ 14 | 15 | # Set precision high enough to handle many divisions 16 | getcontext().prec = 28 17 | 18 | 19 | def locs_for(n: int) -> list[Decimal]: 20 | # Assign initial idx values with buffers at both ends 21 | locs = [] 22 | for i in range(n): 23 | loc = Decimal(i + 1) / Decimal(n + 1) 24 | locs.append(loc) 25 | assert len(locs) == n 26 | return locs 27 | 28 | 29 | def loc_for_insert_at(locs: list[Decimal], position: int, do_insert: bool = True): 30 | """ 31 | TODO: rewrite in terms of LOC_BETWEEN 32 | """ 33 | assert locs == sorted( 34 | locs 35 | ), f"Input LOCS are unsorted, so things are already broken - {locs}" 36 | list_length = len(locs) 37 | if position == 0: # Insert at the beginning 38 | newloc = locs[0] / 2 if list_length > 0 else Decimal("0.5") 39 | elif position >= list_length: # Insert at the end 40 | newloc = locs[-1] + (1 - locs[-1]) / 2 if list_length > 0 else Decimal("0.5") 41 | elif position < 0: 42 | raise Exception(f"Position must be non-negative, but got {position}") 43 | else: # Insert between two items 44 | newloc = (locs[position - 1] + locs[position]) / 2 45 | if do_insert: 46 | locs.insert(position, newloc) 47 | assert locs == sorted(locs), f"Somehow we've broken the LOCS sorting: {locs}" 48 | return newloc 49 | 50 | 51 | def loc_for_insert_at2(locs: list[Decimal], position: int, do_insert: bool = True): 52 | """ 53 | Functional version of LOC_FOR_INSERT_AT that returns a new list instead of 54 | modifying the input list. 55 | 56 | Uses LOC_BETWEEN. 57 | """ 58 | assert locs == sorted( 59 | locs 60 | ), f"Input LOCS are unsorted, so things are already broken - {locs}" 61 | list_length = len(locs) 62 | if position < 0: 63 | raise Exception(f"Position must be non-negative, but got {position}") 64 | elif position == 0: # Insert at the beginning 65 | loc1 = None 66 | loc2 = locs[0] if list_length > 0 else None 67 | elif position >= list_length: # Insert at the end 68 | loc1 = locs[-1] if list_length > 0 else None 69 | loc2 = None 70 | else: # Insert between two items 71 | loc1 = locs[position - 1] 72 | loc2 = locs[position] 73 | newloc = loc_between(loc1, loc2) 74 | if do_insert: 75 | locs.insert(position, newloc) 76 | assert locs == sorted(locs), f"Somehow we've broken the LOCS sorting: {locs}" 77 | return newloc 78 | 79 | 80 | def loc_between(loc1: Optional[Decimal], loc2: Optional[Decimal]) -> Decimal: 81 | if loc1 is not None and loc2 is not None: 82 | assert ( 83 | loc1 >= 0 and loc2 <= 1 84 | ), f"LOCs must be between 0 and 1, but got {loc1} and {loc2}" 85 | return (loc1 + loc2) / 2 86 | elif loc1 is None and loc2 is not None: 87 | return loc2 / 2 88 | elif loc1 is not None and loc2 is None: 89 | return loc1 + (1 - loc1) / 2 90 | elif loc1 is None and loc2 is None: 91 | return Decimal("0.5") 92 | else: 93 | raise Exception(f"This should never happen: {loc1}, {loc2}") 94 | 95 | 96 | def disp(locs: list[Decimal]): 97 | print(", ".join(["%.3f" % loc for loc in locs])) 98 | -------------------------------------------------------------------------------- /tests/test_indexing.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | from typing import Optional 3 | 4 | from gjdutils.indexing import ( 5 | loc_between, 6 | loc_for_insert_at, 7 | loc_for_insert_at2, 8 | ) 9 | 10 | 11 | def test_loc_for_insert_at(): 12 | # Test inserting at the beginning 13 | locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 14 | position = 0 15 | loc = loc_for_insert_at(locs, position) 16 | assert loc == Decimal("0.1") 17 | assert locs == [Decimal("0.1"), Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 18 | 19 | # Test inserting at the end 20 | locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 21 | position = 3 22 | loc = loc_for_insert_at(locs, position) 23 | assert loc == Decimal("0.8") 24 | assert locs == [Decimal("0.2"), Decimal("0.4"), Decimal("0.6"), Decimal("0.8")] 25 | 26 | # Test inserting between two items 27 | locs = [Decimal("0.2"), Decimal("0.6")] 28 | position = 1 29 | loc = loc_for_insert_at(locs, position) 30 | assert loc == Decimal("0.4") 31 | assert locs == [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 32 | 33 | # Test inserting into an empty list 34 | locs = [] 35 | position = 0 36 | loc = loc_for_insert_at(locs, position) 37 | assert loc == Decimal("0.5") 38 | assert locs == [Decimal("0.5")] 39 | 40 | # Test inserting into a list with one item 41 | locs = [Decimal("0.2")] 42 | position = 1 43 | loc = loc_for_insert_at(locs, position) 44 | assert loc == Decimal("0.6") 45 | assert locs == [Decimal("0.2"), Decimal("0.6")] 46 | 47 | 48 | def test_loc_for_insert_at2(): 49 | # Test inserting at the beginning 50 | locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 51 | position = 0 52 | locs_copy = locs.copy() 53 | locs_copy.sort() 54 | loc = loc_for_insert_at2(locs, position) 55 | assert loc == Decimal("0.1") 56 | assert locs == sorted(locs_copy + [Decimal("0.1")]) 57 | 58 | # Test inserting at the end 59 | locs = [Decimal("0.2"), Decimal("0.4"), Decimal("0.6")] 60 | position = 3 61 | locs_copy = locs.copy() 62 | locs_copy.sort() 63 | loc = loc_for_insert_at2(locs, position) 64 | assert loc == Decimal("0.8") 65 | assert locs == sorted(locs_copy + [Decimal("0.8")]) 66 | 67 | # Test inserting between two items 68 | locs = [Decimal("0.2"), Decimal("0.6")] 69 | position = 1 70 | locs_copy = locs.copy() 71 | locs_copy.sort() 72 | loc = loc_for_insert_at2(locs, position) 73 | assert loc == Decimal("0.4") 74 | assert locs == sorted(locs_copy + [Decimal("0.4")]) 75 | 76 | # Test inserting into an empty list 77 | locs = [] 78 | position = 0 79 | locs_copy = locs.copy() 80 | locs_copy.sort() 81 | loc = loc_for_insert_at2(locs, position) 82 | assert loc == Decimal("0.5") 83 | assert locs == sorted(locs_copy + [Decimal("0.5")]) 84 | 85 | # Test inserting into a list with one item 86 | locs = [Decimal("0.2")] 87 | position = 1 88 | locs_copy = locs.copy() 89 | locs_copy.sort() 90 | loc = loc_for_insert_at2(locs, position) 91 | assert loc == Decimal("0.6") 92 | assert locs == sorted(locs_copy + [Decimal("0.6")]) 93 | 94 | 95 | def test_loc_between(): 96 | # Test when both loc1 and loc2 are not None 97 | loc1 = Decimal("0.2") 98 | loc2 = Decimal("0.6") 99 | result = loc_between(loc1, loc2) 100 | assert result == Decimal("0.4") 101 | 102 | # Test when loc1 is None and loc2 is not None 103 | loc1 = None 104 | loc2 = Decimal("0.6") 105 | result = loc_between(loc1, loc2) 106 | assert result == Decimal("0.3") 107 | 108 | # Test when loc1 is not None and loc2 is None 109 | loc1 = Decimal("0.2") 110 | loc2 = None 111 | result = loc_between(loc1, loc2) 112 | assert result == Decimal("0.6") 113 | 114 | # Test when both loc1 and loc2 are None 115 | loc1 = None 116 | loc2 = None 117 | result = loc_between(loc1, loc2) 118 | assert result == Decimal("0.5") 119 | -------------------------------------------------------------------------------- /src/gjdutils/webserver.py: -------------------------------------------------------------------------------- 1 | """Generic HTTP server utilities for serving static files. 2 | 3 | Provides a reusable HTTP server with features like: 4 | - Extensionless URL support (serves /foo as /foo.html) 5 | - Cache control options 6 | - Request logging control 7 | - Address reuse to avoid "address already in use" errors 8 | """ 9 | 10 | import http.server 11 | import socketserver 12 | import os 13 | from functools import partial 14 | from typing import Optional, Callable, Any 15 | 16 | 17 | class ReusableTCPServer(socketserver.TCPServer): 18 | """TCP server that allows address reuse to avoid 'Address already in use' errors.""" 19 | allow_reuse_address = True 20 | 21 | 22 | class CustomHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): 23 | """Custom HTTP request handler with extensionless URL support and cache control. 24 | 25 | Features: 26 | - Serves /foo as /foo.html if the HTML file exists 27 | - Optional cache disabling for development 28 | - Optional request logging control 29 | """ 30 | 31 | def __init__(self, *args, **kwargs): 32 | self.disable_cache = kwargs.pop("disable_cache", False) 33 | self.log_requests = kwargs.pop("log_requests", True) 34 | super().__init__(*args, **kwargs) 35 | 36 | def do_GET(self): 37 | """Handle GET requests with extensionless URL support.""" 38 | # Parse the requested path 39 | path = self.path 40 | # Check if the path has no extension 41 | if not os.path.splitext(path)[1]: 42 | # Append '.html' to the path 43 | new_path = f"{path}.html" 44 | # Construct the full file path 45 | full_path = os.path.join(self.directory, new_path.lstrip("/")) 46 | # Check if the .html file exists 47 | if os.path.exists(full_path): 48 | self.path = new_path # Update the path to the .html file 49 | # Call the superclass method to handle the request 50 | return super().do_GET() 51 | 52 | def end_headers(self): 53 | """Add cache control headers if cache is disabled.""" 54 | if self.disable_cache: 55 | # Disable caching by setting appropriate headers 56 | self.send_header( 57 | "Cache-Control", "no-store, no-cache, must-revalidate, proxy-revalidate" 58 | ) 59 | self.send_header("Pragma", "no-cache") 60 | self.send_header("Expires", "0") 61 | self.send_header("Surrogate-Control", "no-store") 62 | super().end_headers() 63 | 64 | def log_message(self, format: str, *args): 65 | """Override to control request logging.""" 66 | if not self.log_requests: 67 | return 68 | return super().log_message(format, *args) 69 | 70 | 71 | def start_server( 72 | host: str, 73 | port: int, 74 | directory: str, 75 | disable_cache: bool = False, 76 | quiet_requests: bool = False, 77 | handler_class: Optional[Callable[..., Any]] = None, 78 | ) -> None: 79 | """Start an HTTP server to serve static files. 80 | 81 | Args: 82 | host: Host interface to bind to (e.g., "127.0.0.1", "0.0.0.0") 83 | port: Port number to listen on 84 | directory: Directory to serve files from 85 | disable_cache: If True, send no-cache headers 86 | quiet_requests: If True, suppress per-request log messages 87 | handler_class: Optional custom handler class (defaults to CustomHTTPRequestHandler) 88 | 89 | Raises: 90 | ValueError: If directory doesn't exist or isn't a directory 91 | OSError: If server can't bind to the specified host/port 92 | """ 93 | # Fail fast on invalid directory 94 | if not os.path.isdir(directory): 95 | raise ValueError(f"Directory is missing or not a directory: {directory}") 96 | 97 | if handler_class is None: 98 | handler_class = CustomHTTPRequestHandler 99 | 100 | handler = partial( 101 | handler_class, 102 | directory=directory, 103 | disable_cache=disable_cache, 104 | log_requests=not quiet_requests, 105 | ) 106 | 107 | with ReusableTCPServer((host, port), handler) as httpd: 108 | print(f"Serving at http://{host}:{port}") 109 | try: 110 | httpd.serve_forever() 111 | except KeyboardInterrupt: 112 | print("Server stopped.") 113 | finally: 114 | httpd.server_close() -------------------------------------------------------------------------------- /src/gjdutils/ports.py: -------------------------------------------------------------------------------- 1 | """Port management utilities for network servers. 2 | 3 | Utilities for checking port availability, freeing occupied ports, 4 | and managing network server processes. 5 | """ 6 | 7 | import os 8 | import shutil 9 | import signal 10 | import subprocess 11 | import time 12 | from typing import List 13 | 14 | 15 | def looks_like_addr_in_use(e: OSError) -> bool: 16 | """Detect EADDRINUSE across platforms using errno or message text. 17 | 18 | Args: 19 | e: The OSError exception to check 20 | 21 | Returns: 22 | True if the error indicates "address already in use" 23 | """ 24 | try: 25 | err_no = getattr(e, "errno", None) 26 | if isinstance(err_no, int) and err_no in {48, 98, 10048}: 27 | return True 28 | except Exception: 29 | pass 30 | msg = str(e).lower() 31 | return "address already in use" in msg or "errno 48" in msg or "errno 98" in msg 32 | 33 | 34 | def pids_listening_on_port(port: int) -> List[int]: 35 | """Return a list of PIDs that appear to be listening on the given TCP port. 36 | 37 | Prefers lsof; falls back to fuser if available. 38 | 39 | Args: 40 | port: The TCP port number to check 41 | 42 | Returns: 43 | List of process IDs listening on the port 44 | """ 45 | pids: List[int] = [] 46 | try: 47 | if shutil.which("lsof"): 48 | # Use LISTEN state to avoid client connections 49 | proc = subprocess.run( 50 | ["lsof", "-ti", f"tcp:{port}", "-sTCP:LISTEN"], 51 | capture_output=True, 52 | text=True, 53 | check=False, 54 | ) 55 | for line in proc.stdout.splitlines(): 56 | try: 57 | pid = int(line.strip()) 58 | if pid not in pids: 59 | pids.append(pid) 60 | except Exception: 61 | pass 62 | if not pids and shutil.which("fuser"): 63 | # Try Linux-style fuser 64 | proc = subprocess.run( 65 | ["fuser", "-n", "tcp", str(port)], 66 | capture_output=True, 67 | text=True, 68 | check=False, 69 | ) 70 | # Output may be like: "8000/tcp: 1234 5678" 71 | tokens = (proc.stdout or "").replace("/tcp:", " ").split() 72 | for tok in tokens: 73 | try: 74 | pid = int(tok) 75 | if pid not in pids: 76 | pids.append(pid) 77 | except Exception: 78 | pass 79 | except Exception: 80 | # Best-effort: ignore detection errors 81 | pass 82 | return pids 83 | 84 | 85 | def kill_pids(pids: List[int], verbose: int = 0) -> None: 86 | """Kill a list of process IDs, trying SIGTERM first, then SIGKILL. 87 | 88 | Args: 89 | pids: List of process IDs to terminate 90 | verbose: Verbosity level (0=quiet, 1+=show warnings) 91 | """ 92 | if not pids: 93 | return 94 | for sig in (signal.SIGTERM, signal.SIGKILL): 95 | for pid in list(pids): 96 | try: 97 | os.kill(pid, sig) 98 | except ProcessLookupError: 99 | # Already gone 100 | try: 101 | pids.remove(pid) 102 | except ValueError: 103 | pass 104 | except Exception: 105 | # Ignore permission or other errors 106 | pass 107 | # Brief wait and re-check 108 | time.sleep(0.2 if sig == signal.SIGTERM else 0.05) 109 | remaining = [] 110 | for pid in pids: 111 | try: 112 | os.kill(pid, 0) 113 | remaining.append(pid) 114 | except Exception: 115 | pass 116 | pids[:] = remaining 117 | if not pids: 118 | break 119 | if verbose >= 1 and pids: 120 | print(f"Warning: some processes may still be using the port: {pids}") 121 | 122 | 123 | def free_port_if_in_use(port: int, verbose: int = 0) -> None: 124 | """Free a port by killing any processes listening on it. 125 | 126 | Args: 127 | port: The TCP port number to free 128 | verbose: Verbosity level (0=quiet, 1+=show what's being killed) 129 | """ 130 | pids = pids_listening_on_port(port) 131 | if pids: 132 | if verbose >= 1: 133 | print(f"Killing processes on port {port}: {pids}") 134 | kill_pids(pids, verbose) -------------------------------------------------------------------------------- /src/gjdutils/cli/pypi/check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import typer 4 | from rich.console import Console 5 | from pathlib import Path 6 | import shutil 7 | 8 | from gjdutils.decorators import console_print_doc 9 | from gjdutils.shell import temp_venv 10 | from gjdutils.cmd import run_cmd 11 | from gjdutils.pypi_build import verify_installation, check_install_optional_features 12 | 13 | # Create the check subcommand group 14 | app = typer.Typer( 15 | help="Check package installation", 16 | add_completion=True, 17 | no_args_is_help=True, 18 | context_settings={"help_option_names": ["-h", "--help"]}, 19 | ) 20 | console = Console() 21 | 22 | 23 | @console_print_doc(color="yellow") 24 | def clean_build_dirs(): 25 | """Cleaning existing builds...""" 26 | # Command: rm -rf dist/ build/ 27 | shutil.rmtree("dist", ignore_errors=True) 28 | shutil.rmtree("build", ignore_errors=True) 29 | 30 | 31 | def build_package(): 32 | return run_cmd( 33 | f"python -m build", 34 | before_msg="Building package...", 35 | fatal_msg="Failed to build package", 36 | ) 37 | 38 | 39 | def install_and_test_locally(python_path: Path, wheel_file: Path): 40 | """Installing and testing package...""" 41 | # Command: pip install dist/*.whl 42 | run_cmd( 43 | f"{python_path} -m pip install {wheel_file}", 44 | before_msg="Installing package wheel file from local build...", 45 | fatal_msg="Failed to install package", 46 | ) 47 | 48 | # Install all optional dependencies first 49 | check_install_optional_features(python_path) 50 | 51 | # Command: pip install ".[dev]" 52 | run_cmd( 53 | f"{python_path} -m pip install '.[dev]'", 54 | before_msg="Installing dev dependencies...", 55 | fatal_msg="Failed to install dev dependencies", 56 | ) 57 | 58 | 59 | def run_test_suite(python_path: Path): 60 | return run_cmd( 61 | f"{python_path} -m pytest", 62 | before_msg="Running test suite...", 63 | fatal_msg="Test suite failed", 64 | ) 65 | 66 | 67 | @app.command(name="local") 68 | def check_local(): 69 | """Test package installation and functionality locally.""" 70 | console.rule("[yellow]Starting local package testing") 71 | 72 | clean_build_dirs() 73 | build_package() 74 | 75 | venv_path = Path("/tmp/test-gjdutils") 76 | with temp_venv(venv_path) as python_path: 77 | wheel_file = next(Path("dist").glob("*.whl")) 78 | install_and_test_locally(python_path, wheel_file) 79 | verify_installation(python_path) 80 | run_test_suite(python_path) 81 | 82 | console.print("\nLocal testing completed successfully!", style="green") 83 | 84 | 85 | def install_from_test_pypi(python_path: Path): 86 | # Command: pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils 87 | run_cmd( 88 | f"{python_path} -m pip install --index-url https://test.pypi.org/simple/ " 89 | "--extra-index-url https://pypi.org/simple/ gjdutils", 90 | before_msg="Installing package from Test PyPI...", 91 | fatal_msg="Failed to install package from Test PyPI", 92 | ) 93 | 94 | # Install all optional dependencies 95 | check_install_optional_features(python_path, from_test_pypi=True) 96 | 97 | 98 | @app.command(name="test") 99 | def check_test(): 100 | """Test package installation from Test PyPI.""" 101 | console.rule("[yellow]Starting Test PyPI package testing") 102 | 103 | venv_path = Path("/tmp/test-gjdutils-pypi") 104 | with temp_venv(venv_path) as python_path: 105 | install_from_test_pypi(python_path) 106 | verify_installation(python_path) 107 | 108 | console.print("\nTest PyPI testing completed successfully!", style="green") 109 | 110 | 111 | def install_from_pypiprod(python_path: Path): 112 | # Command: pip install gjdutils 113 | run_cmd( 114 | f"{python_path} -m pip install gjdutils", 115 | before_msg="Installing package from PyPI prod...", 116 | fatal_msg="Failed to install package from PyPI prod", 117 | ) 118 | 119 | # Install all optional dependencies 120 | check_install_optional_features(python_path, from_test_pypi=False) 121 | 122 | 123 | @app.command(name="prod") 124 | def check_prod(): 125 | """Test package installation from Production PyPI.""" 126 | console.rule("[yellow]Starting Production PyPI package testing") 127 | 128 | venv_path = Path("/tmp/prod-gjdutils-pypi") 129 | with temp_venv(venv_path) as python_path: 130 | install_from_pypiprod(python_path) 131 | verify_installation(python_path) 132 | 133 | console.print("\nProduction PyPI testing completed successfully!", style="green") 134 | -------------------------------------------------------------------------------- /src/gjdutils/pypi_build.py: -------------------------------------------------------------------------------- 1 | """Shared utilities for PyPI package building and testing.""" 2 | 3 | from pathlib import Path 4 | import urllib.request 5 | import urllib.error 6 | import shutil 7 | import tomllib 8 | from typing import Literal 9 | from rich.console import Console 10 | from rich.progress import track 11 | from packaging.version import Version 12 | from importlib.metadata import metadata 13 | 14 | from gjdutils.cmd import run_cmd 15 | from gjdutils import __version__ 16 | 17 | console = Console() 18 | 19 | 20 | def verify_installation(python_path: Path): 21 | # Command: python -c "import gjdutils; print(gjdutils.__version__)" 22 | retcode, installed_version, extra = run_cmd( 23 | f'{python_path} -c "import gjdutils; print(gjdutils.__version__)"', 24 | before_msg="Verify package installation by importing and checking version...", 25 | fatal_msg="Failed to import gjdutils", 26 | ) 27 | expected_version = __version__ 28 | assert ( 29 | installed_version == expected_version 30 | ), f"Installed version {installed_version} does not match expected version {expected_version}" 31 | console.print(f"gjdutils version: {installed_version}") 32 | return installed_version 33 | 34 | 35 | # Type for PyPI environment 36 | PyPIEnv = Literal["test", "prod"] 37 | 38 | 39 | def check_install_optional_features(python_path: Path, *, from_test_pypi: bool = False): 40 | """Test installation of optional feature sets.""" 41 | # Get optional dependency groups from package metadata 42 | pkg_metadata = metadata("gjdutils") 43 | # Parse the provides-extra field to get optional dependency groups 44 | # get_all() returns None if the field doesn't exist 45 | extra_features = pkg_metadata.get_all("Provides-Extra") or [] 46 | features = [group for group in extra_features if group not in ["dev", "all_no_dev"]] 47 | 48 | for feature in track(features, description="Installing features"): 49 | console.print(f"\nTesting feature set: {feature}", style="yellow") 50 | if from_test_pypi: 51 | cmd = f"{python_path} -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ gjdutils[{feature}]" 52 | else: 53 | cmd = f"{python_path} -m pip install '.[{feature}]'" 54 | run_cmd( 55 | cmd, 56 | before_msg=f"Installing feature set: {feature}...", 57 | fatal_msg=f"Failed to install {feature} feature", 58 | ) 59 | console.print(f"[green]Successfully installed {feature} feature[/green]") 60 | 61 | 62 | def check_version_exists(version: Version, pypi_env: PyPIEnv) -> bool: 63 | """Check if version already exists on specified PyPI environment. 64 | 65 | Args: 66 | version: Version string to check (must be valid semantic version) 67 | pypi_env: PyPI environment to check ("test" or "prod") 68 | 69 | Raises: 70 | TypeError: If version is not a packaging.version.Version instance 71 | """ 72 | if not isinstance(version, Version): 73 | raise TypeError( 74 | f"version must be a packaging.version.Version instance, got {type(version)}" 75 | ) 76 | 77 | base_url = { 78 | "test": "https://test.pypi.org", 79 | "prod": "https://pypi.org", 80 | }[pypi_env] 81 | try: 82 | url = f"{base_url}/pypi/gjdutils/{str(version)}/json" 83 | urllib.request.urlopen(url) 84 | return True 85 | except urllib.error.HTTPError as e: 86 | if e.code == 404: 87 | return False 88 | raise # Re-raise other HTTP errors 89 | 90 | 91 | def clean_build_dirs(): 92 | """Clean build directories (dist/ and build/).""" 93 | # Command: rm -rf dist/ build/ 94 | shutil.rmtree("dist", ignore_errors=True) 95 | shutil.rmtree("build", ignore_errors=True) 96 | 97 | 98 | def build_package(): 99 | """Build package with python -m build.""" 100 | return run_cmd( 101 | "python -m build", 102 | before_msg="Building package...", 103 | fatal_msg="Failed to build package", 104 | ) 105 | 106 | 107 | def upload_to_pypi(pypi_env: PyPIEnv): 108 | """Upload package to specified PyPI environment. 109 | 110 | Args: 111 | pypi_env: PyPI environment to upload to ("test" or "prod") 112 | """ 113 | if pypi_env == "test": 114 | cmd = "twine upload -r testpypi dist/*" 115 | elif pypi_env == "prod": 116 | cmd = "twine upload dist/*" 117 | else: 118 | raise ValueError(f"Invalid PyPI environment: {pypi_env}") 119 | 120 | return run_cmd( 121 | cmd, 122 | before_msg=f"Uploading package to {pypi_env} PyPI...", 123 | fatal_msg=f"Failed to upload to {pypi_env} PyPI", 124 | ) 125 | -------------------------------------------------------------------------------- /src/gjdutils/files.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Sequence 4 | 5 | from .cmd import run_cmd 6 | 7 | # keep this, because it makes sense for the user to be able to import from here 8 | from .strings import is_string, PathOrStr 9 | 10 | 11 | def split_filen(filen: Path | str): 12 | """ 13 | Splits a filename into its path, stem, and extension (without dot), e.g. 14 | 15 | split_filen('data/blah.mp4') -> ('data', 'blah', 'mp4') 16 | """ 17 | filen = Path(filen) 18 | return filen.parent, filen.stem, filen.suffix[1:] if filen.suffix else "" 19 | 20 | 21 | def create_dir_if_not_exists(dirn: str): 22 | if not os.path.exists(dirn): 23 | os.makedirs(dirn) 24 | 25 | 26 | def validate_ext(ext): 27 | assert is_string(ext) 28 | assert ext.lower() == ext 29 | assert ext 30 | assert ext[0] != "." 31 | 32 | 33 | def validate_dir(dirn): 34 | dirn_path = Path(dirn) 35 | assert dirn_path.exists() and dirn_path.is_dir() 36 | return dirn_path 37 | 38 | 39 | def fulltext( 40 | filens: Sequence[str], 41 | patterns: list[str], 42 | dirn: str, 43 | file_ext: str, 44 | case_sensitive=False, 45 | ): 46 | """ 47 | Returns: FOUND_FILES (list of filename strings) 48 | 49 | Feed in a list of filenames (complete with extensions), 50 | which will be fed to agrep for full-text 51 | searching. Returns a list of files. 52 | 53 | FILENS is a list of strings. If its non-empty, then 54 | these will be fed in to agrep. If it's empty, then we'll 55 | just feed in a '*.[freex_extension]'. Spaces in 56 | filenames are escaped with backslashes, but this is the 57 | only thing we're escaping. 58 | 59 | PATTERNS is a list of strings, which will be ANDed 60 | together in the agrep regex. Currently, this doesn't 61 | escape the pattern regex at all, though it does surround it in 62 | quotes, so the usual agrep rules apply. 63 | 64 | Unless case_sensitive==True, will append a -i flag. 65 | """ 66 | # from freex_sqlalchemy.py 67 | 68 | # xxx this should check that all the files have extensions 69 | 70 | if case_sensitive: 71 | case_flag = "" 72 | else: 73 | case_flag = "-i" 74 | 75 | # xxx should check that all the items in the pattern 76 | # list are strings... 77 | # 78 | # first strip each of the pattern strings of whitespace, 79 | # and remove the surrounding quotes - we'll add them 80 | # back to the whole pattern_str when we create the CMD 81 | # 82 | # then AND together multiple patterns with agrep, 83 | # using semicolons 84 | for pat in patterns: 85 | if pat[0] == '"': 86 | pat = pat[1:] 87 | if pat[-1] == '"': 88 | pat = pat[0:-1] 89 | 90 | pattern_str = ";".join([x.strip() for x in patterns]) 91 | 92 | if len(filens) > 0: 93 | # escape all the spaces with back-slashes 94 | filens = [x.replace(" ", "\\ ") for x in filens] 95 | 96 | # convert to a space-delimited string (with spaces 97 | # escaped by backslashes), and each file prepended by the 98 | # database_dir, e.g. 99 | # /blah/test0.freex /blah/hello\ world.freex 100 | fnames_str = " ".join([os.path.join(dirn, filen) for filen in filens]) 101 | 102 | # the -l says to just return filenames only (no text 103 | # context) 104 | # 105 | # put the pattern in quotes 106 | # 107 | # and then just list the files at the end 108 | cmd = 'agrep -l %s "%s" %s' % (case_flag, pattern_str, fnames_str) 109 | 110 | else: 111 | # if we're not restricting the files we're looking 112 | # through, then there could be too many files to run 113 | # agrep on directly, so we have to pipe it from a 114 | # find 115 | # 116 | # this is to avoid the '/usr/local/bin/agrep: 117 | # Argument list too long' error 118 | cmd = 'find %s -name "*.%s" -print0 | xargs -0 agrep -l %s "%s"' % ( 119 | dirn, 120 | file_ext, 121 | case_flag, 122 | pattern_str, 123 | ) 124 | 125 | # Run command with minimal output unless there's an error 126 | retcode, out_str, _ = run_cmd( 127 | cmd, 128 | verbose=0, 129 | check=False, # Don't raise exception if no matches found (agrep returns 1) 130 | ) 131 | 132 | if len(out_str) > 0: 133 | # strip away the path to yield just the filename for 134 | # each of the files in out_str 135 | found_files = [os.path.basename(x) for x in out_str.strip().split("\n")] 136 | else: 137 | # if you run the above on an empty string, you get 138 | # [''], whereas we really want to return an empty 139 | # list if we didn't find anything 140 | found_files = [] 141 | 142 | return found_files 143 | -------------------------------------------------------------------------------- /src/gjdutils/llm_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Literal, Optional, Union, TYPE_CHECKING 2 | from pathlib import Path 3 | import json 4 | 5 | from gjdutils.llms_claude import call_claude_gpt 6 | from gjdutils.strings import jinja_render 7 | 8 | if TYPE_CHECKING: # for type hints only; avoids runtime imports 9 | from anthropic import Anthropic 10 | from openai import OpenAI 11 | 12 | 13 | MODEL_TYPE = Literal["openai", "claude"] 14 | 15 | 16 | def extract_json_from_markdown(text: str, verbose: int = 0) -> str: 17 | """ 18 | Extracts JSON content from text that may be wrapped in markdown code blocks. 19 | 20 | Args: 21 | text: The text that may contain JSON, possibly within markdown code blocks 22 | verbose: Whether to print debug information 23 | 24 | Returns: 25 | A string containing just the JSON content (still as a string, not parsed) 26 | """ 27 | # If it's already valid JSON, return as is 28 | try: 29 | json.loads(text) 30 | if verbose >= 2: 31 | print("Input is already valid JSON") 32 | return text 33 | except json.JSONDecodeError: 34 | # Not valid JSON, may be wrapped in markdown 35 | pass 36 | 37 | # Handle JSON wrapped in markdown code blocks 38 | if text.strip().startswith("```") and "```" in text: 39 | if verbose >= 2: 40 | print("Detected markdown code block") 41 | 42 | # Extract content between backticks 43 | parts = text.split("```", 2) 44 | if len(parts) >= 2: 45 | extracted = parts[1] # Get the middle part 46 | 47 | # Remove the language identifier if present 48 | if extracted.strip().startswith("json"): 49 | extracted = extracted[4:].strip() 50 | else: 51 | extracted = extracted.strip() 52 | 53 | # If there are closing backticks, remove everything from them onwards 54 | if "```" in extracted: 55 | extracted = extracted.split("```", 1)[0].strip() 56 | 57 | if verbose >= 2: 58 | print(f"Extracted content from markdown: {extracted[:50]}...") 59 | 60 | return extracted 61 | 62 | # If we got here, we couldn't extract JSON from markdown 63 | return text 64 | 65 | 66 | def generate_gpt_from_template( 67 | client: "Anthropic | OpenAI", # type: ignore[name-defined] 68 | prompt_template: Union[str, Path], 69 | context_d: dict, 70 | response_json: bool, 71 | image_filens: list[str] | str | None = None, 72 | model_type: MODEL_TYPE = "claude", 73 | max_tokens: Optional[int] = None, 74 | verbose: int = 0, 75 | ) -> tuple[str | dict[str, Any], dict[str, Any]]: 76 | """Generate a response from GPT using a template. 77 | 78 | Args: 79 | client: The Anthropic or OpenAI client 80 | prompt_template: Either a template string or Path to a template file 81 | context_d: Dictionary of variables to render in the template 82 | response_json: Whether to parse the response as JSON 83 | image_filens: Optional paths to image files to include 84 | model_type: Which model type to use ("openai" or "claude") 85 | max_tokens: Maximum tokens in the response 86 | verbose: Verbosity level 87 | """ 88 | # Load template content from Path or use string directly 89 | if isinstance(prompt_template, Path): 90 | with open(prompt_template, "r") as f: 91 | template_content = f.read() 92 | template_name = prompt_template.stem 93 | else: 94 | template_content = prompt_template 95 | template_name = "template from input string" 96 | 97 | prompt = jinja_render(template_content, context_d) 98 | if model_type == "openai": 99 | # Lazy import to avoid requiring OpenAI when only using Anthropic 100 | from gjdutils.llms_openai import call_openai_gpt 101 | 102 | out, _, extra = call_openai_gpt( 103 | prompt, 104 | client=client, 105 | image_filens=image_filens, 106 | response_json=response_json, 107 | max_tokens=max_tokens, 108 | ) 109 | else: 110 | out, extra = call_claude_gpt( 111 | prompt, 112 | client=client, 113 | image_filens=image_filens, 114 | response_json=response_json, 115 | max_tokens=max_tokens if max_tokens is not None else 4096, 116 | ) 117 | print(f"{out=}") 118 | print(f"{max_tokens=}") 119 | if response_json: 120 | assert isinstance(out, dict), f"Expected dict, got {type(out)}" 121 | else: 122 | assert isinstance(out, str), f"Expected str, got {type(out)}" 123 | if verbose >= 1: 124 | print(f"Called GPT on '{template_name}', context keys {list(context_d.keys())}") 125 | extra.update( 126 | { 127 | "model_type": model_type, 128 | "prompt_template": template_name, 129 | "prompt_context_d": context_d, 130 | } 131 | ) 132 | return out, extra # type: ignore 133 | -------------------------------------------------------------------------------- /docs/instructions/GENERATE_MERMAID_DIAGRAM.md: -------------------------------------------------------------------------------- 1 | # Generate Mermaid Diagram Images 2 | 3 | Quick instructions for creating and updating Mermaid diagrams in your project. 4 | 5 | ## Setup 6 | 7 | Install Mermaid CLI if not already available: 8 | ```bash 9 | npm install -g @mermaid-js/mermaid-cli 10 | ``` 11 | 12 | For detailed configuration options, see the [Mermaid CLI documentation](https://github.com/mermaid-js/mermaid-cli). 13 | 14 | ## File Organization 15 | 16 | - **Source files**: Store `.mermaid` files in `docs/diagrams/` (or your preferred documentation directory) 17 | - **Generated files**: Save SVG/PNG outputs in same directory 18 | 19 | ## File Naming 20 | 21 | Mermaid diagrams should follow this naming format: `yyMMdd[letter]_description_in_normal_case.mermaid` 22 | 23 | - Generate date prefix with: `date +%y%m%d` (add letter suffix manually if multiple diagrams per day) 24 | - **Alternative**: Use `npx tsx src/ts/cli/sequential-datetime-prefix.ts` for automated sequential prefixes 25 | - Description: lowercase words separated by underscores (except proper names/acronyms) 26 | - Examples: 27 | - `250701a_flow_iterative_heading_generation.mermaid` 28 | - `250701b_architecture_glossary_complete.mermaid` 29 | - `250701c_diagram_tools_flow_ToC.mermaid` 30 | 31 | ## Generation Commands 32 | 33 | **Prefer SVG format** (scalable, web-friendly): 34 | ```bash 35 | npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.svg -w 1400 -H 1600 -s 2 -b transparent -t default 36 | ``` 37 | 38 | **PNG only when specifically requested**: 39 | ```bash 40 | npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.png -w 1400 -H 1600 -s 2 -b transparent -t default 41 | ``` 42 | 43 | ## Best Practices 44 | 45 | ### Always Regenerate 46 | **Automatically regenerate SVG whenever you update a `.mermaid` file** - keep diagrams in sync with source. 47 | 48 | ### Simplify Linear Flows 49 | If you have a stack of sequential boxes with no branches, **collapse into a single box** with steps as bullet points: 50 | 51 | ❌ **Avoid this**: 52 | ```mermaid 53 | A[Step 1] --> B[Step 2] --> C[Step 3] --> D[Step 4] 54 | ``` 55 | 56 | ✅ **Prefer this**: 57 | ```mermaid 58 | Process[Process Flow:
• Step 1
• Step 2
• Step 3
• Step 4] 59 | ``` 60 | 61 | ### Syntax Tips 62 | - **Avoid special characters** in node labels (quotes, parentheses) 63 | - **Use emojis** for visual clarity 64 | - **Keep labels concise** - detailed descriptions go in documentation 65 | - **Test syntax** before generating images 66 | 67 | 68 | ### Comments 69 | 70 | Include a detailed comment in the `.mermaid` file with a prompt to describe/reproduce this diagram for future reference, including references to relevant files/functions and any other details/intent mentioned by the user. If asked to update the diagram, update the prompt-comment accordingly. 71 | 72 | 73 | ### Spacing 74 | 75 | Reduce spacing so that more information fits on the screen. 76 | 77 | 78 | ### Icons 79 | 80 | Include icons sparingly, e.g. for different systems, actions, components. 81 | 82 | 83 | ### Fonts 84 | 85 | Use `courier` (or other monospaced font) for API endpoints, urls, function names, variables, etc. 86 | 87 | Use `italic` or `bold` in other ways to help aid comprehension. 88 | 89 | 90 | ### Use colour 91 | 92 | Use colour appropriately to distinguish major components. 93 | 94 | For example, you might give different colours to different systems, e.g. 95 | - purple for user 96 | - brown for browser 97 | - orange for mobile/device 98 | - blue for backend 99 | - yellow for database 100 | - purple for cloud storage (including cloud storage services, S3, etc) 101 | - green for payments 102 | 103 | Perhaps use one colour scheme for the outer boxes that group things (e.g. for systems), and another colour scheme for inner boxes based on a different typology. Use your judgment. 104 | 105 | 106 | ### Shapes, lines, arrows, etc 107 | 108 | For database relationships, use database UML-style lines/arrows/shapes. 109 | 110 | Likewise, if there's a domain-standard visual-notational scheme that's relevant for (part of) the diagram, use it there. 111 | 112 | 113 | ## Quick Workflow 114 | 115 | 1. Generate filename prefix: `npx tsx src/ts/cli/sequential-datetime-prefix.ts docs/diagrams/` if available, otherwise `date +%y%m%d` (add letter suffix if needed) 116 | 2. Create/edit `.mermaid` file with the generated prefix in `docs/diagrams/` 117 | 3. Generate SVG: `npx mmdc -i docs/diagrams/FILENAME.mermaid -o docs/diagrams/FILENAME.svg -w 1400 -H 1600 -s 2 -b transparent` 118 | 4. Open the SVG in default app: `open docs/diagrams/FILENAME.svg` (or `xdg-open` on Linux) 119 | 5. Output the generated filename 120 | 121 | ### Example Workflow 122 | 123 | ```bash 124 | # Get the next sequential prefix (manual approach) 125 | date +%y%m%d 126 | # Output: 250701 (add letter suffix manually: 250701a, 250701b, etc.) 127 | 128 | # OR get automated sequential prefix 129 | npx tsx src/ts/cli/sequential-datetime-prefix.ts 130 | # Output: 250701a (automatically finds next available letter) 131 | 132 | # Create the diagram file 133 | # Create: docs/diagrams/250701d_tool_execution_flow.mermaid 134 | 135 | # Generate the SVG 136 | npx mmdc -i docs/diagrams/250701d_tool_execution_flow.mermaid -o docs/diagrams/250701d_tool_execution_flow.svg -w 1400 -H 1600 -s 2 -b transparent 137 | 138 | # Open to verify 139 | open docs/diagrams/250701d_tool_execution_flow.svg 140 | ``` 141 | -------------------------------------------------------------------------------- /docs/reference/SD_STRING_DISPLACEMENT_FIND_REPLACE.md: -------------------------------------------------------------------------------- 1 | # sd Find-Replace Tool ✓ 2 | 3 | sd is a modern, intuitive find-replace CLI tool designed for safe codebase refactoring, with excellent dry-run capabilities and literal string handling perfect for LLM automation. 4 | 5 | **Important**: sd uses **regex mode by default**. Use `--string-mode` (or `-s`) for literal string matching to avoid issues with special characters in file paths. 6 | 7 | ## See Also 8 | 9 | - Official documentation: [sd GitHub](https://github.com/chmln/sd) 10 | - Installation: Available via Rust cargo, Homebrew (macOS), or package managers 11 | 12 | ## Key Benefits 13 | 14 | - **Safety-first**: True dry-run preview mode with `--preview` 15 | - **LLM-friendly**: Literal string mode eliminates regex escaping issues 16 | - **Modern design**: Clean, intuitive syntax focused on find-replace only 17 | - **Simple syntax**: Readable command structure with clear options 18 | - **Path-friendly**: Handles file paths with special characters seamlessly 19 | 20 | ## Installation 21 | 22 | ```bash 23 | # Rust (cross-platform) 24 | cargo install sd 25 | 26 | # macOS (Homebrew) 27 | brew install sd 28 | 29 | # Ubuntu/Debian 30 | sudo apt install sd 31 | ``` 32 | 33 | ## Usage Patterns 34 | 35 | ### Dry-Run Mode (Recommended for LLMs) 36 | 37 | ```bash 38 | # Preview changes without modifying files 39 | sd --preview "old-string" "new-string" file.txt 40 | 41 | # Preview with literal strings (no regex) 42 | sd --preview --string-mode "app/[slug]/page.tsx" "app/[id]/page.tsx" **/*.tsx 43 | 44 | # Short form 45 | sd -ps "old-string" "new-string" . 46 | ``` 47 | 48 | ### Apply Changes Mode 49 | 50 | ```bash 51 | # Apply changes after preview looks good 52 | sd --string-mode "old-string" "new-string" file.txt 53 | 54 | # Recursive across directories 55 | sd -s "old/path" "new/path" **/*.md 56 | ``` 57 | 58 | ## Common Use Cases 59 | 60 | ### Path Updates (File Paths with Special Characters) 61 | ```bash 62 | # Preview import path changes (dry-run) 63 | sd --preview --string-mode "from '@/old/path'" "from '@/new/path'" **/*.{ts,tsx} 64 | 65 | # Apply after preview 66 | sd -s "from '@/old/path'" "from '@/new/path'" **/*.{ts,tsx} 67 | 68 | # Update file references in documentation 69 | sd -ps "/old/location/" "/new/location/" **/*.md 70 | ``` 71 | 72 | ### String Replacements 73 | ```bash 74 | # Preview function name changes 75 | sd --preview "oldFunctionName" "newFunctionName" **/*.{ts,tsx,js,jsx} 76 | 77 | # Update configuration values (literal strings) 78 | sd -ps "OLD_CONFIG_VALUE" "NEW_CONFIG_VALUE" . 79 | ``` 80 | 81 | ### Documentation Updates 82 | ```bash 83 | # Preview cross-reference updates 84 | sd --preview --string-mode "old-doc-name.md" "new-doc-name.md" **/*.md 85 | ``` 86 | 87 | ## Safety Best Practices 88 | 89 | 1. **Always preview first**: Use `--preview` (or `-p`) to see exactly what will change 90 | 2. **Use literal strings**: Use `--string-mode` (or `-s`) for file paths and exact matches 91 | 3. **Test scope first**: Start with single files, then expand to directories 92 | 4. **Use version control**: Commit before running large refactors 93 | 5. **Verify with build**: Run build and tests after changes 94 | 95 | ## Advanced Options 96 | 97 | ```bash 98 | # Case-insensitive matching 99 | sd --ignore-case "CamelCase" "snake_case" . 100 | 101 | # Preview multiline replacements with literal strings 102 | sd --preview --string-mode --multiline "old\nline" "new\nline" file.txt 103 | 104 | # Include hidden files and directories 105 | sd --hidden ".oldValue" ".newValue" . 106 | 107 | # Use shell globbing for file selection 108 | sd -ps "oldKey" "newKey" **/*.json 109 | ``` 110 | 111 | ## Troubleshooting 112 | 113 | **No matches found**: Check file paths and use preview mode 114 | ```bash 115 | # Debug: preview what would be matched 116 | sd --preview "search-term" "replacement" **/*.md 117 | ``` 118 | 119 | **Special character issues**: Use literal string mode for paths 120 | ```bash 121 | # Problematic (regex metacharacters like [, ], ., *, etc.) 122 | sd "app/[slug]/page.tsx" "app/[id]/page.tsx" . 123 | 124 | # Safe (literal strings) 125 | sd --string-mode "app/[slug]/page.tsx" "app/[id]/page.tsx" . 126 | ``` 127 | 128 | **Permission errors**: Ensure files are writable and not locked by editors 129 | 130 | ## Integration with Development Workflow 131 | 132 | sd integrates seamlessly with codebase development: 133 | 134 | - **File moves**: Update all references after moving files (use `--string-mode`) 135 | - **Refactoring**: Rename functions, variables, and imports across the codebase 136 | - **Documentation**: Update cross-references when files are renamed (literal strings) 137 | - **Configuration**: Update environment variables and configuration keys 138 | - **LLM automation**: Perfect for programmatic find-replace with predictable behavior 139 | 140 | ## Key Options Summary 141 | 142 | - `--preview` (`-p`): Show changes without applying them (dry-run) 143 | - `--string-mode` (`-s`): Treat search/replace as literal strings (no regex) 144 | - `--ignore-case` (`-i`): Case-insensitive matching 145 | - `--multiline` (`-m`): Enable multiline matching 146 | - `--hidden`: Include hidden files and directories 147 | 148 | ## Perfect for LLM Usage 149 | 150 | - **Predictable**: Literal string mode eliminates regex escaping surprises 151 | - **Safe**: Dry-run mode shows exactly what will change 152 | - **Simple**: Clean syntax that's easy to generate programmatically 153 | - **Path-friendly**: Handles file paths with brackets, dots, and other special characters -------------------------------------------------------------------------------- /src/gjdutils/cmd.py: -------------------------------------------------------------------------------- 1 | from rich.console import Console 2 | import subprocess 3 | import sys 4 | import time 5 | from typing import Union, Optional, Dict 6 | from pathlib import Path 7 | 8 | from gjdutils.shell import fatal_error_msg 9 | 10 | 11 | def run_cmd( 12 | cmd: Union[str, list[str]], 13 | before_msg: Optional[str] = None, 14 | fatal_msg: Optional[str] = None, 15 | verbose: int = 2, 16 | replace_sys_python_executable: bool = True, 17 | dry_run: bool = False, 18 | **subprocess_kwargs, 19 | ) -> tuple[int, str, Dict]: 20 | """Run a shell command with enhanced output and error handling. 21 | 22 | Args: 23 | cmd: Command to run as string (shell=True) or list of strings (shell=False) 24 | before_msg: Optional message to display before running command (green) 25 | fatal_msg: Optional message to use if command fails (calls fatal_error_msg) 26 | verbose: Output verbosity level: 27 | 0 = silent 28 | 1 = show before_msg if provided 29 | 2 = also show command being run (default) 30 | 3 = also show working directory and duration 31 | 4 = also show command stdout output 32 | replace_sys_python_executable: Replace 'python ' with sys.executable 33 | dry_run: If True, only print what would be run 34 | **subprocess_kwargs: Additional arguments passed to subprocess.run 35 | 36 | Returns: 37 | Tuple of (returncode, stdout, extra) where extra is a dict containing: 38 | - stderr: Standard error output 39 | - duration: Time taken to run command 40 | - cmd_str: Final command string that was run 41 | - cwd: Working directory 42 | - input_args: Original function arguments 43 | - subprocess_result: Full subprocess.CompletedProcess object 44 | 45 | Examples: 46 | Simple usage with string command: 47 | >>> retcode, out, _ = run_cmd4("ls -l", before_msg="Listing files...") 48 | Listing files... 49 | $ ls -l 50 | >>> print(out) 51 | total 8 52 | -rw-r--r-- 1 user user 2048 Mar 15 10:00 example.txt 53 | 54 | Complex usage with list command and error handling: 55 | >>> cmd = ["pytest", "tests/", "-v", "--cov"] 56 | >>> retcode, out, extra = run_cmd4( 57 | ... cmd, 58 | ... before_msg="Running tests with coverage...", 59 | ... fatal_msg="Tests failed!", 60 | ... verbose=2, 61 | ... timeout=300, 62 | ... check=True 63 | ... ) 64 | Running tests with coverage... 65 | $ pytest tests/ -v --cov 66 | === test session starts === 67 | ... 68 | """ 69 | input_args = locals() 70 | 71 | console = Console() 72 | 73 | start_time = time.time() 74 | 75 | # Convert list command to string if needed 76 | cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd 77 | 78 | # Replace python executable if requested 79 | if replace_sys_python_executable and cmd_str.startswith("python "): 80 | cmd_str = f"{sys.executable} {cmd_str[7:]}" 81 | 82 | # Handle verbosity 83 | if verbose >= 1 and before_msg: 84 | console.print(f"[green]{before_msg}[/green]") 85 | if verbose >= 2: 86 | console.print(f"[white]$ {cmd_str}[/white]") 87 | 88 | # Handle dry run 89 | if dry_run: 90 | return ( 91 | 0, 92 | "", 93 | { 94 | "stderr": "", 95 | "duration": 0, 96 | "cmd_str": cmd_str, 97 | "cwd": str(Path.cwd()), 98 | "input_args": input_args, 99 | "subprocess_result": None, 100 | }, 101 | ) 102 | 103 | # Set defaults for subprocess 104 | subprocess_kwargs.setdefault("shell", isinstance(cmd, str)) 105 | subprocess_kwargs.setdefault("capture_output", True) 106 | subprocess_kwargs.setdefault("text", True) 107 | 108 | try: 109 | result = subprocess.run( 110 | cmd if isinstance(cmd, list) else cmd_str, 111 | **subprocess_kwargs, 112 | ) 113 | except subprocess.TimeoutExpired as e: 114 | if fatal_msg: 115 | fatal_error_msg( 116 | fatal_msg, 117 | f"Command timed out after {subprocess_kwargs.get('timeout', '?')}s", 118 | ) 119 | raise 120 | 121 | duration = time.time() - start_time 122 | 123 | # Show additional info at verbose level 3 124 | if verbose >= 3: 125 | console.print(f"[blue]Working directory: {Path.cwd()}[/blue]") 126 | console.print(f"[blue]Duration: {duration:.2f}s[/blue]") 127 | if verbose >= 4: 128 | console.print(f"[blue]Command output:[/blue]\n{result.stdout}") 129 | 130 | # Handle errors 131 | if result.returncode != 0: 132 | # Show both stdout and stderr for failed commands 133 | if result.stdout: 134 | console.print(f"[red]Command output:[/red]\n{result.stdout}") 135 | if result.stderr: 136 | console.print(f"[red]Command error output:[/red]\n{result.stderr}") 137 | if fatal_msg: 138 | fatal_error_msg(fatal_msg) 139 | 140 | extra = { 141 | "stderr": result.stderr, 142 | "duration": duration, 143 | "cmd_str": cmd_str, 144 | "cwd": str(Path.cwd()), 145 | "input_args": input_args, 146 | "subprocess_result": result, 147 | } 148 | 149 | return result.returncode, result.stdout.strip(), extra 150 | -------------------------------------------------------------------------------- /src/gjdutils/llms_claude.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from anthropic import Anthropic, NOT_GIVEN 4 | from typing import Optional 5 | 6 | from gjdutils.image_utils import image_to_base64_basic 7 | from gjdutils.env import get_env_var 8 | 9 | 10 | CLAUDE_API_KEY = get_env_var("CLAUDE_API_KEY") 11 | # https://docs.anthropic.com/en/docs/about-claude/models 12 | MODEL_NAME_CLAUDE_SONNET_GOOD_LATEST = "claude-sonnet-4-0" 13 | MODEL_NAME_CLAUDE_SONNET_CHEAP_LATEST = "claude-3-5-haiku-latest" 14 | 15 | 16 | def img_as_content_dict(img_filen: str): 17 | media_type_from_extension = { 18 | ".jpg": "image/jpeg", 19 | ".jpeg": "image/jpeg", 20 | ".png": "image/png", 21 | ".webp": "image/webp", 22 | ".gif": "image/gif", 23 | ".bmp": "image/bmp", 24 | ".tiff": "image/tiff", 25 | ".ico": "image/vnd.microsoft.icon", 26 | ".svg": "image/svg+xml", 27 | ".heic": "image/heic", 28 | ".heif": "image/heif", 29 | } 30 | 31 | ext = Path(img_filen).suffix 32 | if ext not in media_type_from_extension: 33 | raise ValueError(f"Unknown image file extension: {img_filen}") 34 | media_type = media_type_from_extension[ext] 35 | 36 | img_base64 = image_to_base64_basic(img_filen) 37 | return { 38 | "type": "image", 39 | "source": { 40 | "type": "base64", 41 | "media_type": media_type, 42 | "data": img_base64, 43 | }, 44 | } 45 | 46 | 47 | def call_claude_gpt( 48 | prompt: str, 49 | tools: Optional[list[dict]] = None, 50 | image_filens: str | list[str] | None = None, 51 | image_resize_target_size_kb: Optional[int] = 100, 52 | client: Optional[Anthropic] = None, 53 | model: str = MODEL_NAME_CLAUDE_SONNET_GOOD_LATEST, 54 | temperature: Optional[float] = 0.001, 55 | response_json: bool = False, 56 | # seed: Optional[int] = DEFAULT_RANDOM_SEED, 57 | max_tokens: int = 4096, 58 | verbose: int = 0, 59 | ): 60 | """Call Claude API with support for text, images, and function calling""" 61 | from gjdutils.llm_utils import extract_json_from_markdown 62 | 63 | extra = locals() 64 | extra.pop("client") 65 | 66 | if tools is not None: 67 | raise NotImplementedError( 68 | "I think tools are supported, but not implemented in this function" 69 | ) 70 | 71 | if client is None: 72 | client = Anthropic(api_key=CLAUDE_API_KEY) 73 | 74 | # Prepare image contents if provided 75 | contents = [] 76 | if image_filens: 77 | if isinstance(image_filens, str): 78 | image_filens = [image_filens] 79 | assert image_resize_target_size_kb is not None 80 | for i, img_filen in enumerate(image_filens): 81 | contents.extend( 82 | [ 83 | {"type": "text", "text": f"Image {i+1}:"}, 84 | img_as_content_dict(image_filens[i]), 85 | # { 86 | # "type": "image", 87 | # "source": { 88 | # "type": "base64", 89 | # "media_type": "image/jpeg", # Adjust based on actual image type 90 | # "data": b64, 91 | # }, 92 | # }, 93 | ] 94 | ) 95 | 96 | # if response_json: 97 | # # Add instruction to respond in JSON format - be very explicit 98 | # prompt = f"Please provide your response in valid JSON format without any markdown formatting or backticks. Provide ONLY the JSON object, not any explanatory text before or after the JSON. {prompt}" 99 | 100 | contents.append({"type": "text", "text": prompt}) 101 | 102 | # not supported 103 | # response_format = {"type": "json_object"} if response_json else None 104 | 105 | # Make API call 106 | response = client.messages.create( 107 | model=model, 108 | max_tokens=max_tokens, 109 | messages=[{"role": "user", "content": contents}], 110 | temperature=temperature if temperature is not None else NOT_GIVEN, 111 | # seed=seed, 112 | # response_format=response_format, 113 | ) 114 | 115 | msg = response.content[0].text # type: ignore 116 | if response_json: 117 | try: 118 | # Use our utility function to handle markdown-wrapped JSON 119 | clean_json_text = extract_json_from_markdown(msg, verbose=verbose) 120 | msg = json.loads(clean_json_text) 121 | except json.JSONDecodeError as e: 122 | if verbose: 123 | print(f"JSON decode error: {e}") 124 | print(f"Raw message causing error: {msg}") 125 | print(f"Cleaned: {clean_json_text}") 126 | # Return a structured error response instead of failing 127 | msg = { 128 | "error": "Failed to parse API response", 129 | "raw_response": msg[:500] if msg else "Empty response", 130 | } 131 | extra.update( 132 | { 133 | "response": response.model_dump(), 134 | "msg": msg, 135 | # "tool_calls": tool_calls, 136 | "model": model, 137 | "contents": contents, 138 | } 139 | ) 140 | 141 | if verbose >= 2: 142 | print(f"PROMPT:\n{prompt}") 143 | if verbose >= 1: 144 | print(f"LLM MESSAGE:\n{msg}") 145 | if verbose >= 2: 146 | # print(f"TOOL CALLS:\n{tool_calls}") 147 | print(f"LLM RESPONSE:\n{json.dumps(response.model_dump(), indent=2)}") 148 | return msg, extra 149 | -------------------------------------------------------------------------------- /docs/instructions/FIX_HOUSEKEEPING_BUILD_TYPECHECK_LINT.md: -------------------------------------------------------------------------------- 1 | # Fix Housekeeping Build, TypeCheck & Lint Issues 2 | 3 | This is a periodic housekeeping task to maintain code quality and catch potential issues early in the development process. 4 | 5 | ## Goal 6 | 7 | Systematically address build, TypeScript, and linting issues to prevent accumulation of technical debt and ensure the codebase remains healthy for AI-first development. 8 | 9 | ## Comprehensive Health Check Process 10 | 11 | ### Stage 1: Assessment & Prioritisation 12 | 13 | #### Run Full Health Checks 14 | ```bash 15 | # Type checking - most critical for runtime safety 16 | npm run build # Project build with some lenience 17 | tsc --noEmit # Strict TypeScript type checking 18 | 19 | # Code quality and patterns 20 | npm run lint # ESLint issues and warnings 21 | 22 | # Functionality verification 23 | npm test # Test suite 24 | npm run test:e2e # E2E tests (if available and time permits) 25 | ``` 26 | 27 | #### Assess Scope & Impact 28 | - **Count issues**: `tsc --noEmit 2>&1 | grep -c "error TS"` and `npm run lint 2>&1 | grep -c Warning` 29 | - **Categorise by severity**: 30 | - 🔴 **BLOCKING**: TypeScript errors that could cause runtime failures 31 | - 🟡 **HIGH**: ESLint errors, deprecated patterns, security issues 32 | - 🟢 **LOW**: Style warnings, minor inconsistencies 33 | - **Identify patterns**: Are errors clustered in specific files/areas? 34 | 35 | #### Prioritisation Strategy 36 | 1. **Production code over test code**: Fix core functionality first 37 | 2. **Runtime safety over style**: TypeScript errors before ESLint warnings 38 | 3. **Recently modified files**: Focus on active development areas 39 | 4. **Shared/core modules**: Fix widely-used utilities before isolated features 40 | 5. **Quick wins**: Simple fixes that resolve multiple issues 41 | 42 | ### Stage 2: Systematic Resolution 43 | 44 | #### Use Subagents (if available) for Investigation 45 | Deploy subagents (if available) with specific focus areas: 46 | ``` 47 | "Investigate TypeScript errors in core modules - focus on main API routes and business logic. 48 | Categorise by: type safety issues, missing types, configuration problems. 49 | Suggest fix priorities and identify any dangerous patterns." 50 | ``` 51 | 52 | #### Fix in Batches 53 | - **Batch by file/module**: Complete one area before moving to next 54 | - **Batch by error type**: Fix all `exactOptionalPropertyTypes` issues together 55 | - **Test after each batch**: Verify fixes don't break functionality 56 | 57 | #### Safety Practices 58 | - **Understand before fixing**: Don't apply mechanical fixes without understanding 59 | - **Preserve functionality**: Use tests to verify changes don't break behaviour 60 | - **Conservative approach**: If unsure about a fix, mark for discussion rather than guessing 61 | - **Document complex fixes**: Add comments explaining non-obvious corrections 62 | 63 | ### Stage 3: Verification & Prevention 64 | 65 | #### Comprehensive Re-check 66 | ```bash 67 | # Verify all issues resolved 68 | npm run build && echo "✅ Build successful" 69 | tsc --noEmit && echo "✅ TypeScript clean" 70 | npm run lint && echo "✅ Linting clean" 71 | npm test && echo "✅ Tests passing" 72 | ``` 73 | 74 | #### Update Documentation 75 | - Update any affected documentation files if patterns changed 76 | - Note any systematic issues discovered for future prevention 77 | - Update this document if new issue patterns emerge 78 | 79 | #### Prevention Measures 80 | - **Consider TypeScript config adjustments**: Should `exactOptionalPropertyTypes` be relaxed for legacy code? 81 | - **Evaluate ESLint rules**: Are any rules generating more noise than value? 82 | - **IDE integration**: Ensure development environment catches issues early 83 | - **Documentation updates**: Add patterns to coding guidelines if needed 84 | 85 | ## Decision Framework 86 | 87 | ### When to Fix vs When to Document/Defer 88 | 89 | **Fix Immediately**: 90 | - Type errors that could cause runtime crashes 91 | - Security-related linting issues 92 | - Deprecated API usage that could break in future updates 93 | - Simple mechanical fixes (unused imports, missing semicolons) 94 | 95 | **Document & Schedule**: 96 | - Complex refactoring needs that require architectural decisions 97 | - Issues in legacy code that isn't actively maintained 98 | - Style inconsistencies that don't affect functionality 99 | - Performance optimizations that need measurement 100 | 101 | **Skip/Disable**: 102 | - ESLint rules that conflict with project conventions 103 | - TypeScript strictness that creates excessive overhead 104 | - Test-only issues that don't affect production 105 | 106 | ### Stopping Criteria 107 | 108 | **Stop and discuss with user if**: 109 | - Fixes require significant architectural changes 110 | - Multiple approaches exist with unclear trade-offs 111 | - Error counts remain high after systematic fixing 112 | - Tests start failing due to corrections 113 | - Uncertainty about safety of changes 114 | 115 | ## Post-Completion Actions 116 | 117 | 1. **Commit in logical batches**: Group related fixes together 118 | 2. **Update issue tracking**: Note patterns for future housekeeping 119 | 3. **Review effectiveness**: Did this process catch important issues? 120 | 4. **Schedule next cycle**: Based on issue accumulation rate 121 | 5. **Share learnings**: Update team practices if systematic issues found 122 | 123 | ## Think Hard 124 | 125 | Consider the bigger picture: 126 | - Are we fixing symptoms or root causes? 127 | - What does the error pattern tell us about our development process? 128 | - Should we adjust our TypeScript/ESLint configuration for better AI-first development? 129 | - Are there preventive measures (pre-commit hooks, CI checks) worth implementing? 130 | - Is the balance right between strictness and development velocity? 131 | 132 | Remember: The goal is sustainable code quality, not perfect cleanliness. Focus on issues that matter for functionality, security, and maintainability. -------------------------------------------------------------------------------- /docs/instructions/UPDATE_CLAUDE_INSTRUCTIONS.md: -------------------------------------------------------------------------------- 1 | # Updating AI Agent Instructions 2 | 3 | Guidelines for maintaining CLAUDE.md (or equivalent Cursor rules file) to help AI agents operate effectively on your codebase. 4 | 5 | ## See also 6 | 7 | - `CLAUDE.md` - The main instructions file for AI agents (or `.cursorrules`, etc.) 8 | - `WRITE_EVERGREEN_DOC.md` - General documentation writing guidelines 9 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - Documentation maintenance process 10 | 11 | ## Purpose of Agent Instructions File 12 | 13 | This file (CLAUDE.md, .cursorrules, etc.) serves as the primary orientation document for AI agents working on your codebase. It should provide essential context and signposts without duplicating information that exists elsewhere in the documentation. 14 | 15 | ## What to Include 16 | 17 | ### Essential Project Context 18 | - **Project overview** - Brief description of goals and current phase 19 | - **Architecture summary** - Key framework and storage decisions 20 | - **Build commands** - How to run, test, and debug the application 21 | - **Project structure** - Where to find different types of code/docs 22 | 23 | ### Debugging and Development Aids 24 | - **Type checking** - Commands for compilation errors 25 | - **Linting** - Code quality checking commands 26 | - **Testing** - Test commands and coverage info 27 | - **Log files** - Location of development logs 28 | - **Test locations** - Where to find existing tests 29 | - **Database info** - Migration files and schema documentation 30 | 31 | ### Navigation Signposts 32 | - **Architecture docs** - Link to main architecture documentation 33 | - **Planning docs** - Point to recent decisions and planning documents 34 | - **Specific domains** - Database, API, UI components documentation 35 | 36 | ### Operational Guidelines 37 | - **Git practices** - Reference to commit and workflow guidelines 38 | - **Code style** - Spelling preferences, existing patterns 39 | - **Environment setup** - Key variables and configuration 40 | 41 | ## What NOT to Include 42 | 43 | - **Detailed instructions** - These belong in specific domain docs 44 | - **Code examples** - Link to actual implementation files instead 45 | - **Duplicate information** - Always reference canonical source 46 | - **Step-by-step tutorials** - These belong in setup documentation 47 | 48 | ## Maintenance Principles 49 | 50 | ### Conciseness 51 | Keep the instructions file focused and scannable. Each section should be 3-5 bullet points maximum. Use signposting rather than explanation. 52 | 53 | ### Signposting Over Duplication 54 | Instead of explaining how something works, point to where the information lives: 55 | - "Database schema: `migrations/` directory and `../reference/DATABASE_SCHEMA.md`" 56 | - "Testing: Framework setup in test config, tests in `tests/` or `__tests__/`" 57 | 58 | ### Current State Focus 59 | Document what exists now, not what's planned. Use status indicators (✓ implemented, 📋 planned) when helpful. 60 | 61 | ### User-Driven Updates 62 | Update the instructions file based on: 63 | - **User feedback** - What agents needed but couldn't find 64 | - **Common pain points** - Debugging paths that weren't obvious 65 | - **New major features** - Changes to build process, architecture 66 | - **Structural changes** - New documentation, moved files 67 | 68 | ## Review Triggers 69 | 70 | Update your agent instructions when: 71 | - AI agents struggle to find essential information 72 | - Major architectural changes occur 73 | - New debugging tools or processes are added 74 | - Project structure changes significantly 75 | - User identifies missing signposts during development 76 | 77 | ## Quality Checklist 78 | 79 | Before updating agent instructions: 80 | - [ ] Information is essential for AI agent effectiveness 81 | - [ ] No duplication of content available elsewhere 82 | - [ ] All links and references are valid 83 | - [ ] Debugging paths are clear and actionable 84 | - [ ] Structure remains scannable and concise 85 | - [ ] Cross-references point to canonical sources 86 | 87 | ## Tool-Specific Considerations 88 | 89 | ### Claude Code (CLAUDE.md) 90 | - Include tasks and subagents guidance for context window management 91 | - Reference specific tools and permissions needed 92 | - Include parallel tool execution patterns 93 | - Document debugging workflows for command-line environments 94 | 95 | ### Cursor (.cursorrules) 96 | - Include workspace configuration hints where relevant 97 | - Reference model selection best practices if applicable 98 | - Include shortcuts and workflow patterns for IDE users 99 | - Consider IDE-specific debugging approaches 100 | 101 | ### Other AI Tools 102 | - Adapt structure to tool capabilities and limitations 103 | - Include tool-specific workflow patterns 104 | - Reference appropriate documentation formats for the platform 105 | - Consider different context window constraints 106 | 107 | ## Customization Guidelines 108 | 109 | When maintaining CLAUDE.md: 110 | 111 | 1. **Keep project-specific** - Tailor content to actual codebase needs 112 | 2. **Remove inapplicable sections** - Don't include generic content that doesn't apply 113 | 3. **Add project-specific sections** as needed (API keys, special workflows, domain knowledge) 114 | 4. **Maintain conciseness** - File should be scannable, not comprehensive 115 | 5. **Use signposting approach** - Point to detailed docs rather than duplicating content 116 | 6. **Update regularly** - Keep build commands and key information current 117 | 118 | ## Essential References Structure 119 | 120 | When organizing the "See also" section in CLAUDE.md, consider this structure: 121 | 122 | ```markdown 123 | see: 124 | - `README.md` for project goals and features 125 | - `docs/reference/CODING_PRINCIPLES.md` for development principles 126 | - `docs/instructions/GIT_COMMIT_CHANGES.md` for Git workflow 127 | - `docs/reference/ARCHITECTURE_OVERVIEW.md` for system architecture 128 | - `docs/reference/[DOMAIN]_*.md` for specific domain documentation 129 | ``` 130 | 131 | This provides a logical hierarchy from general (README) to specific (domain docs). -------------------------------------------------------------------------------- /docs/instructions/draft/NONINTERACTIVE.md: -------------------------------------------------------------------------------- 1 | # Non-Interactive AI Assistant Usage 2 | 3 | Non-interactive mode allows AI assistants to execute tasks without human intervention. 4 | 5 | **Note**: This document is specifically written for Claude Code (`claude -p`) but the principles apply to other AI tools. 6 | 7 | ## See Also 8 | 9 | - `../WRITE_PLANNING_DOC.md` - Creating structured task documents 10 | - `../GIT_COMMITS.md` - Git workflow practices 11 | - `../CODING_PRINCIPLES.md` - Development principles 12 | - Planning documents in your project's planning/ directory 13 | 14 | ## Tool Access Philosophy 15 | 16 | **Non-interactive AI assistants typically cannot:** 17 | - Run applications (no access to development servers, browsers, or live applications) 18 | - Execute tests interactively (no access to test runners that require interaction) 19 | - Access specialized MCP tools (browser automation, database queries) 20 | - Commit changes to git (this should be handled externally) 21 | - Access running development servers or databases 22 | 23 | **Non-interactive AI assistants can:** 24 | - Read, write, and edit files 25 | - Perform static analysis of code 26 | - Search and research via web 27 | - Use basic command line tools for file operations 28 | - Generate and modify documentation 29 | - Analyse project structure and dependencies 30 | 31 | ## Basic Usage 32 | 33 | ### Claude Code Example 34 | ```bash 35 | claude -p "your task description" \ 36 | --allowedTools "Bash Edit MultiEdit Read Write Glob Grep LS Task WebFetch WebSearch TodoRead TodoWrite" \ 37 | --output-format stream-json 38 | ``` 39 | 40 | ### Using Wrapper Scripts 41 | 42 | Consider creating a wrapper script (e.g., `scripts/ai-batch.sh`) to standardize your non-interactive AI usage patterns. 43 | 44 | ## Planning Document Integration 45 | 46 | Non-interactive mode works best with well-structured planning documents (see `WRITE_PLANNING_DOC.md`). Feed the AI the entire planning document content: 47 | 48 | ```bash 49 | # Claude Code example 50 | ./scripts/claude-batch.sh "$(cat planning/your_task.md)" 51 | ``` 52 | 53 | This approach: 54 | - Provides complete context upfront 55 | - Reduces need for clarifying questions 56 | - Enables autonomous task execution 57 | - Works well with parallel execution 58 | 59 | ## CI/CD Integration 60 | 61 | ### Example GitHub Actions Workflow 62 | ```yaml 63 | name: AI-Assisted Development 64 | on: 65 | workflow_dispatch: 66 | inputs: 67 | task_description: 68 | description: 'Task for AI to execute' 69 | required: true 70 | 71 | jobs: 72 | ai-task: 73 | runs-on: ubuntu-latest 74 | steps: 75 | - uses: actions/checkout@v3 76 | - name: Setup Node.js 77 | uses: actions/setup-node@v3 78 | with: 79 | node-version: '18' 80 | - name: Install AI Tools 81 | run: npm install -g @anthropic-ai/claude-code 82 | - name: Run AI Task 83 | env: 84 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 85 | run: | 86 | claude -p "${{ github.event.inputs.task_description }}" \ 87 | --allowedTools "Edit MultiEdit Read Write Glob Grep LS Task WebFetch WebSearch" \ 88 | --output-format stream-json 89 | ``` 90 | 91 | ## Tool Configuration 92 | 93 | ### Recommended Tool Set 94 | - **Core file operations**: `Edit MultiEdit Read Write` 95 | - **Search and discovery**: `Glob Grep LS` 96 | - **Research and analysis**: `WebFetch WebSearch` 97 | - **Task management**: `TodoRead TodoWrite` 98 | - **Basic system operations**: `Bash` (limited to file operations) 99 | - **Subtask delegation**: `Task` 100 | 101 | ### Security Considerations 102 | - Use specific tool allowlists rather than broad permissions 103 | - Limit shell access to safe operations 104 | - Run in isolated environments for untrusted tasks 105 | - Store API keys securely in CI environments 106 | 107 | ## Error Handling 108 | 109 | Non-interactive mode requires robust error handling since AI cannot ask for clarification: 110 | 111 | ### In Task Descriptions 112 | ```markdown 113 | # Task: Refactor authentication system 114 | 115 | ## Error Handling 116 | - If compilation errors occur, document them in /tmp/issues.md 117 | - If tests would be needed, create them but note they cannot be run 118 | - If unclear about implementation details, make reasonable assumptions and document them 119 | 120 | ## Constraints 121 | - Cannot run application or tests 122 | - Cannot commit changes 123 | - Must work with existing code patterns 124 | ``` 125 | 126 | ### In Wrapper Scripts 127 | ```bash 128 | ai-batch() { 129 | # ... setup ... 130 | 131 | if ! claude -p "$prompt" --allowedTools "$tools" --output-format stream-json; then 132 | echo "AI task failed. Check output above for details." 133 | return 1 134 | fi 135 | } 136 | ``` 137 | 138 | ## Best Practices 139 | 140 | 1. **Provide complete context** in planning documents 141 | 2. **Specify constraints clearly** (no testing, no commits, etc.) 142 | 3. **Use structured output** for automation parsing 143 | 4. **Handle failures gracefully** in CI environments 144 | 5. **Limit scope** to tasks that don't require runtime verification 145 | 6. **Document assumptions** when requirements are ambiguous 146 | 147 | ## Unresolved Questions 148 | 149 | ### Git & Branch Management 150 | - Should AI be able to make its own Git commits? 151 | - Should wrapper scripts automatically create branches for each task? 152 | - How should branch naming be standardised for parallel execution? 153 | - Should cleanup of completed branches be automated? 154 | 155 | ### Output Format 156 | - Is `stream-json` the best format for CI integration? 157 | - Should results be structured differently for different use cases? 158 | - How should partial results be handled if AI is interrupted? 159 | 160 | ### Task Scope 161 | - Should there be timeout limits for long-running tasks? 162 | 163 | ### Error Recovery 164 | - How should the system handle partial completions? 165 | - Should failed tasks be automatically retried with modified parameters? 166 | - What level of rollback capability is needed? -------------------------------------------------------------------------------- /docs/instructions/WRITE_EVERGREEN_DOC.md: -------------------------------------------------------------------------------- 1 | @# Writing evergreen documentation 2 | 3 | see also: 4 | - `WRITE_PLANNING_DOC.md` - for writing ephemeral decision/planning docs 5 | - `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` - for keeping documentation current every so often 6 | 7 | 8 | # What are evergreen docs? 9 | 10 | This is for writing evergreen, general documentation on how the system works. 11 | 12 | These should be a concise, clear, well-structured, complete-enough, up-to-date description of things. By "complete-enough", they should cover most of the important topics, if only to signpost to where more information can be found, or to the code itself. 13 | 14 | They should refer to one another, and avoid too much overlap in content, so that if information changes, we ideally only need to change the documentation in one place. 15 | 16 | 17 | # Format 18 | 19 | They should be written in Markdown, stored as `TOPIC_NAME.md` (e.g. in `docs/project/`, `docs/reference/`, `docs/instructions/` or similar), following project-specific documentation guidelines (for example an `AGENTS.md` or local docs README) for storage. 20 | 21 | ## Filename Guidelines 22 | 23 | Choose descriptive filenames that clearly indicate the document's content: 24 | 25 | - **Be specific**: `UPLOAD_DOCUMENT_PROCESSING_PIPELINE.md` instead of just `UPLOAD.md` 26 | - **Include context**: `NAVIGATION_COMPONENT_DESIGN.md` instead of just `NAVIGATION.md` 27 | - **Keep existing names**: Where possible, include the current name as a prefix (e.g., `SETUP_DEVELOPMENT_ENVIRONMENT.md` keeps `SETUP`) 28 | - **Group related docs**: Use similar prefixes so related docs sort together (e.g., all `DATABASE_*.md` files) 29 | - **Maintain prefix conventions**: Keep category prefixes (DATABASE_, TESTING_, API_, etc.) 30 | 31 | Good examples: 32 | - `DATABASE_INTEGRATION_REFERENCE.md` 33 | - `TESTING_AUTOMATION_OVERVIEW.md` 34 | - `API_CLIENT_INTEGRATION.md` 35 | - `AUTHENTICATION_SECURITY.md` 36 | 37 | 38 | ## Document structure 39 | 40 | They might be organised into something like the following sections. Use your judgment. Probably only a few of these will be relevant for each doc, feel free to rename them, etc. 41 | 42 | 43 | ### Introduction 44 | 45 | 2-sentence summary of the topic, and what the document covers. 46 | 47 | ### See also 48 | 49 | Bullet-point list of other relevant docs, code, urls, or other resources that provide related information, or more detail. Provide a 1-sentence summary or explanation of how each one is relevant. 50 | 51 | Examples of good cross-references: 52 | - `WRITE_PLANNING_DOC.md` - for information about writing ephemeral decision/planning docs 53 | - `src/components/example.tsx` - implementation of features described here 54 | - `planning/YYYYMMDD_feature_planning.md` - historical decision context 55 | - External URLs when relevant (e.g., library documentation) 56 | 57 | Add references to and from this new doc (e.g. in relevant code, planning docs in `planning/*.md`, etc) - use a subagent for this 58 | 59 | #### Cross-Reference Best Practices 60 | 61 | - **Update documentation organisation index** if your project has one 62 | - **Link to canonical source** (e.g. functions, files, docs, urls, etc) for detailed information rather than duplicating 63 | - **Provide 1-sentence context** with each link explaining its relevance 64 | - **Use relative paths** for internal documentation links 65 | - **Avoid content duplication** - if information exists elsewhere, link to it 66 | 67 | Add references to and from this new doc (e.g. in relevant code, planning docs, etc) - use parallel AI assistance for this 68 | 69 | 70 | ### Principles, key decisions 71 | 72 | - Include any specific principles/approaches or decisions that have been explicitly agreed with the user (over and above existing coding rules, project examples, best practices, etc). 73 | - As you get new information from the user, update this doc so it's always up-to-date. 74 | 75 | ### [Provide a few detailed sections here, depending on the topic] 76 | 77 | Include as appropriate: 78 | - high-level overview, architecture 79 | - common patterns, howtos 80 | - examples 81 | - gotchas 82 | - limitations 83 | - troubleshooting 84 | - planned future work 85 | 86 | 87 | ### Documenting Systems in Transition 88 | 89 | When documenting systems that are changing (e.g., architectural migrations): 90 | 91 | 1. **Clearly distinguish states**: 92 | - **Current State**: How the system works today 93 | - **Target State**: The intended future architecture 94 | - **Migration Status**: Progress and timeline if known 95 | 96 | 2. **Reference decisions**: Link to planning docs or architecture decision records for rationale 97 | 98 | 3. **Update incrementally**: As migration progresses, update the documentation 99 | 100 | Example: 101 | ```markdown 102 | ## Database Architecture 103 | 104 | **Current State**: Uses legacy schema approach 105 | **Target State**: New optimized schema with performance improvements 106 | **Migration Status**: Schema designed, code updates pending 107 | 108 | see `../reference/ARCHITECTURE_DECISIONS.md` for migration rationale 109 | ``` 110 | 111 | 112 | ### Appendix 113 | 114 | Add any other important context here, e.g. 115 | - example data 116 | - other information that should be captured but doesn't fit neatly in the above sections 117 | 118 | 119 | # Maintenance 120 | 121 | ## Review Frequency 122 | 123 | Regular documentation review ensures accuracy: 124 | - **After major features** - Update immediately after implementation 125 | - **During housekeeping** - Monthly review recommended 126 | - **When outdated** - Fix immediately when noticed 127 | - **Before milestones** - Ensure docs reflect current state 128 | 129 | see `UPDATE_HOUSEKEEPING_DOCUMENTATION.md` for the complete housekeeping process 130 | 131 | ## Common Pitfalls to Avoid 132 | 133 | 1. **Information duplication** - Creates maintenance burden when things change 134 | 2. **Vague status descriptions** - Be specific about implementation state 135 | 3. **Missing cross-references** - Always link to related documentation 136 | 4. **Outdated examples** - Ensure code samples match current patterns 137 | 5. **Forgotten transitions** - Update docs as systems migrate 138 | 139 | ## Quality Checklist 140 | 141 | Before committing documentation: 142 | - [ ] Cross-references are valid and helpful 143 | - [ ] No contradictions with other documents 144 | - [ ] Examples match current code patterns 145 | - [ ] Transitional states are clearly marked 146 | - [ ] "See also" sections are comprehensive --------------------------------------------------------------------------------