├── .github ├── dependabot.yml └── workflows │ └── python-package.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── mark ├── __main__.py ├── cli.py ├── config.py ├── llm.py ├── llm_request.py ├── llm_response.py ├── markdown_file.py └── scraper.py ├── poetry.lock ├── pyproject.toml ├── templates └── default_system_prompt.md └── tests ├── conftest.py ├── test_cli.py └── test_scraper.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install poetry flake8 pytest 31 | poetry install --with dev 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | export OPENAI_API_KEY=test_key && poetry run python -m pytest 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | thread.md -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ### [0.10.2] - 2025-03-10 8 | #### Changed 9 | - Bump llm to 0.25 10 | - Bump pytest from 8.3.4 to 8.3.5 11 | 12 | ### [0.10.1] - 2025-03-05 13 | #### Fixed 14 | - Fixed issue where the OPENAI_BASE_URL was no longer being set correctly in the 0.10.0 update. 15 | - Streaming is disabled for all completion requests to prevent issues 16 | 17 | ### [0.10.0] - 2025-02-18 18 | #### Added 19 | - `mark models` subcommand will list all available LLM models available 20 | 21 | ### [0.9.0] - 2025-02-18 22 | #### Fixed 23 | - OpenAI reasoning models now work with Mark 24 | 25 | #### Added 26 | - LLM is now a dependency to expand model availability to Mark and delegate low-level model interaction. 27 | - Added model specification test 28 | 29 | #### Removed 30 | - Import direct import of `openai` (referencing LLMs dependencies directly) 31 | 32 | ### [0.8.3] - 2025-01-20 33 | #### Removed 34 | - Import of `image_to_data_url` has been removed from LangChain. 35 | 36 | ### [0.8.2] - 2024-08-13 37 | #### Fixed 38 | - Slightly improved errors raise when scraping pages on low memory hardware 39 | 40 | ### [0.8.1] - 2024-08-13 41 | #### Changed 42 | - Bump langchain from 0.2.14 to 0.2.15 43 | - Bump httpx from 0.27.0 to 0.27.2 44 | - Bump openai from 1.42.0 to 1.43.0 45 | - Bump ipython from 8.26.0 to 8.27.0 46 | - Bump langchain-community from 0.2.4 to 0.2.12 47 | - Bump openai from 1.41.1 to 1.42.0 48 | - Bump openai from 1.14.2 to 1.41.1 49 | - Bump langchain from 0.2.1 to 0.2.14 50 | - Bump markdownify from 0.12.1 to 0.13.1 51 | - Bump pyyaml from 6.0.1 to 6.0.2 52 | - Bump langchain-community from 0.2.1 to 0.2.4 53 | - Bump ipython from 8.21.0 to 8.26.0 54 | - Bump flake8 from 7.1.0 to 7.1.1 55 | - Bump pytest from 6.2.5 to 8.3. 56 | 57 | ### [0.8.0] - 2024-08-13 58 | #### Added 59 | - Support for `--model` option to allow for selecting a specific OpenAI model 60 | 61 | ### [0.7.3] - 2024-07-24 62 | #### Added 63 | - Support for `--version` option in the CLI 64 | 65 | ### [0.7.2] - 2024-07-24 66 | #### Added 67 | - Aliases for cli options `--system` (`-s`) and `--generate-images` (`-i`) 68 | 69 | ### [0.7.1] - 2024-06-28 70 | #### Fixed 71 | - Gracefully handle timeouts when fetching urls 72 | 73 | ### [0.7.0] - 2024-06-27 74 | #### Changed 75 | - Updated the scraping logic to render pages as clean markdown which exposes the LLM to urls on the page. 76 | 77 | ### [0.6.3] - 2024-06-20 78 | #### Fixed 79 | - Gracefully handle broken links in markdown files. 80 | 81 | ### [0.6.2] - 2024-06-20 82 | #### Added 83 | - Cleaner OpenAI error handling for common issues 84 | 85 | ### [0.6.1] - 2024-06-20 86 | #### Removed 87 | - Response log for image generation 88 | 89 | ### [0.6.0] - 2024-06-20 90 | #### Added 91 | - Ability to override the OpenAI endpoint with OPENAI_API_BASE_URL env var 92 | 93 | ### [0.5.0] - 2024-06-18 94 | #### Added 95 | - Adding experimental support for DALL-E image generation 96 | 97 | ### [0.4.0] - 2024-06-06 98 | #### Added 99 | - Requests are now logged to `~/.mark/logs/` 100 | 101 | ### [0.3.6] - 2024-06-06 102 | #### Fixed 103 | - USER_AGENT warning 104 | 105 | ### [0.3.5] - 2024-06-06 106 | #### Added 107 | - Included additional files in the project for `templates/default_system_prompt.md`. 108 | 109 | #### Changed 110 | - Updated default system prompt and refactored into the templates directory. 111 | 112 | ### [0.3.4] - 2024-06-04 113 | #### Added 114 | - Returned a pretty error if no `OPENAI_API_KEY` is found. 115 | 116 | ### [0.3.3] - 2024-06-04 117 | #### Fixed 118 | - Fixed stdin use case. 119 | 120 | ### [0.3.2] - 2024-06-04 121 | #### Added 122 | - Added local file references to page links. 123 | 124 | ### [0.3.1] - 2024-06-04 125 | #### Added 126 | - Utilized LangChain image utilities for local image encoding. 127 | 128 | ### [0.3.0] - 2024-06-04 129 | #### Added 130 | - Bumped version. 131 | - Added new dependencies: `langchain ^0.2.1` and `langchain-community ^0.2.1`. 132 | 133 | ### [0.2.3] - 2024-05-29 134 | #### Changed 135 | - Updated model. 136 | 137 | ### [0.2.2] - 2024-05-03 138 | #### Fixed 139 | - Fixed issue handling malformed image tags. 140 | 141 | ### [0.2.1] - 2024-04-29 142 | #### Fixed 143 | - Fixed pathing issue with images. 144 | 145 | ### [0.2.0] - 2024-04-23 146 | #### Added 147 | - Added parsing support for images in markdown text. 148 | - Added new dependencies: `beautifulsoup4 ^4.12.3` and `markdown ^3.6`. 149 | 150 | ### [0.1.0] - 2024-03-25 151 | #### Added 152 | - Initial setup with dependencies: `python ^3.8`, `PyYAML 5.4.1`, `ipython 8.21.0`, `openai 1.14.2`. 153 | - Replace `typer` with `click` for CLI tool. 154 | - Setup CLI interface with entry point `agent_gpt.__main__:cli`. 155 | - Added development dependencies: `pytest ^6.2.5`. 156 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ryan Elston 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Build Status](https://img.shields.io/github/actions/workflow/status/relston/mark/.github/workflows/python-package.yml) 2 | ![License](https://img.shields.io/github/license/relston/mark) 3 | 4 | # Mark 5 | **Mark lets you seamlessly use markdown, images and links to interact with LLMs** 6 | 7 | > "Markdown is still the best format for feeding into language models"
- Simon Willison (co-creator of Django) 8 | 9 | Why Markdown + LLMs? For a full rundown of the benefits and use cases, check out [Introducing 'Mark', a Markdown CLI tool for LLMs](https://relston.github.io/markdown/gpt4o/cli/2024/06/07/introducing-mark.html) 10 | 11 | ## Key Features 12 | - Interact with LLMs using Markdown 13 | - Visual recognition of markdown image references via multi-modal LLMs 14 | - Local and remote links are scraped for context 15 | - LLM responses appended directly into Markdown files 16 | - `stdin` and `stdout` support for piping 17 | - Support for Image Generation 18 | 19 | ## Example 20 | ```markdown 21 | Describe ![this image](https://www.animal.photos/mamm1/cat-tig1_files/bengal12.jpg) to me in one short sentence. 22 | 23 | # GPT Response (model: gpt-4o-2024-05-13, system: default) 24 | A Bengal tiger is walking on a sandy path with greenery in the background. 25 | 26 | # User Response 27 | List each section of content on [this page](https://en.wikipedia.org/wiki/Bengal_tiger) 28 | 29 | # GPT Response (model: gpt-4o-2024-05-13, system: default) 30 | ## Sections of Content on the Bengal Tiger Wikipedia Page 31 | 32 | 1. **Taxonomy** 33 | - Genetic ancestry 34 | 35 | 2. **Characteristics** 36 | - Body weight and size 37 | 38 | 3. **Distribution and habitat** 39 | - India 40 | - Bangladesh 41 | ... 42 | ``` 43 | 44 | This allows you to carry on a conversation directly in the markdown file - or simply integrate the response back into the document you're working on. This is a powerful, flexible and natural way to interact with LLMs that unlocks countless use cases. `mark`'s focus on explicitly using Markdown as both a prompt and response medium for LLMs presents a unique approach. By blending interactive, version-controlled Markdown documents with LLM responses in a CLI tool offers an innovative workflow for those looking to integrate LLM interactions into their development and documentation processes. 45 | 46 | # Install the Mark CLI 47 | ```bash 48 | pip install mark 49 | ``` 50 | 51 | # Usage 52 | By default, `mark` will read a markdown file, extract any context references, and send them to the LLM. The responses are then appended to the markdown file. 53 | ```bash 54 | mark path/to/markdown.md 55 | ``` 56 | *Requires an OpenAI API key in the `OPENAI_API_KEY` environment variable* 57 | 58 | Also supports `stdin` with `stdout` for piping LLM responses into other tools 59 | ```bash 60 | cat path/to/markdown.md | mark 61 | # LLM response.... 62 | ``` 63 | 64 | ## Use a specific LLM model 65 | You can specify a different LLM model to use with the `--model` (or `-m`) flag 66 | ```bash 67 | mark path/to/markdown.md --model gpt-4o-2024-05-13 68 | ``` 69 | 70 | ## Custom system prompts 71 | The system prompts folder is located at `~/.mark/system_prompts` and it includes a `default.md` prompt. You can add any additional system prompts you'd like to use in this folder and use them with the `--system` (or `-s`) flag. 72 | ```bash 73 | # ~/.mark/system_prompts/custom.md 74 | mark path/to/markdown.md --system custom 75 | ``` 76 | 77 | ## Override the OpenAI API endpoint 78 | If you want to use a different LLM API endpoint that is fully compatible with the OpenAI API, set the `OPENAI_API_BASE_URL` environment variable to that endpoint value. This should enable you to use OpenAI proxy services like [credal.ai](https://www.credal.ai/), or other LLMs that are compatible with the OpenAI SDK. 79 | 80 | ## Image Generation 81 | To generate an image based on the input just add the `--generate-image` (or `-i`) flag to the command 82 | ```bash 83 | mark path/to/markdown.md --generate-image 84 | ``` 85 | This will generate an image using the 'dall-e-3' model and append it to the markdown file. 86 | 87 | # Development 88 | ## Local Setup 89 | ```bash 90 | poetry install 91 | ``` 92 | *[Requires poetry](https://python-poetry.org/docs/)* 93 | 94 | ## Run the CLI Tool locally 95 | ```bash 96 | poetry run mark path/to/markdown.md 97 | ``` 98 | 99 | ## Run the tests 100 | ```bash 101 | poetry run python -m pytest 102 | ``` 103 | 104 | ## Auto-fix lint errors 105 | ```bash 106 | poetry run autopep8 --in-place --aggressive --aggressive --recursive . 107 | ``` 108 | -------------------------------------------------------------------------------- /mark/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/relston/mark/149eca99d91275a1bf05f273cb77e46d87003f9a/mark/__main__.py -------------------------------------------------------------------------------- /mark/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | from click_default_group import DefaultGroup 3 | from click.testing import CliRunner 4 | from mark import llm 5 | from llm.cli import cli as llm_cli 6 | from mark.llm_request import LLMRequest 7 | from mark.markdown_file import MarkdownFile 8 | from mark.config import get_config 9 | from importlib.metadata import version, PackageNotFoundError 10 | 11 | try: 12 | package_version = version("mark") 13 | except PackageNotFoundError: 14 | package_version = "unknown" 15 | 16 | DEFAULT_MODEL = "gpt-4o" 17 | DALL_E_MODEL = "dall-e-3" 18 | 19 | 20 | @click.group( 21 | cls=DefaultGroup, 22 | default="down", 23 | default_if_no_args=True, 24 | ) 25 | @click.version_option(version=package_version) 26 | def mark_cli(): 27 | """Markdown powered LLM CLI - Multi-modal AI text generation tool""" 28 | 29 | 30 | @mark_cli.command(name="down") 31 | @click.argument('file', type=click.File()) 32 | @click.option('--system', '-s', type=click.STRING, 33 | default='default', help='The system prompt to use') 34 | @click.option('--model', '-m', type=click.STRING, help='The llm model') 35 | @click.option('--generate-image', '-i', is_flag=True, default=False, 36 | help='EXPERIMENTAL: Generate an image using DALL-E.') 37 | def down(file, system, model, generate_image): 38 | """ 39 | Default: Process markdown file or stdin 40 | 41 | In-document Thread Example: 42 | mark path/to/markdown.md 43 | 44 | stdin Example: 45 | echo "Hello, World!" | mark - 46 | """ 47 | system_prompt = get_config().system_prompts().get(system, 'default') 48 | markdown_file = MarkdownFile(file) 49 | 50 | if not model: 51 | model = DALL_E_MODEL if generate_image else DEFAULT_MODEL 52 | 53 | request = LLMRequest(model) \ 54 | .with_prompt(markdown_file.content) \ 55 | .with_system_message(system_prompt) 56 | 57 | [request.with_image(image) for image in markdown_file.images] 58 | [request.with_link(link) for link in markdown_file.links] 59 | 60 | if generate_image: 61 | response = llm.generate_image(request) 62 | else: 63 | response = llm.get_completion(request) 64 | 65 | response.with_system(system) 66 | 67 | if markdown_file.file_path: 68 | with open(markdown_file.file_path, "a") as file: 69 | file.write(response.to_markdown()) 70 | else: 71 | click.echo(response.content) 72 | 73 | 74 | @mark_cli.command("models") 75 | def models_command(): 76 | """List available llm models""" 77 | runner = CliRunner() 78 | result = runner.invoke(llm_cli, ["models"]) 79 | if result.exception: 80 | raise click.ClickException(str(result.exception)) 81 | click.echo(result.output) 82 | 83 | 84 | if __name__ == "__main__": 85 | mark_cli() 86 | -------------------------------------------------------------------------------- /mark/config.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import os 3 | from importlib.resources import read_text 4 | 5 | 6 | class Config: 7 | DEFAULT_SYSTEM_PROMPT_TEMPLATE_PATH = 'templates/default_system_prompt.md' 8 | 9 | def __init__(self): 10 | self.config_dir = os.getenv( 11 | 'MARK_CONFIG_PATH', 12 | os.path.expanduser("~/.mark")) 13 | self.system_prompts_dir = f"/{self.config_dir}/system_prompts" 14 | self.default_system_prompt = f"{self.system_prompts_dir}/default.md" 15 | self.log_folder = f"{self.config_dir}/logs" 16 | 17 | if not os.path.exists(self.system_prompts_dir): 18 | os.makedirs(self.system_prompts_dir) 19 | 20 | if not os.path.exists(self.default_system_prompt): 21 | default_config = read_text('templates', 'default_system_prompt.md') 22 | 23 | with open(os.path.expanduser(self.default_system_prompt), "w") as file: 24 | file.write(default_config) 25 | 26 | if not os.path.exists(self.log_folder): 27 | os.makedirs(self.log_folder) 28 | 29 | def system_prompts(self): 30 | system_prompts = {} 31 | for filename in os.listdir(self.system_prompts_dir): 32 | filepath = os.path.join(self.system_prompts_dir, filename) 33 | with open(filepath, "r") as file: 34 | system_prompt_name = os.path.splitext(filename)[0] 35 | system_prompts[system_prompt_name] = file.read() 36 | return system_prompts 37 | 38 | def log(self, content): 39 | # Get current date and time as string 40 | dt_string = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 41 | log_file = f"{self.log_folder}/{dt_string}.md" 42 | with open(log_file, "w") as file: 43 | file.write(content) 44 | 45 | 46 | _config = None 47 | 48 | 49 | def reset(): 50 | """ 51 | Reset the config object. 52 | """ 53 | global _config 54 | _config = None 55 | 56 | 57 | def get_config(): 58 | """ 59 | Return memoized config object. 60 | """ 61 | global _config 62 | if not _config: 63 | _config = Config() 64 | return _config 65 | -------------------------------------------------------------------------------- /mark/llm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import click 3 | import llm 4 | from llm.default_plugins.openai_models import openai, Chat, AsyncChat 5 | from mark.config import get_config 6 | from mark.llm_request import LLMRequest 7 | from mark.llm_response import LLMResponse, LLMImageResponse 8 | 9 | # TODO: Remove this. Only needed to support image generation. 10 | # Should differ to llm model registration 11 | OPENAI_BASE_URL = os.getenv('OPENAI_API_BASE_URL', openai.base_url) 12 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 13 | if not OPENAI_API_KEY: 14 | click.echo( 15 | "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.") 16 | exit(1) 17 | 18 | client = openai.OpenAI( 19 | api_key=OPENAI_API_KEY, 20 | base_url=OPENAI_BASE_URL 21 | ) 22 | 23 | 24 | def handle_openai_errors(func): 25 | def error_handler(*args, **kwargs): 26 | try: 27 | return func(*args, **kwargs) 28 | except openai.APIConnectionError as e: 29 | click.echo(f"{OPENAI_BASE_URL} could not be reached") 30 | click.echo(e.__cause__) 31 | exit(1) 32 | except openai.RateLimitError: 33 | click.echo("RateLimitError was received; we should back off a bit.") 34 | exit(1) 35 | except openai.BadRequestError as e: 36 | click.echo("BadRequestError was received") 37 | click.echo(e.message) 38 | exit(1) 39 | except openai.APIStatusError as e: 40 | click.echo("Another non-200-range status code was received") 41 | click.echo(e.status_code) 42 | click.echo(e.response) 43 | exit(1) 44 | 45 | return error_handler 46 | 47 | 48 | def get_completion(llm_request): 49 | """ 50 | Get completion from the OpenAI model for the given prompt and agent. 51 | """ 52 | get_config().log(llm_request.to_log()) 53 | 54 | response_text = _llm_call_completion(llm_request) 55 | 56 | return LLMResponse(response_text, llm_request.model) 57 | 58 | 59 | def generate_image(llm_request): 60 | get_config().log(llm_request.to_log()) 61 | 62 | response = _call_generate_image( 63 | llm_request.to_flat_prompt(), 64 | llm_request.model) 65 | 66 | return LLMImageResponse( 67 | response.url, 68 | llm_request.model, 69 | response.revised_prompt) 70 | 71 | 72 | @handle_openai_errors 73 | def _call_generate_image(prompt, model): 74 | # TODO: Can I manually register the dall-e-3 using the llm api? 75 | response = client.images.generate( 76 | prompt=prompt, 77 | model=model, 78 | size="1024x1024", 79 | n=1 80 | ) 81 | 82 | return response.data[0] 83 | 84 | 85 | @handle_openai_errors 86 | def _llm_call_completion(llm_request: LLMRequest) -> str: 87 | model = llm.get_model(llm_request.model) 88 | if isinstance(model, (Chat, AsyncChat)) and model.api_base == None: 89 | # Backwards compatible with the older override 90 | model.api_base = OPENAI_BASE_URL 91 | 92 | attachment = [] 93 | for image in llm_request.images: 94 | if image.is_web_reference(): 95 | attachment.append(llm.Attachment(url=image.src)) 96 | else: 97 | attachment.append(llm.Attachment(path=image.src)) 98 | 99 | # llm.Attachment(path="pelican.jpg"), 100 | # llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg"), 101 | # llm.Attachment(content=b"binary image content here") 102 | return model.prompt( 103 | llm_request.prompt, 104 | system=llm_request.system_content(), 105 | attachments=attachment, 106 | stream=False # we do not support streaming 107 | ) 108 | -------------------------------------------------------------------------------- /mark/llm_request.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | from .markdown_file import Image, Link 3 | from typing import ( 4 | List, 5 | Optional 6 | ) 7 | 8 | 9 | class LLMRequest: 10 | model: str 11 | prompt: Optional[str] 12 | system_message: Optional[str] 13 | images: List[Image] 14 | links: List[Link] 15 | 16 | def __init__(self, model): 17 | """ 18 | Can serialize itself into a payload that can be sent to the OpenAI API (potentially others in the future) 19 | """ 20 | self.system_message = None 21 | self.prompt = None 22 | self.model = model 23 | self.images = [] 24 | self.links = [] 25 | 26 | def with_system_message(self, system_message): 27 | self.system_message = system_message 28 | return self 29 | 30 | def with_prompt(self, prompt): 31 | self.prompt = prompt 32 | return self 33 | 34 | def with_image(self, image): 35 | self.images.append(image) 36 | return self 37 | 38 | def with_link(self, document): 39 | self.links.append(document) 40 | return self 41 | 42 | def system_content(self) -> str: 43 | system_content = "" 44 | 45 | if self.links: 46 | link_content_block = "---".join([str(link) for link in self.links]) 47 | system_content += link_content_block 48 | 49 | if self.system_message: 50 | system_content += "\n" + self.system_message 51 | 52 | return system_content 53 | 54 | def to_payload(self): 55 | system_message = {"role": "system", "content": self.system_content()} 56 | 57 | if self.images: 58 | user_content = [{'type': 'text', 'text': self.prompt}] 59 | for image in self.images: 60 | if image.url: 61 | user_content.append( 62 | {'type': 'image_url', 'image_url': {'url': image.url}}) 63 | else: 64 | user_content = self.prompt 65 | 66 | user_message = {"role": "user", "content": user_content} 67 | return [system_message, user_message] 68 | 69 | def to_flat_prompt(self) -> str: 70 | return self.system_content() + "\n" + self.prompt 71 | 72 | def to_log(self) -> str: 73 | return dedent(""" 74 | # System message 75 | --- 76 | """) \ 77 | + self.system_content() \ 78 | + dedent(""" 79 | --- 80 | # User Message 81 | --- 82 | """) \ 83 | + self.prompt \ 84 | + dedent(""" 85 | --- 86 | # Images 87 | --- 88 | """) \ 89 | + "\n".join([image.url for image in self.images]) 90 | -------------------------------------------------------------------------------- /mark/llm_response.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | 4 | class LLMResponse(object): 5 | RESPONSE_TEMPLATE = dedent( 6 | """ 7 | # GPT Response (model: {model}, system: {system}) 8 | {content} 9 | 10 | # User Response 11 | """ 12 | ) 13 | 14 | def __init__(self, content, model): 15 | self.model = model 16 | self.content = content 17 | self.system = 'default' 18 | 19 | def with_system(self, system): 20 | self.system = system 21 | return self 22 | 23 | def to_markdown(self): 24 | content = self.content 25 | return self.RESPONSE_TEMPLATE.format( 26 | model=self.model, system=self.system, content=content) 27 | 28 | 29 | class LLMImageResponse(LLMResponse): 30 | def __init__(self, image_url, model, revised_prompt=None): 31 | super().__init__(image_url, model) 32 | self.revised_prompt = revised_prompt 33 | 34 | def to_markdown(self): 35 | content = f"![Generated Image]({self.content})" 36 | 37 | if self.revised_prompt: 38 | content = f"{self.revised_prompt}\n\n{content}" 39 | 40 | return self.RESPONSE_TEMPLATE.format( 41 | model=self.model, system=self.system, content=content) 42 | -------------------------------------------------------------------------------- /mark/markdown_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from langchain_core.documents import Document 4 | from io import TextIOWrapper 5 | from textwrap import dedent 6 | import click 7 | from mark import scraper 8 | import base64 9 | import mimetypes 10 | 11 | """ 12 | MarkdownFile 13 | Parses the markdown and extracts image elements from the file, resolving the paths of local images. 14 | """ 15 | 16 | 17 | class MarkdownFile: 18 | def __init__(self, file_wrapper: TextIOWrapper): 19 | """ 20 | Initializes the MarkdownFile object with a TextIOWrapper, usually obtained from opening a file. 21 | """ 22 | self.file_path = None 23 | self.file_dir = None 24 | if hasattr(file_wrapper, 'name') and file_wrapper.name != '': 25 | self.file_path = file_wrapper.name 26 | self.file_dir = os.path.dirname(file_wrapper.name) 27 | else: 28 | self.file_dir = os.getcwd() 29 | self.file_content = file_wrapper.read() 30 | self._images = None 31 | self._links = None 32 | 33 | @property 34 | def content(self): 35 | """ 36 | Returns the content of the markdown file as a string. 37 | """ 38 | return self.file_content 39 | 40 | @property 41 | def images(self): 42 | if not self._images: 43 | self._images = self._parse_elements(Image) 44 | return self._images 45 | 46 | @property 47 | def links(self): 48 | if not self._links: 49 | self._links = self._parse_elements(Link) 50 | return self._links 51 | 52 | def _parse_elements(self, cls): 53 | matches = re.findall(cls.REGX_PATTERN, self.file_content) 54 | return [ 55 | cls.from_reference_folder(self.file_dir) 56 | .with_src(src) 57 | .with_text(text) 58 | for text, src in matches 59 | ] 60 | 61 | 62 | class PageReference: 63 | @classmethod 64 | def from_reference_folder(cls, folder): 65 | return cls(folder) 66 | 67 | def __init__(self, reference_folder, src=None): 68 | self.reference_folder = reference_folder 69 | self.src = src 70 | self.uri = None 71 | if src: 72 | self._resolve_uri() 73 | 74 | def with_src(self, src): 75 | self.src = src 76 | self._resolve_uri() 77 | return self 78 | 79 | def with_text(self, text): 80 | self.link_text = text 81 | return self 82 | 83 | def is_web_reference(self): 84 | return self.src.startswith("http") 85 | 86 | def _resolve_uri(self): 87 | if self.is_web_reference(): 88 | self.uri = self.src 89 | else: 90 | self.uri = os.path.normpath( 91 | os.path.join( 92 | self.reference_folder, 93 | self.src)) 94 | 95 | 96 | class Image(PageReference): 97 | # Regular expression to find Markdown image syntax with alt text 98 | REGX_PATTERN = r'!\[(.*?)\]\((.*?)\)' 99 | 100 | @property 101 | def url(self): 102 | if self.is_web_reference(): 103 | return self.uri 104 | else: 105 | try: 106 | return Image.image_to_data_url(self.uri) 107 | except (FileNotFoundError, IsADirectoryError): 108 | click.echo(f"Image Reference {self.src} not found. Skipping") 109 | return '' 110 | 111 | @classmethod 112 | def encode_image(cls, image_path: str) -> str: 113 | """Get base64 string from image URI. 114 | 115 | Args: 116 | image_path: The path to the image. 117 | 118 | Returns: 119 | The base64 string of the image. 120 | """ 121 | with open(image_path, "rb") as image_file: 122 | return base64.b64encode(image_file.read()).decode("utf-8") 123 | 124 | @classmethod 125 | def image_to_data_url(cls, image_path: str) -> str: 126 | """Get data URL from image URI. 127 | 128 | Args: 129 | image_path: The path to the image. 130 | 131 | Returns: 132 | The data URL of the image. 133 | """ 134 | encoding = cls.encode_image(image_path) 135 | mime_type = mimetypes.guess_type(image_path)[0] 136 | return f"data:{mime_type};base64,{encoding}" 137 | 138 | 139 | class Link(PageReference): 140 | # Regular expression to find Markdown link syntax 141 | # it will match `[text](url)` but not `![text](url)` 142 | REGX_PATTERN = r'(? Page: 38 | raw_html = get_rendered_html(url) 39 | clean_soup = _clean_soup_from_html(raw_html) 40 | markdown = _markdown_from_soup(clean_soup) 41 | page = Page(url, body=markdown, soup=clean_soup) 42 | 43 | if title := clean_soup.find('title'): 44 | page.with_title(title.text) 45 | 46 | return page 47 | 48 | 49 | def get_rendered_html(url: str) -> str: 50 | try: 51 | return asyncio.run(_render_page(url)) 52 | except pyppeteer.errors.BrowserError: 53 | click.echo(f"BrowserError while fetching {url}") 54 | return "BrowserError while fetching" 55 | except pyppeteer.errors.TimeoutError: 56 | click.echo(f"Timeout while fetching {url}") 57 | return "Timeout while fetching page" 58 | 59 | 60 | async def _render_page(url: str) -> str: 61 | browser = None 62 | try: 63 | browser = await pyppeteer.launch() 64 | page = await browser.newPage() 65 | await page.setUserAgent(DEFAULT_USER_AGENT) 66 | await page.goto(url) 67 | rendered_html = await page.content() 68 | finally: 69 | if browser: 70 | await browser.close() 71 | return rendered_html 72 | 73 | 74 | def _clean_soup_from_html(html: str) -> BeautifulSoup: 75 | # warnings.filterwarnings("ignore") 76 | 77 | soup = BeautifulSoup(html, 'html.parser') 78 | 79 | # List of tags to decompose 80 | tags_to_decompose = ['script', 'meta', 'link', 'style'] 81 | 82 | for tag in soup.find_all(True): 83 | # Remove class attributes 84 | if 'class' in tag.attrs: 85 | del tag['class'] 86 | 87 | # Remove style attributes 88 | if 'style' in tag.attrs: 89 | del tag['style'] 90 | 91 | # Decompose unwanted tags 92 | if tag.name in tags_to_decompose: 93 | tag.decompose() 94 | 95 | return soup 96 | 97 | 98 | def _markdown_from_soup(soup: BeautifulSoup) -> str: 99 | raw_markdown_text = MarkdownConverter().convert_soup(soup) 100 | return re.sub(r'\n{3,}', '\n\n', raw_markdown_text) 101 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mark" 3 | version = "0.10.2" 4 | description = "Mark lets you seamlessly use markdown, images and links to interact with LLMs" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | classifiers = [ 8 | "Programming Language :: Python :: 3", 9 | "Operating System :: OS Independent", 10 | ] 11 | license = "MIT" 12 | license-files = ["LICEN[CS]E*"] 13 | 14 | [project.urls] 15 | Homepage = "https://github.com/relston/mark" 16 | Issues = "https://github.com/relston/mark/issues" 17 | 18 | [tool.poetry] 19 | name = "mark" 20 | version = "0.10.2" 21 | description = "Mark lets you seamlessly use markdown, images and links to interact with LLMs" 22 | authors = ["Ryan Elston "] 23 | 24 | # Include additional files 25 | include = [ 26 | "templates/default_system_prompt.md" 27 | ] 28 | 29 | [tool.poetry.dependencies] 30 | python = "^3.10" 31 | PyYAML = "6.0.2" 32 | ipython = "8.32.0" 33 | click = "^8.1.8" 34 | beautifulsoup4 = "^4.12.3" 35 | langchain = "^0.2.16" 36 | langchain-community = "^0.2.15" 37 | pyppeteer = "^2.0.0" 38 | markdownify = "^0.14.1" 39 | llm = "^0.25" 40 | click-default-group = "^1.2.4" 41 | 42 | [tool.poetry.scripts] 43 | mark = "mark.cli:mark_cli" 44 | 45 | [tool.poetry.group.dev.dependencies] 46 | pytest = "^8.3.4" 47 | respx = "^0.22.0" 48 | httpx = "^0.28.1" 49 | flake8 = "^7.1.1" 50 | autopep8 = "^2.3.1" 51 | 52 | [build-system] 53 | requires = ["poetry-core>=1.0.0"] 54 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /templates/default_system_prompt.md: -------------------------------------------------------------------------------- 1 | You are a helpful LLM agent that will receive user input in the form of a markdown file. 2 | The contents of the file will be used as context and the specific prompt from the use will be located at the end of the file. 3 | Your response to the users request should also be written in markdown format. 4 | 5 | RULES: 6 | - Do not echo back any of the input into your response to the user. 7 | - If using a heading in your response, start with a level 2 heading 8 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from unittest.mock import patch 4 | import llm 5 | 6 | 7 | @pytest.fixture(autouse=True) 8 | def mock_openai_key(): 9 | os.environ['OPENAI_API_KEY'] = 'test_key' 10 | 11 | 12 | @pytest.fixture(autouse=True) 13 | def mock_cwd(tmp_path): 14 | with patch('os.getcwd') as mock: 15 | mock.return_value = tmp_path 16 | yield mock 17 | 18 | 19 | @pytest.fixture 20 | def mock_stdout(): 21 | with patch('click.echo') as mock: 22 | yield mock 23 | 24 | 25 | @pytest.fixture 26 | def mock_llm_response(): 27 | with patch('llm.models._Model.prompt') as mock: 28 | yield mock 29 | 30 | 31 | @pytest.fixture 32 | def mock_llm_get_model(): 33 | get_model_method = llm.get_model 34 | 35 | with patch('llm.get_model') as mock: 36 | mock.side_effect = get_model_method 37 | yield mock 38 | 39 | 40 | @pytest.fixture 41 | def mock_image_generation(): 42 | with patch('mark.llm._call_generate_image') as mock: 43 | yield mock 44 | 45 | 46 | @pytest.fixture 47 | def create_file(tmp_path): 48 | def _create_file(file_path, content, binary=False): 49 | file = tmp_path / file_path 50 | file.parent.mkdir(parents=True, exist_ok=True) 51 | if binary: 52 | file.write_bytes(content) 53 | else: 54 | file.write_text(content, encoding="utf-8") 55 | return file 56 | return _create_file 57 | 58 | 59 | @pytest.fixture 60 | def mock_web_page(): 61 | url_to_content = {} 62 | 63 | def _mock(url, page_content): 64 | url_to_content[url] = page_content 65 | 66 | with patch('mark.scraper.get_rendered_html') as mock: 67 | def side_effect(url): 68 | return url_to_content[url] 69 | mock.side_effect = side_effect 70 | yield _mock 71 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from mark.cli import mark_cli 2 | from textwrap import dedent 3 | from mark import config 4 | import pytest 5 | import os 6 | import sys 7 | import io 8 | import llm 9 | from unittest.mock import Mock 10 | 11 | """ 12 | These tests are meant to act as 'functional-lite'. Maximizing code coverage for 13 | each of the main use cases of the CLI command and minimizing the number of 14 | basic unit tests needed for each individual module. 15 | 16 | We just mock out the files and the OpenAI API calls, and then test the CLI 17 | """ 18 | 19 | 20 | class TestCLI: 21 | @pytest.fixture(autouse=True) 22 | def use_tmp_config_path(self, tmp_path): 23 | # MARK_CONFIG_PATH defaults to ~/.mark 24 | # for all tests we use a temporary directory 25 | self.config_path = tmp_path / 'config' 26 | os.environ['MARK_CONFIG_PATH'] = str(self.config_path) 27 | 28 | @pytest.fixture(autouse=True) 29 | def define_files( 30 | self, 31 | create_file, 32 | mock_llm_response, 33 | mock_web_page, 34 | mock_image_generation): 35 | config.reset() 36 | 37 | # Given a markdown file with the following content 38 | self.mock_markdown_file_content = dedent(""" 39 | A Markdown file with various images and links 40 | 41 | Local image: 42 | ![Local Image](./images/sample.png) 43 | 44 | Remote image: 45 | ![Remote Image](https://example.com/image.png) 46 | 47 | Relative image outside directory: 48 | ![Outside Image](../images/outside.png) 49 | 50 | External url link: 51 | [External URL](https://example.com/some-article) 52 | 53 | Local file link: 54 | [Anther Reference](./docs/another-reference.md) 55 | """) 56 | 57 | # and the files exists in the file system 58 | self.markdown_file = create_file( 59 | "test.md", self.mock_markdown_file_content) 60 | create_file("./images/sample.png", b"sample image data", binary=True) 61 | create_file( 62 | "../images/outside.png", 63 | b"outside image data", 64 | binary=True) 65 | create_file("./docs/another-reference.md", "Another reference content") 66 | 67 | # and the external url link returns this response 68 | html_content = """ 69 | 70 | 71 | 72 | Basic HTML Page 73 | 74 | 75 |

Welcome to My Page

76 | Visit Example.com 77 | 78 | 79 | """ 80 | mock_web_page('https://example.com/some-article', html_content) 81 | 82 | # and llm returning this response 83 | mock_llm_response.return_value = "Test completion" 84 | mock_image_generation.return_value = Mock( 85 | url='https://generated.image.url/image.png', 86 | revised_prompt='A revised mock image prompt' 87 | ) 88 | 89 | self.default_system_prompt = dedent( 90 | """ 91 | You are a helpful LLM agent that will receive user input in the form of a markdown file. 92 | The contents of the file will be used as context and the specific prompt from the use will be located at the end of the file. 93 | Your response to the users request should also be written in markdown format. 94 | 95 | RULES: 96 | - Do not echo back any of the input into your response to the user. 97 | - If using a heading in your response, start with a level 2 heading 98 | """ 99 | ) 100 | 101 | self.default_expected_context = dedent( 102 | """ 103 | Link Text: External URL 104 | SRC: https://example.com/some-article 105 | Page Title: Basic HTML Page 106 | Page Content: 107 | 108 | 109 | Basic HTML Page 110 | 111 | Welcome to My Page 112 | ================== 113 | 114 | [Visit Example.com](https://www.example.com) 115 | 116 | 117 | --- 118 | Link Text: Anther Reference 119 | SRC: ./docs/another-reference.md 120 | Page Title: another-reference.md 121 | Page Content: 122 | Another reference content 123 | """ 124 | ) 125 | 126 | self.default_expected_system_message = self.default_expected_context + \ 127 | self.default_system_prompt 128 | 129 | self.default_expected_llm_request = [ 130 | {'role': 'system', 'content': self.default_expected_system_message}, 131 | {'role': 'user', 'content': [ 132 | {'type': 'text', 'text': self.mock_markdown_file_content}, 133 | {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,c2FtcGxlIGltYWdlIGRhdGE='}}, 134 | {'type': 'image_url', 'image_url': {'url': 'https://example.com/image.png'}}, 135 | {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,b3V0c2lkZSBpbWFnZSBkYXRh'}}, 136 | ] 137 | } 138 | ] 139 | self.default_expected_attachements = [ 140 | llm.Attachment(path='./images/sample.png'), 141 | llm.Attachment(url='https://example.com/image.png'), 142 | llm.Attachment(path='../images/outside.png') 143 | ] 144 | 145 | def test_command_default(self, mock_llm_response): 146 | """Test CLI command without specifying an agent (default agent should be used).""" 147 | 148 | # Run the CLI command with only the markdown file 149 | mark_cli([str(self.markdown_file)], None, None, False) 150 | 151 | mock_llm_response.assert_called_once_with( 152 | self.mock_markdown_file_content, 153 | system=self.default_expected_system_message, 154 | attachments=self.default_expected_attachements, 155 | stream=False 156 | ) 157 | 158 | # The markdown file will be updated with the response 159 | expected_markdown_file_content = self.mock_markdown_file_content + \ 160 | dedent(""" 161 | # GPT Response (model: gpt-4o, system: default) 162 | Test completion 163 | 164 | # User Response 165 | """) 166 | 167 | assert self.markdown_file.read_text() == expected_markdown_file_content 168 | 169 | def test_command_with_stdin(self, mock_llm_response, mock_stdout): 170 | byte_string = self.mock_markdown_file_content.encode('utf-8') 171 | input = io.TextIOWrapper(io.BytesIO(byte_string), encoding='utf-8') 172 | sys.stdin = input 173 | 174 | mark_cli(['-'], None, None, False) 175 | 176 | mock_llm_response.assert_called_once_with( 177 | self.mock_markdown_file_content, 178 | system=self.default_expected_system_message, 179 | attachments=self.default_expected_attachements, 180 | stream=False 181 | ) 182 | 183 | mock_stdout.assert_called_once_with("Test completion") 184 | 185 | def test_command_custom_model(self, mock_llm_get_model, mock_llm_response): 186 | """ 187 | mark --model o1 path/to/markdown.md 188 | """ 189 | 190 | mark_cli(['--model', 'o1', str(self.markdown_file)], None, None, False) 191 | 192 | mock_llm_get_model.assert_called_once_with('o1') 193 | 194 | mock_llm_response.assert_called_once_with( 195 | self.mock_markdown_file_content, 196 | system=self.default_expected_system_message, 197 | attachments=self.default_expected_attachements, 198 | stream=False 199 | ) 200 | 201 | def test_command_custom_agent(self, create_file, mock_llm_response): 202 | # Define a custom agent 203 | create_file( 204 | self.config_path / 'system_prompts/custom.md', 205 | """You're a custom agent that .....""" 206 | ) 207 | 208 | # Run the CLI command with the custom agent 209 | mark_cli([str(self.markdown_file), '--system=custom'], 210 | None, None, False) 211 | 212 | expected_system_message = self.default_expected_context + \ 213 | "\nYou're a custom agent that ....." 214 | 215 | mock_llm_response.assert_called_once_with( 216 | self.mock_markdown_file_content, 217 | system=expected_system_message, 218 | attachments=self.default_expected_attachements, 219 | stream=False 220 | ) 221 | 222 | # The markdown file will be updated indicating the custom agent 223 | expected_markdown_file_content = self.mock_markdown_file_content + \ 224 | dedent(""" 225 | # GPT Response (model: gpt-4o, system: custom) 226 | Test completion 227 | 228 | # User Response 229 | """) 230 | assert self.markdown_file.read_text() == expected_markdown_file_content 231 | 232 | def test_command_generate_image(self, mock_image_generation): 233 | """ 234 | Test CLI command with the --generate-image option. 235 | """ 236 | 237 | mark_cli([str(self.markdown_file), '--generate-image'], 238 | None, None, False) 239 | 240 | expected_prompt = self.default_expected_system_message + \ 241 | "\n" + self.mock_markdown_file_content 242 | mock_image_generation.assert_called_once_with( 243 | expected_prompt, "dall-e-3") 244 | 245 | # The markdown file will be updated with the generated image URL 246 | expected_markdown_file_content = self.mock_markdown_file_content + \ 247 | dedent(""" 248 | # GPT Response (model: dall-e-3, system: default) 249 | A revised mock image prompt 250 | 251 | ![Generated Image](https://generated.image.url/image.png) 252 | 253 | # User Response 254 | """) 255 | 256 | assert self.markdown_file.read_text() == expected_markdown_file_content 257 | 258 | def test_command_models(self, mock_stdout): 259 | """ 260 | Test for `mark models` 261 | """ 262 | 263 | mark_cli(['models'], None, None, False) 264 | 265 | call = mock_stdout.call_args_list[0] 266 | assert 'OpenAI Chat' in call[0][0] 267 | -------------------------------------------------------------------------------- /tests/test_scraper.py: -------------------------------------------------------------------------------- 1 | import pyppeteer 2 | from mark import scraper 3 | from unittest.mock import patch 4 | 5 | 6 | def test_page_scrape(mock_web_page): 7 | html_content = """ 8 | 9 | 10 | 11 | Basic HTML Page 12 | 13 | 14 |

Welcome to My Page

15 | Visit Example.com 16 | 17 | 18 | """ 19 | 20 | mock_web_page('https://supercool.com', html_content) 21 | 22 | page = scraper.get('https://supercool.com') 23 | 24 | assert page.title == 'Basic HTML Page' 25 | assert page.url == 'https://supercool.com' 26 | assert page.body == '\n\nBasic HTML Page\n\nWelcome to My Page\n' + \ 27 | '==================\n\n[Visit Example.com](https://www.example.com)\n\n' 28 | 29 | 30 | def test_timeout_error_handling(): 31 | with patch('mark.scraper._render_page', side_effect=pyppeteer.errors.TimeoutError): 32 | page = scraper.get('https://timeout-test.com') 33 | 34 | assert page.body == 'Timeout while fetching page' 35 | assert page.title is None, "Expected no title when a TimeoutError occurs" 36 | assert page.url == 'https://timeout-test.com', "URL should be correct even when timeout occurs" 37 | --------------------------------------------------------------------------------