├── .github
├── dependabot.yml
└── workflows
│ └── python-package.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── mark
├── __main__.py
├── cli.py
├── config.py
├── llm.py
├── llm_request.py
├── llm_response.py
├── markdown_file.py
└── scraper.py
├── poetry.lock
├── pyproject.toml
├── templates
└── default_system_prompt.md
└── tests
├── conftest.py
├── test_cli.py
└── test_scraper.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ "main" ]
9 | pull_request:
10 | branches: [ "main" ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | python-version: ["3.9", "3.10", "3.11"]
20 |
21 | steps:
22 | - uses: actions/checkout@v4
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v3
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | python -m pip install poetry flake8 pytest
31 | poetry install --with dev
32 | - name: Lint with flake8
33 | run: |
34 | # stop the build if there are Python syntax errors or undefined names
35 | poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 | poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 | - name: Test with pytest
39 | run: |
40 | export OPENAI_API_KEY=test_key && poetry run python -m pytest
41 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | thread.md
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## Changelog
2 | All notable changes to this project will be documented in this file.
3 |
4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6 |
7 | ### [0.10.2] - 2025-03-10
8 | #### Changed
9 | - Bump llm to 0.25
10 | - Bump pytest from 8.3.4 to 8.3.5
11 |
12 | ### [0.10.1] - 2025-03-05
13 | #### Fixed
14 | - Fixed issue where the OPENAI_BASE_URL was no longer being set correctly in the 0.10.0 update.
15 | - Streaming is disabled for all completion requests to prevent issues
16 |
17 | ### [0.10.0] - 2025-02-18
18 | #### Added
19 | - `mark models` subcommand will list all available LLM models available
20 |
21 | ### [0.9.0] - 2025-02-18
22 | #### Fixed
23 | - OpenAI reasoning models now work with Mark
24 |
25 | #### Added
26 | - LLM is now a dependency to expand model availability to Mark and delegate low-level model interaction.
27 | - Added model specification test
28 |
29 | #### Removed
30 | - Import direct import of `openai` (referencing LLMs dependencies directly)
31 |
32 | ### [0.8.3] - 2025-01-20
33 | #### Removed
34 | - Import of `image_to_data_url` has been removed from LangChain.
35 |
36 | ### [0.8.2] - 2024-08-13
37 | #### Fixed
38 | - Slightly improved errors raise when scraping pages on low memory hardware
39 |
40 | ### [0.8.1] - 2024-08-13
41 | #### Changed
42 | - Bump langchain from 0.2.14 to 0.2.15
43 | - Bump httpx from 0.27.0 to 0.27.2
44 | - Bump openai from 1.42.0 to 1.43.0
45 | - Bump ipython from 8.26.0 to 8.27.0
46 | - Bump langchain-community from 0.2.4 to 0.2.12
47 | - Bump openai from 1.41.1 to 1.42.0
48 | - Bump openai from 1.14.2 to 1.41.1
49 | - Bump langchain from 0.2.1 to 0.2.14
50 | - Bump markdownify from 0.12.1 to 0.13.1
51 | - Bump pyyaml from 6.0.1 to 6.0.2
52 | - Bump langchain-community from 0.2.1 to 0.2.4
53 | - Bump ipython from 8.21.0 to 8.26.0
54 | - Bump flake8 from 7.1.0 to 7.1.1
55 | - Bump pytest from 6.2.5 to 8.3.
56 |
57 | ### [0.8.0] - 2024-08-13
58 | #### Added
59 | - Support for `--model` option to allow for selecting a specific OpenAI model
60 |
61 | ### [0.7.3] - 2024-07-24
62 | #### Added
63 | - Support for `--version` option in the CLI
64 |
65 | ### [0.7.2] - 2024-07-24
66 | #### Added
67 | - Aliases for cli options `--system` (`-s`) and `--generate-images` (`-i`)
68 |
69 | ### [0.7.1] - 2024-06-28
70 | #### Fixed
71 | - Gracefully handle timeouts when fetching urls
72 |
73 | ### [0.7.0] - 2024-06-27
74 | #### Changed
75 | - Updated the scraping logic to render pages as clean markdown which exposes the LLM to urls on the page.
76 |
77 | ### [0.6.3] - 2024-06-20
78 | #### Fixed
79 | - Gracefully handle broken links in markdown files.
80 |
81 | ### [0.6.2] - 2024-06-20
82 | #### Added
83 | - Cleaner OpenAI error handling for common issues
84 |
85 | ### [0.6.1] - 2024-06-20
86 | #### Removed
87 | - Response log for image generation
88 |
89 | ### [0.6.0] - 2024-06-20
90 | #### Added
91 | - Ability to override the OpenAI endpoint with OPENAI_API_BASE_URL env var
92 |
93 | ### [0.5.0] - 2024-06-18
94 | #### Added
95 | - Adding experimental support for DALL-E image generation
96 |
97 | ### [0.4.0] - 2024-06-06
98 | #### Added
99 | - Requests are now logged to `~/.mark/logs/`
100 |
101 | ### [0.3.6] - 2024-06-06
102 | #### Fixed
103 | - USER_AGENT warning
104 |
105 | ### [0.3.5] - 2024-06-06
106 | #### Added
107 | - Included additional files in the project for `templates/default_system_prompt.md`.
108 |
109 | #### Changed
110 | - Updated default system prompt and refactored into the templates directory.
111 |
112 | ### [0.3.4] - 2024-06-04
113 | #### Added
114 | - Returned a pretty error if no `OPENAI_API_KEY` is found.
115 |
116 | ### [0.3.3] - 2024-06-04
117 | #### Fixed
118 | - Fixed stdin use case.
119 |
120 | ### [0.3.2] - 2024-06-04
121 | #### Added
122 | - Added local file references to page links.
123 |
124 | ### [0.3.1] - 2024-06-04
125 | #### Added
126 | - Utilized LangChain image utilities for local image encoding.
127 |
128 | ### [0.3.0] - 2024-06-04
129 | #### Added
130 | - Bumped version.
131 | - Added new dependencies: `langchain ^0.2.1` and `langchain-community ^0.2.1`.
132 |
133 | ### [0.2.3] - 2024-05-29
134 | #### Changed
135 | - Updated model.
136 |
137 | ### [0.2.2] - 2024-05-03
138 | #### Fixed
139 | - Fixed issue handling malformed image tags.
140 |
141 | ### [0.2.1] - 2024-04-29
142 | #### Fixed
143 | - Fixed pathing issue with images.
144 |
145 | ### [0.2.0] - 2024-04-23
146 | #### Added
147 | - Added parsing support for images in markdown text.
148 | - Added new dependencies: `beautifulsoup4 ^4.12.3` and `markdown ^3.6`.
149 |
150 | ### [0.1.0] - 2024-03-25
151 | #### Added
152 | - Initial setup with dependencies: `python ^3.8`, `PyYAML 5.4.1`, `ipython 8.21.0`, `openai 1.14.2`.
153 | - Replace `typer` with `click` for CLI tool.
154 | - Setup CLI interface with entry point `agent_gpt.__main__:cli`.
155 | - Added development dependencies: `pytest ^6.2.5`.
156 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Ryan Elston
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 | # Mark
5 | **Mark lets you seamlessly use markdown, images and links to interact with LLMs**
6 |
7 | > "Markdown is still the best format for feeding into language models"
- Simon Willison (co-creator of Django)
8 |
9 | Why Markdown + LLMs? For a full rundown of the benefits and use cases, check out [Introducing 'Mark', a Markdown CLI tool for LLMs](https://relston.github.io/markdown/gpt4o/cli/2024/06/07/introducing-mark.html)
10 |
11 | ## Key Features
12 | - Interact with LLMs using Markdown
13 | - Visual recognition of markdown image references via multi-modal LLMs
14 | - Local and remote links are scraped for context
15 | - LLM responses appended directly into Markdown files
16 | - `stdin` and `stdout` support for piping
17 | - Support for Image Generation
18 |
19 | ## Example
20 | ```markdown
21 | Describe  to me in one short sentence.
22 |
23 | # GPT Response (model: gpt-4o-2024-05-13, system: default)
24 | A Bengal tiger is walking on a sandy path with greenery in the background.
25 |
26 | # User Response
27 | List each section of content on [this page](https://en.wikipedia.org/wiki/Bengal_tiger)
28 |
29 | # GPT Response (model: gpt-4o-2024-05-13, system: default)
30 | ## Sections of Content on the Bengal Tiger Wikipedia Page
31 |
32 | 1. **Taxonomy**
33 | - Genetic ancestry
34 |
35 | 2. **Characteristics**
36 | - Body weight and size
37 |
38 | 3. **Distribution and habitat**
39 | - India
40 | - Bangladesh
41 | ...
42 | ```
43 |
44 | This allows you to carry on a conversation directly in the markdown file - or simply integrate the response back into the document you're working on. This is a powerful, flexible and natural way to interact with LLMs that unlocks countless use cases. `mark`'s focus on explicitly using Markdown as both a prompt and response medium for LLMs presents a unique approach. By blending interactive, version-controlled Markdown documents with LLM responses in a CLI tool offers an innovative workflow for those looking to integrate LLM interactions into their development and documentation processes.
45 |
46 | # Install the Mark CLI
47 | ```bash
48 | pip install mark
49 | ```
50 |
51 | # Usage
52 | By default, `mark` will read a markdown file, extract any context references, and send them to the LLM. The responses are then appended to the markdown file.
53 | ```bash
54 | mark path/to/markdown.md
55 | ```
56 | *Requires an OpenAI API key in the `OPENAI_API_KEY` environment variable*
57 |
58 | Also supports `stdin` with `stdout` for piping LLM responses into other tools
59 | ```bash
60 | cat path/to/markdown.md | mark
61 | # LLM response....
62 | ```
63 |
64 | ## Use a specific LLM model
65 | You can specify a different LLM model to use with the `--model` (or `-m`) flag
66 | ```bash
67 | mark path/to/markdown.md --model gpt-4o-2024-05-13
68 | ```
69 |
70 | ## Custom system prompts
71 | The system prompts folder is located at `~/.mark/system_prompts` and it includes a `default.md` prompt. You can add any additional system prompts you'd like to use in this folder and use them with the `--system` (or `-s`) flag.
72 | ```bash
73 | # ~/.mark/system_prompts/custom.md
74 | mark path/to/markdown.md --system custom
75 | ```
76 |
77 | ## Override the OpenAI API endpoint
78 | If you want to use a different LLM API endpoint that is fully compatible with the OpenAI API, set the `OPENAI_API_BASE_URL` environment variable to that endpoint value. This should enable you to use OpenAI proxy services like [credal.ai](https://www.credal.ai/), or other LLMs that are compatible with the OpenAI SDK.
79 |
80 | ## Image Generation
81 | To generate an image based on the input just add the `--generate-image` (or `-i`) flag to the command
82 | ```bash
83 | mark path/to/markdown.md --generate-image
84 | ```
85 | This will generate an image using the 'dall-e-3' model and append it to the markdown file.
86 |
87 | # Development
88 | ## Local Setup
89 | ```bash
90 | poetry install
91 | ```
92 | *[Requires poetry](https://python-poetry.org/docs/)*
93 |
94 | ## Run the CLI Tool locally
95 | ```bash
96 | poetry run mark path/to/markdown.md
97 | ```
98 |
99 | ## Run the tests
100 | ```bash
101 | poetry run python -m pytest
102 | ```
103 |
104 | ## Auto-fix lint errors
105 | ```bash
106 | poetry run autopep8 --in-place --aggressive --aggressive --recursive .
107 | ```
108 |
--------------------------------------------------------------------------------
/mark/__main__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/relston/mark/149eca99d91275a1bf05f273cb77e46d87003f9a/mark/__main__.py
--------------------------------------------------------------------------------
/mark/cli.py:
--------------------------------------------------------------------------------
1 | import click
2 | from click_default_group import DefaultGroup
3 | from click.testing import CliRunner
4 | from mark import llm
5 | from llm.cli import cli as llm_cli
6 | from mark.llm_request import LLMRequest
7 | from mark.markdown_file import MarkdownFile
8 | from mark.config import get_config
9 | from importlib.metadata import version, PackageNotFoundError
10 |
11 | try:
12 | package_version = version("mark")
13 | except PackageNotFoundError:
14 | package_version = "unknown"
15 |
16 | DEFAULT_MODEL = "gpt-4o"
17 | DALL_E_MODEL = "dall-e-3"
18 |
19 |
20 | @click.group(
21 | cls=DefaultGroup,
22 | default="down",
23 | default_if_no_args=True,
24 | )
25 | @click.version_option(version=package_version)
26 | def mark_cli():
27 | """Markdown powered LLM CLI - Multi-modal AI text generation tool"""
28 |
29 |
30 | @mark_cli.command(name="down")
31 | @click.argument('file', type=click.File())
32 | @click.option('--system', '-s', type=click.STRING,
33 | default='default', help='The system prompt to use')
34 | @click.option('--model', '-m', type=click.STRING, help='The llm model')
35 | @click.option('--generate-image', '-i', is_flag=True, default=False,
36 | help='EXPERIMENTAL: Generate an image using DALL-E.')
37 | def down(file, system, model, generate_image):
38 | """
39 | Default: Process markdown file or stdin
40 |
41 | In-document Thread Example:
42 | mark path/to/markdown.md
43 |
44 | stdin Example:
45 | echo "Hello, World!" | mark -
46 | """
47 | system_prompt = get_config().system_prompts().get(system, 'default')
48 | markdown_file = MarkdownFile(file)
49 |
50 | if not model:
51 | model = DALL_E_MODEL if generate_image else DEFAULT_MODEL
52 |
53 | request = LLMRequest(model) \
54 | .with_prompt(markdown_file.content) \
55 | .with_system_message(system_prompt)
56 |
57 | [request.with_image(image) for image in markdown_file.images]
58 | [request.with_link(link) for link in markdown_file.links]
59 |
60 | if generate_image:
61 | response = llm.generate_image(request)
62 | else:
63 | response = llm.get_completion(request)
64 |
65 | response.with_system(system)
66 |
67 | if markdown_file.file_path:
68 | with open(markdown_file.file_path, "a") as file:
69 | file.write(response.to_markdown())
70 | else:
71 | click.echo(response.content)
72 |
73 |
74 | @mark_cli.command("models")
75 | def models_command():
76 | """List available llm models"""
77 | runner = CliRunner()
78 | result = runner.invoke(llm_cli, ["models"])
79 | if result.exception:
80 | raise click.ClickException(str(result.exception))
81 | click.echo(result.output)
82 |
83 |
84 | if __name__ == "__main__":
85 | mark_cli()
86 |
--------------------------------------------------------------------------------
/mark/config.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import os
3 | from importlib.resources import read_text
4 |
5 |
6 | class Config:
7 | DEFAULT_SYSTEM_PROMPT_TEMPLATE_PATH = 'templates/default_system_prompt.md'
8 |
9 | def __init__(self):
10 | self.config_dir = os.getenv(
11 | 'MARK_CONFIG_PATH',
12 | os.path.expanduser("~/.mark"))
13 | self.system_prompts_dir = f"/{self.config_dir}/system_prompts"
14 | self.default_system_prompt = f"{self.system_prompts_dir}/default.md"
15 | self.log_folder = f"{self.config_dir}/logs"
16 |
17 | if not os.path.exists(self.system_prompts_dir):
18 | os.makedirs(self.system_prompts_dir)
19 |
20 | if not os.path.exists(self.default_system_prompt):
21 | default_config = read_text('templates', 'default_system_prompt.md')
22 |
23 | with open(os.path.expanduser(self.default_system_prompt), "w") as file:
24 | file.write(default_config)
25 |
26 | if not os.path.exists(self.log_folder):
27 | os.makedirs(self.log_folder)
28 |
29 | def system_prompts(self):
30 | system_prompts = {}
31 | for filename in os.listdir(self.system_prompts_dir):
32 | filepath = os.path.join(self.system_prompts_dir, filename)
33 | with open(filepath, "r") as file:
34 | system_prompt_name = os.path.splitext(filename)[0]
35 | system_prompts[system_prompt_name] = file.read()
36 | return system_prompts
37 |
38 | def log(self, content):
39 | # Get current date and time as string
40 | dt_string = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
41 | log_file = f"{self.log_folder}/{dt_string}.md"
42 | with open(log_file, "w") as file:
43 | file.write(content)
44 |
45 |
46 | _config = None
47 |
48 |
49 | def reset():
50 | """
51 | Reset the config object.
52 | """
53 | global _config
54 | _config = None
55 |
56 |
57 | def get_config():
58 | """
59 | Return memoized config object.
60 | """
61 | global _config
62 | if not _config:
63 | _config = Config()
64 | return _config
65 |
--------------------------------------------------------------------------------
/mark/llm.py:
--------------------------------------------------------------------------------
1 | import os
2 | import click
3 | import llm
4 | from llm.default_plugins.openai_models import openai, Chat, AsyncChat
5 | from mark.config import get_config
6 | from mark.llm_request import LLMRequest
7 | from mark.llm_response import LLMResponse, LLMImageResponse
8 |
9 | # TODO: Remove this. Only needed to support image generation.
10 | # Should differ to llm model registration
11 | OPENAI_BASE_URL = os.getenv('OPENAI_API_BASE_URL', openai.base_url)
12 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
13 | if not OPENAI_API_KEY:
14 | click.echo(
15 | "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
16 | exit(1)
17 |
18 | client = openai.OpenAI(
19 | api_key=OPENAI_API_KEY,
20 | base_url=OPENAI_BASE_URL
21 | )
22 |
23 |
24 | def handle_openai_errors(func):
25 | def error_handler(*args, **kwargs):
26 | try:
27 | return func(*args, **kwargs)
28 | except openai.APIConnectionError as e:
29 | click.echo(f"{OPENAI_BASE_URL} could not be reached")
30 | click.echo(e.__cause__)
31 | exit(1)
32 | except openai.RateLimitError:
33 | click.echo("RateLimitError was received; we should back off a bit.")
34 | exit(1)
35 | except openai.BadRequestError as e:
36 | click.echo("BadRequestError was received")
37 | click.echo(e.message)
38 | exit(1)
39 | except openai.APIStatusError as e:
40 | click.echo("Another non-200-range status code was received")
41 | click.echo(e.status_code)
42 | click.echo(e.response)
43 | exit(1)
44 |
45 | return error_handler
46 |
47 |
48 | def get_completion(llm_request):
49 | """
50 | Get completion from the OpenAI model for the given prompt and agent.
51 | """
52 | get_config().log(llm_request.to_log())
53 |
54 | response_text = _llm_call_completion(llm_request)
55 |
56 | return LLMResponse(response_text, llm_request.model)
57 |
58 |
59 | def generate_image(llm_request):
60 | get_config().log(llm_request.to_log())
61 |
62 | response = _call_generate_image(
63 | llm_request.to_flat_prompt(),
64 | llm_request.model)
65 |
66 | return LLMImageResponse(
67 | response.url,
68 | llm_request.model,
69 | response.revised_prompt)
70 |
71 |
72 | @handle_openai_errors
73 | def _call_generate_image(prompt, model):
74 | # TODO: Can I manually register the dall-e-3 using the llm api?
75 | response = client.images.generate(
76 | prompt=prompt,
77 | model=model,
78 | size="1024x1024",
79 | n=1
80 | )
81 |
82 | return response.data[0]
83 |
84 |
85 | @handle_openai_errors
86 | def _llm_call_completion(llm_request: LLMRequest) -> str:
87 | model = llm.get_model(llm_request.model)
88 | if isinstance(model, (Chat, AsyncChat)) and model.api_base == None:
89 | # Backwards compatible with the older override
90 | model.api_base = OPENAI_BASE_URL
91 |
92 | attachment = []
93 | for image in llm_request.images:
94 | if image.is_web_reference():
95 | attachment.append(llm.Attachment(url=image.src))
96 | else:
97 | attachment.append(llm.Attachment(path=image.src))
98 |
99 | # llm.Attachment(path="pelican.jpg"),
100 | # llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg"),
101 | # llm.Attachment(content=b"binary image content here")
102 | return model.prompt(
103 | llm_request.prompt,
104 | system=llm_request.system_content(),
105 | attachments=attachment,
106 | stream=False # we do not support streaming
107 | )
108 |
--------------------------------------------------------------------------------
/mark/llm_request.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 | from .markdown_file import Image, Link
3 | from typing import (
4 | List,
5 | Optional
6 | )
7 |
8 |
9 | class LLMRequest:
10 | model: str
11 | prompt: Optional[str]
12 | system_message: Optional[str]
13 | images: List[Image]
14 | links: List[Link]
15 |
16 | def __init__(self, model):
17 | """
18 | Can serialize itself into a payload that can be sent to the OpenAI API (potentially others in the future)
19 | """
20 | self.system_message = None
21 | self.prompt = None
22 | self.model = model
23 | self.images = []
24 | self.links = []
25 |
26 | def with_system_message(self, system_message):
27 | self.system_message = system_message
28 | return self
29 |
30 | def with_prompt(self, prompt):
31 | self.prompt = prompt
32 | return self
33 |
34 | def with_image(self, image):
35 | self.images.append(image)
36 | return self
37 |
38 | def with_link(self, document):
39 | self.links.append(document)
40 | return self
41 |
42 | def system_content(self) -> str:
43 | system_content = ""
44 |
45 | if self.links:
46 | link_content_block = "---".join([str(link) for link in self.links])
47 | system_content += link_content_block
48 |
49 | if self.system_message:
50 | system_content += "\n" + self.system_message
51 |
52 | return system_content
53 |
54 | def to_payload(self):
55 | system_message = {"role": "system", "content": self.system_content()}
56 |
57 | if self.images:
58 | user_content = [{'type': 'text', 'text': self.prompt}]
59 | for image in self.images:
60 | if image.url:
61 | user_content.append(
62 | {'type': 'image_url', 'image_url': {'url': image.url}})
63 | else:
64 | user_content = self.prompt
65 |
66 | user_message = {"role": "user", "content": user_content}
67 | return [system_message, user_message]
68 |
69 | def to_flat_prompt(self) -> str:
70 | return self.system_content() + "\n" + self.prompt
71 |
72 | def to_log(self) -> str:
73 | return dedent("""
74 | # System message
75 | ---
76 | """) \
77 | + self.system_content() \
78 | + dedent("""
79 | ---
80 | # User Message
81 | ---
82 | """) \
83 | + self.prompt \
84 | + dedent("""
85 | ---
86 | # Images
87 | ---
88 | """) \
89 | + "\n".join([image.url for image in self.images])
90 |
--------------------------------------------------------------------------------
/mark/llm_response.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 |
4 | class LLMResponse(object):
5 | RESPONSE_TEMPLATE = dedent(
6 | """
7 | # GPT Response (model: {model}, system: {system})
8 | {content}
9 |
10 | # User Response
11 | """
12 | )
13 |
14 | def __init__(self, content, model):
15 | self.model = model
16 | self.content = content
17 | self.system = 'default'
18 |
19 | def with_system(self, system):
20 | self.system = system
21 | return self
22 |
23 | def to_markdown(self):
24 | content = self.content
25 | return self.RESPONSE_TEMPLATE.format(
26 | model=self.model, system=self.system, content=content)
27 |
28 |
29 | class LLMImageResponse(LLMResponse):
30 | def __init__(self, image_url, model, revised_prompt=None):
31 | super().__init__(image_url, model)
32 | self.revised_prompt = revised_prompt
33 |
34 | def to_markdown(self):
35 | content = f""
36 |
37 | if self.revised_prompt:
38 | content = f"{self.revised_prompt}\n\n{content}"
39 |
40 | return self.RESPONSE_TEMPLATE.format(
41 | model=self.model, system=self.system, content=content)
42 |
--------------------------------------------------------------------------------
/mark/markdown_file.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from langchain_core.documents import Document
4 | from io import TextIOWrapper
5 | from textwrap import dedent
6 | import click
7 | from mark import scraper
8 | import base64
9 | import mimetypes
10 |
11 | """
12 | MarkdownFile
13 | Parses the markdown and extracts image elements from the file, resolving the paths of local images.
14 | """
15 |
16 |
17 | class MarkdownFile:
18 | def __init__(self, file_wrapper: TextIOWrapper):
19 | """
20 | Initializes the MarkdownFile object with a TextIOWrapper, usually obtained from opening a file.
21 | """
22 | self.file_path = None
23 | self.file_dir = None
24 | if hasattr(file_wrapper, 'name') and file_wrapper.name != '':
25 | self.file_path = file_wrapper.name
26 | self.file_dir = os.path.dirname(file_wrapper.name)
27 | else:
28 | self.file_dir = os.getcwd()
29 | self.file_content = file_wrapper.read()
30 | self._images = None
31 | self._links = None
32 |
33 | @property
34 | def content(self):
35 | """
36 | Returns the content of the markdown file as a string.
37 | """
38 | return self.file_content
39 |
40 | @property
41 | def images(self):
42 | if not self._images:
43 | self._images = self._parse_elements(Image)
44 | return self._images
45 |
46 | @property
47 | def links(self):
48 | if not self._links:
49 | self._links = self._parse_elements(Link)
50 | return self._links
51 |
52 | def _parse_elements(self, cls):
53 | matches = re.findall(cls.REGX_PATTERN, self.file_content)
54 | return [
55 | cls.from_reference_folder(self.file_dir)
56 | .with_src(src)
57 | .with_text(text)
58 | for text, src in matches
59 | ]
60 |
61 |
62 | class PageReference:
63 | @classmethod
64 | def from_reference_folder(cls, folder):
65 | return cls(folder)
66 |
67 | def __init__(self, reference_folder, src=None):
68 | self.reference_folder = reference_folder
69 | self.src = src
70 | self.uri = None
71 | if src:
72 | self._resolve_uri()
73 |
74 | def with_src(self, src):
75 | self.src = src
76 | self._resolve_uri()
77 | return self
78 |
79 | def with_text(self, text):
80 | self.link_text = text
81 | return self
82 |
83 | def is_web_reference(self):
84 | return self.src.startswith("http")
85 |
86 | def _resolve_uri(self):
87 | if self.is_web_reference():
88 | self.uri = self.src
89 | else:
90 | self.uri = os.path.normpath(
91 | os.path.join(
92 | self.reference_folder,
93 | self.src))
94 |
95 |
96 | class Image(PageReference):
97 | # Regular expression to find Markdown image syntax with alt text
98 | REGX_PATTERN = r'!\[(.*?)\]\((.*?)\)'
99 |
100 | @property
101 | def url(self):
102 | if self.is_web_reference():
103 | return self.uri
104 | else:
105 | try:
106 | return Image.image_to_data_url(self.uri)
107 | except (FileNotFoundError, IsADirectoryError):
108 | click.echo(f"Image Reference {self.src} not found. Skipping")
109 | return ''
110 |
111 | @classmethod
112 | def encode_image(cls, image_path: str) -> str:
113 | """Get base64 string from image URI.
114 |
115 | Args:
116 | image_path: The path to the image.
117 |
118 | Returns:
119 | The base64 string of the image.
120 | """
121 | with open(image_path, "rb") as image_file:
122 | return base64.b64encode(image_file.read()).decode("utf-8")
123 |
124 | @classmethod
125 | def image_to_data_url(cls, image_path: str) -> str:
126 | """Get data URL from image URI.
127 |
128 | Args:
129 | image_path: The path to the image.
130 |
131 | Returns:
132 | The data URL of the image.
133 | """
134 | encoding = cls.encode_image(image_path)
135 | mime_type = mimetypes.guess_type(image_path)[0]
136 | return f"data:{mime_type};base64,{encoding}"
137 |
138 |
139 | class Link(PageReference):
140 | # Regular expression to find Markdown link syntax
141 | # it will match `[text](url)` but not ``
142 | REGX_PATTERN = r'(? Page:
38 | raw_html = get_rendered_html(url)
39 | clean_soup = _clean_soup_from_html(raw_html)
40 | markdown = _markdown_from_soup(clean_soup)
41 | page = Page(url, body=markdown, soup=clean_soup)
42 |
43 | if title := clean_soup.find('title'):
44 | page.with_title(title.text)
45 |
46 | return page
47 |
48 |
49 | def get_rendered_html(url: str) -> str:
50 | try:
51 | return asyncio.run(_render_page(url))
52 | except pyppeteer.errors.BrowserError:
53 | click.echo(f"BrowserError while fetching {url}")
54 | return "BrowserError while fetching"
55 | except pyppeteer.errors.TimeoutError:
56 | click.echo(f"Timeout while fetching {url}")
57 | return "Timeout while fetching page"
58 |
59 |
60 | async def _render_page(url: str) -> str:
61 | browser = None
62 | try:
63 | browser = await pyppeteer.launch()
64 | page = await browser.newPage()
65 | await page.setUserAgent(DEFAULT_USER_AGENT)
66 | await page.goto(url)
67 | rendered_html = await page.content()
68 | finally:
69 | if browser:
70 | await browser.close()
71 | return rendered_html
72 |
73 |
74 | def _clean_soup_from_html(html: str) -> BeautifulSoup:
75 | # warnings.filterwarnings("ignore")
76 |
77 | soup = BeautifulSoup(html, 'html.parser')
78 |
79 | # List of tags to decompose
80 | tags_to_decompose = ['script', 'meta', 'link', 'style']
81 |
82 | for tag in soup.find_all(True):
83 | # Remove class attributes
84 | if 'class' in tag.attrs:
85 | del tag['class']
86 |
87 | # Remove style attributes
88 | if 'style' in tag.attrs:
89 | del tag['style']
90 |
91 | # Decompose unwanted tags
92 | if tag.name in tags_to_decompose:
93 | tag.decompose()
94 |
95 | return soup
96 |
97 |
98 | def _markdown_from_soup(soup: BeautifulSoup) -> str:
99 | raw_markdown_text = MarkdownConverter().convert_soup(soup)
100 | return re.sub(r'\n{3,}', '\n\n', raw_markdown_text)
101 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "mark"
3 | version = "0.10.2"
4 | description = "Mark lets you seamlessly use markdown, images and links to interact with LLMs"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | classifiers = [
8 | "Programming Language :: Python :: 3",
9 | "Operating System :: OS Independent",
10 | ]
11 | license = "MIT"
12 | license-files = ["LICEN[CS]E*"]
13 |
14 | [project.urls]
15 | Homepage = "https://github.com/relston/mark"
16 | Issues = "https://github.com/relston/mark/issues"
17 |
18 | [tool.poetry]
19 | name = "mark"
20 | version = "0.10.2"
21 | description = "Mark lets you seamlessly use markdown, images and links to interact with LLMs"
22 | authors = ["Ryan Elston "]
23 |
24 | # Include additional files
25 | include = [
26 | "templates/default_system_prompt.md"
27 | ]
28 |
29 | [tool.poetry.dependencies]
30 | python = "^3.10"
31 | PyYAML = "6.0.2"
32 | ipython = "8.32.0"
33 | click = "^8.1.8"
34 | beautifulsoup4 = "^4.12.3"
35 | langchain = "^0.2.16"
36 | langchain-community = "^0.2.15"
37 | pyppeteer = "^2.0.0"
38 | markdownify = "^0.14.1"
39 | llm = "^0.25"
40 | click-default-group = "^1.2.4"
41 |
42 | [tool.poetry.scripts]
43 | mark = "mark.cli:mark_cli"
44 |
45 | [tool.poetry.group.dev.dependencies]
46 | pytest = "^8.3.4"
47 | respx = "^0.22.0"
48 | httpx = "^0.28.1"
49 | flake8 = "^7.1.1"
50 | autopep8 = "^2.3.1"
51 |
52 | [build-system]
53 | requires = ["poetry-core>=1.0.0"]
54 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/templates/default_system_prompt.md:
--------------------------------------------------------------------------------
1 | You are a helpful LLM agent that will receive user input in the form of a markdown file.
2 | The contents of the file will be used as context and the specific prompt from the use will be located at the end of the file.
3 | Your response to the users request should also be written in markdown format.
4 |
5 | RULES:
6 | - Do not echo back any of the input into your response to the user.
7 | - If using a heading in your response, start with a level 2 heading
8 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from unittest.mock import patch
4 | import llm
5 |
6 |
7 | @pytest.fixture(autouse=True)
8 | def mock_openai_key():
9 | os.environ['OPENAI_API_KEY'] = 'test_key'
10 |
11 |
12 | @pytest.fixture(autouse=True)
13 | def mock_cwd(tmp_path):
14 | with patch('os.getcwd') as mock:
15 | mock.return_value = tmp_path
16 | yield mock
17 |
18 |
19 | @pytest.fixture
20 | def mock_stdout():
21 | with patch('click.echo') as mock:
22 | yield mock
23 |
24 |
25 | @pytest.fixture
26 | def mock_llm_response():
27 | with patch('llm.models._Model.prompt') as mock:
28 | yield mock
29 |
30 |
31 | @pytest.fixture
32 | def mock_llm_get_model():
33 | get_model_method = llm.get_model
34 |
35 | with patch('llm.get_model') as mock:
36 | mock.side_effect = get_model_method
37 | yield mock
38 |
39 |
40 | @pytest.fixture
41 | def mock_image_generation():
42 | with patch('mark.llm._call_generate_image') as mock:
43 | yield mock
44 |
45 |
46 | @pytest.fixture
47 | def create_file(tmp_path):
48 | def _create_file(file_path, content, binary=False):
49 | file = tmp_path / file_path
50 | file.parent.mkdir(parents=True, exist_ok=True)
51 | if binary:
52 | file.write_bytes(content)
53 | else:
54 | file.write_text(content, encoding="utf-8")
55 | return file
56 | return _create_file
57 |
58 |
59 | @pytest.fixture
60 | def mock_web_page():
61 | url_to_content = {}
62 |
63 | def _mock(url, page_content):
64 | url_to_content[url] = page_content
65 |
66 | with patch('mark.scraper.get_rendered_html') as mock:
67 | def side_effect(url):
68 | return url_to_content[url]
69 | mock.side_effect = side_effect
70 | yield _mock
71 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from mark.cli import mark_cli
2 | from textwrap import dedent
3 | from mark import config
4 | import pytest
5 | import os
6 | import sys
7 | import io
8 | import llm
9 | from unittest.mock import Mock
10 |
11 | """
12 | These tests are meant to act as 'functional-lite'. Maximizing code coverage for
13 | each of the main use cases of the CLI command and minimizing the number of
14 | basic unit tests needed for each individual module.
15 |
16 | We just mock out the files and the OpenAI API calls, and then test the CLI
17 | """
18 |
19 |
20 | class TestCLI:
21 | @pytest.fixture(autouse=True)
22 | def use_tmp_config_path(self, tmp_path):
23 | # MARK_CONFIG_PATH defaults to ~/.mark
24 | # for all tests we use a temporary directory
25 | self.config_path = tmp_path / 'config'
26 | os.environ['MARK_CONFIG_PATH'] = str(self.config_path)
27 |
28 | @pytest.fixture(autouse=True)
29 | def define_files(
30 | self,
31 | create_file,
32 | mock_llm_response,
33 | mock_web_page,
34 | mock_image_generation):
35 | config.reset()
36 |
37 | # Given a markdown file with the following content
38 | self.mock_markdown_file_content = dedent("""
39 | A Markdown file with various images and links
40 |
41 | Local image:
42 | 
43 |
44 | Remote image:
45 | 
46 |
47 | Relative image outside directory:
48 | 
49 |
50 | External url link:
51 | [External URL](https://example.com/some-article)
52 |
53 | Local file link:
54 | [Anther Reference](./docs/another-reference.md)
55 | """)
56 |
57 | # and the files exists in the file system
58 | self.markdown_file = create_file(
59 | "test.md", self.mock_markdown_file_content)
60 | create_file("./images/sample.png", b"sample image data", binary=True)
61 | create_file(
62 | "../images/outside.png",
63 | b"outside image data",
64 | binary=True)
65 | create_file("./docs/another-reference.md", "Another reference content")
66 |
67 | # and the external url link returns this response
68 | html_content = """
69 |
70 |
71 |
72 | Basic HTML Page
73 |
74 |
75 | Welcome to My Page
76 | Visit Example.com
77 |
78 |
79 | """
80 | mock_web_page('https://example.com/some-article', html_content)
81 |
82 | # and llm returning this response
83 | mock_llm_response.return_value = "Test completion"
84 | mock_image_generation.return_value = Mock(
85 | url='https://generated.image.url/image.png',
86 | revised_prompt='A revised mock image prompt'
87 | )
88 |
89 | self.default_system_prompt = dedent(
90 | """
91 | You are a helpful LLM agent that will receive user input in the form of a markdown file.
92 | The contents of the file will be used as context and the specific prompt from the use will be located at the end of the file.
93 | Your response to the users request should also be written in markdown format.
94 |
95 | RULES:
96 | - Do not echo back any of the input into your response to the user.
97 | - If using a heading in your response, start with a level 2 heading
98 | """
99 | )
100 |
101 | self.default_expected_context = dedent(
102 | """
103 | Link Text: External URL
104 | SRC: https://example.com/some-article
105 | Page Title: Basic HTML Page
106 | Page Content:
107 |
108 |
109 | Basic HTML Page
110 |
111 | Welcome to My Page
112 | ==================
113 |
114 | [Visit Example.com](https://www.example.com)
115 |
116 |
117 | ---
118 | Link Text: Anther Reference
119 | SRC: ./docs/another-reference.md
120 | Page Title: another-reference.md
121 | Page Content:
122 | Another reference content
123 | """
124 | )
125 |
126 | self.default_expected_system_message = self.default_expected_context + \
127 | self.default_system_prompt
128 |
129 | self.default_expected_llm_request = [
130 | {'role': 'system', 'content': self.default_expected_system_message},
131 | {'role': 'user', 'content': [
132 | {'type': 'text', 'text': self.mock_markdown_file_content},
133 | {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,c2FtcGxlIGltYWdlIGRhdGE='}},
134 | {'type': 'image_url', 'image_url': {'url': 'https://example.com/image.png'}},
135 | {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,b3V0c2lkZSBpbWFnZSBkYXRh'}},
136 | ]
137 | }
138 | ]
139 | self.default_expected_attachements = [
140 | llm.Attachment(path='./images/sample.png'),
141 | llm.Attachment(url='https://example.com/image.png'),
142 | llm.Attachment(path='../images/outside.png')
143 | ]
144 |
145 | def test_command_default(self, mock_llm_response):
146 | """Test CLI command without specifying an agent (default agent should be used)."""
147 |
148 | # Run the CLI command with only the markdown file
149 | mark_cli([str(self.markdown_file)], None, None, False)
150 |
151 | mock_llm_response.assert_called_once_with(
152 | self.mock_markdown_file_content,
153 | system=self.default_expected_system_message,
154 | attachments=self.default_expected_attachements,
155 | stream=False
156 | )
157 |
158 | # The markdown file will be updated with the response
159 | expected_markdown_file_content = self.mock_markdown_file_content + \
160 | dedent("""
161 | # GPT Response (model: gpt-4o, system: default)
162 | Test completion
163 |
164 | # User Response
165 | """)
166 |
167 | assert self.markdown_file.read_text() == expected_markdown_file_content
168 |
169 | def test_command_with_stdin(self, mock_llm_response, mock_stdout):
170 | byte_string = self.mock_markdown_file_content.encode('utf-8')
171 | input = io.TextIOWrapper(io.BytesIO(byte_string), encoding='utf-8')
172 | sys.stdin = input
173 |
174 | mark_cli(['-'], None, None, False)
175 |
176 | mock_llm_response.assert_called_once_with(
177 | self.mock_markdown_file_content,
178 | system=self.default_expected_system_message,
179 | attachments=self.default_expected_attachements,
180 | stream=False
181 | )
182 |
183 | mock_stdout.assert_called_once_with("Test completion")
184 |
185 | def test_command_custom_model(self, mock_llm_get_model, mock_llm_response):
186 | """
187 | mark --model o1 path/to/markdown.md
188 | """
189 |
190 | mark_cli(['--model', 'o1', str(self.markdown_file)], None, None, False)
191 |
192 | mock_llm_get_model.assert_called_once_with('o1')
193 |
194 | mock_llm_response.assert_called_once_with(
195 | self.mock_markdown_file_content,
196 | system=self.default_expected_system_message,
197 | attachments=self.default_expected_attachements,
198 | stream=False
199 | )
200 |
201 | def test_command_custom_agent(self, create_file, mock_llm_response):
202 | # Define a custom agent
203 | create_file(
204 | self.config_path / 'system_prompts/custom.md',
205 | """You're a custom agent that ....."""
206 | )
207 |
208 | # Run the CLI command with the custom agent
209 | mark_cli([str(self.markdown_file), '--system=custom'],
210 | None, None, False)
211 |
212 | expected_system_message = self.default_expected_context + \
213 | "\nYou're a custom agent that ....."
214 |
215 | mock_llm_response.assert_called_once_with(
216 | self.mock_markdown_file_content,
217 | system=expected_system_message,
218 | attachments=self.default_expected_attachements,
219 | stream=False
220 | )
221 |
222 | # The markdown file will be updated indicating the custom agent
223 | expected_markdown_file_content = self.mock_markdown_file_content + \
224 | dedent("""
225 | # GPT Response (model: gpt-4o, system: custom)
226 | Test completion
227 |
228 | # User Response
229 | """)
230 | assert self.markdown_file.read_text() == expected_markdown_file_content
231 |
232 | def test_command_generate_image(self, mock_image_generation):
233 | """
234 | Test CLI command with the --generate-image option.
235 | """
236 |
237 | mark_cli([str(self.markdown_file), '--generate-image'],
238 | None, None, False)
239 |
240 | expected_prompt = self.default_expected_system_message + \
241 | "\n" + self.mock_markdown_file_content
242 | mock_image_generation.assert_called_once_with(
243 | expected_prompt, "dall-e-3")
244 |
245 | # The markdown file will be updated with the generated image URL
246 | expected_markdown_file_content = self.mock_markdown_file_content + \
247 | dedent("""
248 | # GPT Response (model: dall-e-3, system: default)
249 | A revised mock image prompt
250 |
251 | 
252 |
253 | # User Response
254 | """)
255 |
256 | assert self.markdown_file.read_text() == expected_markdown_file_content
257 |
258 | def test_command_models(self, mock_stdout):
259 | """
260 | Test for `mark models`
261 | """
262 |
263 | mark_cli(['models'], None, None, False)
264 |
265 | call = mock_stdout.call_args_list[0]
266 | assert 'OpenAI Chat' in call[0][0]
267 |
--------------------------------------------------------------------------------
/tests/test_scraper.py:
--------------------------------------------------------------------------------
1 | import pyppeteer
2 | from mark import scraper
3 | from unittest.mock import patch
4 |
5 |
6 | def test_page_scrape(mock_web_page):
7 | html_content = """
8 |
9 |
10 |
11 | Basic HTML Page
12 |
13 |
14 | Welcome to My Page
15 | Visit Example.com
16 |
17 |
18 | """
19 |
20 | mock_web_page('https://supercool.com', html_content)
21 |
22 | page = scraper.get('https://supercool.com')
23 |
24 | assert page.title == 'Basic HTML Page'
25 | assert page.url == 'https://supercool.com'
26 | assert page.body == '\n\nBasic HTML Page\n\nWelcome to My Page\n' + \
27 | '==================\n\n[Visit Example.com](https://www.example.com)\n\n'
28 |
29 |
30 | def test_timeout_error_handling():
31 | with patch('mark.scraper._render_page', side_effect=pyppeteer.errors.TimeoutError):
32 | page = scraper.get('https://timeout-test.com')
33 |
34 | assert page.body == 'Timeout while fetching page'
35 | assert page.title is None, "Expected no title when a TimeoutError occurs"
36 | assert page.url == 'https://timeout-test.com', "URL should be correct even when timeout occurs"
37 |
--------------------------------------------------------------------------------