├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE.txt ├── Makefile ├── README.md ├── minithesis.py ├── requirements.txt └── test_minithesis.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [3.8, 3.9] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | python -m pip install flake8 pytest 29 | pip install -r requirements.txt 30 | - name: Test with pytest 31 | run: | 32 | pytest 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | .pytest_cache/ 3 | __pycache__/ 4 | .minithesis-cache.* 5 | venv 6 | .pyc 7 | *.egg-info 8 | __pycache__ 9 | .coverage 10 | .hypothesis/ 11 | .pytest_cache/ 12 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, David R. MacIver 2 | 3 | All code in this repository is released under the Mozilla Public License v 2.0. You can obtain a copy at https://mozilla.org/MPL/2.0/. 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | venv/bin/python: 2 | virtualenv venv 3 | 4 | venv/installed: venv/bin/python requirements.txt 5 | venv/bin/pip install -r requirements.txt 6 | touch venv/installed 7 | 8 | .PHONY: update-requirements 9 | update-requirements: venv/installed 10 | venv/bin/pip freeze > requirements.txt 11 | 12 | 13 | .PHONY: test 14 | test: venv/installed 15 | venv/bin/python -m coverage run --include=minithesis.py --branch -m pytest test_minithesis.py --ff --maxfail=1 -m 'not hypothesis' --durations=100 --verbose 16 | venv/bin/coverage report --show-missing --fail-under=100 17 | 18 | .PHONY: format 19 | format: venv/installed 20 | venv/bin/isort *.py 21 | venv/bin/black *.py 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Minithesis 2 | 3 | This is is an incredibly minimal implementation of the core idea of [Hypothesis](https://github.com/HypothesisWorks/hypothesis). 4 | It is mostly intended to provide a minimal implementation of a powerful modern property-based testing library 5 | that is easy to port to new languages. 6 | 7 | Minithesis supports the following features absent from most property-based testing libraries.: 8 | 9 | * Generating arbitrary values inline in the test, including based on previous test results 10 | * Fully generic shrinking 11 | * A test database 12 | * Targeted Property-Based Testing 13 | 14 | It achieves all of this in less than 500 lines of relatively straightforward Python (plus comments). 15 | 16 | ## Notes 17 | 18 | * The algorithms for both shrinking and targeting are a bit naive but they're not *terrible* - they should be good enough that using them is better than not having them. 19 | * The database uses Python's DBM module for simplicity. 20 | * This is probably best read after or in tandem with [our paper about test-case reduction in Hypothesis](https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf) 21 | * This does not necessarily track the core Hypothesis implementation that closely and is more an "in spirit" implementation. 22 | * This probably doesn't work all that well - it's tolerably well tested, but nobody has ever used it for real and probably nobody ever will because why would they when Hypothesis exists? 23 | * Pull requests to improve clarity *extremely* welcome. It probably won't ever grow many features (I might implement the database at some point) though because you're not supposed to use it in anger. 24 | * I've used the (sadly defunct) Hypothesis-for-Ruby naming conventions because those are better than the Python ones because I actually put some thought into them. 25 | * There is a fairly minimal generator library just to get you started but it's nothing resembling comprehensive and is unlikely to ever be. 26 | 27 | 28 | ## Minithesis Ports 29 | 30 | There are a number of ports of minithesis (:tada:). The following are the ones I'm aware of: 31 | 32 | * Martin Janiczek's Elm port, [elm-minithesis](https://github.com/Janiczek/elm-minithesis) 33 | * Jack Firth's racket port, [miniracksis](https://github.com/jackfirth/miniracksis/) 34 | * Amanda Walker's Haskell port, [haskell-minithesis](https://github.com/AnOctopus/haskell-minithesis) 35 | * Dmitry Dygalo and Rik de Kort's Rust port, [minithesis-rust](https://github.com/Rik-de-Kort/minithesis-rust) 36 | * Justin Blank's Java port, [jiminy-thesis](https://github.com/hyperpape/jiminy-thesis) 37 | * Valentin Bogad's Julia port, [MiniThesis.jl](https://github.com/Seelengrab/MiniThesis.jl) 38 | 39 | If you write a port, please submit a pull request to add it to the list! 40 | -------------------------------------------------------------------------------- /minithesis.py: -------------------------------------------------------------------------------- 1 | # This file is part of Minithesis, which may be found at 2 | # https://github.com/DRMacIver/minithesis 3 | # 4 | # This work is copyright (C) 2020 David R. MacIver. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public License, 7 | # v. 2.0. If a copy of the MPL was not distributed with this file, You can 8 | # obtain one at https://mozilla.org/MPL/2.0/. 9 | 10 | """ 11 | This file implements a simple property-based testing library called 12 | minithesis. It's not really intended to be used as is, but is instead 13 | a proof of concept that implements as much of the core ideas of 14 | Hypothesis in a simple way that is designed for people who want to 15 | implement a property-based testing library for non-Python languages. 16 | 17 | minithesis is always going to be self-contained in a single file 18 | and consist of < 1000 sloc (as measured by cloc). This doesn't 19 | count comments and I intend to comment on it extensively. 20 | 21 | 22 | ============= 23 | PORTING NOTES 24 | ============= 25 | 26 | minithesis supports roughly the following features, more or less 27 | in order of most to least important: 28 | 29 | 1. Test case generation. 30 | 2. Test case reduction ("shrinking") 31 | 3. A small library of primitive possibilities (generators) and combinators. 32 | 4. A Test case database for replay between runs. 33 | 5. Targeted property-based testing 34 | 6. A caching layer for mapping choice sequences to outcomes 35 | 36 | 37 | Anything that supports 1 and 2 is a reasonable good first porting 38 | goal. You'll probably want to port most of the possibilities library 39 | because it's easy and it helps you write tests, but don't worry 40 | too much about the specifics. 41 | 42 | The test case database is *very* useful and I strongly encourage 43 | you to support it, but if it's fiddly feel free to leave it out 44 | of a first pass. 45 | 46 | Targeted property-based testing is very much a nice to have. You 47 | probably don't need it, but it's a rare enough feature that supporting 48 | it gives you bragging rights and who doesn't love bragging rights? 49 | 50 | The caching layer you can skip. It's used more heavily in Hypothesis 51 | proper, but in minithesis you only really need it for shrinking 52 | performance, so it's mostly a nice to have. 53 | """ 54 | 55 | from __future__ import annotations 56 | 57 | 58 | import hashlib 59 | import os 60 | from array import array 61 | from enum import IntEnum 62 | from random import Random 63 | from typing import ( 64 | cast, 65 | Any, 66 | Callable, 67 | Dict, 68 | Generic, 69 | List, 70 | Mapping, 71 | NoReturn, 72 | Optional, 73 | Protocol, 74 | Sequence, 75 | Tuple, 76 | TypeVar, 77 | Union, 78 | ) 79 | 80 | 81 | T = TypeVar("T", covariant=True) 82 | S = TypeVar("S", covariant=True) 83 | U = TypeVar("U") # Invariant 84 | 85 | 86 | class Database(Protocol): 87 | def __setitem__(self, key: str, value: bytes) -> None: 88 | ... 89 | 90 | def get(self, key: str) -> Optional[bytes]: 91 | ... 92 | 93 | def __delitem__(self, key: str) -> None: 94 | ... 95 | 96 | 97 | def run_test( 98 | max_examples: int = 100, 99 | random: Optional[Random] = None, 100 | database: Optional[Database] = None, 101 | quiet: bool = False, 102 | ) -> Callable[[Callable[[TestCase], None]], None]: 103 | """Decorator to run a test. Usage is: 104 | 105 | .. code-block: python 106 | 107 | @run_test() 108 | def _(test_case): 109 | n = test_case.choice(1000) 110 | ... 111 | 112 | The decorated function takes a ``TestCase`` argument, 113 | and should raise an exception to indicate a test failure. 114 | It will either run silently or print drawn values and then 115 | fail with an exception if minithesis finds some test case 116 | that fails. 117 | 118 | The test will be run immediately, unlike in Hypothesis where 119 | @given wraps a function to expose it to the the test runner. 120 | If you don't want it to be run immediately wrap it inside a 121 | test function yourself. 122 | 123 | Arguments: 124 | 125 | * max_examples: the maximum number of valid test cases to run for. 126 | Note that under some circumstances the test may run fewer test 127 | cases than this. 128 | * random: An instance of random.Random that will be used for all 129 | nondeterministic choices. 130 | * database: A dict-like object in which results will be cached and resumed 131 | from, ensuring that if a test is run twice it fails in the same way. 132 | * quiet: Will not print anything on failure if True. 133 | """ 134 | 135 | def accept(test: Callable[[TestCase], None]) -> None: 136 | def mark_failures_interesting(test_case: TestCase) -> None: 137 | try: 138 | test(test_case) 139 | except Exception: 140 | if test_case.status is not None: 141 | raise 142 | test_case.mark_status(Status.INTERESTING) 143 | 144 | state = TestingState( 145 | random or Random(), mark_failures_interesting, max_examples 146 | ) 147 | 148 | if database is None: 149 | # If the database is not set, use a standard cache directory 150 | # location to persist examples. 151 | db: Database = DirectoryDB(".minithesis-cache") 152 | else: 153 | db = database 154 | 155 | previous_failure = db.get(test.__name__) 156 | 157 | if previous_failure is not None: 158 | choices = [ 159 | int.from_bytes(previous_failure[i : i + 8], "big") 160 | for i in range(0, len(previous_failure), 8) 161 | ] 162 | state.test_function(TestCase.for_choices(choices)) 163 | 164 | if state.result is None: 165 | state.run() 166 | 167 | if state.valid_test_cases == 0: 168 | raise Unsatisfiable() 169 | 170 | if state.result is None: 171 | try: 172 | del db[test.__name__] 173 | except KeyError: 174 | pass 175 | else: 176 | db[test.__name__] = b"".join(i.to_bytes(8, "big") for i in state.result) 177 | 178 | if state.result is not None: 179 | test(TestCase.for_choices(state.result, print_results=not quiet)) 180 | 181 | return accept 182 | 183 | 184 | class TestCase(object): 185 | """Represents a single generated test case, which consists 186 | of an underlying set of choices that produce possibilities.""" 187 | 188 | @classmethod 189 | def for_choices( 190 | cls, 191 | choices: Sequence[int], 192 | print_results: bool = False, 193 | ) -> TestCase: 194 | """Returns a test case that makes this series of choices.""" 195 | return TestCase( 196 | prefix=choices, 197 | random=None, 198 | max_size=len(choices), 199 | print_results=print_results, 200 | ) 201 | 202 | def __init__( 203 | self, 204 | prefix: Sequence[int], 205 | random: Optional[Random], 206 | max_size: float = float("inf"), 207 | print_results: bool = False, 208 | ): 209 | self.prefix = prefix 210 | # XXX Need a cast because below we assume self.random is not None; 211 | # it can only be None if max_size == len(prefix) 212 | self.random: Random = cast(Random, random) 213 | self.max_size = max_size 214 | self.choices: array[int] = array("Q") 215 | self.status: Optional[Status] = None 216 | self.print_results = print_results 217 | self.depth = 0 218 | self.targeting_score: Optional[int] = None 219 | 220 | def choice(self, n: int) -> int: 221 | """Returns a number in the range [0, n]""" 222 | result = self.__make_choice(n, lambda: self.random.randint(0, n)) 223 | if self.__should_print(): 224 | print(f"choice({n}): {result}") 225 | return result 226 | 227 | def weighted(self, p: float) -> int: 228 | """Return True with probability ``p``.""" 229 | if p <= 0: 230 | result = self.forced_choice(0) 231 | elif p >= 1: 232 | result = self.forced_choice(1) 233 | else: 234 | result = bool(self.__make_choice(1, lambda: int(self.random.random() <= p))) 235 | if self.__should_print(): 236 | print(f"weighted({p}): {result}") 237 | return result 238 | 239 | def forced_choice(self, n: int) -> int: 240 | """Inserts a fake choice into the choice sequence, as if 241 | some call to choice() had returned ``n``. You almost never 242 | need this, but sometimes it can be a useful hint to the 243 | shrinker.""" 244 | if n.bit_length() > 64 or n < 0: 245 | raise ValueError(f"Invalid choice {n}") 246 | if self.status is not None: 247 | raise Frozen() 248 | if len(self.choices) >= self.max_size: 249 | self.mark_status(Status.OVERRUN) 250 | self.choices.append(n) 251 | return n 252 | 253 | def reject(self) -> NoReturn: 254 | """Mark this test case as invalid.""" 255 | self.mark_status(Status.INVALID) 256 | 257 | def assume(self, precondition: bool) -> None: 258 | """If this precondition is not met, abort the test and 259 | mark this test case as invalid.""" 260 | if not precondition: 261 | self.reject() 262 | 263 | def target(self, score: int) -> None: 264 | """Set a score to maximize. Multiple calls to this function 265 | will override previous ones. 266 | 267 | The name and idea come from Löscher, Andreas, and Konstantinos 268 | Sagonas. "Targeted property-based testing." ISSTA. 2017, but 269 | the implementation is based on that found in Hypothesis, 270 | which is not that similar to anything described in the paper. 271 | """ 272 | self.targeting_score = score 273 | 274 | def any(self, possibility: Possibility[U]) -> U: 275 | """Return a possible value from ``possibility``.""" 276 | try: 277 | self.depth += 1 278 | result = possibility.produce(self) 279 | finally: 280 | self.depth -= 1 281 | 282 | if self.__should_print(): 283 | print(f"any({possibility}): {result}") 284 | return result 285 | 286 | def mark_status(self, status: Status) -> NoReturn: 287 | """Set the status and raise StopTest.""" 288 | if self.status is not None: 289 | raise Frozen() 290 | self.status = status 291 | raise StopTest() 292 | 293 | def __should_print(self) -> bool: 294 | return self.print_results and self.depth == 0 295 | 296 | def __make_choice(self, n: int, rnd_method: Callable[[], int]) -> int: 297 | """Make a choice in [0, n], by calling rnd_method if 298 | randomness is needed.""" 299 | if n.bit_length() > 64 or n < 0: 300 | raise ValueError(f"Invalid choice {n}") 301 | if self.status is not None: 302 | raise Frozen() 303 | if len(self.choices) >= self.max_size: 304 | self.mark_status(Status.OVERRUN) 305 | if len(self.choices) < len(self.prefix): 306 | result = self.prefix[len(self.choices)] 307 | else: 308 | result = rnd_method() 309 | self.choices.append(result) 310 | if result > n: 311 | self.mark_status(Status.INVALID) 312 | return result 313 | 314 | 315 | class Possibility(Generic[T]): 316 | """Represents some range of values that might be used in 317 | a test, that can be requested from a ``TestCase``. 318 | 319 | Pass one of these to TestCase.any to get a concrete value. 320 | """ 321 | 322 | def __init__(self, produce: Callable[[TestCase], T], name: Optional[str] = None): 323 | self.produce = produce 324 | self.name = produce.__name__ if name is None else name 325 | 326 | def __repr__(self) -> str: 327 | return self.name 328 | 329 | def map(self, f: Callable[[T], S]) -> Possibility[S]: 330 | """Returns a ``Possibility`` where values come from 331 | applying ``f`` to some possible value for ``self``.""" 332 | return Possibility( 333 | lambda test_case: f(test_case.any(self)), 334 | name=f"{self.name}.map({f.__name__})", 335 | ) 336 | 337 | def bind(self, f: Callable[[T], Possibility[S]]) -> Possibility[S]: 338 | """Returns a ``Possibility`` where values come from 339 | applying ``f`` (which should return a new ``Possibility`` 340 | to some possible value for ``self`` then returning a possible 341 | value from that.""" 342 | 343 | def produce(test_case: TestCase) -> S: 344 | return test_case.any(f(test_case.any(self))) 345 | 346 | return Possibility[S]( 347 | produce, 348 | name=f"{self.name}.bind({f.__name__})", 349 | ) 350 | 351 | def satisfying(self, f: Callable[[T], bool]) -> Possibility[T]: 352 | """Returns a ``Possibility`` whose values are any possible 353 | value of ``self`` for which ``f`` returns True.""" 354 | 355 | def produce(test_case: TestCase) -> T: 356 | for _ in range(3): 357 | candidate = test_case.any(self) 358 | if f(candidate): 359 | return candidate 360 | test_case.reject() 361 | 362 | return Possibility[T](produce, name=f"{self.name}.select({f.__name__})") 363 | 364 | 365 | def integers(m: int, n: int) -> Possibility[int]: 366 | """Any integer in the range [m, n] is possible""" 367 | return Possibility(lambda tc: m + tc.choice(n - m), name=f"integers({m}, {n})") 368 | 369 | 370 | def lists( 371 | elements: Possibility[U], 372 | min_size: int = 0, 373 | max_size: float = float("inf"), 374 | ) -> Possibility[List[U]]: 375 | """Any lists whose elements are possible values from ``elements`` are possible.""" 376 | 377 | def produce(test_case: TestCase) -> List[U]: 378 | result: List[U] = [] 379 | while True: 380 | if len(result) < min_size: 381 | test_case.forced_choice(1) 382 | elif len(result) + 1 >= max_size: 383 | test_case.forced_choice(0) 384 | break 385 | elif not test_case.weighted(0.9): 386 | break 387 | result.append(test_case.any(elements)) 388 | return result 389 | 390 | return Possibility[List[U]](produce, name=f"lists({elements.name})") 391 | 392 | 393 | def just(value: U) -> Possibility[U]: 394 | """Only ``value`` is possible.""" 395 | return Possibility[U](lambda tc: value, name=f"just({value})") 396 | 397 | 398 | def nothing() -> Possibility[NoReturn]: 399 | """No possible values. i.e. Any call to ``any`` will reject 400 | the test case.""" 401 | 402 | def produce(tc: TestCase) -> NoReturn: 403 | tc.reject() 404 | 405 | return Possibility(produce) 406 | 407 | 408 | def mix_of(*possibilities: Possibility[T]) -> Possibility[T]: 409 | """Possible values can be any value possible for one of ``possibilities``.""" 410 | if not possibilities: 411 | # XXX Need a cast since NoReturn isn't a T (though perhaps it should be) 412 | return cast(Possibility[T], nothing()) 413 | return Possibility( 414 | lambda tc: tc.any(possibilities[tc.choice(len(possibilities) - 1)]), 415 | name="mix_of({', '.join(p.name for p in possibilities)})", 416 | ) 417 | 418 | 419 | # XXX This signature requires PEP 646 420 | def tuples(*possibilities: Possibility[Any]) -> Possibility[Any]: 421 | """Any tuple t of of length len(possibilities) such that t[i] is possible 422 | for possibilities[i] is possible.""" 423 | return Possibility( 424 | lambda tc: tuple(tc.any(p) for p in possibilities), 425 | name="tuples({', '.join(p.name for p in possibilities)})", 426 | ) 427 | 428 | 429 | # We cap the maximum amount of entropy a test case can use. 430 | # This prevents cases where the generated test case size explodes 431 | # by effectively rejection 432 | BUFFER_SIZE = 8 * 1024 433 | 434 | 435 | def sort_key(choices: Sequence[int]) -> Tuple[int, Sequence[int]]: 436 | """Returns a key that can be used for the shrinking order 437 | of test cases.""" 438 | return (len(choices), choices) 439 | 440 | 441 | class CachedTestFunction(object): 442 | """Returns a cached version of a function that maps 443 | a choice sequence to the status of calling a test function 444 | on a test case populated with it. Is able to take advantage 445 | of the structure of the test function to predict the result 446 | even if exact sequence of choices has not been seen 447 | previously. 448 | 449 | You can safely omit implementing this at the cost of 450 | somewhat increased shrinking time. 451 | """ 452 | 453 | def __init__(self, test_function: Callable[[TestCase], None]): 454 | self.test_function = test_function 455 | 456 | # Tree nodes are either a point at which a choice occurs 457 | # in which case they map the result of the choice to the 458 | # tree node we are in after, or a Status object indicating 459 | # mark_status was called at this point and all future 460 | # choices are irrelevant. 461 | # 462 | # Note that a better implementation of this would use 463 | # a Patricia trie, which implements long non-branching 464 | # paths as an array inline. For simplicity we don't 465 | # do that here. 466 | # XXX The type of self.tree is recursive 467 | self.tree: Dict[int, Union[Status, Dict[int, Any]]] = {} 468 | 469 | def __call__(self, choices: Sequence[int]) -> Status: 470 | # XXX The type of node is problematic 471 | node: Any = self.tree 472 | try: 473 | for c in choices: 474 | node = node[c] 475 | # mark_status was called, thus future choices 476 | # will be ignored. 477 | if isinstance(node, Status): 478 | assert node != Status.OVERRUN 479 | return node 480 | # If we never entered an unknown region of the tree 481 | # or hit a Status value, then we know that another 482 | # choice will be made next and the result will overrun. 483 | return Status.OVERRUN 484 | except KeyError: 485 | pass 486 | 487 | # We now have to actually call the test function to find out 488 | # what happens. 489 | test_case = TestCase.for_choices(choices) 490 | self.test_function(test_case) 491 | assert test_case.status is not None 492 | 493 | # We enter the choices made in a tree. 494 | node = self.tree 495 | for i, c in enumerate(test_case.choices): 496 | if i + 1 < len(test_case.choices) or test_case.status == Status.OVERRUN: 497 | try: 498 | node = node[c] 499 | except KeyError: 500 | node = node.setdefault(c, {}) 501 | else: 502 | node[c] = test_case.status 503 | return test_case.status 504 | 505 | 506 | class TestingState(object): 507 | def __init__( 508 | self, 509 | random: Random, 510 | test_function: Callable[[TestCase], None], 511 | max_examples: int, 512 | ): 513 | self.random = random 514 | self.max_examples = max_examples 515 | self.__test_function = test_function 516 | self.valid_test_cases = 0 517 | self.calls = 0 518 | self.result: Optional[array[int]] = None 519 | self.best_scoring: Optional[Tuple[int, Sequence[int]]] = None 520 | self.test_is_trivial = False 521 | 522 | def test_function(self, test_case: TestCase) -> None: 523 | try: 524 | self.__test_function(test_case) 525 | except StopTest: 526 | pass 527 | if test_case.status is None: 528 | test_case.status = Status.VALID 529 | self.calls += 1 530 | if test_case.status >= Status.INVALID and len(test_case.choices) == 0: 531 | self.test_is_trivial = True 532 | if test_case.status >= Status.VALID: 533 | self.valid_test_cases += 1 534 | 535 | if test_case.targeting_score is not None: 536 | relevant_info = (test_case.targeting_score, test_case.choices) 537 | if self.best_scoring is None: 538 | self.best_scoring = relevant_info 539 | else: 540 | best, _ = self.best_scoring 541 | if test_case.targeting_score > best: 542 | self.best_scoring = relevant_info 543 | 544 | if test_case.status == Status.INTERESTING and ( 545 | self.result is None or sort_key(test_case.choices) < sort_key(self.result) 546 | ): 547 | self.result = test_case.choices 548 | 549 | def target(self) -> None: 550 | """If any test cases have had ``target()`` called on them, do a simple 551 | hill climbing algorithm to attempt to optimise that target score.""" 552 | if self.result is not None or self.best_scoring is None: 553 | return 554 | 555 | def adjust(i: int, step: int) -> bool: 556 | """Can we improve the score by changing choices[i] by ``step``?""" 557 | assert self.best_scoring is not None 558 | score, choices = self.best_scoring 559 | if choices[i] + step < 0 or choices[i].bit_length() >= 64: 560 | return False 561 | attempt = array("Q", choices) 562 | attempt[i] += step 563 | test_case = TestCase( 564 | prefix=attempt, random=self.random, max_size=BUFFER_SIZE 565 | ) 566 | self.test_function(test_case) 567 | assert test_case.status is not None 568 | return ( 569 | test_case.status >= Status.VALID 570 | and test_case.targeting_score is not None 571 | and test_case.targeting_score > score 572 | ) 573 | 574 | while self.should_keep_generating(): 575 | i = self.random.randrange(0, len(self.best_scoring[1])) 576 | sign = 0 577 | for k in [1, -1]: 578 | if not self.should_keep_generating(): 579 | return 580 | if adjust(i, k): 581 | sign = k 582 | break 583 | if sign == 0: 584 | continue 585 | 586 | k = 1 587 | while self.should_keep_generating() and adjust(i, sign * k): 588 | k *= 2 589 | 590 | while k > 0: 591 | while self.should_keep_generating() and adjust(i, sign * k): 592 | pass 593 | k //= 2 594 | 595 | def run(self) -> None: 596 | self.generate() 597 | self.target() 598 | self.shrink() 599 | 600 | def should_keep_generating(self) -> bool: 601 | return ( 602 | not self.test_is_trivial 603 | and self.result is None 604 | and self.valid_test_cases < self.max_examples 605 | and 606 | # We impose a limit on the maximum number of calls as 607 | # well as the maximum number of valid examples. This is 608 | # to avoid taking a prohibitively long time on tests which 609 | # have hard or impossible to satisfy preconditions. 610 | self.calls < self.max_examples * 10 611 | ) 612 | 613 | def generate(self) -> None: 614 | """Run random generation until either we have found an interesting 615 | test case or hit the limit of how many test cases we should 616 | evaluate.""" 617 | while self.should_keep_generating() and ( 618 | self.best_scoring is None or self.valid_test_cases <= self.max_examples // 2 619 | ): 620 | self.test_function( 621 | TestCase(prefix=(), random=self.random, max_size=BUFFER_SIZE) 622 | ) 623 | 624 | def shrink(self) -> None: 625 | """If we have found an interesting example, try shrinking it 626 | so that the choice sequence leading to our best example is 627 | shortlex smaller than the one we originally found. This improves 628 | the quality of the generated test case, as per our paper. 629 | 630 | https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf 631 | """ 632 | if not self.result: 633 | return 634 | 635 | # Shrinking will typically try the same choice sequences over 636 | # and over again, so we cache the test function in order to 637 | # not end up reevaluating it in those cases. This also allows 638 | # us to catch cases where we try something that is e.g. a prefix 639 | # of something we've previously tried, which is guaranteed 640 | # not to work. 641 | cached = CachedTestFunction(self.test_function) 642 | 643 | def consider(choices: array[int]) -> bool: 644 | if choices == self.result: 645 | return True 646 | return cached(choices) == Status.INTERESTING 647 | 648 | assert consider(self.result) 649 | 650 | # We are going to perform a number of transformations to 651 | # the current result, iterating until none of them make any 652 | # progress - i.e. until we make it through an entire iteration 653 | # of the loop without changing the result. 654 | prev = None 655 | while prev != self.result: 656 | prev = self.result 657 | 658 | # A note on weird loop order: We iterate backwards 659 | # through the choice sequence rather than forwards, 660 | # because later bits tend to depend on earlier bits 661 | # so it's easier to make changes near the end and 662 | # deleting bits at the end may allow us to make 663 | # changes earlier on that we we'd have missed. 664 | # 665 | # Note that we do not restart the loop at the end 666 | # when we find a successful shrink. This is because 667 | # things we've already tried are less likely to work. 668 | # 669 | # If this guess is wrong, that's OK, this isn't a 670 | # correctness problem, because if we made a successful 671 | # reduction then we are not at a fixed point and 672 | # will restart the loop at the end the next time 673 | # round. In some cases this can result in performance 674 | # issues, but the end result should still be fine. 675 | 676 | # First try deleting each choice we made in chunks. 677 | # We try longer chunks because this allows us to 678 | # delete whole composite elements: e.g. deleting an 679 | # element from a generated list requires us to delete 680 | # both the choice of whether to include it and also 681 | # the element itself, which may involve more than one 682 | # choice. Some things will take more than 8 choices 683 | # in the sequence. That's too bad, we may not be 684 | # able to delete those. In Hypothesis proper we 685 | # record the boundaries corresponding to ``any`` 686 | # calls so that we can try deleting those, but 687 | # that's pretty high overhead and also a bunch of 688 | # slightly annoying code that it's not worth porting. 689 | # 690 | # We could instead do a quadratic amount of work 691 | # to try all boundaries, but in general we don't 692 | # want to do that because even a shrunk test case 693 | # can involve a relatively large number of choices. 694 | k = 8 695 | while k > 0: 696 | i = len(self.result) - k - 1 697 | while i >= 0: 698 | if i >= len(self.result): 699 | # Can happen if we successfully lowered 700 | # the value at i - 1 701 | i -= 1 702 | continue 703 | attempt = self.result[:i] + self.result[i + k :] 704 | assert len(attempt) < len(self.result) 705 | if not consider(attempt): 706 | # This fixes a common problem that occurs 707 | # when you have dependencies on some 708 | # length parameter. e.g. draw a number 709 | # between 0 and 10 and then draw that 710 | # many elements. This can't delete 711 | # everything that occurs that way, but 712 | # it can delete some things and often 713 | # will get us unstuck when nothing else 714 | # does. 715 | if i > 0 and attempt[i - 1] > 0: 716 | attempt[i - 1] -= 1 717 | if consider(attempt): 718 | i += 1 719 | i -= 1 720 | k -= 1 721 | 722 | def replace(values: Mapping[int, int]) -> bool: 723 | """Attempts to replace some indices in the current 724 | result with new values. Useful for some purely lexicographic 725 | reductions that we are about to perform.""" 726 | assert self.result is not None 727 | attempt = array("Q", self.result) 728 | for i, v in values.items(): 729 | # The size of self.result can change during shrinking. 730 | # If that happens, stop attempting to make use of these 731 | # replacements because some other shrink pass is better 732 | # to run now. 733 | if i >= len(attempt): 734 | return False 735 | attempt[i] = v 736 | return consider(attempt) 737 | 738 | # Now we try replacing blocks of choices with zeroes. 739 | # Note that unlike the above we skip k = 1 because we 740 | # handle that in the next step. Often (but not always) 741 | # a block of all zeroes is the shortlex smallest value 742 | # that a region can be. 743 | k = 8 744 | 745 | while k > 1: 746 | i = len(self.result) - k 747 | while i >= 0: 748 | if replace({j: 0 for j in range(i, i + k)}): 749 | # If we've succeeded then all of [i, i + k] 750 | # is zero so we adjust i so that the next region 751 | # does not overlap with this at all. 752 | i -= k 753 | else: 754 | # Otherwise we might still be able to zero some 755 | # of these values but not the last one, so we 756 | # just go back one. 757 | i -= 1 758 | k -= 1 759 | 760 | # Now try replacing each choice with a smaller value 761 | # by doing a binary search. This will replace n with 0 or n - 1 762 | # if possible, but will also more efficiently replace it with 763 | # a smaller number than doing multiple subtractions would. 764 | i = len(self.result) - 1 765 | while i >= 0: 766 | # Attempt to replace 767 | bin_search_down(0, self.result[i], lambda v: replace({i: v})) 768 | i -= 1 769 | 770 | # NB from here on this is just showing off cool shrinker tricks and 771 | # you probably don't need to worry about it and can skip these bits 772 | # unless they're easy and you want bragging rights for how much 773 | # better you are at shrinking than the local QuickCheck equivalent. 774 | 775 | # Try sorting out of order ranges of choices, as ``sort(x) <= x``, 776 | # so this is always a lexicographic reduction. 777 | k = 8 778 | while k > 1: 779 | for i in range(len(self.result) - k - 1, -1, -1): 780 | consider( 781 | self.result[:i] 782 | + array("Q", sorted(self.result[i : i + k])) 783 | + self.result[i + k :] 784 | ) 785 | k -= 1 786 | 787 | # Try adjusting nearby pairs of integers by redistributing value 788 | # between them. This is useful for tests that depend on the 789 | # sum of some generated values. 790 | for k in [2, 1]: 791 | for i in range(len(self.result) - 1 - k, -1, -1): 792 | j = i + k 793 | # This check is necessary because the previous changes 794 | # might have shrunk the size of result, but also it's tedious 795 | # to write tests for this so I didn't. 796 | if j < len(self.result): # pragma: no cover 797 | # Try swapping out of order pairs 798 | if self.result[i] > self.result[j]: 799 | replace({j: self.result[i], i: self.result[j]}) 800 | # j could be out of range if the previous swap succeeded. 801 | if j < len(self.result) and self.result[i] > 0: 802 | previous_i = self.result[i] 803 | previous_j = self.result[j] 804 | bin_search_down( 805 | 0, 806 | previous_i, 807 | lambda v: replace( 808 | {i: v, j: previous_j + (previous_i - v)} 809 | ), 810 | ) 811 | 812 | 813 | def bin_search_down(lo: int, hi: int, f: Callable[[int], bool]) -> int: 814 | """Returns n in [lo, hi] such that f(n) is True, 815 | where it is assumed and will not be checked that 816 | f(hi) is True. 817 | 818 | Will return ``lo`` if ``f(lo)`` is True, otherwise 819 | the only guarantee that is made is that ``f(n - 1)`` 820 | is False and ``f(n)`` is True. In particular this 821 | does *not* guarantee to find the smallest value, 822 | only a locally minimal one. 823 | """ 824 | if f(lo): 825 | return lo 826 | while lo + 1 < hi: 827 | mid = lo + (hi - lo) // 2 828 | if f(mid): 829 | hi = mid 830 | else: 831 | lo = mid 832 | return hi 833 | 834 | 835 | class DirectoryDB: 836 | """A very basic key/value store that just uses a file system 837 | directory to store values. You absolutely don't have to copy this 838 | and should feel free to use a more reasonable key/value store 839 | if you have easy access to one.""" 840 | 841 | def __init__(self, directory: str): 842 | self.directory = directory 843 | try: 844 | os.mkdir(directory) 845 | except FileExistsError: 846 | pass 847 | 848 | def __to_file(self, key: str) -> str: 849 | return os.path.join( 850 | self.directory, hashlib.sha1(key.encode("utf-8")).hexdigest()[:10] 851 | ) 852 | 853 | def __setitem__(self, key: str, value: bytes) -> None: 854 | with open(self.__to_file(key), "wb") as o: 855 | o.write(value) 856 | 857 | def get(self, key: str) -> Optional[bytes]: 858 | f = self.__to_file(key) 859 | if not os.path.exists(f): 860 | return None 861 | with open(f, "rb") as i: 862 | return i.read() 863 | 864 | def __delitem__(self, key: str) -> None: 865 | try: 866 | os.unlink(self.__to_file(key)) 867 | except FileNotFoundError: 868 | raise KeyError() 869 | 870 | 871 | class Frozen(Exception): 872 | """Attempted to make choices on a test case that has been 873 | completed.""" 874 | 875 | 876 | class StopTest(Exception): 877 | """Raised when a test should stop executing early.""" 878 | 879 | 880 | class Unsatisfiable(Exception): 881 | """Raised when a test has no valid examples.""" 882 | 883 | 884 | class Status(IntEnum): 885 | # Test case didn't have enough data to complete 886 | OVERRUN = 0 887 | 888 | # Test case contained something that prevented completion 889 | INVALID = 1 890 | 891 | # Test case completed just fine but was boring 892 | VALID = 2 893 | 894 | # Test case completed and was interesting 895 | INTERESTING = 3 896 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | attrs==23.2.0 3 | black==24.2.0 4 | click==8.1.7 5 | coverage==7.4.3 6 | hypothesis==6.99.5 7 | importlib_metadata==7.0.2 8 | iniconfig==2.0.0 9 | isort==5.13.2 10 | more-itertools==10.2.0 11 | mypy-extensions==1.0.0 12 | packaging==24.0 13 | pathspec==0.12.1 14 | platformdirs==4.2.0 15 | pluggy==1.4.0 16 | py==1.11.0 17 | pyparsing==3.1.2 18 | pytest==8.1.1 19 | regex==2023.12.25 20 | six==1.16.0 21 | sortedcontainers==2.4.0 22 | toml==0.10.2 23 | typed-ast==1.5.5 24 | wcwidth==0.2.13 25 | zipp==3.18.0 26 | -------------------------------------------------------------------------------- /test_minithesis.py: -------------------------------------------------------------------------------- 1 | # This file is part of Minithesis, which may be found at 2 | # https://github.com/DRMacIver/minithesis 3 | # 4 | # This work is copyright (C) 2020 David R. MacIver. 5 | # 6 | # This Source Code Form is subject to the terms of the Mozilla Public License, 7 | # v. 2.0. If a copy of the MPL was not distributed with this file, You can 8 | # obtain one at https://mozilla.org/MPL/2.0/. 9 | 10 | from collections import defaultdict 11 | from random import Random 12 | 13 | import pytest 14 | from hypothesis import HealthCheck, Phase, given, note, reject, settings 15 | from hypothesis import strategies as st 16 | 17 | import minithesis as mt 18 | from minithesis import CachedTestFunction, DirectoryDB, Frozen, Possibility, Status 19 | from minithesis import TestCase as TC 20 | from minithesis import TestingState as State 21 | from minithesis import ( 22 | Unsatisfiable, 23 | integers, 24 | just, 25 | lists, 26 | mix_of, 27 | nothing, 28 | run_test, 29 | tuples, 30 | ) 31 | 32 | 33 | @Possibility 34 | def list_of_integers(test_case): 35 | result = [] 36 | while test_case.weighted(0.9): 37 | result.append(test_case.choice(10000)) 38 | return result 39 | 40 | 41 | @pytest.mark.parametrize("seed", range(10)) 42 | def test_finds_small_list(capsys, seed): 43 | 44 | with pytest.raises(AssertionError): 45 | 46 | @run_test(database={}, random=Random(seed)) 47 | def _(test_case): 48 | ls = test_case.any(lists(integers(0, 10000))) 49 | assert sum(ls) <= 1000 50 | 51 | captured = capsys.readouterr() 52 | 53 | assert captured.out.strip() == "any(lists(integers(0, 10000))): [1001]" 54 | 55 | 56 | @pytest.mark.parametrize("seed", range(10)) 57 | def test_finds_small_list_even_with_bad_lists(capsys, seed): 58 | """Minithesis can't really handle shrinking arbitrary 59 | monadic bind, but length parameters are a common case 60 | of monadic bind that it has a little bit of special 61 | casing for. This test ensures that that special casing 62 | works. 63 | 64 | The problem is that if you generate a list by drawing 65 | a length and then drawing that many elements, you can 66 | end up with something like ``[1001, 0, 0]`` then 67 | deleting those zeroes in the middle is a pain. minithesis 68 | will solve this by first sorting those elements, so that 69 | we have ``[0, 0, 1001]``, and then lowering the length 70 | by two, turning it into ``[1001]`` as desired. 71 | """ 72 | 73 | with pytest.raises(AssertionError): 74 | 75 | @Possibility 76 | def bad_list(test_case): 77 | n = test_case.choice(10) 78 | return [test_case.choice(10000) for _ in range(n)] 79 | 80 | @run_test(database={}, random=Random(seed)) 81 | def _(test_case): 82 | ls = test_case.any(bad_list) 83 | assert sum(ls) <= 1000 84 | 85 | captured = capsys.readouterr() 86 | 87 | assert captured.out.strip() == "any(bad_list): [1001]" 88 | 89 | 90 | def test_reduces_additive_pairs(capsys): 91 | 92 | with pytest.raises(AssertionError): 93 | 94 | @run_test(database={}, max_examples=10000) 95 | def _(test_case): 96 | m = test_case.choice(1000) 97 | n = test_case.choice(1000) 98 | assert m + n <= 1000 99 | 100 | captured = capsys.readouterr() 101 | 102 | assert [c.strip() for c in captured.out.splitlines()] == [ 103 | "choice(1000): 1", 104 | "choice(1000): 1000", 105 | ] 106 | 107 | 108 | def test_reuses_results_from_the_database(tmpdir): 109 | db = DirectoryDB(tmpdir) 110 | count = 0 111 | 112 | def run(): 113 | with pytest.raises(AssertionError): 114 | 115 | @run_test(database=db) 116 | def _(test_case): 117 | nonlocal count 118 | count += 1 119 | assert test_case.choice(10000) < 10 120 | 121 | run() 122 | 123 | assert len(tmpdir.listdir()) == 1 124 | prev_count = count 125 | 126 | run() 127 | 128 | assert len(tmpdir.listdir()) == 1 129 | assert count == prev_count + 2 130 | 131 | 132 | def test_test_cases_satisfy_preconditions(): 133 | @run_test() 134 | def _(test_case): 135 | n = test_case.choice(10) 136 | test_case.assume(n != 0) 137 | assert n != 0 138 | 139 | 140 | def test_error_on_too_strict_precondition(): 141 | with pytest.raises(Unsatisfiable): 142 | 143 | @run_test() 144 | def _(test_case): 145 | n = test_case.choice(10) 146 | test_case.reject() 147 | 148 | 149 | def test_error_on_unbounded_test_function(monkeypatch): 150 | monkeypatch.setattr(mt, "BUFFER_SIZE", 10) 151 | with pytest.raises(Unsatisfiable): 152 | 153 | @run_test(max_examples=5) 154 | def _(test_case): 155 | while True: 156 | test_case.choice(10) 157 | 158 | 159 | def test_function_cache(): 160 | def tf(tc): 161 | if tc.choice(1000) >= 200: 162 | tc.mark_status(Status.INTERESTING) 163 | if tc.choice(1) == 0: 164 | tc.reject() 165 | 166 | state = State(Random(0), tf, 100) 167 | 168 | cache = CachedTestFunction(state.test_function) 169 | 170 | assert cache([1, 1]) == Status.VALID 171 | assert cache([1]) == Status.OVERRUN 172 | assert cache([1000]) == Status.INTERESTING 173 | assert cache([1000]) == Status.INTERESTING 174 | assert cache([1000, 1]) == Status.INTERESTING 175 | 176 | assert state.calls == 2 177 | 178 | 179 | @pytest.mark.parametrize("max_examples", range(1, 100)) 180 | def test_max_examples_is_not_exceeded(max_examples): 181 | """Targeting has a number of places it checks for 182 | whether we've exceeded the generation limits. This 183 | makes sure we've checked them all. 184 | """ 185 | calls = 0 186 | 187 | @run_test(database={}, random=Random(0), max_examples=max_examples) 188 | def _(tc): 189 | nonlocal calls 190 | m = 10000 191 | n = tc.choice(m) 192 | calls += 1 193 | tc.target(n * (m - n)) 194 | 195 | assert calls == max_examples 196 | 197 | 198 | @pytest.mark.parametrize("seed", range(100)) 199 | def test_finds_a_local_maximum(seed): 200 | """Targeting has a number of places it checks for 201 | whether we've exceeded the generation limits. This 202 | makes sure we've checked them all. 203 | """ 204 | 205 | with pytest.raises(AssertionError): 206 | 207 | @run_test(database={}, random=Random(seed), max_examples=200, quiet=True) 208 | def _(tc): 209 | m = tc.choice(1000) 210 | n = tc.choice(1000) 211 | score = -((m - 500) ** 2 + (n - 500) ** 2) 212 | tc.target(score) 213 | assert m != 500 or n != 500 214 | 215 | 216 | def test_can_target_a_score_upwards_to_interesting(capsys): 217 | with pytest.raises(AssertionError): 218 | 219 | @run_test(database={}, max_examples=1000) 220 | def _(test_case): 221 | n = test_case.choice(1000) 222 | m = test_case.choice(1000) 223 | score = n + m 224 | test_case.target(score) 225 | assert score < 2000 226 | 227 | captured = capsys.readouterr() 228 | 229 | assert [c.strip() for c in captured.out.splitlines()] == [ 230 | "choice(1000): 1000", 231 | "choice(1000): 1000", 232 | ] 233 | 234 | 235 | def test_can_target_a_score_upwards_without_failing(): 236 | max_score = 0 237 | 238 | @run_test(database={}, max_examples=1000) 239 | def _(test_case): 240 | nonlocal max_score 241 | n = test_case.choice(1000) 242 | m = test_case.choice(1000) 243 | score = n + m 244 | test_case.target(score) 245 | max_score = max(score, max_score) 246 | 247 | assert max_score == 2000 248 | 249 | 250 | def test_targeting_when_most_do_not_benefit(capsys): 251 | with pytest.raises(AssertionError): 252 | big = 10000 253 | 254 | @run_test(database={}, max_examples=1000) 255 | def _(test_case): 256 | test_case.choice(1000) 257 | test_case.choice(1000) 258 | score = test_case.choice(big) 259 | test_case.target(score) 260 | assert score < big 261 | 262 | captured = capsys.readouterr() 263 | 264 | assert [c.strip() for c in captured.out.splitlines()] == [ 265 | "choice(1000): 0", 266 | "choice(1000): 0", 267 | f"choice({big}): {big}", 268 | ] 269 | 270 | 271 | def test_can_target_a_score_downwards(capsys): 272 | with pytest.raises(AssertionError): 273 | 274 | @run_test(database={}, max_examples=1000) 275 | def _(test_case): 276 | n = test_case.choice(1000) 277 | m = test_case.choice(1000) 278 | score = n + m 279 | test_case.target(-score) 280 | assert score > 0 281 | 282 | captured = capsys.readouterr() 283 | 284 | assert [c.strip() for c in captured.out.splitlines()] == [ 285 | "choice(1000): 0", 286 | "choice(1000): 0", 287 | ] 288 | 289 | 290 | def test_prints_a_top_level_weighted(capsys): 291 | with pytest.raises(AssertionError): 292 | 293 | @run_test(database={}, max_examples=1000) 294 | def _(test_case): 295 | assert test_case.weighted(0.5) 296 | 297 | captured = capsys.readouterr() 298 | assert captured.out.strip() == "weighted(0.5): False" 299 | 300 | 301 | def test_errors_when_using_frozen(): 302 | tc = TC.for_choices([0]) 303 | tc.status = Status.VALID 304 | 305 | with pytest.raises(Frozen): 306 | tc.mark_status(Status.INTERESTING) 307 | 308 | with pytest.raises(Frozen): 309 | tc.choice(10) 310 | 311 | with pytest.raises(Frozen): 312 | tc.forced_choice(10) 313 | 314 | 315 | def test_errors_on_too_large_choice(): 316 | tc = TC.for_choices([0]) 317 | with pytest.raises(ValueError): 318 | tc.choice(2 ** 64) 319 | 320 | 321 | def test_can_choose_full_64_bits(): 322 | @run_test() 323 | def _(tc): 324 | tc.choice(2 ** 64 - 1) 325 | 326 | 327 | def test_mapped_possibility(): 328 | @run_test() 329 | def _(tc): 330 | n = tc.any(integers(0, 5).map(lambda n: n * 2)) 331 | assert n % 2 == 0 332 | 333 | 334 | def test_selected_possibility(): 335 | @run_test() 336 | def _(tc): 337 | n = tc.any(integers(0, 5).satisfying(lambda n: n % 2 == 0)) 338 | assert n % 2 == 0 339 | 340 | 341 | def test_bound_possibility(): 342 | @run_test() 343 | def _(tc): 344 | m, n = tc.any( 345 | integers(0, 5).bind(lambda m: tuples(just(m), integers(m, m + 10),)) 346 | ) 347 | 348 | assert m <= n <= m + 10 349 | 350 | 351 | def test_cannot_witness_nothing(): 352 | with pytest.raises(Unsatisfiable): 353 | 354 | @run_test() 355 | def _(tc): 356 | tc.any(nothing()) 357 | 358 | 359 | def test_cannot_witness_empty_mix_of(): 360 | with pytest.raises(Unsatisfiable): 361 | 362 | @run_test() 363 | def _(tc): 364 | tc.any(mix_of()) 365 | 366 | 367 | def test_can_draw_mixture(): 368 | @run_test() 369 | def _(tc): 370 | m = tc.any(mix_of(integers(-5, 0), integers(2, 5))) 371 | assert -5 <= m <= 5 372 | assert m != 1 373 | 374 | 375 | def test_target_and_reduce(capsys): 376 | """This test is very hard to trigger without targeting, 377 | and targeting will tend to overshoot the score, so we 378 | will see multiple interesting test cases before 379 | shrinking.""" 380 | with pytest.raises(AssertionError): 381 | 382 | @run_test(database={}) 383 | def _(tc): 384 | m = tc.choice(100000) 385 | tc.target(m) 386 | assert m <= 99900 387 | 388 | captured = capsys.readouterr() 389 | 390 | assert captured.out.strip() == "choice(100000): 99901" 391 | 392 | 393 | def test_impossible_weighted(): 394 | with pytest.raises(Failure): 395 | 396 | @run_test(database={}) 397 | def _(tc): 398 | tc.choice(1) 399 | for _ in range(10): 400 | if tc.weighted(0.0): 401 | assert False 402 | if tc.choice(1): 403 | raise Failure() 404 | 405 | 406 | def test_guaranteed_weighted(): 407 | with pytest.raises(Failure): 408 | 409 | @run_test(database={}) 410 | def _(tc): 411 | if tc.weighted(1.0): 412 | tc.choice(1) 413 | raise Failure() 414 | else: 415 | assert False 416 | 417 | 418 | def test_size_bounds_on_list(): 419 | @run_test(database={}) 420 | def _(tc): 421 | ls = tc.any(lists(integers(0, 10), min_size=1, max_size=3)) 422 | assert 1 <= len(ls) <= 3 423 | 424 | 425 | def test_forced_choice_bounds(): 426 | with pytest.raises(ValueError): 427 | 428 | @run_test(database={}) 429 | def _(tc): 430 | tc.forced_choice(2 ** 64) 431 | 432 | 433 | class Failure(Exception): 434 | pass 435 | 436 | 437 | @settings( 438 | suppress_health_check=list(HealthCheck), 439 | deadline=None, 440 | report_multiple_bugs=False, 441 | max_examples=50, 442 | ) 443 | @given(st.data()) 444 | def test_give_minithesis_a_workout(data): 445 | seed = data.draw(st.integers(0, 1000)) 446 | rnd = Random(seed) 447 | max_examples = data.draw(st.integers(1, 100)) 448 | 449 | method_call = st.one_of( 450 | st.tuples( 451 | st.just("mark_status"), 452 | st.sampled_from((Status.INVALID, Status.VALID, Status.INTERESTING)), 453 | ), 454 | st.tuples(st.just("target"), st.floats(0.0, 1.0)), 455 | st.tuples(st.just("choice"), st.integers(0, 1000)), 456 | st.tuples(st.just("weighted"), st.floats(0.0, 1.0)), 457 | ) 458 | 459 | def new_node(): 460 | return [None, defaultdict(new_node)] 461 | 462 | tree = new_node() 463 | 464 | database = {} 465 | failed = False 466 | call_count = 0 467 | valid_count = 0 468 | 469 | try: 470 | try: 471 | 472 | @run_test( 473 | max_examples=max_examples, random=rnd, database=database, quiet=True, 474 | ) 475 | def test_function(test_case): 476 | node = tree 477 | depth = 0 478 | nonlocal call_count, valid_count, failed 479 | call_count += 1 480 | 481 | while True: 482 | depth += 1 483 | if node[0] is None: 484 | node[0] = data.draw(method_call) 485 | if node[0] == ("mark_status", Status.INTERESTING): 486 | failed = True 487 | raise Failure() 488 | if node[0] == ("mark_status", Status.VALID): 489 | valid_count += 1 490 | name, *rest = node[0] 491 | 492 | result = getattr(test_case, name)(*rest) 493 | node = node[1][result] 494 | 495 | except Failure: 496 | failed = True 497 | except Unsatisfiable: 498 | reject() 499 | 500 | if not failed: 501 | assert valid_count <= max_examples 502 | assert call_count <= max_examples * 10 503 | except Exception as e: 504 | 505 | @note 506 | def tree_as_code(): 507 | """If the test fails, print out a test that will trigger that 508 | failure rather than making me hand-edit it into something useful.""" 509 | 510 | i = 1 511 | while True: 512 | test_name = f"test_failure_from_hypothesis_{i}" 513 | if test_name not in globals(): 514 | break 515 | i += 1 516 | 517 | lines = [ 518 | f"def {test_name}():", 519 | " with pytest.raises(Failure):", 520 | f" @run_test(max_examples=1000, database={{}}, random=Random({seed}))", 521 | " def _(tc):", 522 | ] 523 | 524 | varcount = 0 525 | 526 | def recur(indent, node): 527 | nonlocal varcount 528 | 529 | if node[0] is None: 530 | lines.append(" " * indent + "tc.reject()") 531 | return 532 | 533 | method, *args = node[0] 534 | if method == "mark_status": 535 | if args[0] == Status.INTERESTING: 536 | lines.append(" " * indent + "raise Failure()") 537 | elif args[0] == Status.VALID: 538 | lines.append(" " * indent + "return") 539 | elif args[0] == Status.INVALID: 540 | lines.append(" " * indent + "tc.reject()") 541 | else: 542 | lines.append( 543 | " " * indent + f"tc.mark_status(Status.{args[0].name})" 544 | ) 545 | elif method == "target": 546 | lines.append(" " * indent + f"tc.target({args[0]})") 547 | recur(indent, *node[1].values()) 548 | elif method == "weighted": 549 | cond = f"tc.weighted({args[0]})" 550 | assert len(node[1]) > 0 551 | if len(node[1]) == 2: 552 | lines.append(" " * indent + "if {cond}:") 553 | recur(indent + 4, node[1][True]) 554 | lines.append(" " * indent + "else:") 555 | recur(indent + 4, node[1][False]) 556 | else: 557 | if True in node[1]: 558 | lines.append(" " * indent + f"if {cond}:") 559 | recur(indent + 4, node[1][True]) 560 | else: 561 | assert False in node[1] 562 | lines.append(" " * indent + f"if not {cond}:") 563 | recur(indent + 4, node[1][False]) 564 | else: 565 | varcount += 1 566 | varname = f"n{varcount}" 567 | lines.append( 568 | " " * indent 569 | + f"{varname} = tc.{method}({', '.join(map(repr, args))})" 570 | ) 571 | first = True 572 | for k, v in node[1].items(): 573 | if v[0] == ("mark_status", Status.INVALID): 574 | continue 575 | lines.append( 576 | " " * indent 577 | + ("if" if first else "elif") 578 | + f" {varname} == {k}:" 579 | ) 580 | first = False 581 | recur(indent + 4, v) 582 | lines.append(" " * indent + "else:") 583 | lines.append(" " * (indent + 4) + "tc.reject()") 584 | 585 | recur(12, tree) 586 | return "\n".join(lines) 587 | 588 | raise e 589 | 590 | 591 | def test_failure_from_hypothesis_1(): 592 | with pytest.raises(Failure): 593 | 594 | @run_test(max_examples=1000, database={}, random=Random(100)) 595 | def _(tc): 596 | n1 = tc.weighted(0.0) 597 | if not n1: 598 | n2 = tc.choice(511) 599 | if n2 == 112: 600 | n3 = tc.choice(511) 601 | if n3 == 124: 602 | raise Failure() 603 | elif n3 == 93: 604 | raise Failure() 605 | else: 606 | tc.mark_status(Status.INVALID) 607 | elif n2 == 93: 608 | raise Failure() 609 | else: 610 | tc.mark_status(Status.INVALID) 611 | 612 | 613 | def test_failure_from_hypothesis_2(): 614 | with pytest.raises(Failure): 615 | 616 | @run_test(max_examples=1000, database={}, random=Random(0)) 617 | def _(tc): 618 | n1 = tc.choice(6) 619 | if n1 == 6: 620 | n2 = tc.weighted(0.0) 621 | if not n2: 622 | raise Failure() 623 | elif n1 == 4: 624 | n3 = tc.choice(0) 625 | if n3 == 0: 626 | raise Failure() 627 | else: 628 | tc.mark_status(Status.INVALID) 629 | elif n1 == 2: 630 | raise Failure() 631 | else: 632 | tc.mark_status(Status.INVALID) 633 | --------------------------------------------------------------------------------