├── cabal.project ├── shell.nix ├── .gitmodules ├── CHANGELOG.md ├── .gitignore ├── src ├── Trynocular.hs ├── Trynocular │ ├── TestHarness.hs │ ├── Generable.hs │ ├── PartialKeySet.hs │ ├── Key.hs │ ├── Generator.hs │ └── Standardizer.hs └── Demo.hs ├── nix ├── sources.json └── sources.nix ├── default.nix ├── LICENSE ├── trynocular.cabal ├── README.md ├── Probabilities.md └── test └── Main.hs /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | . 3 | thirdparty/* -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | (import ./default.nix { }).shell 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thirdparty/StrictCheck"] 2 | path = thirdparty/StrictCheck 3 | url = git@github.com:shapr/StrictCheck.git 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Revision history for trynocular 2 | 3 | ## 0.1.0.0 -- YYYY-mm-dd 4 | 5 | * First version. Released on an unsuspecting world. 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.hie 7 | *.chi 8 | *.chs.h 9 | *.dyn_o 10 | *.dyn_hi 11 | .hpc 12 | .hsenv 13 | .cabal-sandbox/ 14 | cabal.sandbox.config 15 | *.prof 16 | *.aux 17 | *.hp 18 | *.eventlog 19 | .stack-work/ 20 | cabal.project.local 21 | cabal.project.local~ 22 | .HTF/ 23 | .ghc.environment.* 24 | -------------------------------------------------------------------------------- /src/Trynocular.hs: -------------------------------------------------------------------------------- 1 | -- | TODO: Module documentation 2 | module Trynocular 3 | ( module Trynocular.Generator, 4 | module Trynocular.Key, 5 | module Trynocular.Generable, 6 | module Trynocular.PartialKeySet 7 | ) 8 | where 9 | 10 | import Trynocular.Generable 11 | import Trynocular.Generator 12 | import Trynocular.Key 13 | import Trynocular.PartialKeySet 14 | -------------------------------------------------------------------------------- /nix/sources.json: -------------------------------------------------------------------------------- 1 | { 2 | "nixpkgs": { 3 | "branch": "nixpkgs-unstable", 4 | "description": "Nix Packages collection", 5 | "homepage": null, 6 | "owner": "nixos", 7 | "repo": "nixpkgs", 8 | "rev": "7d050b98e51cdbdd88ad960152d398d41c7ff5b4", 9 | "sha256": "1lwlnd0a48n07qbyn2f30zspn9is6s2r87ygchn27h30k8xqk2q8", 10 | "type": "tarball", 11 | "url": "https://github.com/nixos/nixpkgs/archive/7d050b98e51cdbdd88ad960152d398d41c7ff5b4.tar.gz", 12 | "url_template": "https://github.com///archive/.tar.gz" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | { compiler ? "ghc928" }: 2 | 3 | let 4 | sources = import ./nix/sources.nix; 5 | pkgs = import sources.nixpkgs { }; 6 | 7 | gitignore = pkgs.nix-gitignore.gitignoreSourcePure [ ./.gitignore ]; 8 | 9 | myHaskellPackages = pkgs.haskell.packages.${compiler}.override { 10 | overrides = hself: hsuper: { 11 | "trynocular" = hself.callCabal2nix "trynocular" (gitignore ./.) { }; 12 | }; 13 | }; 14 | 15 | shell = myHaskellPackages.shellFor { 16 | packages = p: [ p."trynocular" ]; 17 | buildInputs = [ 18 | myHaskellPackages.haskell-language-server 19 | pkgs.haskellPackages.cabal-install 20 | pkgs.haskellPackages.ghcid 21 | pkgs.haskellPackages.ormolu 22 | pkgs.haskellPackages.hlint 23 | pkgs.haskellPackages.hasktags 24 | pkgs.niv 25 | pkgs.nixpkgs-fmt 26 | ]; 27 | withHoogle = true; 28 | }; 29 | 30 | exe = pkgs.haskell.lib.justStaticExecutables (myHaskellPackages."trynocular"); 31 | 32 | docker = pkgs.dockerTools.buildImage { 33 | name = "trynocular"; 34 | config.Cmd = [ "${exe}/bin/trynocular" ]; 35 | }; 36 | in { 37 | inherit shell; 38 | inherit exe; 39 | inherit docker; 40 | inherit myHaskellPackages; 41 | "trynocular" = myHaskellPackages."trynocular"; 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Shae Erisson 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /trynocular.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: trynocular 3 | version: 0.1.0.0 4 | -- synopsis: 5 | -- description: 6 | -- category: 7 | bug-reports: https://github.com/shapr/trynocular/issues 8 | license: MIT 9 | author: Shae Erisson and Chris Smith 10 | maintainer: shae@scannedinavian.com 11 | 12 | extra-source-files: 13 | CHANGELOG.md 14 | README.md 15 | 16 | library 17 | exposed-modules: Trynocular 18 | , Trynocular.Generable 19 | , Trynocular.Generator 20 | , Trynocular.Key 21 | , Trynocular.PartialKeySet 22 | , Trynocular.Standardizer 23 | , Trynocular.TestHarness 24 | , Demo 25 | build-depends: base 26 | , fingertree 27 | , hpc 28 | , math-functions 29 | , random 30 | , semigroupoids 31 | , universe-base 32 | , megaparsec 33 | , bytestring 34 | , containers 35 | hs-source-dirs: src 36 | default-language: GHC2021 37 | ghc-options: -Wall 38 | 39 | test-suite trynocular-test 40 | type: exitcode-stdio-1.0 41 | main-is: Main.hs 42 | build-depends: base 43 | , hspec 44 | , random 45 | , random-shuffle 46 | , trynocular 47 | , QuickCheck 48 | , bytestring 49 | , containers 50 | hs-source-dirs: test 51 | default-language: GHC2021 52 | ghc-options: -Wall 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # trynocular 2 | library for lazy generators with observable demand 3 | 4 | # Why? 5 | 6 | I want to mutate an input in some way that the new input runs more of the program being tested. 7 | 8 | # the inspiration 9 | 10 | Could I run gradient descent on random test inputs? 11 | 12 | To be able to do that, I would need some way to generate an input and then find a "nearby" input to exercise parts of the program that haven't been evaluated yet. 13 | 14 | Trynocular is an attempt to define "nearby" as well as a framework for detecting which parts of the input were used by the program under test. 15 | 16 | # the big idea 17 | You can represent any value by a binary tree of the choices you made to reach that value. 18 | 19 | # `Generator`, `GenKey`, and Strictness 20 | 21 | The fundamental idea here is that a `rerunGenerator` constructs values from 22 | `GenKey`s in a strictness-preserving way. That is to say that partially 23 | forcing the result of `rerunGenerator` will force the nodes of the `GenKey` 24 | structure only when corresponding parts of the resulting value are forced. 25 | 26 | # Use plan 27 | 28 | To test with a Generator, one would: 29 | 30 | 1. Initialize probabilities to 0.5 for each Choice node of the generator. 31 | 2. Initialize the coverage expectation to some initial value. 32 | 3. Loop as follows: 33 | 1. Generate a random test case that differs from past test cases in some 34 | portion that was forced by that test case. 35 | 2. Run the test and see if it fails. 36 | 3. Observe the demand on the key, and the percent of additional coverage 37 | generated by the test. 38 | 4. Compare the observed additional coverage to the expectation. 39 | a. If the observed additional coverage is less than the expectation, 40 | update probabilities of each Choice node that was forced in the key 41 | to decrease their probability. 42 | b. If the observed additional coverage is greater than the expectation, 43 | update probabilities of each Choice node that was forced in the key 44 | to increase their probability. 45 | 5. Update the coverage expectation using the observed additional coverage, 46 | using an exponentially weighted moving average. That is, new coverage 47 | expectation = alpha * old coverage expectation + (1 - alpha) * observed 48 | new coverage. 49 | 6. Repeat until some termination condition is satisfied. 50 | -------------------------------------------------------------------------------- /src/Trynocular/TestHarness.hs: -------------------------------------------------------------------------------- 1 | module Trynocular.TestHarness where 2 | 3 | import Trace.Hpc.Reflect (examineTix) 4 | import Trace.Hpc.Tix (Tix (..), TixModule (..)) 5 | import Trynocular.Generator 6 | ( Generator, 7 | adjustProbability, 8 | fromKey, 9 | keyProbability, 10 | pickKey, 11 | ) 12 | import Trynocular.Key (PartialKey, spy) 13 | import Trynocular.Standardizer 14 | ( NormalStandardizer, 15 | Standardizer (..), 16 | normalStandardizer, 17 | ) 18 | 19 | data TestState 20 | = forall params. 21 | Show params => 22 | TestState 23 | Int 24 | (Generator params) 25 | (params -> IO ()) 26 | NormalStandardizer 27 | 28 | initialTestState :: 29 | forall params. 30 | Show params => 31 | Generator params -> 32 | (params -> IO ()) -> 33 | TestState 34 | initialTestState generator action = 35 | TestState 36 | 0 37 | generator 38 | action 39 | (normalStandardizer 5 5 0.1) 40 | 41 | updateGenerator :: 42 | -- | quantile for the benefit 43 | Double -> 44 | -- | demanded portion of the key 45 | PartialKey -> 46 | -- | generator to update 47 | Generator a -> 48 | -- | new updated generator 49 | Generator a 50 | updateGenerator pct pkey gen = adjustProbability pkey targetKeyProb gen 51 | where 52 | priorKeyProb = keyProbability gen pkey 53 | priorBenefitProb = 54 | priorKeyProb * 0.5 + (1 - priorKeyProb) * (1 - pct) 55 | idealKeyProb = 0.5 * priorKeyProb / priorBenefitProb 56 | targetKeyProb = (priorKeyProb + idealKeyProb) / 2 57 | 58 | testHarness :: TestState -> IO () 59 | testHarness (TestState n generator action coverage) = do 60 | key <- pickKey generator 61 | print $ fromKey generator key 62 | (pkey, ((), observedCoverage)) <- 63 | spy key (observeCoverage . action . fromKey generator) 64 | putStrLn $ " " <> show observedCoverage 65 | let (coverage', coveragePct) = 66 | percentile coverage $ fromIntegral observedCoverage 67 | let generator' = updateGenerator coveragePct pkey generator 68 | let state' = TestState (n + 1) generator' action coverage' 69 | if (shouldContinue state') 70 | then testHarness state' 71 | else do 72 | putStrLn $ "Ran this many test cases: " <> show (n + 1) 73 | 74 | -- | at the 95 percentile, we should expect at least 1 additional coverage 75 | shouldContinue :: TestState -> Bool 76 | shouldContinue (TestState _ _ _ standardizer) = 77 | maybe False (> threshold) (fromPercentile standardizer targetPercentile) 78 | where 79 | targetPercentile = 0.99 -- percentile where we expect more coverage 80 | threshold = 1 -- number of additional measured regions we expect to see 81 | 82 | observeCoverage :: IO a -> IO (a, Int) 83 | observeCoverage a = do 84 | preTix <- examineTix 85 | a' <- a 86 | afterTix <- examineTix 87 | pure (a', additionalTix preTix afterTix) 88 | 89 | additionalTix :: Tix -> Tix -> Int 90 | additionalTix old new = tixCount new - tixCount old 91 | 92 | -- How many regions were executed at least once for this module? 93 | tixModuleCount :: TixModule -> Int 94 | tixModuleCount (TixModule _ _ _ regions) = sum $ 1 <$ filter (> 0) regions 95 | 96 | -- How many regions were executed at least once for all these modules? 97 | tixCount :: Tix -> Int 98 | tixCount (Tix ms) = sum $ map tixModuleCount ms 99 | 100 | smartCheck :: Show params => Generator params -> (params -> IO ()) -> IO () 101 | smartCheck generator action = testHarness (initialTestState generator action) 102 | -------------------------------------------------------------------------------- /src/Trynocular/Generable.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DefaultSignatures #-} 2 | {-# LANGUAGE ExplicitNamespaces #-} 3 | 4 | -- | The 'Generable' type class contains types that have a default 'Generator' 5 | -- capable of generating any value of that type. 6 | module Trynocular.Generable (Generable (..), GenericGenerable (..), genGeneric) where 7 | 8 | import Data.Char (chr) 9 | import Data.Functor.Alt (Alt (..)) 10 | import Data.Int (Int16, Int32, Int64, Int8) 11 | import Data.Word (Word16, Word32, Word64, Word8) 12 | import GHC.Generics 13 | ( Generic (..), 14 | K1 (K1), 15 | M1 (M1), 16 | U1 (..), 17 | type (:*:) (..), 18 | type (:+:) (..), 19 | ) 20 | import Trynocular.Generator 21 | import Unsafe.Coerce 22 | 23 | -- | A 'Generator' that produces an arbitrary 'Integer'. 24 | genInteger :: Generator Integer 25 | genInteger = pure 0 genPositive negate <$> genPositive 26 | 27 | -- | A 'Generator' that produces an arbitrary 'Float'. 28 | genFloat :: Generator Float 29 | genFloat 30 | | isIEEE (undefined :: Float) 31 | && floatRadix (undefined :: Float) == 2 32 | && floatDigits (undefined :: Float) == 24 33 | && floatRange (undefined :: Float) == (-125, 128) = 34 | unsafeCoerce <$> (genAny @Word32) 35 | | otherwise = error "Float is not IEEE single-precision on this platform!" 36 | 37 | -- | A 'Generator' that produces an arbitrary 'Double'. 38 | genDouble :: Generator Double 39 | genDouble 40 | | isIEEE (undefined :: Double) 41 | && floatRadix (undefined :: Double) == 2 42 | && floatDigits (undefined :: Double) == 53 43 | && floatRange (undefined :: Double) == (-1021, 1024) = 44 | unsafeCoerce <$> (genAny @Word64) 45 | | otherwise = error "Double is not IEEE double-precision on this platform!" 46 | 47 | -- | A type class for types that can be generated by a 'Generator'. For types 48 | -- that are an instance of this class, 'genAny' can be used as a generator for 49 | -- an arbitrary value of that type. 50 | class Generable a where 51 | genAny :: Generator a 52 | default genAny :: (Generic a, GenericGenerable (Rep a)) => Generator a 53 | genAny = genGeneric 54 | 55 | instance Generable () 56 | 57 | instance Generable Bool 58 | 59 | instance Generable Char where genAny = fmap chr (genRange (0, 0x10FFFF)) 60 | 61 | instance Generable Word8 where genAny = genBoundedIntegral 62 | 63 | instance Generable Word16 where genAny = genBoundedIntegral 64 | 65 | instance Generable Word32 where genAny = genBoundedIntegral 66 | 67 | instance Generable Word64 where genAny = genBoundedIntegral 68 | 69 | instance Generable Word where genAny = genBoundedIntegral 70 | 71 | instance Generable Int8 where genAny = genBoundedIntegral 72 | 73 | instance Generable Int16 where genAny = genBoundedIntegral 74 | 75 | instance Generable Int32 where genAny = genBoundedIntegral 76 | 77 | instance Generable Int64 where genAny = genBoundedIntegral 78 | 79 | instance Generable Int where genAny = genBoundedIntegral 80 | 81 | instance Generable Integer where genAny = genInteger 82 | 83 | instance Generable Float where genAny = genFloat 84 | 85 | instance Generable Double where genAny = genDouble 86 | 87 | instance Generable a => Generable (Maybe a) 88 | 89 | instance (Generable a, Generable b) => Generable (Either a b) 90 | 91 | instance (Generable a, Generable b) => Generable (a, b) 92 | 93 | instance (Generable a, Generable b, Generable c) => Generable (a, b, c) 94 | 95 | instance 96 | (Generable a, Generable b, Generable c, Generable d) => 97 | Generable (a, b, c, d) 98 | 99 | instance 100 | (Generable a, Generable b, Generable c, Generable d, Generable e) => 101 | Generable (a, b, c, d, e) 102 | 103 | instance Generable a => Generable [a] 104 | 105 | -- | A type class for GHC Generics constructors that can be used to generate a 106 | -- Generable instance for a type. 107 | class GenericGenerable f where 108 | genGenericRep :: Generator (f p) 109 | 110 | instance GenericGenerable U1 where 111 | genGenericRep = pure U1 112 | 113 | instance 114 | (GenericGenerable f, GenericGenerable g) => 115 | GenericGenerable (f :+: g) 116 | where 117 | genGenericRep = L1 <$> genGenericRep R1 <$> genGenericRep 118 | 119 | instance 120 | (GenericGenerable f, GenericGenerable g) => 121 | GenericGenerable (f :*: g) 122 | where 123 | genGenericRep = (:*:) <$> genGenericRep <*> genGenericRep 124 | 125 | instance Generable a => GenericGenerable (K1 i a) where 126 | genGenericRep = K1 <$> genAny 127 | 128 | instance GenericGenerable f => GenericGenerable (M1 i t f) where 129 | genGenericRep = M1 <$> genGenericRep 130 | 131 | genGeneric :: (Generic a, GenericGenerable (Rep a)) => Generator a 132 | genGeneric = to <$> genGenericRep 133 | -------------------------------------------------------------------------------- /src/Trynocular/PartialKeySet.hs: -------------------------------------------------------------------------------- 1 | module Trynocular.PartialKeySet 2 | ( PartialKeySet, 3 | empty, 4 | singleton, 5 | fromList, 6 | insert, 7 | member, 8 | ) 9 | where 10 | 11 | import Data.Foldable (Foldable (..)) 12 | import Data.Functor.Identity (Identity (..)) 13 | import Trynocular.Key (Key, KeyF (..), PartialKey) 14 | 15 | newtype PartialKeySet = PartialKeySet (PKTrie ()) 16 | 17 | empty :: PartialKeySet 18 | empty = PartialKeySet EmptyPKTrie 19 | 20 | singleton :: PartialKey -> PartialKeySet 21 | singleton k = insert k empty 22 | 23 | fromList :: [PartialKey] -> PartialKeySet 24 | fromList = foldr insert empty 25 | 26 | insert :: PartialKey -> PartialKeySet -> PartialKeySet 27 | insert pk (PartialKeySet pkt) = PartialKeySet (insertPKTrie pk () pkt) 28 | 29 | member :: Key -> PartialKeySet -> Bool 30 | member k (PartialKeySet pkt) = not (null (lookupPKTrie k pkt)) 31 | 32 | data PKTrie a = EmptyPKTrie | NonEmptyPKTrie (NEPKTrie a) 33 | 34 | insertPKTrie :: PartialKey -> a -> PKTrie a -> PKTrie a 35 | insertPKTrie pk x EmptyPKTrie = 36 | NonEmptyPKTrie (singletonNEPKTrie pk x) 37 | insertPKTrie pk x (NonEmptyPKTrie nepkt) = 38 | NonEmptyPKTrie (insertNEPKTrie pk x nepkt) 39 | 40 | lookupPKTrie :: Key -> PKTrie a -> [a] 41 | lookupPKTrie _ EmptyPKTrie = [] 42 | lookupPKTrie k (NonEmptyPKTrie nepkt) = lookupNEPKTrie k nepkt 43 | 44 | newtype NEPKTrie a = NEPKTrie (SumTrie Identity KeyFTrie a) 45 | 46 | singletonNEPKTrie :: PartialKey -> a -> NEPKTrie a 47 | singletonNEPKTrie Nothing x = NEPKTrie (LTrie (Identity x)) 48 | singletonNEPKTrie (Just k) x = NEPKTrie (RTrie (singletonKeyFTrie k x)) 49 | 50 | insertNEPKTrie :: PartialKey -> a -> NEPKTrie a -> NEPKTrie a 51 | insertNEPKTrie k x = alterNEPKTrie k x (const x) 52 | 53 | alterNEPKTrie :: PartialKey -> a -> (a -> a) -> NEPKTrie a -> NEPKTrie a 54 | alterNEPKTrie Nothing new old (NEPKTrie t) = 55 | NEPKTrie (onLeft (Identity new) (\(Identity x) -> Identity (old x)) t) 56 | alterNEPKTrie (Just k) new old (NEPKTrie t) = 57 | NEPKTrie (onRight (singletonKeyFTrie k new) (alterKeyFTrie k new old) t) 58 | 59 | lookupNEPKTrie :: Key -> NEPKTrie a -> [a] 60 | lookupNEPKTrie (Identity k) (NEPKTrie t) = fromBoth toList (lookupKeyFTrie k) t 61 | 62 | data SumTrie f g a = LTrie (f a) | RTrie (g a) | BTrie (f a) (g a) 63 | 64 | onLeft :: f a -> (f a -> f a) -> SumTrie f g a -> SumTrie f g a 65 | onLeft _ old (LTrie t) = LTrie (old t) 66 | onLeft new _ (RTrie t) = BTrie new t 67 | onLeft _ old (BTrie l r) = BTrie (old l) r 68 | 69 | onRight :: g a -> (g a -> g a) -> SumTrie f g a -> SumTrie f g a 70 | onRight new _ (LTrie t) = BTrie t new 71 | onRight _ old (RTrie t) = RTrie (old t) 72 | onRight _ old (BTrie l r) = BTrie l (old r) 73 | 74 | fromLeft :: Monoid m => (f a -> m) -> SumTrie f g a -> m 75 | fromLeft f (LTrie t) = f t 76 | fromLeft f (BTrie l _) = f l 77 | fromLeft _ _ = mempty 78 | 79 | fromRight :: Monoid m => (g a -> m) -> SumTrie f g a -> m 80 | fromRight f (RTrie t) = f t 81 | fromRight f (BTrie _ r) = f r 82 | fromRight _ _ = mempty 83 | 84 | fromBoth :: Semigroup s => (f a -> s) -> (g a -> s) -> SumTrie f g a -> s 85 | fromBoth f _ (LTrie t) = f t 86 | fromBoth _ g (RTrie t) = g t 87 | fromBoth f g (BTrie l r) = f l <> g r 88 | 89 | newtype ProductTrie f g a = ProductTrie (f (g a)) 90 | 91 | data KeyFTrie a 92 | = TrivialTrie a 93 | | ChoiceTrie (SumTrie NEPKTrie NEPKTrie a) 94 | | BothTrie (ProductTrie NEPKTrie NEPKTrie a) 95 | 96 | singletonKeyFTrie :: KeyF Maybe -> a -> KeyFTrie a 97 | singletonKeyFTrie TrivialF x = TrivialTrie x 98 | singletonKeyFTrie (LeftF k) x = ChoiceTrie (LTrie (singletonNEPKTrie k x)) 99 | singletonKeyFTrie (RightF k) x = ChoiceTrie (RTrie (singletonNEPKTrie k x)) 100 | singletonKeyFTrie (BothF k1 k2) x = 101 | BothTrie (ProductTrie (singletonNEPKTrie k1 (singletonNEPKTrie k2 x))) 102 | 103 | alterKeyFTrie :: KeyF Maybe -> a -> (a -> a) -> KeyFTrie a -> KeyFTrie a 104 | alterKeyFTrie TrivialF _ old (TrivialTrie x) = TrivialTrie (old x) 105 | alterKeyFTrie (LeftF k) new old (ChoiceTrie t) = 106 | ChoiceTrie (onLeft (singletonNEPKTrie k new) (alterNEPKTrie k new old) t) 107 | alterKeyFTrie (RightF k) new old (ChoiceTrie t) = 108 | ChoiceTrie (onRight (singletonNEPKTrie k new) (alterNEPKTrie k new old) t) 109 | alterKeyFTrie (BothF k1 k2) new old (BothTrie (ProductTrie t)) = 110 | BothTrie 111 | ( ProductTrie 112 | ( alterNEPKTrie 113 | k1 114 | (singletonNEPKTrie k2 new) 115 | (alterNEPKTrie k2 new old) 116 | t 117 | ) 118 | ) 119 | alterKeyFTrie _ _ _ _ = error "Inconsistent keys" 120 | 121 | lookupKeyFTrie :: KeyF Identity -> KeyFTrie a -> [a] 122 | lookupKeyFTrie TrivialF (TrivialTrie x) = [x] 123 | lookupKeyFTrie (LeftF k) (ChoiceTrie t) = fromLeft (lookupNEPKTrie k) t 124 | lookupKeyFTrie (RightF k) (ChoiceTrie t) = fromRight (lookupNEPKTrie k) t 125 | lookupKeyFTrie (BothF k1 k2) (BothTrie (ProductTrie t)) = 126 | concatMap (lookupNEPKTrie k2) (lookupNEPKTrie k1 t) 127 | lookupKeyFTrie _ _ = [] 128 | -------------------------------------------------------------------------------- /src/Demo.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE LambdaCase #-} 2 | 3 | module Demo where 4 | 5 | import Data.ByteString (ByteString) 6 | import Data.ByteString qualified as B 7 | import Data.ByteString.Builder (Builder) 8 | import Data.ByteString.Builder qualified as BB 9 | import Data.ByteString.Lazy qualified as BL 10 | import Data.Char (chr, ord) 11 | import Data.Foldable (foldl') 12 | import Data.Map.Strict qualified as M 13 | import Data.Maybe (isJust) 14 | import Data.Void (Void) 15 | import Data.Word (Word8) 16 | import GHC.Generics (Generic) 17 | import Text.Megaparsec 18 | ( MonadParsec (takeP, takeWhileP, try), 19 | Parsec, 20 | manyTill, 21 | optional, 22 | parse, 23 | satisfy, 24 | (<|>), 25 | ) 26 | import Text.Megaparsec.Byte qualified as MB 27 | 28 | type Parser = Parsec Void ByteString 29 | 30 | -- "i10e s3abce" 31 | data Bencode 32 | = BString ByteString 33 | | BInteger Integer 34 | | BList [Bencode] 35 | | BDict (M.Map ByteString Bencode) 36 | deriving (Show, Eq, Ord, Generic) 37 | 38 | bencode :: Parser Bencode 39 | bencode = 40 | BString <$> try bString 41 | <|> BInteger <$> try bInteger 42 | <|> BList <$> try bList 43 | <|> BDict <$> bDict 44 | 45 | prop_roundtrip :: Bencode -> Bool 46 | prop_roundtrip benc = do 47 | parsed == Right benc 48 | where 49 | bs = BL.toStrict . BB.toLazyByteString $ write benc 50 | parsed = parse bencode "" bs 51 | 52 | -- 3:abc -> "abc" 53 | bString :: Parser ByteString 54 | bString = do 55 | len <- number 56 | _ <- char_ ':' 57 | takeP Nothing (fromIntegral len) 58 | 59 | -- i10e -> 10 60 | bInteger :: Parser Integer 61 | bInteger = do 62 | _ <- char_ 'i' 63 | i <- 64 | optional (char_ '-') >>= \case 65 | Nothing -> number 66 | Just _ -> negate <$> positiveNumber 67 | _ <- char_ 'e' 68 | pure i 69 | 70 | number :: Parser Integer 71 | number = try positiveNumber <|> (0 <$ char_ '0') 72 | 73 | positiveNumber :: Parser Integer 74 | positiveNumber = do 75 | d1 <- satisfy isNonZeroDigit 76 | ds <- takeWhileP Nothing isDigit 77 | let i = read @Integer $ map (chr . fromIntegral) (d1 : B.unpack ds) 78 | pure i 79 | 80 | isDigit :: Word8 -> Bool 81 | isDigit x = x >= 48 && x <= 57 82 | 83 | isNonZeroDigit :: Word8 -> Bool 84 | isNonZeroDigit x = x >= 49 && x <= 57 85 | 86 | bList :: Parser [Bencode] 87 | bList = do 88 | _ <- char_ 'l' 89 | manyTill bencode (char_ 'e') 90 | 91 | bDict :: Parser (M.Map ByteString Bencode) 92 | bDict = do 93 | _ <- char_ 'd' 94 | M.fromList <$> manyTill ((,) <$> bString <*> bencode) (char_ 'e') 95 | 96 | char_ :: Char -> Parser Word8 97 | char_ c = MB.char $ fromIntegral $ ord c 98 | 99 | delimit :: Char -> Builder -> Builder 100 | delimit d bs = BB.char8 d <> bs <> BB.char8 'e' 101 | 102 | write :: Bencode -> Builder 103 | write (BString bs) = (BB.intDec . B.length $ bs) <> BB.char8 ':' <> BB.byteString bs 104 | write (BInteger i) = delimit 'i' $ BB.integerDec i 105 | write (BList bs) = delimit 'l' $ foldMap write bs 106 | write (BDict m) = delimit 'd' $ M.foldMapWithKey (\k v -> write (BString k) <> write v) m 107 | 108 | data RBTree a 109 | = Branch RB (RBTree a) a (RBTree a) 110 | | Nil 111 | deriving (Show, Eq, Ord, Generic) 112 | 113 | data RB = Red | Black deriving (Show, Eq, Ord, Generic) 114 | 115 | rbEmpty :: RBTree a 116 | rbEmpty = Nil 117 | 118 | rbInsert :: Ord a => a -> RBTree a -> RBTree a 119 | rbInsert new t = blacken (go t) 120 | where 121 | go Nil = Branch Red Nil new Nil 122 | go (Branch c l y r) = case compare new y of 123 | LT -> balance c (go l) y r 124 | GT -> balance c l y (go r) 125 | EQ -> Branch c l y r 126 | 127 | blacken (Branch _ l y r) = Branch Black l y r 128 | blacken Nil = Nil 129 | 130 | balance Black (Branch Red (Branch Red a x b) y c) z d = 131 | Branch Red (Branch Black a x b) y (Branch Black c z d) 132 | balance Black (Branch Red a x (Branch Red b y c)) z d = 133 | Branch Red (Branch Black a x b) y (Branch Black c z d) 134 | balance Black a x (Branch Red (Branch Red b y c) z d) = 135 | Branch Red (Branch Black a x b) y (Branch Black c z d) 136 | balance Black a x (Branch Red b y (Branch Red c z d)) = 137 | Branch Red (Branch Black a x b) y (Branch Black c z d) 138 | balance c a x b = Branch c a x b 139 | 140 | rbFromList :: Ord a => [a] -> RBTree a 141 | rbFromList = foldl' (flip rbInsert) rbEmpty 142 | 143 | rbMember :: Ord a => RBTree a -> a -> Bool 144 | rbMember Nil _ = False 145 | rbMember (Branch _ l y r) x = case compare x y of 146 | LT -> rbMember l x 147 | GT -> rbMember r x 148 | EQ -> True 149 | 150 | isValid :: RBTree a -> Bool 151 | isValid tree = rootIsBlack tree && noRedOnRed tree && isJust (blackLen tree) 152 | where 153 | rootIsBlack Nil = True 154 | rootIsBlack (Branch Black _ _ _) = True 155 | rootIsBlack _ = False 156 | 157 | noRedOnRed Nil = True 158 | noRedOnRed (Branch c l _ r) = 159 | (c == Black || (rootIsBlack l && rootIsBlack r)) 160 | && noRedOnRed l 161 | && noRedOnRed r 162 | 163 | blackLen :: RBTree a -> Maybe Int 164 | blackLen Nil = Just 1 165 | blackLen (Branch c l _ r) = 166 | (if c == Black then succ else id) 167 | <$> (consistent (blackLen l) (blackLen r)) 168 | 169 | consistent a b 170 | | a == b = a 171 | | otherwise = Nothing 172 | -------------------------------------------------------------------------------- /nix/sources.nix: -------------------------------------------------------------------------------- 1 | # This file has been generated by Niv. 2 | 3 | let 4 | 5 | # 6 | # The fetchers. fetch_ fetches specs of type . 7 | # 8 | 9 | fetch_file = pkgs: spec: 10 | if spec.builtin or true then 11 | builtins_fetchurl { inherit (spec) url sha256; } 12 | else 13 | pkgs.fetchurl { inherit (spec) url sha256; }; 14 | 15 | fetch_tarball = pkgs: name: spec: 16 | let 17 | ok = str: ! builtins.isNull (builtins.match "[a-zA-Z0-9+-._?=]" str); 18 | # sanitize the name, though nix will still fail if name starts with period 19 | name' = stringAsChars (x: if ! ok x then "-" else x) "${name}-src"; 20 | in 21 | if spec.builtin or true then 22 | builtins_fetchTarball { name = name'; inherit (spec) url sha256; } 23 | else 24 | pkgs.fetchzip { name = name'; inherit (spec) url sha256; }; 25 | 26 | fetch_git = spec: 27 | builtins.fetchGit { url = spec.repo; inherit (spec) rev ref; }; 28 | 29 | fetch_builtin-tarball = name: throw 30 | ''[${name}] The niv type "builtin-tarball" is deprecated. You should instead use `builtin = true`. 31 | $ niv modify ${name} -a type=tarball -a builtin=true''; 32 | 33 | fetch_builtin-url = name: throw 34 | ''[${name}] The niv type "builtin-url" will soon be deprecated. You should instead use `builtin = true`. 35 | $ niv modify ${name} -a type=file -a builtin=true''; 36 | 37 | # 38 | # Various helpers 39 | # 40 | 41 | # The set of packages used when specs are fetched using non-builtins. 42 | mkPkgs = sources: 43 | let 44 | sourcesNixpkgs = 45 | import (builtins_fetchTarball { inherit (sources.nixpkgs) url sha256; }) {}; 46 | hasNixpkgsPath = builtins.any (x: x.prefix == "nixpkgs") builtins.nixPath; 47 | hasThisAsNixpkgsPath = == ./.; 48 | in 49 | if builtins.hasAttr "nixpkgs" sources 50 | then sourcesNixpkgs 51 | else if hasNixpkgsPath && ! hasThisAsNixpkgsPath then 52 | import {} 53 | else 54 | abort 55 | '' 56 | Please specify either (through -I or NIX_PATH=nixpkgs=...) or 57 | add a package called "nixpkgs" to your sources.json. 58 | ''; 59 | 60 | # The actual fetching function. 61 | fetch = pkgs: name: spec: 62 | 63 | if ! builtins.hasAttr "type" spec then 64 | abort "ERROR: niv spec ${name} does not have a 'type' attribute" 65 | else if spec.type == "file" then fetch_file pkgs spec 66 | else if spec.type == "tarball" then fetch_tarball pkgs name spec 67 | else if spec.type == "git" then fetch_git spec 68 | else if spec.type == "builtin-tarball" then fetch_builtin-tarball name 69 | else if spec.type == "builtin-url" then fetch_builtin-url name 70 | else 71 | abort "ERROR: niv spec ${name} has unknown type ${builtins.toJSON spec.type}"; 72 | 73 | # Ports of functions for older nix versions 74 | 75 | # a Nix version of mapAttrs if the built-in doesn't exist 76 | mapAttrs = builtins.mapAttrs or ( 77 | f: set: with builtins; 78 | listToAttrs (map (attr: { name = attr; value = f attr set.${attr}; }) (attrNames set)) 79 | ); 80 | 81 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 82 | range = first: last: if first > last then [] else builtins.genList (n: first + n) (last - first + 1); 83 | 84 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 85 | stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); 86 | 87 | # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 88 | stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); 89 | concatStrings = builtins.concatStringsSep ""; 90 | 91 | # fetchTarball version that is compatible between all the versions of Nix 92 | builtins_fetchTarball = { url, name, sha256 }@attrs: 93 | let 94 | inherit (builtins) lessThan nixVersion fetchTarball; 95 | in 96 | if lessThan nixVersion "1.12" then 97 | fetchTarball { inherit name url; } 98 | else 99 | fetchTarball attrs; 100 | 101 | # fetchurl version that is compatible between all the versions of Nix 102 | builtins_fetchurl = { url, sha256 }@attrs: 103 | let 104 | inherit (builtins) lessThan nixVersion fetchurl; 105 | in 106 | if lessThan nixVersion "1.12" then 107 | fetchurl { inherit url; } 108 | else 109 | fetchurl attrs; 110 | 111 | # Create the final "sources" from the config 112 | mkSources = config: 113 | mapAttrs ( 114 | name: spec: 115 | if builtins.hasAttr "outPath" spec 116 | then abort 117 | "The values in sources.json should not have an 'outPath' attribute" 118 | else 119 | spec // { outPath = fetch config.pkgs name spec; } 120 | ) config.sources; 121 | 122 | # The "config" used by the fetchers 123 | mkConfig = 124 | { sourcesFile ? ./sources.json 125 | , sources ? builtins.fromJSON (builtins.readFile sourcesFile) 126 | , pkgs ? mkPkgs sources 127 | }: rec { 128 | # The sources, i.e. the attribute set of spec name to spec 129 | inherit sources; 130 | 131 | # The "pkgs" (evaluated nixpkgs) to use for e.g. non-builtin fetchers 132 | inherit pkgs; 133 | }; 134 | in 135 | mkSources (mkConfig {}) // { __functor = _: settings: mkSources (mkConfig settings); } 136 | -------------------------------------------------------------------------------- /src/Trynocular/Key.hs: -------------------------------------------------------------------------------- 1 | -- | A 'Key' is a plan for building a value. There are two types of keys used 2 | -- here, though: a total 'Key', and a 'PartialKey', where the latter represents 3 | -- a partial value, some of whose subterms can be undefined. When both forms 4 | -- are possible, 'GeneralKey' can be used. In fact, 'Key' is a synonym for 5 | -- 'GeneralKey Identity', and 'PartialKey' is a synonym for 'GeneralKey Maybe'. 6 | -- 7 | -- One way to obtain a 'PartialKey' is with 'spy', which observe the strictness 8 | -- of an 'IO' action on a 'Key', producing a 'PartialKey' that expresses which 9 | -- parts of the 'Key' were forced by the action. 10 | module Trynocular.Key 11 | ( Key, 12 | keySimilarity, 13 | PartialKey, 14 | totalKey, 15 | partialKeys, 16 | subsumes, 17 | spy, 18 | GeneralKey, 19 | KeyF (..), 20 | mapSubkeys, 21 | traverseSubkeys, 22 | ) 23 | where 24 | 25 | import Control.Concurrent (MVar, newMVar, readMVar, swapMVar) 26 | import Data.Functor.Classes (Eq1 (..), Ord1 (..), Show1 (..)) 27 | import Data.Functor.Identity (Identity (..)) 28 | import GHC.Generics (Generic) 29 | import System.IO.Unsafe (unsafeInterleaveIO) 30 | 31 | -- | A 'GeneralKey' is a key with some context attached to each node, 32 | -- represented by a 'Functor' called @f@. 33 | type GeneralKey f = f (KeyF f) 34 | 35 | -- | A type representing the common structure of different kinds of 'GeneralKey' 36 | -- types, which represent specific decisions in a generator. The @f@ parameter 37 | -- represents context that can be applied at each level. 38 | data KeyF f 39 | = TrivialF 40 | | LeftF (GeneralKey f) 41 | | RightF (GeneralKey f) 42 | | BothF (GeneralKey f) (GeneralKey f) 43 | deriving (Generic) 44 | 45 | instance Show1 f => Show (KeyF f) where 46 | showsPrec _ TrivialF = showString "TrivialF" 47 | showsPrec p (LeftF k) = 48 | showParen (p > 10) $ 49 | showString "LeftF " . liftShowsPrec showsPrec showList 11 k 50 | showsPrec p (RightF k) = 51 | showParen (p > 10) $ 52 | showString "RightF " . liftShowsPrec showsPrec showList 11 k 53 | showsPrec p (BothF k1 k2) = 54 | showParen (p > 10) $ 55 | showString "BothF " 56 | . liftShowsPrec showsPrec showList 11 k1 57 | . showString " " 58 | . liftShowsPrec showsPrec showList 11 k2 59 | 60 | instance Eq1 f => Eq (KeyF f) where 61 | TrivialF == TrivialF = True 62 | LeftF a == LeftF b = liftEq (==) a b 63 | RightF a == RightF b = liftEq (==) a b 64 | BothF a b == BothF c d = liftEq (==) a c && liftEq (==) b d 65 | _ == _ = False 66 | 67 | instance Ord1 f => Ord (KeyF f) where 68 | compare TrivialF TrivialF = EQ 69 | compare TrivialF _ = LT 70 | compare (LeftF _) TrivialF = GT 71 | compare (LeftF a) (LeftF b) = liftCompare compare a b 72 | compare (LeftF _) _ = LT 73 | compare (RightF _) (BothF _ _) = LT 74 | compare (RightF a) (RightF b) = liftCompare compare a b 75 | compare (RightF _) _ = GT 76 | compare (BothF a b) (BothF c d) = 77 | case liftCompare compare a c of 78 | EQ -> liftCompare compare b d 79 | cmp -> cmp 80 | compare (BothF _ _) _ = GT 81 | 82 | -- | A unique identifier for a value produced by a 'Trynocular.Generator'. You 83 | -- can think of this as a "plan" that a 'Trynocular.Generator' can follow to 84 | -- produce a specific value. 85 | type Key = GeneralKey Identity 86 | 87 | -- | Produces a similarity score between two values. Similarity is between 0 88 | -- and 1, and a key has a similarity of 1 only to itself. 89 | keySimilarity :: Key -> Key -> Rational 90 | keySimilarity (Identity TrivialF) (Identity TrivialF) = 1 91 | keySimilarity (Identity (LeftF k1)) (Identity (LeftF k2)) = 92 | 0.5 + 0.5 * keySimilarity k1 k2 93 | keySimilarity (Identity (RightF k1)) (Identity (RightF k2)) = 94 | 0.5 + 0.5 * keySimilarity k1 k2 95 | keySimilarity (Identity (BothF k1 k2)) (Identity (BothF k3 k4)) = 96 | (keySimilarity k1 k3 + keySimilarity k2 k4) / 2 97 | keySimilarity _ _ = 0 98 | 99 | -- | A uniquely identifier for a partial value produced by a 100 | -- 'Trynocular.Generator'. 101 | type PartialKey = GeneralKey Maybe 102 | 103 | -- | Produces a 'PartialKey' that is actually total. 104 | totalKey :: Key -> PartialKey 105 | totalKey (Identity k) = Just (mapSubkeys totalKey k) 106 | 107 | -- | Returns all possible 'PartialKey's that subsume a given 'Key'. 108 | partialKeys :: Key -> [PartialKey] 109 | partialKeys (Identity k) = Nothing : (Just <$> traverseSubkeys partialKeys k) 110 | 111 | -- | Determines whether a 'PartialKey' subsumes another 'GeneralKey' (either a 112 | -- 'Key' or a 'PartialKey') subsumes another if any value of the first key 113 | -- agrees with the second except when the first key is partial. 114 | subsumes :: forall f. Foldable f => PartialKey -> GeneralKey f -> Bool 115 | Nothing `subsumes` _ = True 116 | Just key1 `subsumes` key2 = any (go key1) key2 117 | where 118 | go :: KeyF Maybe -> KeyF f -> Bool 119 | go TrivialF TrivialF = True 120 | go (LeftF k) (LeftF k') = k `subsumes` k' 121 | go (RightF k) (RightF k') = k `subsumes` k' 122 | go (BothF k1 k2) (BothF k1' k2') = k1 `subsumes` k1' && k2 `subsumes` k2' 123 | go _ _ = False 124 | 125 | -- A utility function that's useful for recursive conversion functions on 126 | -- keys. 127 | mapSubkeys :: (GeneralKey f1 -> GeneralKey f2) -> KeyF f1 -> KeyF f2 128 | mapSubkeys f k = runIdentity (traverseSubkeys (pure . f) k) 129 | 130 | -- A utility function that's useful for recursive effectful conversion functions 131 | -- on keys. 132 | traverseSubkeys :: 133 | Applicative m => 134 | (GeneralKey f1 -> m (GeneralKey f2)) -> 135 | KeyF f1 -> 136 | m (KeyF f2) 137 | traverseSubkeys _ TrivialF = pure TrivialF 138 | traverseSubkeys op (LeftF k) = LeftF <$> op k 139 | traverseSubkeys op (RightF k) = RightF <$> op k 140 | traverseSubkeys op (BothF k1 k2) = BothF <$> op k1 <*> op k2 141 | 142 | -- A 'GeneralKey' that contains all the information of a 'Key', and also keeps 143 | -- track of whether any given node has been observed. 144 | type ObservableKey = GeneralKey Observable 145 | 146 | -- A 'Functor' used to implement 'ObservableKey'. 147 | data Observable a = Observable (MVar Bool) a 148 | 149 | -- Given a 'Key', produces an 'ObservableKey' that can keep track of demand. 150 | makeObservable :: Key -> IO ObservableKey 151 | makeObservable (Identity k) = 152 | Observable <$> newMVar False <*> traverseSubkeys makeObservable k 153 | 154 | -- Given an 'ObservableKey', produces a 'PartialKey' that records how much of 155 | -- the key has been demanded. 156 | observe :: ObservableKey -> IO PartialKey 157 | observe (Observable var k) = do 158 | forced <- readMVar var 159 | if forced then Just <$> traverseSubkeys observe k else pure Nothing 160 | 161 | -- Given an 'ObservableKey', produces a 'Key' that behaves exactly like the 162 | -- original (on which 'makeObservable' was called), but silently tracks demand 163 | -- as it is consumed. 164 | makeSpy :: ObservableKey -> IO Key 165 | makeSpy (Observable var k) = unsafeInterleaveIO $ do 166 | _ <- swapMVar var True 167 | Identity <$> traverseSubkeys makeSpy k 168 | 169 | -- | Runs an action on a 'Key', and returns the demand on the 'Key' (as a 170 | -- 'PartialKey') in addition to the result of the action. 171 | spy :: Key -> (Key -> IO a) -> IO (PartialKey, a) 172 | spy k action = do 173 | ok <- makeObservable k 174 | x <- action =<< makeSpy ok 175 | (,x) <$> observe ok 176 | -------------------------------------------------------------------------------- /src/Trynocular/Generator.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE LambdaCase #-} 3 | {-# LANGUAGE ViewPatterns #-} 4 | 5 | -- | A 'Generator' knows how to generate 'Key's and convert them into values for 6 | -- a type. This module contains the 'Generator' type, some operations on 7 | -- 'Generator's, and some helpers for building 'Generator's. 8 | module Trynocular.Generator 9 | ( -- * The 'Generator' type 10 | Generator, 11 | 12 | -- * Operations on 'Generator' 13 | pickKey, 14 | keys, 15 | fromKey, 16 | fromPartialKey, 17 | compatibleKey, 18 | pickValue, 19 | values, 20 | 21 | -- * Manual construction of 'Generator's 22 | genRange, 23 | genBoundedIntegral, 24 | genPositive, 25 | genNonNegative, 26 | 27 | -- * Adjusting probabilities 28 | keyProbability, 29 | adjustProbability, 30 | ) 31 | where 32 | 33 | import Data.Functor.Alt (Alt (..)) 34 | import Data.Functor.Identity (Identity (..)) 35 | import Data.Kind (Type) 36 | import Data.Universe.Helpers ((+*+), (+++)) 37 | import System.IO.Unsafe (unsafeInterleaveIO) 38 | import System.Random (randomRIO) 39 | import Trynocular.Key (Key, KeyF (..), PartialKey) 40 | 41 | -- | A 'Generator' for a type knows how to enumerate or pick values for the 42 | -- type in terms of their 'Key', and how to actually build those values using 43 | -- 'fromKey'. Crucially, 'fromKey' is maximally lazy in the 'Key'. 44 | data Generator :: Type -> Type where 45 | Trivial :: Generator () 46 | Choice :: Double -> Generator a -> Generator b -> Generator (Either a b) 47 | Both :: Generator a -> Generator b -> Generator (a, b) 48 | Apply :: (a -> b) -> Generator a -> Generator b 49 | 50 | instance Functor Generator where 51 | fmap = Apply 52 | 53 | instance Alt Generator where 54 | a b = either id id <$> Choice 0.5 a b 55 | 56 | instance Applicative Generator where 57 | pure x = 58 | Apply 59 | (\() -> x) -- Strictness is deliberate here, so that demand of the 60 | -- value is observable in demand of the key. 61 | Trivial 62 | f <*> x = uncurry ($) <$> Both f x 63 | 64 | -- | Choose a single 'Key' corresponding to a value from a 'Generator'. 65 | -- 66 | -- This uses lazy IO to generate the random path, so it is cheap if you don't 67 | -- force too much of the resulting key. 68 | pickKey :: Generator a -> IO Key 69 | pickKey = unsafeInterleaveIO . go 70 | where 71 | go :: Generator a -> IO Key 72 | go Trivial = pure (Identity TrivialF) 73 | go (Choice p ga gb) = do 74 | (< p) <$> randomRIO (0, 1) >>= \case 75 | True -> Identity . LeftF <$> pickKey ga 76 | False -> Identity . RightF <$> pickKey gb 77 | go (Both ga gb) = Identity <$> (BothF <$> pickKey ga <*> pickKey gb) 78 | go (Apply _ ga) = go ga 79 | 80 | -- | List all 'Key's corresponding to values from a 'Generator'. 81 | -- 82 | -- TODO: This currently fails on generators that contain infinite paths in the 83 | -- left branch of a choice. See issue #1. 84 | keys :: Generator a -> [Key] 85 | keys Trivial = [Identity TrivialF] 86 | keys (Choice _ ga gb) = 87 | (Identity . LeftF <$> keys ga) +++ (Identity . RightF <$> keys gb) 88 | keys (Both ga gb) = 89 | Identity <$> (uncurry BothF <$> keys ga +*+ keys gb) 90 | keys (Apply _ g) = keys g 91 | 92 | -- | Using a 'Generator', convert a 'Key' to a value. 93 | -- 94 | -- This is a partial function. If the provided 'Key' is not compatible with 95 | -- the given generator, it will fail to return a total value. 96 | -- 97 | -- The conversion is maximally lazy in the 'Key', in the sense that it forces 98 | -- only as much of the key as needed to satisfy the demand on the resulting 99 | -- value. 100 | fromKey :: Generator a -> Key -> a 101 | fromKey Trivial (Identity TrivialF) = () 102 | fromKey (Choice _ ga _) (Identity (LeftF k)) = Left (fromKey ga k) 103 | fromKey (Choice _ _ gb) (Identity (RightF k)) = Right (fromKey gb k) 104 | fromKey (Both ga gb) (Identity (BothF k1 k2)) = 105 | (fromKey ga k1, fromKey gb k2) 106 | fromKey (Apply f ga) k = f (fromKey ga k) 107 | fromKey _ _ = error "key doesn't match generator" 108 | 109 | -- | Using a 'Generator', convert a 'PartialKey' to a value. 110 | -- 111 | -- This is just like 'fromKey', except that it can produce values containing 112 | -- 'undefined' when the key isn't specified. 113 | fromPartialKey :: Generator a -> PartialKey -> a 114 | fromPartialKey _ Nothing = undefined 115 | fromPartialKey Trivial (Just TrivialF) = () 116 | fromPartialKey (Choice _ ga _) (Just (LeftF k)) = Left (fromPartialKey ga k) 117 | fromPartialKey (Choice _ _ gb) (Just (RightF k)) = Right (fromPartialKey gb k) 118 | fromPartialKey (Both ga gb) (Just (BothF k1 k2)) = 119 | (fromPartialKey ga k1, fromPartialKey gb k2) 120 | fromPartialKey (Apply f ga) k = f (fromPartialKey ga k) 121 | fromPartialKey _ _ = error "key doesn't match generator" 122 | 123 | -- | Checks whether a given 'Key' is compatible with this 'Generator'. If the 124 | -- result is 'True', then 'fromKey' will succeed and produce a total value. 125 | -- Otherwise, it will produce a value containing bottoms. 126 | compatibleKey :: Generator a -> Key -> Bool 127 | compatibleKey Trivial (Identity TrivialF) = True 128 | compatibleKey (Choice _ ga _) (Identity (LeftF k)) = compatibleKey ga k 129 | compatibleKey (Choice _ _ gb) (Identity (RightF k)) = compatibleKey gb k 130 | compatibleKey (Both ga gb) (Identity (BothF k1 k2)) = 131 | compatibleKey ga k1 && compatibleKey gb k2 132 | compatibleKey (Apply _ ga) k = compatibleKey ga k 133 | compatibleKey _ _ = False 134 | 135 | -- | Pick a value from a 'Generator'. 136 | -- 137 | -- This uses lazy IO to make random choices, so it is cheap if you don't force 138 | -- too much of the resulting value. 139 | pickValue :: Generator a -> IO a 140 | pickValue g = fromKey g <$> pickKey g 141 | 142 | -- | List all values for a 'Generator'. 143 | -- 144 | -- TODO: This currently fails on generators that contain infinite paths in the 145 | -- left branch of a choice. See issue #1. 146 | values :: Generator a -> [a] 147 | values g = fromKey g <$> keys g 148 | 149 | -- | A 'Generator' that produces a value of an 'Integral' type from the given 150 | -- inclusive range. 151 | genRange :: Integral t => (t, t) -> Generator t 152 | genRange (toInteger -> lo, toInteger -> hi) = 153 | fmap (fromInteger . (+ lo)) $ go (hi - lo + 1) 154 | where 155 | go n 156 | | n == 1 = pure 0 157 | | n == 2 = pure 0 pure 1 158 | | even n = (\b x -> b * (n `div` 2) + x) <$> go 2 <*> go (n `div` 2) 159 | | otherwise = pure 0 succ <$> go (n - 1) 160 | 161 | -- | A 'Generator' that produces a value of an 'Integral' type from its full 162 | -- range. 163 | genBoundedIntegral :: forall t. (Bounded t, Integral t) => Generator t 164 | genBoundedIntegral = genRange (minBound, maxBound) 165 | 166 | -- | A 'Generator' that produces a positive 'Integer'. 167 | genPositive :: Generator Integer 168 | genPositive = (\x b -> 2 * x + b) <$> genNonNegative <*> (pure 1 pure 2) 169 | 170 | -- | A 'Generator' that produces a non-negative 'Integer'. 171 | genNonNegative :: Generator Integer 172 | genNonNegative = pure 0 genPositive 173 | 174 | -- | Returns the probability of a 'Generator' producing a given 'PartialKey'. 175 | keyProbability :: Generator a -> PartialKey -> Double 176 | keyProbability _ Nothing = 1 177 | keyProbability Trivial (Just TrivialF) = 1 178 | keyProbability (Choice p g _) (Just (LeftF k)) = p * keyProbability g k 179 | keyProbability (Choice p _ g) (Just (RightF k)) = (1 - p) * keyProbability g k 180 | keyProbability (Both ga gb) (Just (BothF a b)) = 181 | keyProbability ga a * keyProbability gb b 182 | keyProbability (Apply _ g) k = keyProbability g k 183 | keyProbability _ _ = error "key doesn't match generator" 184 | 185 | -- | Modifies a 'Generator' to have the given probability of producing the given 186 | -- 'PartialKey'. 187 | adjustProbability :: PartialKey -> Double -> Generator a -> Generator a 188 | adjustProbability key target generator = go key generator 189 | where 190 | go :: PartialKey -> Generator a -> Generator a 191 | go Nothing g = g 192 | go (Just TrivialF) Trivial = Trivial 193 | go (Just (LeftF k)) (Choice p ga gb) = Choice (adjusted p) (go k ga) gb 194 | go (Just (RightF k)) (Choice p ga gb) = 195 | Choice (1 - adjusted (1 - p)) ga (go k gb) 196 | go (Just (BothF k1 k2)) (Both ga gb) = Both (go k1 ga) (go k2 gb) 197 | go (Just k) (Apply f g) = Apply f (go (Just k) g) 198 | go _ _ = error "key doesn't match generator" 199 | 200 | -- determine what probability it has now 201 | prior = keyProbability generator key 202 | -- by what factor would we like to multiply the current probability of this key? 203 | multiplier = target / prior 204 | -- take current probability and divide by current probability 205 | adjusted p = p * multiplier ** (log p / log prior) 206 | -------------------------------------------------------------------------------- /src/Trynocular/Standardizer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | {-# LANGUAGE ViewPatterns #-} 3 | module Trynocular.Standardizer 4 | ( Standardizer (..), 5 | NormalStandardizer, 6 | normalStandardizer, 7 | BetaStandardizer, 8 | betaStandardizer, 9 | CompleteStandardizer, 10 | emptyCompleteStandardizer, 11 | ) 12 | where 13 | 14 | import Data.FingerTree 15 | import Data.FingerTree qualified as FingerTree 16 | import Data.Maybe (fromMaybe) 17 | import Numeric.SpecFunctions (erfc, incompleteBeta, invErfc) 18 | 19 | -- | A 'Standardizer' is an online method for estimating the percentile of new 20 | -- observations from a data set. 21 | class Standardizer std where 22 | type Datum std 23 | percentile :: std -> Datum std -> (std, Double) 24 | fromPercentile :: std -> Double -> Maybe (Datum std) 25 | 26 | -- | A `Standardizer` implementation that assumes the data is normally 27 | -- distributed, and discounts older observations by exponentially down-weighting 28 | -- them based on the given responsiveness. 29 | data NormalStandardizer = NormalStandardizer 30 | { _nsMean :: Double, 31 | _nsVariance :: Double, 32 | _nsResponsiveness :: Double 33 | } 34 | 35 | instance Standardizer NormalStandardizer where 36 | type Datum NormalStandardizer = Double 37 | 38 | percentile (NormalStandardizer mean variance responsiveness) x = 39 | ( NormalStandardizer mean' variance' responsiveness, 40 | 1 - erfc (zscore / sqrt 2) / 2 41 | ) 42 | where 43 | mean' = responsiveness * x + (1 - responsiveness) * mean 44 | xvar = (x - mean) ^ (2 :: Int) 45 | variance' = responsiveness * xvar + (1 - responsiveness) * variance 46 | 47 | zscore = (x - mean') / sqrt variance' 48 | 49 | -- inverse of: 1 - erfc (zscore / sqrt 2) / 2 50 | fromPercentile (NormalStandardizer mean variance _) d = Just $ mean + zscore * stddev 51 | where zscore = (invErfc $ (1 - d) * 2) * sqrt 2 52 | stddev = sqrt variance 53 | 54 | normalStandardizer :: Double -> Double -> Double -> NormalStandardizer 55 | normalStandardizer = NormalStandardizer 56 | 57 | -- | A `Standardizer` implementation that assumes the data follows a beta 58 | -- distribution with a discrete component at each end, and discounts older 59 | -- observations by exponentially down-weighting them based on the given 60 | -- responsiveness. 61 | data BetaStandardizer = BetaStandardizer 62 | { _bsLow :: Double, 63 | _bsHigh :: Double, 64 | _bsLowProb :: Double, 65 | _bsHighProb :: Double, 66 | _bsScaledMean :: Double, 67 | _bsScaledVariance :: Double, 68 | _bsResponsiveness :: Double 69 | } 70 | 71 | instance Standardizer BetaStandardizer where 72 | type Datum BetaStandardizer = Double 73 | 74 | percentile 75 | (BetaStandardizer low high lowp highp mean variance responsiveness) 76 | x 77 | | x <= low = 78 | let lowp' = (1 - responsiveness) * lowp + responsiveness 79 | highp' = ((1 - responsiveness) * highp) 80 | in ( BetaStandardizer 81 | low 82 | high 83 | lowp' 84 | highp' 85 | mean 86 | variance 87 | responsiveness, 88 | lowp' / 2 89 | ) 90 | | x >= high = 91 | let lowp' = ((1 - responsiveness) * lowp) 92 | highp' = (1 - responsiveness) * highp + responsiveness 93 | in ( BetaStandardizer 94 | low 95 | high 96 | lowp' 97 | highp' 98 | mean 99 | variance 100 | responsiveness, 101 | 1 - highp' / 2 102 | ) 103 | | otherwise = 104 | let scaled = (x - low) / (high - low) 105 | 106 | mean' = responsiveness * scaled + (1 - responsiveness) * mean 107 | xvar = (scaled - mean) ^ (2 :: Int) 108 | variance' = 109 | responsiveness * xvar + (1 - responsiveness) * variance 110 | 111 | alpha = ((1 - mean') / variance' - 1 / mean') * mean' * mean' 112 | beta = alpha * (1 / mean' - 1) 113 | 114 | lowp' = (1 - responsiveness) * lowp 115 | highp' = (1 - responsiveness) * highp 116 | in ( BetaStandardizer 117 | low 118 | high 119 | lowp' 120 | highp' 121 | mean' 122 | variance' 123 | responsiveness, 124 | lowp' + (1 - lowp' - highp') * incompleteBeta alpha beta scaled 125 | ) 126 | fromPercentile _ _ = error "not yet implemented, this will not work for now" 127 | 128 | betaStandardizer :: 129 | (Double, Double) -> Double -> Double -> Double -> BetaStandardizer 130 | betaStandardizer (low, high) mean variance responsiveness = 131 | BetaStandardizer 132 | low 133 | high 134 | 0 135 | 0 136 | ((mean - low) / (high - low)) 137 | (variance / (high - low) ^ (2 :: Int)) 138 | responsiveness 139 | 140 | -- | A `Standardizer` implementation that makes no assumptions about 141 | -- distribution, and discounts older observations by exponentially 142 | -- down-weighting them based on the given responsiveness. This uses 143 | -- significantly more memory than the statistical estimators above, but is more 144 | -- versatile. 145 | data CompleteStandardizer a = CompleteStandardizer 146 | { _csDataPoints :: FingerTree (Maybe (Counted a)) (Counted a), 147 | _csCurrentWeight :: Double, 148 | _csResponsiveness :: Double 149 | } 150 | deriving (Show) 151 | 152 | data Counted a = Counted {countedVal :: a, countedWeight :: Double} 153 | deriving (Show) 154 | 155 | instance Ord a => Semigroup (Counted a) where 156 | Counted x xs <> Counted y ys = Counted (min x y) (xs + ys) 157 | 158 | instance Ord a => FingerTree.Measured (Maybe (Counted a)) (Counted a) where 159 | measure = Just 160 | 161 | emptyCompleteStandardizer :: Ord a => Double -> CompleteStandardizer a 162 | emptyCompleteStandardizer responsiveness = 163 | CompleteStandardizer FingerTree.empty 1e-50 responsiveness 164 | 165 | instance Ord a => Standardizer (CompleteStandardizer a) where 166 | type Datum (CompleteStandardizer a) = a 167 | 168 | percentile (CompleteStandardizer dataPoints weight responsiveness) x 169 | | weight > 1e50 = 170 | percentile 171 | ( CompleteStandardizer 172 | ( FingerTree.fmap' 173 | (\(Counted y w) -> Counted y (w / weight * 1e-50)) 174 | dataPoints 175 | ) 176 | 1e-50 177 | responsiveness 178 | ) 179 | x 180 | | otherwise = 181 | case FingerTree.search 182 | (\_ r -> fromMaybe True ((<) <$> Just x <*> fmap countedVal r)) 183 | dataPoints of 184 | FingerTree.Position l (Counted y w) r 185 | | x == y -> 186 | ( CompleteStandardizer 187 | (l <> (Counted y (w + weight) FingerTree.<| r)) 188 | (weight / (1 - responsiveness)) 189 | responsiveness, 190 | (totalWeight l + (w + weight) / 2) 191 | / (weight + totalWeight dataPoints) 192 | ) 193 | | otherwise -> 194 | ( CompleteStandardizer 195 | ( (l FingerTree.|> Counted y w) 196 | <> (Counted x weight FingerTree.<| r) 197 | ) 198 | (weight / (1 - responsiveness)) 199 | responsiveness, 200 | (totalWeight l + w + weight / 2) 201 | / (weight + totalWeight dataPoints) 202 | ) 203 | FingerTree.OnLeft -> 204 | ( CompleteStandardizer 205 | (Counted x weight FingerTree.<| dataPoints) 206 | (weight / (1 - responsiveness)) 207 | responsiveness, 208 | weight / (2 * (weight + totalWeight dataPoints)) 209 | ) 210 | _ -> error "CompleteStandardizer percentile: invariant violation" 211 | where 212 | totalWeight = maybe 0 countedWeight . FingerTree.measure 213 | fromPercentile (CompleteStandardizer points _ _) desiredPercentile = case FingerTree.split enough points of 214 | (_,FingerTree.viewl -> Counted x _ :< _) -> Just x 215 | (FingerTree.viewr -> _ :> Counted x _,_) -> Just x 216 | _ -> Nothing 217 | where desiredWeight = desiredPercentile * maybe 0 countedWeight (FingerTree.measure points) 218 | enough :: Maybe (Counted a) -> Bool 219 | enough x = maybe 0 countedWeight x < desiredWeight 220 | -------------------------------------------------------------------------------- /Probabilities.md: -------------------------------------------------------------------------------- 1 | # Problem overview 2 | 3 | For a given test case, we have a `Generator` and a `PartialKey`. 4 | 5 | - The `Generator` is a tree of decisions. However, unlike a traditional 6 | decision tree, it has `Choice` nodes where only one branch is taken, and also 7 | `Both` nodes, where both branches are taken. 8 | 9 | - The `PartialKey` is a record of the path through the tree. At `Choice` nodes 10 | it records left or right. At `Both` nodes, it splits into two paths: one for 11 | each subtree. 12 | 13 | For example, a generator for the type `Either ((), Bool) ()` would look like 14 | this: 15 | 16 | ``` 17 | Choice 18 | / \ 19 | / \ 20 | / \ 21 | Both Trivial 22 | / \ 23 | / \ 24 | / \ 25 | Trivial Choice 26 | / \ 27 | / \ 28 | / \ 29 | Trivial Trivial 30 | ``` 31 | 32 | These are the three possible total `Key`s: 33 | 34 | ``` 35 | LeftF 36 | / 37 | / 38 | / 39 | BothF 40 | / \ 41 | / \ 42 | / \ 43 | TrivialF LeftF 44 | / 45 | / 46 | / 47 | TrivialF 48 | ``` 49 | 50 | ``` 51 | LeftF 52 | / 53 | / 54 | / 55 | BothF 56 | / \ 57 | / \ 58 | / \ 59 | TrivialF RightF 60 | \ 61 | \ 62 | \ 63 | TrivialF 64 | ``` 65 | 66 | ``` 67 | RightF 68 | \ 69 | \ 70 | \ 71 | TrivialF 72 | ``` 73 | 74 | Consider this hypothetical key. The node labeled "-" is the point at which the 75 | key was no longer demanded, so this is essentially the leaf of the path. There 76 | is no point in updating nodes lower than that one. 77 | 78 | ``` 79 | o p_Left = 50% 80 | / 81 | / 82 | o p_Left = 50% 83 | \ 84 | \ 85 | o p_Left = 50% 86 | / 87 | / 88 | - 89 | ``` 90 | 91 | Currently, this key is chosen with a 12.5% probability. We will observe the 92 | test and determine its *benefit*. (The current idea is to base the benefit on 93 | code coverage from hpc, but this calculation makes no assumptions about the 94 | nature of the benefit.) If the benefit is greater than we have seen when 95 | testing with other keys, then we want to increase the probability of generating 96 | keys similar to this one. If it is less than we have typically seen, though, 97 | then we want to decrease the probability of generating keys like this one. 98 | 99 | # Choosing a new target probability for a key 100 | 101 | Bayes' Law tells us that for any events A and B, P(A) * P(B|A) = P(B) * P(A|B). 102 | Consider the following events for a test with a random key: 103 | 104 | * A = a test uses the observed `PartialKey` 105 | * B = a test yields at least the observed benefit 106 | 107 | We take P(B|A) to be 50% by assumption. Although we observed the benefit and 108 | know its exact value, it is right on the line and it makes sense to assign half 109 | credit to preserve continuity. 110 | 111 | P(A) is computed exactly as the product of the prior probabilities associated 112 | with all choice nodes of the `Generator` that were forced by the test. 113 | 114 | P(B|~A) is estimated by comparing the observed benefit with typical benefit. 115 | The `Quantiler` class abstracts over various techniques for estimating this 116 | probability. It can be done with statistical methods (e.g., exponentially 117 | weighted moving average) or by storing the entire set of past benefits. 118 | Whatever the method for estimating P(B|~A), P(B) is then calculated easily: 119 | P(B) = P(A) * P(B|A) + (1 - P(A)) * P(B|~A). 120 | 121 | Applying Bayes' Law, we can now estimate P(A|B), the conditional probability of 122 | using the observed key given that a test yields at least the observed benefit. 123 | In a perfect world, we would adjust the `Generator` so that it generates the 124 | observed `PartialKey` with this probability (and therefore other similar values 125 | more or less often as well). However, because the chosen value for P(B) was 126 | only an estimate, it behooves us to be more careful, and simply adjust in the 127 | direction of this target probability by some configurable step size. 128 | 129 | For example, using the key above with a prior probability of 12.5%. suppose we 130 | estimate P(B|~A) = 30%. This was an above average example. Then: 131 | 132 | * P(B|A) = 50% 133 | * P(A) = 0.5 * 0.5 * 0.5 = 12.5% 134 | * P(B|~A) = 30% 135 | * P(B) = 0.125 * 0.5 + 0.875 * 0.3 = 32.5% 136 | * P(A|B) = 0.5 * 0.125 / 0.325 = 19.23% 137 | 138 | As expected, the probability of this key was increased to reflect that it does 139 | better than the previous average. On the other hand, if we estimate P(B|~A) = 140 | 70%, making this a below average key, then: 141 | 142 | * P(B|A) = 50% 143 | * P(A) = 0.5 * 0.5 * 0.5 = 12.5% 144 | * P(B|~A) = 70% 145 | * P(B) = 0.125 * 0.5 + 0.875 * 0.7 = 67.5% 146 | * P(A|B) = 0.5 * 0.125 / 0.675 = 9.26% 147 | 148 | The below average key had its probability decreased. Finally, suppose the 149 | observed benefit is remarkably high, so P(B|~A) is only 1%. Now: 150 | 151 | * P(B|A) = 50% 152 | * P(A) = 0.5 * 0.5 * 0.5 = 12.5% 153 | * P(B|~A) = 1% 154 | * P(B) = 0.125 * 0.5 + 0.875 * 0.01 = 7.125% 155 | * P(A|B) = 0.5 * 0.125 / 0.07125 = 87.72% 156 | 157 | Do we really want an 88% chance of choosing this specific key? No! In fact, 158 | this would just result in choosing the same key over and over again, and the 159 | test harness would skip these redundant tests. The issue here is that when 160 | P(B|~A) is extreme, it's more likely that the estimate of P(B|~A) is wrong than 161 | that the chosen key is really that great. This illustrates why we might choose 162 | a smaller step size to move in the direction of the target probability without 163 | stepping all the way there in a single iteration. 164 | 165 | # How to adjust the target key probability 166 | 167 | Recall the example key from above. 168 | 169 | ``` 170 | o p_Left = 50% 171 | / 172 | / 173 | o p_Left = 50% 174 | \ 175 | \ 176 | o p_Left = 50% 177 | / 178 | / 179 | - 180 | ``` 181 | 182 | The probability of choosing this key is 12.5%. Suppose we want to adjust that 183 | probability to a new target, such as 20%. We must accomplish this by adjusting 184 | the `p_Left` value at each individual node of the tree. There are, of course, 185 | many ways that we could change these three probabilities so that they have the 186 | desired product. Which shall we choose? 187 | 188 | We seek to make the desired adjustment by changing the probabilities of only 189 | those `Choice` nodes in the generator that were reached by the given test. 190 | (Notably, a `Choice` node is reached if and only if it appears in the generated 191 | `PartialKey`. A node that was reached to generate the total test input, but 192 | whose choice was never observed by the test itself, should not be adjusted.) 193 | The overall probability of the key is the product of the probabilities of making 194 | the observed choice at all of those choice nodes. Thus, we decompose the 195 | probability into `p = p_1 * p_2 * ... * p_n`, where `p_i` is the probability 196 | of making the choice at the `i`th `Choice` node. We can only directly modify 197 | the various `p_i` to have the desired cumulative effect on `p`. 198 | 199 | We can reason from the extremal case to see how to do this. If we wanted 200 | `p = 1` (multiplying it by a factor of `k = 1 / p`), then clearly all `p_i` must 201 | be changed to 1 as well. Then `p_1` should be multiplied by `1 / p_1`, which is 202 | `k ^ (log p_1 / log p)`, and `p_2` should be multiplied by `1 / p_2`, which is 203 | `k ^ (log p_2 / log p)`, and so on. Back in the general case, we can check that 204 | these same exponents still work to multiply `p` by an arbitrary factor `k`, so 205 | long as `p` is strictly between 0 and 1. 206 | 207 | (If `p = 0`, then it's impossible that we observed this key at all, so we can 208 | dispense with this case. If `p = 1`, then we will not adjust the probability. 209 | In practice, this means that the test never observed the result of a 210 | non-trivial `Choice` node at all. We treat `Choice` nodes with a probability 211 | equal to 1 as trivial. The approach described here ignores them, since the log 212 | of their probability is 0.) 213 | 214 | In the example above, the probability of the key is 12.5%. Suppose we want to 215 | adjust it to 25%, so `k = 2`. Then using base 2 logarithms for easier math, we 216 | have: 217 | 218 | * `p_1 = p_2 = p_3 = 0.5` 219 | * `log p = -3`, while `log p_1 = log p_2 = log p_3 = -1`. 220 | * All three `Choice` nodes have their selected direction multiplied by 221 | `2 ^ (1/3)`, the cube root of 2. 222 | * The resulting combined probability is multiplied by 2 as a result. 223 | 224 | On the other hand, suppose the three nodes had probabilities of `0.1`, `0.2`, 225 | and `0.3`. Then `p = 0.1 * 0.2 * 0.3 = 0.006`. Now if we wanted to multiply 226 | its probability by `k`, we would have: 227 | 228 | * `log p ≈ -7.38`, `log p_1 ≈ -3.32`, `log p_2 ≈ -2.32`, and `log p_3 ≈ -1.74` 229 | * `p_1` is multiplied by `k ^ 0.45` 230 | * `p_2` is multiplied by `k ^ 0.31` 231 | * `p_3` is multiplied by `k ^ 0.24` 232 | * These exponents sum to 1, so the combined probability is multiplied by `k`. 233 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DataKinds #-} 2 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} 3 | {-# OPTIONS_GHC -Wno-orphans #-} 4 | 5 | module Main where 6 | 7 | import Control.Exception (evaluate) 8 | import Data.ByteString qualified as B 9 | import Data.Foldable (foldl') 10 | import Data.Functor.Identity (Identity (..)) 11 | import Data.List (nub, sort) 12 | import Data.Map qualified as M 13 | import Data.Word (Word8) 14 | import Demo 15 | import GHC.Generics (Generic) 16 | import System.Random (mkStdGen) 17 | import System.Random.Shuffle (shuffle') 18 | import Test.Hspec 19 | ( Spec, 20 | describe, 21 | example, 22 | hspec, 23 | it, 24 | shouldBe, 25 | shouldReturn, 26 | shouldSatisfy, 27 | ) 28 | import Test.Hspec.QuickCheck (prop) 29 | import Test.QuickCheck 30 | ( Arbitrary (..), 31 | Gen, 32 | choose, 33 | elements, 34 | forAll, 35 | genericShrink, 36 | listOf, 37 | oneof, 38 | (===), 39 | (==>), 40 | ) 41 | import Trynocular.Generable (Generable (..), genGeneric) 42 | import Trynocular.Generator 43 | ( Generator, 44 | adjustProbability, 45 | fromKey, 46 | keyProbability, 47 | keys, 48 | values, 49 | ) 50 | import Trynocular.Key 51 | ( Key, 52 | KeyF (..), 53 | PartialKey, 54 | partialKeys, 55 | spy, 56 | subsumes, 57 | totalKey, 58 | ) 59 | import Trynocular.PartialKeySet qualified as PartialKeySet 60 | import Trynocular.Standardizer 61 | ( Standardizer (..), 62 | betaStandardizer, 63 | emptyCompleteStandardizer, 64 | normalStandardizer, 65 | ) 66 | import Trynocular.TestHarness (smartCheck) 67 | 68 | data Foo 69 | = Foo1 String Word 70 | | Foo2 [Integer] !Bool 71 | | Foo3 Foo Foo 72 | deriving (Generic, Eq, Show) 73 | 74 | instance Generable Foo 75 | 76 | generatorSpec :: Spec 77 | generatorSpec = do 78 | it "generates unit values" $ values genAny `shouldBe` [()] 79 | 80 | it "generates bool values" $ values genAny `shouldBe` [False, True] 81 | 82 | it "generates words" $ 83 | sort (values genAny) `shouldBe` ([0 .. 255] :: [Word8]) 84 | 85 | it "generates lists" $ 86 | take 5 (values genAny) 87 | `shouldBe` [[], [()], [(), ()], [(), (), ()], [(), (), (), ()]] 88 | 89 | it "generates ADTs" $ do 90 | let isFoo1 x = case x of (Foo1 _ _) -> True; _ -> False 91 | isFoo2 x = case x of (Foo2 _ _) -> True; _ -> False 92 | isFoo3 x = case x of (Foo3 _ _) -> True; _ -> False 93 | vals = take 100 (values genAny) 94 | 95 | any isFoo1 vals `shouldBe` True 96 | any isFoo2 vals `shouldBe` True 97 | any isFoo3 vals `shouldBe` True 98 | nub vals `shouldBe` vals 99 | 100 | describe "adjustProbability" $ do 101 | prop "reaches the desired probability" $ do 102 | let gen = genAny :: Generator [Bool] 103 | forAll (elements (take 100 (keys gen))) $ \tkey -> 104 | forAll (elements (partialKeys tkey)) $ \pkey -> 105 | forAll (choose (0, 1)) $ \target -> 106 | let p0 = keyProbability gen pkey 107 | p = keyProbability (adjustProbability pkey target gen) pkey 108 | in p0 < 1 ==> abs (p - target) < 1e-10 109 | 110 | spySpec :: Spec 111 | spySpec = do 112 | let getDemand :: forall a b. Generable a => Key -> (a -> b) -> IO PartialKey 113 | getDemand origKey f = 114 | fst <$> spy origKey (\key -> evaluate (f (fromKey genAny key))) 115 | 116 | describe "Just ()" $ do 117 | let key = Identity (RightF (Identity TrivialF)) 118 | 119 | it "has the correct key" $ do 120 | fromKey genAny key `shouldBe` Just () 121 | 122 | -- Demand lattice: 123 | -- 124 | -- Just () 125 | -- 126 | -- | 127 | -- 128 | -- Just ⊥ 129 | -- 130 | -- | 131 | -- 132 | -- ⊥ 133 | 134 | it "reports demand" $ do 135 | getDemand @(Maybe ()) key (\_ -> ()) `shouldReturn` Nothing 136 | 137 | it "reports demand" $ do 138 | getDemand @(Maybe ()) key (\(Just _) -> ()) 139 | `shouldReturn` Just (RightF Nothing) 140 | 141 | it "reports demand" $ do 142 | getDemand @(Maybe ()) key (\(Just ()) -> ()) 143 | `shouldReturn` Just (RightF (Just TrivialF)) 144 | 145 | describe "ADT" $ do 146 | let strKey = head . keys $ (genAny @String) 147 | let wordKey = head . keys $ (genAny @Word) 148 | let integerListKey = head . keys $ (genAny @[Integer]) 149 | let boolKey = head . keys $ (genAny @Bool) 150 | let subFooKey = head . keys $ (genAny @Foo) 151 | 152 | describe "Foo1 \"\" 0" $ do 153 | let key = Identity (LeftF (Identity (BothF strKey wordKey))) 154 | 155 | it "has the correct key" $ do 156 | fromKey genAny key `shouldBe` Foo1 "" 0 157 | 158 | -- Demand lattice: 159 | -- 160 | -- Foo1 "" 0 161 | -- 162 | -- / \ 163 | -- / \ 164 | -- 165 | -- Foo1 ⊥ 0 Foo1 "" ⊥ 166 | -- 167 | -- \ / 168 | -- \ / 169 | -- 170 | -- Foo1 ⊥ ⊥ 171 | -- 172 | -- | 173 | -- | 174 | -- 175 | -- ⊥ 176 | 177 | it "reports demand" $ do 178 | getDemand @Foo key (\_ -> ()) `shouldReturn` Nothing 179 | 180 | it "reports demand" $ do 181 | getDemand @Foo key (\(Foo1 _ _) -> ()) 182 | `shouldReturn` Just 183 | (LeftF (Just (BothF Nothing Nothing))) 184 | 185 | it "reports demand" $ do 186 | getDemand @Foo key (\(Foo1 "" _) -> ()) 187 | `shouldReturn` Just 188 | (LeftF (Just (BothF (totalKey strKey) Nothing))) 189 | 190 | it "reports demand" $ do 191 | getDemand @Foo key (\(Foo1 _ 0) -> ()) 192 | `shouldReturn` Just 193 | (LeftF (Just (BothF Nothing (totalKey wordKey)))) 194 | 195 | it "reports demand" $ do 196 | getDemand @Foo key (\(Foo1 "" 0) -> ()) 197 | `shouldReturn` Just 198 | (LeftF (Just (BothF (totalKey strKey) (totalKey wordKey)))) 199 | 200 | describe "Foo2 []" $ do 201 | let key = 202 | Identity 203 | ( RightF 204 | (Identity (LeftF (Identity (BothF integerListKey boolKey)))) 205 | ) 206 | 207 | it "has the correct key" $ do 208 | fromKey genAny key `shouldBe` Foo2 [] False 209 | 210 | -- Demand lattice (the Bool field is strict!): 211 | -- 212 | -- Foo2 [] False 213 | -- 214 | -- | 215 | -- | 216 | -- 217 | -- Foo2 ⊥ False 218 | -- 219 | -- | 220 | -- | 221 | -- 222 | -- ⊥ 223 | 224 | it "reports demand" $ do 225 | getDemand @Foo key (\_ -> ()) `shouldReturn` Nothing 226 | 227 | it "reports demand" $ do 228 | getDemand @Foo key (\(Foo2 _ _) -> ()) 229 | `shouldReturn` Just 230 | ( RightF 231 | ( Just 232 | ( LeftF 233 | ( Just 234 | ( BothF 235 | Nothing 236 | (totalKey boolKey) -- Strict field 237 | ) 238 | ) 239 | ) 240 | ) 241 | ) 242 | 243 | it "reports demand" $ do 244 | getDemand @Foo key (\(Foo2 [] _) -> ()) 245 | `shouldReturn` Just 246 | ( RightF 247 | ( Just 248 | ( LeftF 249 | ( Just 250 | ( BothF 251 | (totalKey integerListKey) 252 | (totalKey boolKey) -- Strict field 253 | ) 254 | ) 255 | ) 256 | ) 257 | ) 258 | 259 | describe "Foo3 (Foo1 [] 0) (Foo1 [] 0)" $ do 260 | let key = 261 | Identity 262 | ( RightF 263 | (Identity (RightF (Identity (BothF subFooKey subFooKey)))) 264 | ) 265 | 266 | it "has the correct key" $ do 267 | fromKey genAny key `shouldBe` Foo3 (Foo1 "" 0) (Foo1 "" 0) 268 | 269 | describe "[(), ()]" $ do 270 | let nil f = f (LeftF (f TrivialF)) 271 | let cons f x xs = f (RightF (f (BothF x xs))) 272 | let unit f = f TrivialF 273 | 274 | let key f = cons f (unit f) (cons f (unit f) (nil f)) 275 | 276 | it "has the correct key" $ do 277 | fromKey genAny (key Identity) `shouldBe` [(), ()] 278 | 279 | -- Demand lattice: 280 | -- 281 | -- () : () : [] 282 | -- 283 | -- / | \ 284 | -- / | \ 285 | -- 286 | -- ⊥ : () : [] () : ⊥ : [] () : () : ⊥ 287 | -- 288 | -- | \ / | \ / | 289 | -- | X | X | 290 | -- | / \ | / \ | 291 | -- 292 | -- ⊥ : ⊥ : [] ⊥ : () : ⊥ () : ⊥ : ⊥ 293 | -- 294 | -- \ | / | 295 | -- \ | / | 296 | -- \ | / | 297 | -- \ | / | 298 | -- 299 | -- ⊥ : ⊥ : ⊥ () : ⊥ 300 | -- 301 | -- \ / 302 | -- \ / 303 | -- \ / 304 | -- 305 | -- ⊥ : ⊥ 306 | -- 307 | -- | 308 | -- | 309 | -- 310 | -- ⊥ 311 | 312 | it "reports demand" $ do 313 | getDemand @[()] (key Identity) (\_ -> ()) 314 | `shouldReturn` Nothing 315 | 316 | it "reports demand" $ do 317 | getDemand @[()] (key Identity) (\(_ : _) -> ()) 318 | `shouldReturn` cons Just Nothing Nothing 319 | 320 | it "reports demand" $ do 321 | getDemand @[()] (key Identity) (\(_ : _ : _) -> ()) 322 | `shouldReturn` cons Just Nothing (cons Just Nothing Nothing) 323 | 324 | it "reports demand" $ do 325 | getDemand @[()] (key Identity) (\(() : _) -> ()) 326 | `shouldReturn` cons Just (unit Just) Nothing 327 | 328 | it "reports demand" $ do 329 | getDemand @[()] (key Identity) (\(_ : _ : []) -> ()) 330 | `shouldReturn` cons Just Nothing (cons Just Nothing (nil Just)) 331 | 332 | it "reports demand" $ do 333 | getDemand @[()] (key Identity) (\(_ : () : _) -> ()) 334 | `shouldReturn` cons Just Nothing (cons Just (unit Just) Nothing) 335 | 336 | it "reports demand" $ do 337 | getDemand @[()] (key Identity) (\(() : _ : _) -> ()) 338 | `shouldReturn` cons Just (unit Just) (cons Just Nothing Nothing) 339 | 340 | it "reports demand" $ do 341 | getDemand @[()] (key Identity) (\(() : () : _) -> ()) 342 | `shouldReturn` cons Just (unit Just) (cons Just (unit Just) Nothing) 343 | 344 | it "reports demand" $ do 345 | getDemand @[()] (key Identity) (\(() : _ : []) -> ()) 346 | `shouldReturn` cons Just (unit Just) (cons Just Nothing (nil Just)) 347 | 348 | it "reports demand" $ do 349 | getDemand @[()] (key Identity) (\(_ : () : []) -> ()) 350 | `shouldReturn` cons Just Nothing (cons Just (unit Just) (nil Just)) 351 | 352 | it "reports demand" $ do 353 | getDemand @[()] (key Identity) (\(() : () : []) -> ()) 354 | `shouldReturn` key Just 355 | 356 | instance Arbitrary (KeyF Identity) where 357 | arbitrary = 358 | oneof 359 | [ pure TrivialF, 360 | LeftF <$> arbitrary, 361 | RightF <$> arbitrary, 362 | BothF <$> arbitrary <*> arbitrary 363 | ] 364 | shrink = genericShrink 365 | 366 | instance Arbitrary (KeyF Maybe) where 367 | arbitrary = 368 | oneof 369 | [ pure TrivialF, 370 | LeftF <$> arbitrary, 371 | RightF <$> arbitrary, 372 | BothF <$> arbitrary <*> arbitrary 373 | ] 374 | shrink = genericShrink 375 | 376 | oneOfInfinitelyMany :: [a] -> Gen a 377 | oneOfInfinitelyMany [] = error "oneOfInfinitelyMany: empty list" 378 | oneOfInfinitelyMany [x] = pure x 379 | oneOfInfinitelyMany (x : xs) = oneof [pure x, oneOfInfinitelyMany xs] 380 | 381 | compatibleKeys :: Generator a -> Gen Key 382 | compatibleKeys g = oneOfInfinitelyMany (keys g) 383 | 384 | partialKeySetSpec :: Spec 385 | partialKeySetSpec = do 386 | prop "contains nothing in an empty set" $ 387 | \k -> k `PartialKeySet.member` PartialKeySet.empty === False 388 | 389 | it "contains an exact key" $ do 390 | let k = 391 | Identity 392 | ( BothF 393 | (Identity (LeftF (Identity TrivialF))) 394 | (Identity (RightF (Identity TrivialF))) 395 | ) 396 | let k2 = 397 | Identity 398 | ( BothF 399 | (Identity (RightF (Identity TrivialF))) 400 | (Identity (RightF (Identity TrivialF))) 401 | ) 402 | let k3 = 403 | Identity 404 | ( BothF 405 | (Identity (LeftF (Identity TrivialF))) 406 | (Identity (LeftF (Identity TrivialF))) 407 | ) 408 | let ks = PartialKeySet.singleton (totalKey k) 409 | 410 | k `PartialKeySet.member` ks `shouldBe` True 411 | k2 `PartialKeySet.member` ks `shouldBe` False 412 | k3 `PartialKeySet.member` ks `shouldBe` False 413 | 414 | prop "contains any exact key" $ \(k1 :: Key) (k2 :: Key) -> 415 | k2 `PartialKeySet.member` PartialKeySet.singleton (totalKey k1) 416 | === (k1 == k2) 417 | 418 | prop "contains a subsumed key" $ 419 | let k = 420 | Identity 421 | ( BothF 422 | (Identity (LeftF (Identity TrivialF))) 423 | (Identity (RightF (Identity TrivialF))) 424 | ) 425 | in forAll (elements (partialKeys k)) $ \pk -> 426 | k `PartialKeySet.member` PartialKeySet.singleton pk 427 | 428 | prop "contains any subsumed key" $ \(pk :: PartialKey) (k :: Key) -> 429 | k `PartialKeySet.member` PartialKeySet.singleton pk 430 | `shouldBe` pk `subsumes` k 431 | 432 | prop "contains any multiple exact keys" $ 433 | let g = genAny :: Generator [Bool] 434 | in forAll (listOf (compatibleKeys g)) $ \ks -> 435 | forAll (compatibleKeys g) $ \k -> 436 | k `PartialKeySet.member` PartialKeySet.fromList (totalKey <$> ks) 437 | === (k `elem` ks) 438 | 439 | standardizerSpec :: Spec 440 | standardizerSpec = do 441 | describe "NormalStandardizer" $ do 442 | it "estimates normal distributions based on z score" $ do 443 | -- Responsiveness of 0 ensures the dinitialStandardizeristribution isn't updated. 444 | let standardizer = normalStandardizer 0 1 0 445 | 446 | snd (percentile standardizer 0) `shouldBe` 0.5 447 | snd (percentile standardizer 1) `shouldBe` 0.8413447460685429 448 | snd (percentile standardizer (-1)) `shouldBe` 0.15865525393145707 449 | 450 | describe "BetaStandardizer" $ do 451 | it "estimates a uniform distribution" $ do 452 | -- Uniform distribution has a variance of 1/12. 453 | -- Responsiveness of 0 ensures the distribution isn't updated. 454 | let standardizer = betaStandardizer (0, 1) 0.5 (1 / 12) 0 455 | snd (percentile standardizer 0.00) `shouldBe` 0.00 456 | snd (percentile standardizer 0.25) `shouldBe` 0.25 457 | snd (percentile standardizer 0.50) `shouldBe` 0.50 458 | snd (percentile standardizer 0.75) `shouldBe` 0.75 459 | snd (percentile standardizer 1.00) `shouldBe` 1.00 460 | 461 | it "learns non-zero discrete component at the bottom" $ do 462 | let initialStandardizer = betaStandardizer (0, 1) 0.5 (1 / 12) 0.1 463 | trainedStandardizer = 464 | foldl' ((fst .) . percentile) initialStandardizer (replicate 100 0) 465 | abs (snd (percentile trainedStandardizer 0) - 0.5) `shouldSatisfy` (< 0.001) 466 | 467 | it "learns non-zero discrete component at the top" $ do 468 | let initialStandardizer = betaStandardizer (0, 1) 0.5 (1 / 12) 0.1 469 | trainedStandardizer = 470 | foldl' ((fst .) . percentile) initialStandardizer (replicate 100 1) 471 | abs (snd (percentile trainedStandardizer 1) - 0.5) `shouldSatisfy` (< 0.001) 472 | 473 | describe "CompleteStandardizer" $ do 474 | it "approximates a uniform distribution" $ 475 | example $ do 476 | let input = shuffle' [1 :: Int .. 10000] 10000 (mkStdGen 123) 477 | standardizer = 478 | foldl' 479 | (\s x -> fst (percentile s x)) 480 | (emptyCompleteStandardizer 0.05) 481 | input 482 | 483 | -- Accuracy isn't great here, both because the test modifies the 484 | -- distribution and because we're not generating a lot of values. 485 | abs (snd (percentile standardizer 0) - 0.00) `shouldSatisfy` (< 0.1) 486 | abs (snd (percentile standardizer 2500) - 0.25) `shouldSatisfy` (< 0.1) 487 | abs (snd (percentile standardizer 5000) - 0.50) `shouldSatisfy` (< 0.1) 488 | abs (snd (percentile standardizer 7500) - 0.75) `shouldSatisfy` (< 0.1) 489 | abs (snd (percentile standardizer 10000) - 1.00) `shouldSatisfy` (< 0.1) 490 | 491 | testHarnessSpec :: Spec 492 | testHarnessSpec = do 493 | it "tests bencode" $ 494 | smartCheck genGeneric (\b -> prop_roundtrip b `shouldBe` True) 495 | 496 | describe "tests RBTree" $ do 497 | it "produces valid trees" $ 498 | smartCheck genGeneric $ \(xs :: [Int]) -> 499 | isValid (rbFromList xs) `shouldBe` True 500 | 501 | it "finds nothing in empty set" $ 502 | smartCheck genAny $ \(x :: Int) -> 503 | rbMember rbEmpty x `shouldBe` False 504 | 505 | it "finds first inserted member in tree" $ 506 | smartCheck genGeneric $ \(x :: Int, xs :: [Int]) -> 507 | rbMember (rbFromList (x : xs)) x `shouldBe` True 508 | 509 | it "finds last inserted member in tree" $ 510 | smartCheck genGeneric $ \(x :: Int, xs :: [Int]) -> 511 | rbMember (rbFromList (xs ++ [x])) x `shouldBe` True 512 | 513 | instance (Ord k, Generable k, Generable v) => Generable (M.Map k v) where 514 | genAny = M.fromList <$> genAny 515 | 516 | instance Generable B.ByteString where 517 | genAny = B.pack <$> genAny 518 | 519 | instance Generable Bencode 520 | 521 | main :: IO () 522 | main = hspec $ do 523 | describe "Generator" generatorSpec 524 | describe "spy" spySpec 525 | describe "PartialKeySet" partialKeySetSpec 526 | describe "Standardizer" standardizerSpec 527 | describe "TestHarness" testHarnessSpec 528 | --------------------------------------------------------------------------------