├── Setup.hs ├── .gitignore ├── README.md ├── LICENSE ├── genprog.cabal └── src ├── GenProg ├── GenExpr.hs └── GenExpr │ └── Data.hs └── GenProg.hs /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | .virthualenv 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | genprog: Genetic Programming Library 2 | ==================================== 3 | 4 | This package provides a *genetic programming* framework. Genetic programming is 5 | an evolutionary technique, inspired by biological evolution, to evolve programs 6 | for solving specific problems. A genetic program is represented by a value of 7 | an algebraic datatype and associated with a custom-defined *fitness* value 8 | indicating the quality of the solution. Starting from a randomly generated 9 | initial population of genetic programs, the genetic operators of *selection*, 10 | *crossover*, and *mutation* are used to evolve programs of increasingly better 11 | quality. 12 | 13 | Check out the documentation and examples on 14 | [Hackage](http://hackage.haskell.org/package/genprog). 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Jan Snajder 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | * Neither the name of Jan Snajder nor the names of other 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /genprog.cabal: -------------------------------------------------------------------------------- 1 | name: genprog 2 | version: 0.1.0.2 3 | synopsis: Genetic programming library 4 | license: BSD3 5 | license-file: LICENSE 6 | author: Jan Snajder 7 | maintainer: jan.snajder@fer.hr 8 | homepage: http://github.com/jsnajder/genprog 9 | copyright: (c) 2010 Jan Snajder 10 | category: AI, Algorithms, Optimisation 11 | stability: Experimental 12 | build-type: Simple 13 | cabal-version: >= 1.8 14 | description: 15 | This package provides a /genetic programming/ framework. Genetic 16 | programming is an evolutionary technique, inspired by biological 17 | evolution, to evolve programs for solving specific problems. A genetic 18 | program is represented by a value of an algebraic datatype and 19 | associated with a custom-defined /fitness/ value indicating the quality 20 | of the solution. Starting from a randomly generated initial population 21 | of genetic programs, the genetic operators of /selection/, /crossover/, 22 | and /mutation/ are used to evolve programs of increasingly better 23 | quality. 24 | 25 | library 26 | exposed-modules: 27 | GenProg, GenProg.GenExpr, GenProg.GenExpr.Data 28 | build-depends: 29 | base == 4.6.*, syb == 0.4.*, syz == 0.2.*, MonadRandom == 0.1.* 30 | hs-source-dirs: src 31 | extensions: 32 | MultiParamTypeClasses, FunctionalDependencies, 33 | NoMonomorphismRestriction, ScopedTypeVariables, FlexibleInstances, 34 | Rank2Types, UndecidableInstances, DeriveDataTypeable 35 | 36 | source-repository head 37 | type: git 38 | location: https://github.com/jsnajder/genprog 39 | -------------------------------------------------------------------------------- /src/GenProg/GenExpr.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : GenProg.GenExpr 3 | -- Copyright : (c) 2010 Jan Snajder 4 | -- License : BSD-3 (see the LICENSE file) 5 | -- 6 | -- Maintainer : Jan Snajder 7 | -- Stability : experimental 8 | -- Portability : non-portable 9 | -- 10 | -- An interface to genetically programmable expressions. 11 | -- 12 | ----------------------------------------------------------------------------- 13 | 14 | module GenProg.GenExpr ( 15 | GenExpr (..)) where 16 | 17 | import Control.Monad 18 | 19 | -- | This typeclass defines an interface to expressions 20 | -- that can be genetically programmed. The operations that must be 21 | -- provided by instances of this class are used for the generation 22 | -- of random individuals as well as crossover and mutation operations. 23 | -- (An instance for members of the @Data@ typeclass is provided in 24 | -- "GenProg.GenExpr.Data".) 25 | -- 26 | -- Minimal complete definition: 'exchange', 'nodeMapM', 'nodeMapQ', 27 | -- and 'nodeIndices'. 28 | class GenExpr e where 29 | -- | Exchanges subtrees of two expressions: 30 | -- @exchange e1 n1 e2 n2@ replaces the subexpression of @e1@ rooted in node 31 | -- @n1@ with the subexpression of @e2@ rooted in @n2@, and vice versa. 32 | exchange :: e -> Int -> e -> Int -> (e, e) 33 | -- | Maps a monadic transformation function over the immediate 34 | -- children of the given node. 35 | nodeMapM :: Monad m => (e -> m e) -> e -> m e 36 | -- | Maps a query function over the immediate children of the given 37 | -- node and returns a list of results. 38 | nodeMapQ :: (e -> a) -> e -> [a] 39 | -- | A list of indices of internal (functional) and external 40 | -- (terminal) nodes of an expression. 41 | nodeIndices :: e -> ([Int], [Int]) 42 | -- | Adjusts a subexpression rooted at the given node by applying a 43 | -- monadic transformation function. 44 | adjustM :: (Monad m) => (e -> m e) -> e -> Int -> m e 45 | -- | Number of nodes an expression has. 46 | nodes :: e -> Int 47 | -- | The depth of an expression. Equals 1 for single-node expressions. 48 | depth :: e -> Int 49 | 50 | 51 | -- | Default method (expensive because it calls exchange twice). 52 | adjustM f e n = replace e n `liftM` f (get e n) 53 | where get e n = fst $ exchange e 0 e n 54 | replace e1 n1 e2 = fst $ exchange e1 n1 e2 0 55 | 56 | nodes = (+1) . foldr (+) 0 . nodeMapQ nodes 57 | 58 | depth = (+1) . foldr max 0 . nodeMapQ depth 59 | 60 | -------------------------------------------------------------------------------- /src/GenProg/GenExpr/Data.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : GenProg.GenExpr.Data 3 | -- Copyright : (c) 2010 Jan Snajder 4 | -- License : BSD-3 (see the LICENSE file) 5 | -- 6 | -- Maintainer : Jan Snajder 7 | -- Stability : experimental 8 | -- Portability : non-portable 9 | -- 10 | -- Implementation of the @GenProg.GenExpr@ interface for members of 11 | -- the 'Data' typeclass. The implementation is based on SYB and SYZ 12 | -- generic programming frameworks (see 13 | -- and 14 | -- for details). 15 | -- 16 | -- NB: Subexpressions that are candidates for crossover points or 17 | -- mutation must be of the same type as the expression itself, and 18 | -- must be reachable from the root node by type-preserving traversal. 19 | -- See below for an example. 20 | -- 21 | ----------------------------------------------------------------------------- 22 | 23 | {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, Rank2Types, 24 | UndecidableInstances, DeriveDataTypeable #-} 25 | 26 | module GenProg.GenExpr.Data ( 27 | -- | This module re-exports @GenExpr@ typeclass. 28 | GenExpr (..) 29 | -- * Example 30 | -- $Example 31 | ) where 32 | 33 | import Data.Generics 34 | import Data.Generics.Zipper 35 | import Data.Maybe 36 | import Control.Monad 37 | import GenProg.GenExpr 38 | 39 | moduleName = "GenProg.GenExpr.Data" 40 | 41 | instance (Data a) => GenExpr a where 42 | 43 | -- | Exchanges two expression nodes. Works by using two generic 44 | -- zippers and exchanging their holes. 45 | exchange e1 n1 e2 n2 = (fromZipper y1, fromZipper y2) 46 | where z1 = typeMoveForUnsafe n1 $ toZipper e1 47 | z2 = typeMoveForUnsafe n2 $ toZipper e2 48 | (y1,y2) = exchangeHoles z1 z2 49 | 50 | -- | Adjust an expression node. Works by applying a monadic 51 | -- tranformation on a zipper hole. 52 | adjustM f e n = fromZipper `liftM` transM (mkM f) z 53 | where z = typeMoveForUnsafe n (toZipper e) 54 | 55 | nodeMapM f = gmapM (mkM f) 56 | 57 | nodeMapQ q (x::a) = concat $ gmapQ ([] `mkQ` (\(y::a) -> [q y])) x 58 | 59 | nodeIndices = index 0 [] [] . toZipper 60 | 61 | -- Zipper moves 62 | 63 | type Move a = Zipper a -> Maybe (Zipper a) 64 | 65 | backtrack :: (Typeable a) => Move a 66 | backtrack z = do 67 | z2 <- up z 68 | right z2 `mplus` backtrack z2 69 | 70 | repeatM :: (Monad m) => Int -> (a -> m a) -> a -> m a 71 | repeatM 0 _ x = return x 72 | repeatM n f x = f x >>= repeatM (n - 1) f 73 | 74 | -- Moves zipper to next node in DFS order, but does not move down the 75 | -- zipper if node satisfies query 'q'. 76 | nextDfsQ :: Typeable a => GenericQ Bool -> Move a 77 | nextDfsQ q z = (if query q z then Nothing else down' z) 78 | `mplus` right z `mplus` backtrack z 79 | 80 | -- Moves the zipper to node 'n' from current position in DFS order, 81 | -- skipping nodes not satisfying query 'q2' and descending only down 82 | -- the nodes satisfying query 'q1'. 83 | moveForQ :: (Typeable a) => GenericQ Bool -> GenericQ Bool -> Int -> Move a 84 | moveForQ _ _ 0 z = Just z 85 | moveForQ q1 q2 n z = do 86 | z2 <- nextDfsQ q1 z 87 | moveForQ q1 q2 (if query q2 z2 then n - 1 else n) z2 88 | 89 | -- Moves the zipper to node 'n' from current position in DFS order, 90 | -- counting only nodes of type 'a', and not descending down the nodes 91 | -- of other type. 92 | typeMoveFor :: (Typeable a) => Int -> Move a 93 | typeMoveFor n (z::Zipper a) = 94 | moveForQ (True `mkQ` (\(_::a) -> False)) (False `mkQ` (\(_::a) -> True)) n z 95 | 96 | -- | Same as typeMoveFor, but throws an error if node index is out of 97 | -- bound. 98 | typeMoveForUnsafe :: (Typeable a) => Int -> Zipper a -> Zipper a 99 | typeMoveForUnsafe n z = fromMaybe 100 | (error $ moduleName ++ ".typeMoveForUnsafe: Nonexisting node.") 101 | (typeMoveFor n z) 102 | 103 | -- | Exchanges two zipper holes. 104 | exchangeHoles :: (Data a) => Zipper a -> Zipper a -> (Zipper a, Zipper a) 105 | exchangeHoles (z1::Zipper a) (z2::Zipper a) = (y1,y2) 106 | where Just h1 = getHole z1 :: Maybe a 107 | Just h2 = getHole z2 :: Maybe a 108 | y1 = setHole h2 z1 109 | y2 = setHole h1 z2 110 | 111 | index :: (Data a) => Int -> [Int] -> [Int] -> Zipper a -> ([Int], [Int]) 112 | index i is es (z :: Zipper a) = 113 | maybe (is2,es2) (index (i + 1) is2 es2) (typeMoveFor 1 z) 114 | where Just h = getHole z :: Maybe a 115 | (is2,es2) = if terminalQ h then (is,i:es) else (i:is,es) 116 | 117 | terminalQ :: (Data a) => a -> Bool 118 | terminalQ = null . nodeMapQ id 119 | 120 | {- $Example 121 | 122 | Suppose you have a datatype defined as 123 | 124 | @ 125 | data E = A E E 126 | | B String [E] 127 | | C 128 | deriving (Eq,Show,Typeable,Data) 129 | @ 130 | 131 | and an expression defined as 132 | 133 | @ 134 | e = A (A C C) (B \"abc\" [C,C]) 135 | @ 136 | 137 | The subexpressions of a @e@ are considered to be only the subvalues of 138 | @e@ that are of the same type as @e@. Thus, the number of nodes of 139 | expression @e@ is 140 | 141 | >>> nodes e 142 | 5 143 | 144 | because subvalues of node @B@ are of different type than expression 145 | @e@ and therefore not considered as subexpressions. 146 | 147 | Consequently, during a genetic programming run, subexpressions that 148 | are of a different type than the expression itself, or subexpression 149 | that cannot be reached from the root node by a type-preserving 150 | traversal, cannot be chosen as crossover points nor can they be 151 | mutated. 152 | 153 | -} 154 | -------------------------------------------------------------------------------- /src/GenProg.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : GenProg 3 | -- Copyright : (c) 2010 Jan Snajder 4 | -- License : BSD-3 (see the LICENSE file) 5 | -- 6 | -- Maintainer : Jan Snajder 7 | -- Stability : experimental 8 | -- Portability : non-portable 9 | -- 10 | -- The Genetic Programming Library. 11 | -- 12 | -- /Genetic programming/ is an evolutionary optimization technique 13 | -- inspired by biological evolution. It is similar to /genetic algorithms/ 14 | -- except that the individual solutions are programs (or, more generally, 15 | -- /expressions/) representing a solution to a given problem. A genetic 16 | -- program is represented as an /abstract syntax tree/ and associated 17 | -- with a custom-defined /fitness/ value indicating the quality of the 18 | -- solution. Starting from a randomly generated initial population of 19 | -- genetic programs, the genetic operators of /selection/, /crossover/, 20 | -- and (occasionally) /mutation/ are used to evolve programs of 21 | -- increasingly better quality. 22 | -- 23 | -- Standard reference is: John Koza. /Genetic programming:/ 24 | -- /On the Programming of Computers by Means of Natural Selection/. 25 | -- MIT Press, 1992. 26 | -- 27 | -- In GenProg, a genetic program is represented by a value of an 28 | -- algebraic datatype. To use a datatype as a genetic program, it 29 | -- suffices to define it as an instance of the 'GenProg' typeclass. 30 | -- A custom datatype can be made an instance of the 'GenProg' 31 | -- typeclass, provided it is an instance of the 'Data' typeclass (see 32 | -- "GenProg.GenExpr.Data"). 33 | -- 34 | -- An example of how to use this library is given below. 35 | -- 36 | ----------------------------------------------------------------------------- 37 | 38 | {-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, 39 | NoMonomorphismRestriction #-} 40 | 41 | module GenProg ( 42 | -- * Genetic programs 43 | GenProg (..), 44 | -- * Expressions 45 | generateFullExpr, 46 | generateGrownExpr, 47 | depth, 48 | nodes, 49 | -- * Individuals 50 | Ind, 51 | unInd, 52 | mkInd, 53 | aFitness, 54 | sFitness, 55 | -- * Population 56 | Pop, 57 | unPop, 58 | mkPop, 59 | generatePop, 60 | replenishPop, 61 | mergePop, 62 | best, 63 | avgFitness, 64 | avgDepth, 65 | avgNodes, 66 | -- * Genetic operators 67 | -- | The following functions are not meant to be used directly. 68 | -- They are exposed for debugging purposes. 69 | crossoverInd, 70 | mutateInd, 71 | crossoverPop, 72 | mutatePop, 73 | -- * Evolution state 74 | EvolState (..), 75 | -- * Control parameters 76 | Fitness, 77 | Mutate, 78 | defaultMutation, 79 | Terminate, 80 | tSuccess, 81 | tFitness, 82 | tGeneration, 83 | EvolParams (..), 84 | defaultEvolParams, 85 | -- * Evolution 86 | evolve, 87 | evolveFrom, 88 | evolveTrace, 89 | evolveTraceFrom 90 | -- * Example 91 | -- $Example 92 | ) where 93 | 94 | import Data.List 95 | import Data.Ord 96 | import Data.Maybe 97 | import Control.Monad 98 | import Control.Monad.Random 99 | import GenProg.GenExpr 100 | 101 | -- | A typeclass defining a genetic program interface. Datatypes @e@ 102 | -- that are to be used as genetic programs must be instances of the 103 | -- 'GenExpr' typeclass and must implement this interface. 104 | class (Eq e, GenExpr e, MonadRandom m) => GenProg m e | e -> m where 105 | -- | Generates a random terminal @T@. 106 | terminal :: m e 107 | -- | Generates a random nonterminal (functional) node @F(T,...,T)@ whose 108 | -- arguments are again terminals (this condition is not verified). 109 | nonterminal :: m e 110 | 111 | ----------------------------------------------------------------------------- 112 | -- Expressions 113 | 114 | -- | Generates a random expression of a given maximum depth. 115 | generateExpr :: (GenProg m e) => m e -> Int -> m e 116 | generateExpr g d 117 | | d < 1 = error "GenProg.generateExpr: Invalid expression depth" 118 | | otherwise = nonterminal >>= step (d - 1) 119 | where step 0 _ = terminal 120 | step d e = nodeMapM (const g >=> step (d - 1)) e 121 | 122 | -- | Generates a random expression fully expanded to the specified depth. 123 | generateFullExpr :: (GenProg m e) => Int -> m e 124 | generateFullExpr = generateExpr nonterminal 125 | 126 | -- | Generates a random expression of limited depth. The maximum depth of 127 | -- the resulting expression may be less than the specified depth 128 | -- limit, and paths may be of different length. 129 | generateGrownExpr :: (GenProg m e) => Int -> m e 130 | generateGrownExpr d = do 131 | t <- getRandom 132 | generateExpr (if t then terminal else nonterminal) d 133 | 134 | ----------------------------------------------------------------------------- 135 | -- Individuals 136 | 137 | -- | A genetically programmed individual, representing a basic unit 138 | -- of evolution. (Basically a wrapper around a genetically programmable 139 | -- expression.) 140 | data Ind e = Ind { 141 | -- | Returns the expression wrapped by an individual. 142 | unInd :: e, 143 | -- | Adjusted fitness of an individual. Adjusted fitness equals 144 | -- @1/(1+s)@, where @s@ is the standardized fitness as computed by 145 | -- 'fitness'. To reduce computational costs, this value is computed 146 | -- only once and then cached. 147 | aFitness :: Double, 148 | -- The indices of inner (functional) nodes of an individual's expression. 149 | iNodes :: [Int], 150 | -- The indices of external (terminal) nodes of an individual's expression. 151 | eNodes :: [Int] } 152 | deriving (Show) 153 | 154 | instance (Eq e) => Eq (Ind e) where 155 | i1 == i2 = unInd i1 == unInd i2 156 | 157 | instance (Eq e) => Ord (Ind e) where 158 | compare = comparing aFitness 159 | 160 | -- | Wraps an expression into an individual. 161 | mkInd :: (GenProg m e) => Fitness e -> e -> Ind e 162 | mkInd f e = Ind e (adjust $ f e) fs ts 163 | where (fs,ts) = nodeIndices e 164 | 165 | -- Adjusts fitness. 166 | adjust :: Double -> Double 167 | adjust f = 1 / (1 + max 0 f) 168 | 169 | -- Unadjusts fitness (the inverse of adjustFitness). 170 | unadjust :: Double -> Double 171 | unadjust f = 1 / f - 1 172 | 173 | -- | Standardized fitness of an individual as computed by 'fitness' 174 | sFitness :: Ind e -> Double 175 | sFitness = unadjust . aFitness 176 | 177 | ----------------------------------------------------------------------------- 178 | -- Population 179 | 180 | -- | A population of individuals. (Basically a wrapper around a list of 181 | -- individuals.) 182 | data Pop e = Pop 183 | { unPop :: [Ind e] -- ^ Unwraps a population. 184 | , dist_ :: [Double] -- ^ Fitness distribution. 185 | } deriving (Show, Eq) 186 | 187 | -- | Wraps a list of individuals into a population. 188 | mkPop :: [Ind e] -> Pop e 189 | mkPop is = Pop is ds 190 | where ds = map snd . distribution $ 191 | map (\i -> (unInd i, aFitness i)) is 192 | 193 | -- | Generate population of given size and given depth limit using 194 | -- /ramped half-and-half/ method (Koza, 1992): for each depth value from 0 to 195 | -- the initial depth limit 'iDepth', 50% of individuals are generated using 196 | -- 'generateFullExpr' and 50% are generated using 197 | -- 'generateGrownExpr'. Afterwards, duplicates are removed, thus the 198 | -- size of the resulting population may actually be less than the 199 | -- specified size. 200 | generatePop :: (GenProg m e) => EvolParams m e -> m (Pop e) 201 | generatePop p 202 | | s < 2 || n==0 = error "GenProg.generatePop: Invalid population size" 203 | | otherwise = do 204 | iss <- forM [2..di] $ \i -> do 205 | is1 <- replicateM n (mkInd (fitness p) `liftM` generateFullExpr di) 206 | is2 <- replicateM n (mkInd (fitness p) `liftM` generateGrownExpr di) 207 | return $ is1 ++ is2 208 | return . mkPop . nub $ concat iss 209 | where n = s `div` (2 * (di - 1)) 210 | s = popSize p 211 | di = iDepth p 212 | 213 | -- | Replenishes a population up to 'popSize' by randomly 214 | -- generating new individuals. 215 | replenishPop :: (GenProg m e) => EvolParams m e -> Pop e -> m (Pop e) 216 | replenishPop p pop1 = do 217 | pop2 <- generatePop p 218 | return . mkPop $ unPop pop1 ++ drop s (unPop pop2) 219 | where s = length $ unPop pop1 220 | 221 | -- | Merges two populations by taking 'popSize' best-fitted individuals 222 | -- from the union of the two populations. 223 | mergePop :: (GenProg m e) => EvolParams m e -> Pop e -> Pop e -> Pop e 224 | mergePop p pop1 pop2 = mkPop $ take (popSize p) is 225 | where is = sortBy (flip $ comparing aFitness) $ unPop pop1 ++ unPop pop2 226 | 227 | -- | Population's best-fitted individual. 228 | best :: Pop e -> Ind e 229 | best = maximumBy (comparing aFitness) . unPop 230 | 231 | avg :: (Fractional a) => [a] -> a 232 | avg xs = sum xs / realToFrac n 233 | where n = length xs 234 | 235 | -- | Population's average standardized fitness. 236 | avgFitness :: Pop e -> Double 237 | avgFitness = avg . map (unadjust . aFitness) . unPop 238 | 239 | -- | Average depth of expressions in the population. 240 | avgDepth :: (GenProg m e) => Pop e -> Double 241 | avgDepth = avg . map (realToFrac . depth . unInd) . unPop 242 | 243 | -- | Average number of expression nodes in the population. 244 | avgNodes :: (GenProg m e) => Pop e -> Double 245 | avgNodes = avg . map (realToFrac . nodes . unInd) . unPop 246 | 247 | ----------------------------------------------------------------------------- 248 | -- Genetic operators 249 | 250 | -- Selects at random an index of an expression node. Functional 251 | -- (internal) nodes are selected with probability 'pci', whereas 252 | -- terminal (external) nodes are selecred with probability '1-pi'. 253 | selectNode :: (GenProg m e, MonadRandom m) => Double -> Ind e -> m Int 254 | selectNode pi i 255 | | null $ iNodes i = oneof $ eNodes i 256 | | otherwise = choice pi (oneof $ iNodes i) (oneof $ eNodes i) 257 | 258 | -- | Crossover operation of two individuals, resulting in two 259 | -- offsprings. Crossover is performed by choosing at random two nodes 260 | -- in each expressions, and then by exchanging the subexpressions 261 | -- rooted at these nodes between the two individuals. The probability 262 | -- that an internal (functional) node is chosen as crossover point is 263 | -- set by the 'ciProb' parameter in 'EvolParams', whereas the 264 | -- probability that an external (terminal) node is chosen equals 265 | -- @1-ciProb@. Among internal and external nodes, nodes are chosen 266 | -- uniformly at random. If the depth of a created offspring exceeds 267 | -- the depth limit 'cDepth' specified by evolution parameters 268 | -- 'EvolParams', that offspring is discarded and a parent is 269 | -- reproduced (i.e., copied as-is). 270 | crossoverInd :: (GenProg m e) => 271 | EvolParams m e -> Ind e -> Ind e -> m (Ind e, Ind e) 272 | crossoverInd p i1 i2 = do 273 | n1 <- selectNode (ciProb p) i1 274 | n2 <- selectNode (ciProb p) i2 275 | let (r1,r2) = exchange (unInd i1) n1 (unInd i2) n2 276 | return (if depth r1 <= cDepth p then mkInd (fitness p) r1 else i1, 277 | if depth r2 <= cDepth p then mkInd (fitness p) r2 else i2) 278 | 279 | -- | Mutates an individual by applying the mutation function @mutate@ 280 | -- to a randomly selected node. The probability that an internal 281 | -- (functional) node is chosen for muration is set by the 'miProb' 282 | -- parameter in 'EvolParams', whereas the probability that an external 283 | -- (terminal) node is chosen equals @1-miProb@. Among internal and 284 | -- external nodes, nodes are chosen uniformly at random. If the depth 285 | -- of the mutated expression exceeds the depth limit 'cDepth' 286 | -- specified by evolution parameters 'EvolParams', the individual is 287 | -- left unaltered. 288 | mutateInd :: (GenProg m e) => EvolParams m e -> Ind e -> m (Ind e) 289 | mutateInd p i = do 290 | n <- selectNode (miProb p) i 291 | e2 <- adjustM (mutate p) e1 n 292 | return . mkInd (fitness p) $ if depth e2 <= cDepth p then e2 else e1 293 | where e1 = unInd i 294 | 295 | -- Discrete distribution. 296 | type Distribution a = [(a, Double)] 297 | 298 | -- Computes distribution from a weighted list. 299 | -- The weights need not sum to 1. 300 | distribution :: [(a, Double)] -> Distribution a 301 | distribution xs = [(x,f i) | ((x,_),i) <- zip xs [1..]] 302 | where f i = sum . map snd $ take i ys 303 | s = sum $ map snd xs 304 | ys = map (\(x, w) -> (x, w/s)) xs 305 | 306 | -- Samples a value from a discrete distribution. 307 | choose :: (MonadRandom m) => Distribution a -> m a 308 | choose xs = do 309 | p <- getRandomR (0,1) 310 | return . fst . fromJust $ find ((>= p) . snd) xs 311 | 312 | -- Chose first action with probability 'p' and second with probability 313 | -- 1-p. 314 | choice :: (MonadRandom m) => Double -> m a -> m a -> m a 315 | choice p a1 a2 = do 316 | r <- getRandomR (0,1) 317 | if r <= p then a1 else a2 318 | 319 | oneof :: (MonadRandom m) => [a] -> m a 320 | oneof xs = (xs!!) `liftM` getRandomR (0,length xs-1) 321 | 322 | -- Fitness-proportionate selection of an individual from a population. 323 | selectInd :: (MonadRandom m) => Pop e -> m (Ind e) 324 | selectInd pop = choose (zip (unPop pop) (dist_ pop)) 325 | 326 | reproducePop :: (MonadRandom m) => Pop e -> m (Ind e) 327 | reproducePop = selectInd 328 | 329 | -- | Applies crossover to two randomly chosen individuals from a 330 | -- population. The probability of an individual being chosen as parent 331 | -- is fitness-proportionate (individuals with better fitness have 332 | -- better chanches of being chosen for crossover). 333 | crossoverPop :: (GenProg m e) => EvolParams m e -> Pop e -> m (Ind e,Ind e) 334 | crossoverPop p pop = do 335 | i1 <- selectInd pop 336 | i2 <- selectInd pop 337 | crossoverInd p i1 i2 338 | 339 | -- | Applies mutation operation to individuals from a population. The 340 | -- probability of mutating each individual is determined by 'mProb' parameter 341 | -- from 'EvalParams'. 342 | mutatePop :: (GenProg m e) => EvolParams m e -> Pop e -> m (Pop e) 343 | mutatePop p pop 344 | | mProb p == 0 = return pop 345 | | otherwise = liftM mkPop . forM (unPop pop) $ \i -> 346 | choice (mProb p) (mutateInd p i) (return i) 347 | 348 | ----------------------------------------------------------------------------- 349 | -- Evolution state 350 | 351 | -- | The state of the evolution. 352 | data EvolState e = EvolState 353 | { pop :: Pop e -- ^ Current population. 354 | , iter :: Int -- ^ Iteration (current generation number). 355 | , cachedBest :: Ind e -- ^ Best individual evolved so far. 356 | } deriving (Show,Eq) 357 | 358 | initState :: Pop e -> EvolState e 359 | initState pop = 360 | EvolState { pop = pop, iter = 0, cachedBest = best pop } 361 | 362 | -- | Advances to next evolution state. 363 | nextState :: (GenProg m e ) => 364 | EvolParams m e -> EvolState e -> m (EvolState e) 365 | nextState p es1 = do 366 | pop2 <- evolvePop p pop1 367 | return $ es1 { pop = pop2, iter = iter es1 + 1, 368 | cachedBest = max (cachedBest es1) (best pop1) } 369 | where pop1 = pop es1 370 | 371 | ----------------------------------------------------------------------------- 372 | -- Control parameters 373 | 374 | -- | Standardized fitness. It takes on values from 0 (best fitness) to 375 | -- +infinity (worst fitness). 376 | type Fitness e = e -> Double 377 | 378 | -- | A function to mutate a chosen expression node. 379 | type Mutate m e = e -> m e 380 | 381 | -- | Default mutation. Replaces a node, irrespective of its value, 382 | -- with a randomly generated subexpression whose depth is limited to 383 | -- 'iDepth'. 384 | defaultMutation :: (GenProg m e) => EvolParams m e -> Mutate m e 385 | defaultMutation p = const $ generateGrownExpr (iDepth p) 386 | 387 | -- | Termination predicate. 388 | type Terminate e = EvolState e -> Bool 389 | 390 | -- | Termination predicate: terminate if any individual satisfies the 391 | -- specified predicate. 392 | tSuccess :: (e -> Bool) -> Terminate e 393 | tSuccess c = any (c . unInd) . unPop . pop 394 | 395 | -- | Termination predicate: terminate if best individual's 396 | -- standardized fitness is greater than or equal to the specified value. 397 | tFitness :: (GenProg m e) => Double -> Terminate e 398 | tFitness f = (>= f) . unadjust . aFitness . cachedBest 399 | 400 | -- | Termination predicate: terminate after running for the specified 401 | -- number of iterations. 402 | tGeneration :: Int -> Terminate e 403 | tGeneration n = (>=n) . iter 404 | 405 | -- | Parameters governing the evolution. 406 | -- 407 | -- Default evolution parameters, 408 | -- as used in (Koza, 1992), are defined by 'defaultEvolParams' 409 | -- and indicated below. At least the fitness function 'fitness' should 410 | -- be overriden. 411 | data EvolParams m e = EvolParams { 412 | -- | Population size (number of individuals). Default is @500@. 413 | popSize :: Int, 414 | -- | Depth of expressions in initial population. Default is @6@. 415 | iDepth :: Int, 416 | -- | Maximum depth of expressions created during the evolution. 417 | -- Default is @17@. 418 | cDepth :: Int, 419 | -- | Probability of crossover. Default is @0.9@. If crossover is not 420 | -- chosen, an individual is simply reproduced (copied as-is) into 421 | -- the next generation. 422 | cProb :: Double, 423 | -- | Probability that an internal (functional) node is chosen as a 424 | -- crossover point. Default is @0.9@. If an internal node is not 425 | -- chosen, an external (terminal) node is 426 | -- chosen. 427 | ciProb :: Double, 428 | -- | Probability that an individual gets mutated. Default is @0@ 429 | -- (no mutation). 430 | mProb :: Double, 431 | -- | Probability that an internal (functional) node is chosen for 432 | -- mutation. Default is @0.1@. 433 | miProb :: Double, 434 | -- | Standardized fitness function. Default value is @undefined@ 435 | -- (must be overriden). 436 | fitness :: Fitness e, 437 | -- | Mutation function. Defines how to change a randomly chosen 438 | -- node. Default is @defaultMutation defaultEvolParams@ 439 | -- (replacement of a chosen node with a randomly generated subexpression). 440 | mutate :: Mutate m e, 441 | -- | Elitist factor: number of best-fitted individuals that are preserved 442 | -- from each generation (reproduced as-is into next evolution state). 443 | -- Default is @0@. 444 | elitists :: Int, 445 | -- | Termination predicate. Default is @50@ (terminate after 50 generations). 446 | terminate :: Terminate e } 447 | 448 | defaultEvolParams = EvolParams 449 | { popSize = 500 450 | , iDepth = 6 451 | , cDepth = 17 452 | , cProb = 0.9 453 | , ciProb = 0.9 454 | , mProb = 0.0 455 | , miProb = 0.1 456 | , terminate = tGeneration 50 457 | , fitness = error "GenProg.defaultEvolParams: fitness function is undefined" 458 | , mutate = const $ generateGrownExpr (iDepth defaultEvolParams) 459 | , elitists = 0 } 460 | 461 | ----------------------------------------------------------------------------- 462 | -- Evolution 463 | 464 | untilM :: (Monad m) => (a -> Bool) -> (a -> m a) -> a -> m a 465 | untilM p f x | p x = return x 466 | | otherwise = f x >>= untilM p f 467 | 468 | iterateUntilM :: (Monad m) => (a -> Bool) -> (a -> m a) -> a -> m [a] 469 | iterateUntilM p f x 470 | | p x = return [] 471 | | otherwise = do y <- f x 472 | ys <- iterateUntilM p f y 473 | return (y:ys) 474 | 475 | -- | Evolves one population from another one by performing a single 476 | -- evolution step. 477 | evolvePop :: (GenProg m e) => EvolParams m e -> Pop e -> m (Pop e) 478 | evolvePop p pop1 = do 479 | pop2 <- mkPop `liftM` untilM ((>= s) . length) step [] 480 | pop3 <- mutatePop p pop2 481 | return $ mkPop (elite ++ unPop pop3) 482 | where s = popSize p - length elite 483 | elite = take (elitists p) topRanked 484 | topRanked = sortBy (flip $ comparing aFitness) $ unPop pop1 485 | step is | length is == s - 1 = (:is) `liftM` reproducePop pop1 486 | | otherwise = choice (cProb p) 487 | (do (i1,i2) <- crossoverPop p pop1; return (i1:i2:is)) 488 | ((:is) `liftM` reproducePop pop1) 489 | 490 | -- | Creates an initial population and evolves it until termination 491 | -- predicate is satisfied, returning the last evolution state. 492 | evolve :: (GenProg m e) => EvolParams m e -> m (EvolState e) 493 | evolve p = -- generatePop p >>= evolveFrom p 494 | last `liftM` evolveTrace p 495 | 496 | -- | Evolves a given initial population until termination 497 | -- predicate is satisfied, returning the last evolution state. 498 | -- If the size of the initial population is less than 499 | -- 'popSize', the population will be replenished (see 'replenishPop'). 500 | evolveFrom :: (GenProg m e) => EvolParams m e -> Pop e -> m (EvolState e) 501 | evolveFrom p pop = -- untilM (terminate p) (nextState p) . initState 502 | last `liftM` evolveTraceFrom p pop 503 | 504 | -- | Runs evolution on a given initial population until termination 505 | -- predicate is satisfied and returns a list of successive evolution 506 | -- states. If the size of the initial population is less than 507 | -- 'popSize', the population will be replenished (see 'replenishPop'). 508 | evolveTraceFrom :: (GenProg m e) => EvolParams m e -> Pop e -> m [EvolState e] 509 | evolveTraceFrom p pop1 = 510 | iterateUntilM (terminate p) (nextState p) . initState =<< replenishPop p pop1 511 | 512 | -- | Creates an initial population and runs evolution until 513 | -- termination predicate is satisfied. Returns a list of successive 514 | -- evolution states. 515 | evolveTrace :: (GenProg m e) => EvolParams m e -> m [EvolState e] 516 | evolveTrace p = generatePop p >>= evolveTraceFrom p 517 | 518 | ----------------------------------------------------------------------------- 519 | -- Example 520 | 521 | {- $Example 522 | 523 | This is a simple, worked through example of how to use the GenProg 524 | library. Given a target number @n@, out aim is to evolve an arithmetic 525 | expression that evaluates to @n@. For example, given @13@ as the 526 | target number, one possible solution is @(3 * 5) - 2@. The constants 527 | allowed to appear in the expression are restricted to integers from 1 528 | to 9. The allowed operations are @+@, @-@, @*@, and integer division 529 | without remainder. 530 | 531 | We begin by defining the datatype for the genetically programed 532 | expression: 533 | 534 | @ 535 | -- The following language extensions need to be enabled: 536 | -- DeriveDataTypeable, FlexibleInstances, MultiParamTypeClasses 537 | 538 | import GenProg 539 | import Data.Generics 540 | import Control.Monad 541 | import Control.Monad.Random 542 | 543 | data E = Plus E E 544 | | Minus E E 545 | | Times E E 546 | | Div E E 547 | | Const Int 548 | deriving (Typeable,Data,Eq,Show) 549 | @ 550 | 551 | In order to evolve arithmetic expressions, we need to be able to 552 | compute their values. To this end we define 553 | 554 | @ 555 | eval :: E -> Maybe Int 556 | eval (Const c) = Just c 557 | eval (Plus e1 e2) = liftM2 (+) (eval e1) (eval e2) 558 | eval (Minus e1 e2) = liftM2 (-) (eval e1) (eval e2) 559 | eval (Times e1 e2) = liftM2 (*) (eval e1) (eval e2) 560 | eval (Div e1 e2) | ok = liftM2 div x1 x2 561 | | otherwise = Nothing 562 | where (x1,x2) = (eval e1,eval e2) 563 | ok = x2 /= Just 0 && liftM2 mod x1 x2 == Just 0 564 | @ 565 | 566 | Dividing by zero and dividing with a remainder are not allowed and in 567 | such cases we return @Nothing@. 568 | 569 | Because we have made @E@ an instance of the 'Data' typeclass, it can 570 | be readily used as a genetically programmable expression. Next step is 571 | to make 'E' an instance of the 'GenProg' typeclass: 572 | 573 | @ 574 | instance GenProg (Rand StdGen) E where 575 | terminal = Const `liftM` getRandomR (1,9) 576 | nonterminal = do 577 | r <- getRandomR (0,3) 578 | [liftM2 Plus terminal terminal, 579 | liftM2 Minus terminal terminal, 580 | liftM2 Times terminal terminal, 581 | liftM2 Div terminal terminal] !! r 582 | @ 583 | 584 | Thus, a random terminal node contains one of the constants from 1 to 585 | 9. A nonterminal node can be one of the four arithmetic operations, 586 | each with terminal nodes as arguments. Note that computations are run 587 | within the standard random generator monad (@Rand StdGen@). 588 | 589 | The fitness function evaluates the accurateness of the arithmetic 590 | expression with respect to the target number. If the value of the 591 | expression is far off from the target number @n@, the standardized 592 | fitness should be high. Moreover, we would like to keep the expression 593 | as simple as possible. To this end, we include a /parsimony factor/ 594 | that is proportional to the number of nodes an expression has. We 595 | define the overall standardized fitness as 596 | 597 | @ 598 | myFitness :: Int -> E -> Double 599 | myFitness n e = error + size 600 | where error = realToFrac $ maybe maxBound (abs . (n-)) (eval e) 601 | size = (realToFrac $ nodes e) / 100 602 | @ 603 | 604 | The number of nodes is divided by a factor of 100 to make it less 605 | important than the numeric accuracy of the expression. 606 | 607 | We now have everything in place to get the evolution going. We will use 608 | default evolution parameters and choose @12345@ as the target number: 609 | 610 | >>> let params = defaultEvolParams { fitness = myFitness 12345 } 611 | 612 | Let us first create a random number generator: 613 | 614 | >>> let g = mkStdGen 0 615 | 616 | We are doing this because we want our results to be reproducible, and 617 | because we want to be able to compare the results of different 618 | evolution runs. Normally, you would use @getStdGen@ to get a random 619 | generator with random seed. 620 | 621 | To run the evolution and get the best evolved individual, we type 622 | 623 | >>> let i = cachedBest $ evalRand (evolve params) g 624 | 625 | To check out its standardized fitness, we type 626 | 627 | >>> sFitness i 628 | 39.61 629 | 630 | Let us see how the actual expression looks like: 631 | 632 | >>> unInd i 633 | Times (Minus (Minus (Minus (Plus (Const 4) (Const 4)) (Plus (Const 6) 634 | (Const 7))) (Minus (Minus (Const 5) (Const 9)) (Plus (Minus (Const 5) 635 | (Const 9)) (Minus (Const 4) (Const 4))))) (Plus (Times (Plus (Const 5) 636 | (Const 1)) (Const 6)) (Times (Plus (Const 9) (Const 3)) (Minus (Const 1) 637 | (Const 8))))) (Div (Times (Plus (Plus (Const 3) (Const 5)) (Times (Const 4) 638 | (Const 7))) (Plus (Const 4) (Const 4))) (Minus (Minus (Plus (Const 2) 639 | (Const 8)) (Plus (Const 6) (Const 7))) (Plus (Minus (Const 5) (Const 9)) 640 | (Minus (Const 4) (Const 4))))) 641 | 642 | The number of nodes is 643 | 644 | >>> nodes $ unInd i 645 | 61 646 | 647 | Let us see to what number the expression evaluates: 648 | 649 | >>> eval $ unInd i 650 | Just 12384 651 | 652 | So in this run we didn't get a perfect match, but we were close. Let 653 | us see if we can do better. 654 | 655 | When doing genetic programming, it is always a good idea to experiment 656 | a bit with the parameters. There are no parameters that work best for 657 | any given problem. You can learn a lot about how parameters influence 658 | the evolution by analysing how the evolution progresses in time. This 659 | can be accomplised by evolving an evolution trace: 660 | 661 | >>> let trace = evalRand (evolveTrace params) g 662 | 663 | We can now analyse how the standardized fitness of the 664 | best individual improves during the evolution: 665 | 666 | >>> map (sFitness . best . pop) trace 667 | [9591.35,2343.59,1935.59,2343.59,903.51,903.45,585.59,585.59,327.45,225.41, 668 | 225.41,135.43,57.49,39.61,39.61,39.61,39.61,39.61,57.43,57.47,57.43,57.45, 669 | 57.33,57.43,57.43,57.45,57.43,57.43,57.35,57.35,57.43,57.27,57.33,57.33,57.43, 670 | 57.29,57.33,57.41,57.29,57.43,57.33,57.35,57.35,57.33,57.39,57.39,57.39,57.33, 671 | 57.37,57.37] 672 | 673 | We see that at some point the fitness decreases and then increases 674 | again. This indicates that the best fitted individual was lost by 675 | evolving from one generation to the other. We can prevent this by 676 | employing the /elitist strategy/. Let us see what happens if we 677 | preserve a best fitted individual in each generation: 678 | 679 | >>> let trace = evalRand (evolveTrace params {elitists = 1}) g 680 | >>> map (sFitness . best . pop) trace 681 | [9591.35,2343.59,711.61,711.61,711.61,711.61,57.55,57.53,57.39,57.39,57.39, 682 | 57.39,57.37,57.37,57.37,57.37,57.37,57.37,57.37,57.37,57.35,57.35,57.35, 683 | 57.35,57.35,57.35,57.35,57.35,57.35,57.35,57.33,57.33,57.33,57.33,57.33, 684 | 57.33,57.33,57.33,57.33,25.31,25.31,25.31,25.31,25.31,25.31,25.296,25.296, 685 | 25.296,25.296,25.296] 686 | 687 | This gives us better fitness, but still not an exact match: 688 | 689 | >>> let i = cachedBest $ last trace 690 | >>> eval $ unInd i 691 | Just 12320 692 | 693 | In the previous evolution run fitness converged relatively fast, but then 694 | remained stuck. To stir up things a little, let us allow for some 695 | mutation. Setting mutation probability to 5%, while retaining the 696 | elitist strategy, we get 697 | 698 | >>> let trace = evalRand (evolveTrace params {elitists = 1, mProb = 0.05}) g 699 | >>> map (sFitness . best . pop) trace 700 | [9591.35,9591.35,9591.35,9591.35,9591.35,9591.35,9159.35,8403.23,7239.11, 701 | 6087.15,6087.15,1479.13,819.21,60.13,51.19,5.19,5.19,5.19,5.19,5.19,1.23, 702 | 1.23,1.23,1.23,1.23,1.23,1.21,1.21,1.21,1.21,0.23998,0.23998,0.23998,0.23998, 703 | 0.23998,0.23998,0.23998,0.23998,0.23998,0.23998,0.23998,0.23998,0.23998, 704 | 0.23998,0.23998,0.23998,0.23998,0.23998,0.23998,0.23998] 705 | 706 | This time we've got a perfect match: 707 | 708 | >>> let i = cachedBest $ last trace 709 | >>> eval $ unInd i 710 | Just 12345 711 | 712 | while at the same time the expression is rather compact: 713 | 714 | >>> unInd i 715 | Plus (Times (Const 4) (Plus (Const 9) (Const 4))) (Plus (Plus (Times 716 | (Plus (Const 4) (Const 3)) (Times (Times (Const 3) (Const 9)) (Times 717 | (Const 5) (Plus (Const 9) (Const 4))))) (Const 3)) (Const 5)) 718 | >>> nodes $ unInd i 719 | 23 720 | 721 | -} 722 | --------------------------------------------------------------------------------