├── .gitignore
├── tests
    ├── Main.hs
    └── AI
    │   ├── VersionSpaceTests.hs
    │   └── Tests.hs
├── Setup.hs
├── README.md
├── src
    └── AI
    │   ├── LogicHelpers.hs
    │   ├── Examples.hs
    │   └── VersionSpaces.hs
├── LICENSE
├── HaVSA.cabal
└── documents
    └── intro.md


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | cabal-dev
3 | 


--------------------------------------------------------------------------------
/tests/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import qualified AI.Tests as LogicHelpers
 4 | import qualified AI.VersionSpaceTests as VersionSpaces
 5 | 
 6 | import Test.Framework ( defaultMain )
 7 | 
 8 | main :: IO ()
 9 | main = defaultMain [ LogicHelpers.tests
10 |                    , VersionSpaces.tests
11 |                    ]
12 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
 1 | import Distribution.Simple
 2 | main = defaultMain
 3 | 
 4 | -- import Distribution.Simple
 5 | -- import System.Cmd (system)
 6 | -- import System.Exit
 7 | -- import System
 8 | 
 9 | -- ghcCmd =  "ghc --make -odir dist/build -hidir dist/build -idist/build:src src/Unit.hs -main-is Unit.runTests -o unit"
10 | 
11 | -- main :: IO ()
12 | -- main = defaultMainWithHooks (simpleUserHooks { runTests = quickCheck } )
13 | --     where
14 | --       quickCheck _ _ _ _ = do ec <- system $ ghcCmd
15 | --                               case ec of
16 | --                                 ExitSuccess -> system "./unit"
17 | --                                 _           -> return ec
18 | --                               return () 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | HaVSA (Have-Saa) is a Haskell implementation of the Version Space
 2 | Algebra Machine Learning technique described in Tessa Lau's PhD thesis
 3 | (the link is for a journal version):
 4 | 
 5 | > Tessa Lau, Steven Wolfman, Pedro Domingos, and Daniel S. Weld,
 6 | > Programming by Demonstration using Version Space Algebra, Machine
 7 | > Learning, 2003. (http://tlau.org/research/papers/mlj01-draft.pdf)
 8 | 
 9 | # Documentation
10 |    * [VSIntro Introduction to Version Space Algebra]
11 | 
12 | ## Publications about Version Space Algebra
13 | 
14 | A number of Version Spaces papers that I've come across are listed [here](http://www.citeulike.org/user/creswick/tag/version_spaces)
15 | 
16 | # Contributors
17 | 
18 | HaVSA was created by [Rogan Creswick](http://blog.ciscavate.org)
19 | 
20 | # Other Implementations
21 | 
22 | If you're looking for an implementation in Java, you should check out [JVersionSpaces](http://code.google.com/p/jversionspaces)
23 | 


--------------------------------------------------------------------------------
/src/AI/LogicHelpers.hs:
--------------------------------------------------------------------------------
 1 | module AI.LogicHelpers (
 2 |      fairInts
 3 |    , choices
 4 |    , absMinVal
 5 |    , module Control.Monad.Logic
 6 |   ) where
 7 | 
 8 | import Control.Monad.Logic
 9 | import Data.Function (on)
10 | 
11 | choices :: MonadPlus m => [a] -> m a
12 | choices = msum . map return
13 | 
14 | -- | Generates all @Int@ values in the specified range, inclusive,
15 | -- steadily increasing in absolute value.
16 | fairInts :: Int -> Int -> Logic Int
17 | fairInts a b = return start `mplus` (choices (sTail $ mkList start (max a b)) `interleave`
18 |                                      choices (sTail $ mkList start (min a b)))
19 |   where start = absMinVal a b
20 |         sTail []       = []
21 |         sTail xs@(x:_) = tail xs
22 |         mkList s e = if s > e
23 |                      then [s, pred s..e]
24 |                      else [s..e]
25 | 
26 | -- | Find the value with minimum absolute value in the range @[a b]@
27 | absMinVal :: Int -> Int -> Int
28 | absMinVal a b = case (compare `on` signum) a b of
29 |   LT -> 0 -- signs are different, so 0 is in the range.
30 |   EQ -> case (compare `on` abs) a b of
31 |          LT -> a
32 |          EQ -> a
33 |          GT -> b
34 |   GT -> 0 -- signs are different, so 0 is in the range.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011, Eugene Rogan Creswick
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 9 | 
10 | Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 | 
14 | Neither the name of Eugene Rogan Creswick nor the names of his
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/HaVSA.cabal:
--------------------------------------------------------------------------------
 1 | -- cabal configure --prefix=$HOME --user
 2 | -- cabal build
 3 | name:                HaVSA
 4 | version:             0.1.0.2
 5 | synopsis:            An implementation of the Version Space Algebra learning framework.
 6 | description:         HaVSA (Have-Saa) is a Haskell implementation of the Version Space
 7 |                      Algebra Machine Learning technique described by Tessa Lau.  The
 8 |                      canonical description is in:
 9 |                      .
10 |                      Tessa Lau, Steven Wolfman, Pedro Domingos, and Daniel S. Weld,
11 |                      Programming by Demonstration using Version Space Algebra,
12 |                      Machine Learning, 2003. (http://tlau.org/research/papers/mlj01-draft.pdf)
13 | category:            AI
14 | license:             BSD3
15 | License-file:        LICENSE
16 | author:              Rogan Creswick
17 | maintainer:          creswick@gmail.com
18 | Cabal-Version:       >=1.8.0.6
19 | build-type:          Simple
20 | 
21 | Library
22 |    Build-depends:    base >= 4 && < 6,
23 |                      logict >= 0.4.2 && < 0.5
24 | 
25 |    Exposed-modules:  AI.VersionSpaces
26 |    Other-modules:    AI.Examples,
27 |                      AI.LogicHelpers
28 | 
29 |    ghc-options:      -Wall
30 |    hs-source-dirs:   src
31 | 
32 | Executable tests
33 |   Main-Is:           Main.hs
34 |   hs-source-dirs:    tests,
35 |                      src
36 |   Other-modules:     AI.Tests,
37 |                      AI.VersionSpaceTests
38 | 
39 |   Build-Depends:     base >= 4 && < 6,
40 |                      logict >= 0.4.2 && < 0.5,
41 |                      QuickCheck >= 1.1 && < 2,
42 |                      HUnit >= 1.2.2 && < 1.2.3,
43 |                      test-framework >= 0.3.3 && < 0.4,
44 |                      test-framework-quickcheck >= 0.2.7 && < 0.3,
45 |                      test-framework-hunit >= 0.2.6 && < 0.3
46 | 
47 |   ghc-options:       -Wall
48 | 
49 | 
50 | source-repository head
51 |   type:     git
52 |   location: git://github.com/creswick/HaVSA.git


--------------------------------------------------------------------------------
/tests/AI/VersionSpaceTests.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE RankNTypes #-}
 2 | module AI.VersionSpaceTests where
 3 | 
 4 | import AI.VersionSpaces
 5 | import AI.Examples
 6 | 
 7 | import Test.Framework                 (Test, testGroup)
 8 | import Test.Framework.Providers.HUnit
 9 | import Test.Framework.Providers.QuickCheck (testProperty)
10 | 
11 | -- import Test.QuickCheck
12 | import Test.HUnit ( (@=?), Assertion )
13 | 
14 | tests :: Test
15 | tests = testGroup "VersionSpace tests" [
16 |           testCase "Union empties" test_emptyUnion1
17 |         , testCase "Union Empty foo == id" test_emptyUnion2
18 |         , testCase "Union foo Empty == id" test_emptyUnion3
19 |         , testCase "Join Empty foo == Empty" test_emptyJoin1
20 |         , testCase "Join foo Empty == Empty" test_emptyJoin2
21 |         , testCase "Tr doesn't generate hypotheses" test_emptyTRisEmpty
22 |         , testProperty "Tr id id id is 'id'" prop_IDtransform
23 |         ]
24 | 
25 | -- | Check that the union operator on Empty version spaces behaves as expected.
26 | test_emptyUnion1 :: Assertion
27 | test_emptyUnion1 = length []                     @=? length (hypotheses $ union Empty Empty)
28 | 
29 | test_emptyUnion2 :: Assertion
30 | test_emptyUnion2 = length (hypotheses constIdVS) @=? length (hypotheses $ union Empty constIdVS)
31 | 
32 | test_emptyUnion3 :: Assertion
33 | test_emptyUnion3 = length (hypotheses constIdVS) @=? length (hypotheses $ union constIdVS Empty)
34 | 
35 | -- | Check that the join operator on Empty version spaces behaves as expected.
36 | test_emptyJoin1 :: Assertion
37 | test_emptyJoin1 = length [] @=? length (hypotheses $ join emptyVS constIdVS)
38 | 
39 | test_emptyJoin2 :: Assertion
40 | test_emptyJoin2 = length [] @=? length (hypotheses $ join constIdVS emptyVS)
41 | 
42 | test_emptyTRisEmpty :: Assertion
43 | test_emptyTRisEmpty = 0 @=? length (hypotheses $ tr id id id emptyVS)
44 | 
45 | prop_IDtransform :: Int -> Int -> Int -> Bool
46 | prop_IDtransform x y z = let vs   = VS intHs
47 |                              vsTr = tr id id id vs
48 |                              -- | Train and execute a versionspace on the inputs:
49 |                              eval vs = runVS (train vs x y) z
50 |                              types = (x :: Int, y :: Int, z :: Int)
51 |                          in eval vs == eval vsTr
52 | 
53 | -- | This is necessary to make the type checker happy in some cases.
54 | emptyVS :: VersionSpace Int Int
55 | emptyVS = Empty
56 | 
57 | -- | Version space that always returns @id@
58 | constIdVS :: VersionSpace Int Int
59 | constIdVS = VS $ BSR { storage = undefined
60 |                      , narrow = \bsr _ _ -> bsr
61 |                      , hypos = \_ -> [id]
62 |                      }


--------------------------------------------------------------------------------
/src/AI/Examples.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeSynonymInstances, FunctionalDependencies, MultiParamTypeClasses  #-}
 2 | module AI.Examples where
 3 | 
 4 | 
 5 | import AI.VersionSpaces
 6 | import AI.LogicHelpers (choices, fairInts, observeAll)
 7 | 
 8 | import GHC.Real (infinity)
 9 | 
10 | -- | Version space that learns fixed or relative
11 | -- offsets into an input region:
12 | sizeVS :: VersionSpace Int Int
13 | sizeVS = (VS intHs) `union` (intFromRatTr $ VS ratHs)
14 | 
15 | -- | Transform to adapt Rational VSs to Integral VSs
16 | intFromRatTr :: VersionSpace Rational Rational -> VersionSpace Int Int
17 | intFromRatTr = Tr fromIntegral fromIntegral round
18 | 
19 | -- | Define a rectangle type to simplify the syntax and add semantics:
20 | data Rectangle = Rect {x_coord :: Int,
21 |                        y_coord :: Int,
22 |                        width   :: Int,
23 |                        height  :: Int
24 |                       } deriving (Show, Eq)
25 |                                  
26 | -- | Define a 2-D Region type, also to add semantics and simplify syntax.
27 | type Region1D = (Int, Int)
28 | 
29 | -- | Rectangle VS learns rectangles contained in a rectangular region.
30 | -- This is simply a join of two 1-D regions, wrapped in a transform.
31 | rectangleVS :: VersionSpace Rectangle Rectangle
32 | rectangleVS = rectTr $ region1d `join` region1d
33 |               where 
34 |                 rectTr = Tr decompose decompose compose
35 |                 compose ((x, w), (y, h)) = Rect x y w h
36 |                 decompose (Rect x y w h) = ((x, w), (y, h))
37 | 
38 | -- | The core components of RectangleVS: (1-D regions)
39 | region1d :: VersionSpace Region1D Region1D
40 | region1d = sizeVS `join` sizeVS -- offset and width.
41 | 
42 | -- | Hypothesis space of constant int functions.  This is a bit
43 | -- wastefull, since the bounds collapse to be equal on one
44 | -- example. However, it serves as an example of a BSR representation
45 | -- that may be instructive to others.
46 | intHs :: BSR (Int, Int) i Int
47 | intHs = BSR { storage = (minBound :: Int, maxBound :: Int)
48 |             , narrow = narrowIntHs
49 |             , hypos = hyposIntHs
50 |             }
51 |         
52 | narrowIntHs :: BSR (Int, Int) i Int -> i -> Int -> BSR (Int, Int) i Int
53 | narrowIntHs EmptyBSR _ _          = EmptyBSR
54 | narrowIntHs (BSR (l, u) f g) _ exOut 
55 |          | exOut < l || u < exOut = EmptyBSR
56 |          | otherwise              = BSR (exOut, exOut) f g
57 | 
58 | hyposIntHs :: BSR (Int, Int) i Int -> [(i -> Int)]
59 | hyposIntHs EmptyBSR        = []
60 | hyposIntHs (BSR (l,u) _ _) = [\_-> y | y <- observeAll $ fairInts l u] 
61 | 
62 | -- | Hypothesis space of ratio functions.
63 | ratHs :: BSR (Rational, Rational) Rational Rational
64 | ratHs =  BSR { storage = (-infinity, infinity)
65 |              , narrow = narrowRatHs
66 |              , hypos = hyposRatHs
67 |              }
68 | 
69 | narrowRatHs :: BSR (Rational, Rational) Rational Rational 
70 |                -> Rational 
71 |                -> Rational 
72 |                -> BSR (Rational, Rational) Rational Rational
73 | narrowRatHs EmptyBSR         _ _  = EmptyBSR
74 | narrowRatHs bsr@(BSR (n, d) f g) exIn exOut 
75 |   | d == infinity         = bsr { storage = (exOut, exIn) }
76 |   | exOut / exIn == n / d = bsr
77 |   | otherwise             = EmptyBSR
78 |     
79 |     
80 | -- | exOut < l || u < exOut = EmptyBSR
81 |          -- | otherwise              = BSR (exOut, exOut) f g
82 | 
83 | -- | TODO ERC: pull in the code that uses Logic to intercalate values from 0.
84 | hyposRatHs :: BSR (Rational, Rational) Rational Rational -> [Rational -> Rational]
85 | hyposRatHs EmptyBSR        = []
86 | -- | TODO ERC: this is not correct..
87 | hyposRatHs (BSR (n, d) _ _) | d == infinity = [\_-> y | y <- [n .. d]]
88 |                             | n == 0        = [\_ -> 0]
89 |                             | otherwise     = [\x -> x * (n / d)]
90 | 


--------------------------------------------------------------------------------
/tests/AI/Tests.hs:
--------------------------------------------------------------------------------
  1 | module AI.Tests where
  2 | 
  3 | import AI.VersionSpaces
  4 | import AI.Examples
  5 | import AI.LogicHelpers
  6 | 
  7 | import Control.Monad (liftM4, liftM2, liftM)
  8 | import Data.List
  9 | 
 10 | import Control.Monad.Logic
 11 | 
 12 | import Test.Framework                      (defaultMain, testGroup)
 13 | -- import Test.Framework.Providers.HUnit
 14 | -- import Test.HUnit
 15 | import Test.Framework.Providers.QuickCheck (testProperty)
 16 | 
 17 | import Test.QuickCheck
 18 | 
 19 | tests = testGroup "LogicHelpers tests" [
 20 |          testGroup "absMinVal properties" [
 21 |                           testProperty "same sign GT" prop_absMinVal_sameSignGt
 22 |                         , testProperty "same sign LT" prop_absMinVal_sameSignLt
 23 |                         , testProperty "zero-span" prop_absMinVal_span0
 24 |                         ]
 25 |         , testGroup "fairInts properties" [
 26 |                           testProperty "associate" prop_fairInts_associate
 27 |                         , testProperty "unique" prop_fairInts_unique
 28 |                         ]
 29 |         ]
 30 | 
 31 | prop_absMinVal_sameSignGt :: Int -> Int -> Property
 32 | prop_absMinVal_sameSignGt x y =
 33 |   x > 0 && y > 0 ==> absMinVal x y == min x y
 34 | 
 35 | prop_absMinVal_sameSignLt :: Int -> Int -> Property
 36 | prop_absMinVal_sameSignLt x y =
 37 |   x < 0 && y < 0 ==> absMinVal x y == max x y
 38 | 
 39 | prop_absMinVal_span0 :: Int -> Int -> Property
 40 | prop_absMinVal_span0 x y =
 41 |   signum x /= signum y ==> absMinVal x y == 0
 42 | 
 43 | prop_fairInts_associate :: Int -> Int -> Property
 44 | prop_fairInts_associate x y =
 45 |   abs (x - y) < 10000 ==> -- stop the tests before they get huge
 46 |    (observeAll $ fairInts x y) == (observeAll $ fairInts y x)
 47 | 
 48 | prop_fairInts_unique :: Int -> Int -> Property
 49 | prop_fairInts_unique x y =
 50 |   abs (x - y) < 10000 ==> -- stop the tests before they get huge
 51 |   let ints = (observeAll $ fairInts x y)
 52 |   in nub ints == ints
 53 | 
 54 | 
 55 | -- | threw an @*** Exception: Ratio.%: zero denominator@ initially.
 56 | checkSizes = let screen800 = Rect 0 0 800 600
 57 |                  example = (Rect 0 0 80 60)
 58 |                  rvs = train rectangleVS screen800 example
 59 |                  -- this is not yet demanded, because the condition
 60 |                  -- fails too soon:
 61 |                  results = runVS rvs screen800
 62 |               in
 63 |               -- Only 0,0,80x60 is valid, but it can be generated two ways:
 64 |               (length results == 2) &&
 65 |               (results!!0 == example) &&
 66 |               (results!!1 == example)
 67 | 
 68 | -- arbitraryBSR :: BSR a i o => Gen a
 69 | -- arbitraryBSR = oneof [AnyInt, AnyRat]
 70 | 
 71 | -- instance Arbitrary (VersionSpace i o) where
 72 | --   arbitrary = sized arbitraryVS
 73 | 
 74 | -- arbitraryVS :: Int -> Gen (VersionSpace i o)
 75 | -- arbitraryVS n | n <= 0    = liftM VS arbitraryBSR
 76 | --               | otherwise = oneof [ liftM2 join (arbitraryVS n/2) (arbitraryVS n/2) 
 77 | --                                   , liftM2 union (arbitraryVS n/2) (arbitraryVS n/2)
 78 | --                                     -- we should reduce the tr size a bit, but halving it may be excessive.
 79 | --                                   , liftM4 tr (return id) (return id) (return id) (arbitraryVS n/2)
 80 | --                                   ]
 81 | 
 82 | 
 83 | -- Quickcheck property ideas:
 84 | --
 85 | --   * every hypotheses is consistent with some training input, or no hypotheses exist:
 86 | --      case hypotheses (train v i o) of
 87 | --         Empty -> True
 88 | --         hs    -> map (\f -> f i) hs == take (length hs) $ repeat o
 89 | --
 90 | --   * Hypotheses sets shrink monotonically:
 91 | --      length $ hypotheses v >= length $ hypotheses $ train v i o
 92 | --
 93 | --   * Joining two version spaces results in hypotheses that are the cross product of the inputs.
 94 | --     (even if some are Empty)
 95 | --       let l1 = length $ hypotheses v1
 96 | --           l2 = length $ hypotheses v2
 97 | --       in l1 * l2 == length $ hypotheses $ join v1 v2
 98 | --
 99 | --   * Unioning two version spaces is additive in the size of the hypotheses.
100 | --       let l1 = length $ hypotheses v1
101 | --           l2 = length $ hypotheses v2
102 | --       in l1 + l2 == length $ hypotheses $ union v1 v2
103 | --
104 | 


--------------------------------------------------------------------------------
/documents/intro.md:
--------------------------------------------------------------------------------
  1 | # Version Space Algebra #
  2 | 
  3 | Version Space Algebra is a set of operations that allow multiple
  4 | version spaces to be combined.  Before examining these operations we
  5 | should first talk about version spaces themselves.
  6 | 
  7 | ## Version Spaces ##
  8 | 
  9 | A version space is simply a set of hypotheses.  
 10 | 
 11 | Given a collection of examples we would like to form a hypothesis that
 12 | explains the data.  Hopefully, there is one hypothesis that perfectly
 13 | explains all data seen, and all data that will appear in the future
 14 | (for that problem).
 15 | 
 16 | Version Spaces are collections of all possible hypotheses for a given
 17 | class of problems.  These collections are narrowed down as data is
 18 | provided, eliminating hypotheses that are inconsistent with the
 19 | example data (these hypotheses don't adequately explain those
 20 | examples).
 21 | 
 22 | Atomic version spaces (or just "Version Spaces") are typically very
 23 | large -- potentially containing an uncountably infinite number of
 24 | hypotheses.  This poses a problem when narrowing hypotheses, unless
 25 | some criteria are met.  
 26 | 
 27 | (Cite boundary-set-representable paper.)
 28 | 
 29 | __________ shows that version spaces can be efficiently represented
 30 | and narrowed by ordering the hypotheses by some generality criteria.
 31 | This ordering needs to be engineered such that the boundaries move
 32 | gradually inward as examples are seen.
 33 | 
 34 | ## Composing Version Spaces ##
 35 | 
 36 | Tessa Lau developed a technique for composing version spaces with
 37 | three different operations:
 38 | 
 39 |    * Transform
 40 |    * Union
 41 |    * Join
 42 | 
 43 | The detailed explanation of these operations is given in [ML VS
 44 | paper], but a brief overview is given below.
 45 | 
 46 | Version Space algebra helps to move the complexity of atomic version
 47 | spaces (outlined above) from within the specific version spaces, and
 48 | into the *structure* of composite version spaces.  Atomic
 49 | version spaces are still used, but they are trivially simple when used
 50 | in Version Space Algebra.
 51 | 
 52 | ### Transform ###
 53 | 
 54 | Transform changes the input and output types of a version space to
 55 | match a new type.  Transforms can be used to:
 56 | 
 57 |    * Generate complex structures from tuple types
 58 |    * Convert types to match the expectations of a composite version space.
 59 | 
 60 | Transformations are performed with `VS.transform(VS, Transform)`.
 61 | 
 62 |     -- | Transform to adapt Rational VSs to Integral VSs
 63 |     intFromRatTr :: VersionSpace Rational Rational -> VersionSpace Int Int
 64 |     intFromRatTr # Tr fromIntegral fromIntegral round
 65 | 
 66 | 
 67 | ### Union ###
 68 | 
 69 | The simplest means to combine version spaces is with union.  Unioning
 70 | two version spaces generates a composite version space that, as
 71 | expected, contains the set of hypotheses in both version spaces.
 72 | 
 73 | Union is useful for generating a complex version space that consists
 74 | of hypotheses that generate results of the same *type* but through
 75 | much different *means*.  For example, an offset in a region can be
 76 | expressed as a fixed offset from one end or a ratio of the length of
 77 | the enclosing region.  Both approaches generate the same thing -- an
 78 | integer offset -- but they operate much differently, so the version
 79 | spaces should be separate.
 80 | 
 81 | This is used in sizeVS:
 82 | 
 83 |     -- | Version space that learns fixed or relative
 84 |     -- offsets into an input region:
 85 |     sizeVS :: VersionSpace Int Int
 86 |     sizeVS = (VS AnyInt) `union` (intFromRatTr $ VS AnyRat)
 87 | 
 88 | `union` takes two version spaces with the same input and
 89 | output types and generates a version space that encompasses both of
 90 | the provided version spaces.
 91 | 
 92 | In this example the input version spaces have types that are not
 93 | directly compatible, so a transform is needed to create version spaces
 94 | that take and produce the correct outputs.
 95 | 
 96 | ### Join ###
 97 | 
 98 | Two version spaces can also be combined with a *join* operation.
 99 | 
100 | Joining version spaces effectively generates the cartesian product of
101 | the two sets of hypotheses (one from each joined version space),
102 | although the full set of hypotheses is only created when a version
103 | space is executed.
104 | 
105 | Because `join` is defined in general terms, the input and
106 | output types are simply a tuple type of the inputs and outputs of the
107 | component types.  Since tuples are generally of limited use, it is
108 | typical to apply a transform to the result of a join in order to add
109 | structure (and type information) to the resulting composite version
110 | space.
111 | 
112 | The following snippet shows the construction of the `rectangle` version space from composite version spaces:
113 | 
114 | 
115 |     -- | Define some types to simplify the syntax and add semantics:
116 |     data Rectangle = Rect Int Int Int Int
117 |     type Region1D = (Int, Int)
118 |     
119 |     -- | Rectangle VS learns rectangles contained in a rectangular region.
120 |     -- This is simply a join of two 1-D regions, wrapped in a transform.
121 |     rectangleVS :: VersionSpace Rectangle Rectangle
122 |     rectangleVS = rectTr $ region1d `join` region1d
123 |                   where 
124 |                     rectTr = Tr decompose decompose compose
125 |                     compose ((x, w), (y, h)) = Rect x y w h
126 |                     decompose (Rect x y w h) = ((x, w), (y, h))
127 | 
128 |     -- | The core components of RectangleVS: (1-D regions)
129 |     region1d :: VersionSpace Region1D Region1D
130 |     region1d = sizeVS `join` sizeVS -- offset and width.
131 | 


--------------------------------------------------------------------------------
/src/AI/VersionSpaces.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs, TypeSynonymInstances, FunctionalDependencies, MultiParamTypeClasses  #-}
  2 | module AI.VersionSpaces where
  3 | 
  4 | import Control.Arrow ((***))
  5 | 
  6 | -- | Representation of a traditional version space, as described by
  7 | -- Hirsh: Hirsh, H.: 1991, 'Theoretical Underpinnings of Version
  8 | -- Spaces'. In: Proceedings of the Twelfth International Joint
  9 | -- Conference on Artificial Intelligence. pp.  665–670.
 10 | data BSR a i o = EmptyBSR
 11 |                | BSR { storage :: a
 12 |                      , narrow :: BSR a i o -> i -> o -> BSR a i o
 13 |                      , hypos  :: BSR a i o -> [i -> o]
 14 |                      }
 15 | 
 16 | -- | Renders a BSR to a string to show whether the BSR is empty or
 17 | -- not.  Additional details place undesirable restrictions on the
 18 | -- state storage.
 19 | showBSR :: BSR a i o -> String
 20 | showBSR EmptyBSR    = "Empty"
 21 | showBSR (BSR _ _ _) = "non-empty"
 22 | 
 23 | -- | Union two versionspaces, generating a third.
 24 | union :: VersionSpace a b -> VersionSpace a b -> VersionSpace a b
 25 | union Empty y = y
 26 | union x Empty = x
 27 | union x y     = Union x y
 28 | 
 29 | -- | Join two versionspaces, generating a third.
 30 | join :: (Eq b, Eq d) => VersionSpace a b -> VersionSpace c d -> VersionSpace (a, c) (b, d)
 31 | join Empty _ = Empty
 32 | join _ Empty = Empty
 33 | join x y     = Join x y
 34 | 
 35 | -- | Transform a version space to mutate the input and/or output types.
 36 | -- Transforms require that three functions be specified:
 37 | --
 38 | --  [@i -> a@] Transform the input of the resulting version space to the input of the initial versionspace.
 39 | --
 40 | --  [@o -> b@] Transform the output of the initial versionspace into the output of the resulting versionspace.
 41 | --
 42 | --  [@b -> o@] Transform the output of the /resulting/ versionspace
 43 | --  into the output of the /initial/ versionspace.  This is necessary
 44 | --  to support training: the training examples will be in terms of the
 45 | --  resulting versionspace, so the output must be transformed back
 46 | --  into the terms of the initial versionspace.
 47 | tr :: (Eq b) => (i -> a) -> (o -> b) -> (b -> o) -> VersionSpace a b -> VersionSpace i o
 48 | tr _   _    _    Empty = Empty
 49 | tr tin tout fout vs    = Tr tin tout fout vs
 50 | 
 51 | -- | Version Space algebraic operators:
 52 | data VersionSpace i o where
 53 |   -- The empty, or collapsed versionspace.
 54 |   Empty :: VersionSpace i o
 55 |   -- A basic leaf versionspace, this just wraps a 'BSR'
 56 |   VS :: BSR a i o -> VersionSpace i o
 57 |   -- The Join of two versionspaces.  This should not be used directly, rather, use the 'join' function.
 58 |   Join :: (Eq d, Eq b) => VersionSpace a b -> VersionSpace c d -> VersionSpace (a, c) (b, d)
 59 |   -- The union of two versionspaces.  This should not be used directly, rather, use the 'union' function.
 60 |   Union :: VersionSpace a b -> VersionSpace a b -> VersionSpace a b
 61 |   -- The transform of two versionspaces.  This should not be used directly, rather, use the 'tr' function.
 62 |   Tr :: (Eq b) => (i -> a) -> (o -> b) -> (b -> o) -> VersionSpace a b -> VersionSpace i o
 63 | 
 64 | -- | Serializes a versionspace to a human-readable string, for certain values of 'human'.
 65 | showVS :: VersionSpace i o -> String
 66 | showVS Empty           = "Empty"
 67 | showVS (VS hs)         = showBSR hs
 68 | showVS (Union vs1 vs2) = "["++showVS vs1++" U "++showVS vs2++"]"
 69 | showVS (Join vs1 vs2)  = "["++showVS vs1++" |><| "++showVS vs2++"]"
 70 | showVS (Tr _ _ _ vs)   = "[TR "++showVS vs++"]"
 71 | 
 72 | -- | Train a version space, reducing the set of valid hypotheses.  We
 73 | -- handle the Empty VS cases prior to the corresponding non-empty
 74 | -- cases because the Empties are simplifying cases, so logic can be
 75 | -- short-circuited by collapsing parts of the hierarchy before
 76 | -- recursing.
 77 | train :: (Eq o) => VersionSpace i o -> i -> o -> VersionSpace i o
 78 | train Empty  _ _ = Empty
 79 | train (VS b) i o = case (narrow b) b i o of
 80 |   EmptyBSR -> Empty
 81 |   bsr      -> VS bsr
 82 | 
 83 | -- | The join of an empty VS with any other VS is empty.
 84 | train (Join Empty _)    _       _       = Empty
 85 | train (Join _ Empty)    _       _       = Empty
 86 | train (Join vs1 vs2)   (i1,i2) (o1, o2) = join (train vs1 i1 o1) (train vs2 i2 o2)
 87 | 
 88 | -- | Unioning a VS with an empty VS is just the non-empty VS.
 89 | train (Union vs1 Empty) _       _       = vs1
 90 | train (Union Empty vs2) _       _       = vs2
 91 | train (Union vs1 vs2)   i       o       = union (train vs1 i o) (train vs2 i o)
 92 | 
 93 | -- | Any transform on an empty VS is just an empty VS.
 94 | train (Tr _ _ _ Empty)           _ _  = Empty
 95 | train (Tr tin tout fout innerVS) i o  = tr tin tout fout trainedVS
 96 |     where trainedVS = train innerVS (tin i) (tout o)
 97 | 
 98 | -- | Retrieve the valid hypotheses for a version space.
 99 | hypotheses :: VersionSpace i o -> [(i -> o)] -- could be i -> [o]
100 | hypotheses Empty              = []
101 | hypotheses (VS hs)            = (hypos hs) hs
102 | hypotheses (Join vs1 vs2)     = zipWith (***) (hypotheses vs1) (hypotheses vs2)
103 | hypotheses (Union vs1 vs2)    = hypotheses vs1 ++ hypotheses vs2
104 | hypotheses (Tr fin _ fout vs) = map (\x->fout . x . fin) $ hypotheses vs
105 | 
106 | -- | Runs all valid hypotheses from the version space
107 | -- on the specified input.
108 | runVS :: VersionSpace a b -> a -> [b]
109 | runVS vs input = map (\x->x $ input) $ hypotheses vs
110 | 
111 | {-
112 | 
113 | Notes regarding error-tolerance:
114 | 
115 |   * The Similar function happens at the narrowing stage - I think this
116 | can be done in the narrow fun. on BSR.  However, this assumes that
117 | user error on different aspects of the input is independent of the
118 | other aspects of error in the same way that the components of the
119 | trained hypotheses are independent.
120 | 
121 |   * The Aggregate function operates on sets of hypotheses.  It
122 | converts a set of hypotheses into a new, potentially smaller, set of
123 | hypotheses.  It could probably do better if it could use the
124 | demonstrations to determine the best aggregation.  I don't think
125 | Aggregation can happen solely at the leaves (although it may be
126 | worthwhile to aggregate at that level).  Rather, I think it may be
127 | necessary to aggregate on higher-evel types.
128 | 
129 |    This might not be a problem though, since the only reasons I can
130 | think of to aggregate at a higher level are due to dependencies
131 | between sibling version spaces.
132 | 
133 | -}


--------------------------------------------------------------------------------