├── test
    ├── gold
    │   ├── anbn-ab.txt
    │   ├── anbn-ba.txt
    │   ├── anbn-eps.txt
    │   ├── as-a.txt
    │   ├── as-eps.txt
    │   ├── dyck-a.txt
    │   ├── dyck-b.txt
    │   ├── dyck-c.txt
    │   ├── dyck-d.txt
    │   ├── dyck-e.txt
    │   ├── pp-pi.txt
    │   ├── pp-pig.txt
    │   ├── pp-ping.txt
    │   ├── pp-pink.txt
    │   ├── pps-pig.txt
    │   ├── pps-q.txt
    │   ├── anbn-aaabbb.txt
    │   ├── anbn-aaabbbb.txt
    │   ├── anbn-aabb.txt
    │   ├── anbn-aacbb.txt
    │   ├── asas-100.txt
    │   ├── asbas-aabaaa.txt
    │   ├── asbas-aabba.txt
    │   ├── asbs-aabbba.txt
    │   ├── asbs-aabbbb.txt
    │   ├── letters as0df.txt
    │   ├── letters asdf.txt
    │   ├── pps-pigpig.txt
    │   ├── pps-pigping.txt
    │   ├── letters asdf 40.txt
    │   └── pps-pinkpigpinkpigpig.txt
    ├── stats
    │   ├── anbn.txt
    │   ├── dyck.txt
    │   ├── fishy.txt
    │   ├── asas.txt
    │   ├── star-a.txt
    │   ├── letters.txt
    │   ├── asbs.txt
    │   └── asbas.txt
    ├── do-bench1
    ├── wizard.jpg
    ├── stats-tweak.el
    ├── do-bench
    ├── do-benches
    ├── ImageTest.hs
    ├── Benchmark.hs
    ├── GoldTests.hs
    └── Benchmarks
    │   ├── opt regexp.md
    │   ├── no-opt regexp.md
    │   ├── with isOne.md
    │   └── without isOne.md
├── errata.md
├── .gitignore
├── stack.yaml
├── src
    ├── Language.hs
    ├── MMap.hs
    ├── Stream.hs
    ├── GenInstances.inc
    ├── Examples.hs
    ├── Misc.hs
    ├── ShareMap.hs
    ├── Constrained.hs
    ├── RegExp.hs
    ├── Cofree.hs
    ├── Poly.hs
    └── Semi.hs
├── Changes.md
├── formatting.fmt
├── sections.tex
├── package.yaml
├── Makefile
├── readme.md
├── notes.md
├── macros.tex
├── reviews-icfp2019.md
├── todo.md
└── was1.lhs


/test/gold/anbn-ab.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/anbn-ba.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/anbn-eps.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/as-a.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/as-eps.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/dyck-a.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/dyck-b.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/dyck-c.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/dyck-d.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/dyck-e.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/pp-pi.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/pp-pig.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pp-ping.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/pp-pink.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pps-pig.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pps-q.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/anbn-aaabbb.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/anbn-aaabbbb.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/anbn-aabb.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/anbn-aacbb.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/asas-100.txt:
--------------------------------------------------------------------------------
1 | 101


--------------------------------------------------------------------------------
/test/gold/asbas-aabaaa.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/asbas-aabba.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/asbs-aabbba.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/asbs-aabbbb.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/letters as0df.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/letters asdf.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pps-pigpig.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pps-pigping.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/test/gold/letters asdf 40.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/gold/pps-pinkpigpinkpigpig.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/test/stats/anbn.txt:
--------------------------------------------------------------------------------
1 | \stat{| anbn |}{1.293 ms}{\hang}{12.12 ms}{2.770 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/do-bench1:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | echo $1 : $2
4 | ./do-bench $1 "$2" > stats/$1.txt
5 | 


--------------------------------------------------------------------------------
/test/stats/dyck.txt:
--------------------------------------------------------------------------------
1 | \stat{| dyck |}{254.9 $\mu{}$s}{\hang}{24.77 ms}{3.062 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/stats/fishy.txt:
--------------------------------------------------------------------------------
1 | \stat{| fishy |}{1.276 ms}{2.528 ms}{29.73 ms}{4.233 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/wizard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conal/convolution-paper/HEAD/test/wizard.jpg


--------------------------------------------------------------------------------
/test/stats/asas.txt:
--------------------------------------------------------------------------------
1 | \stat{| star a1 <.> star a1 |}{2.818 ms}{1.274 ms}{601.6 ms}{2.619 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/stats/star-a.txt:
--------------------------------------------------------------------------------
1 | \stat{| star a1 |}{30.56 $\mu{}$s}{22.45 $\mu{}$s}{5.258 ms}{2.624 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/stats/letters.txt:
--------------------------------------------------------------------------------
1 | \stat{| star letter |}{690.4 $\mu{}$s}{690.9 $\mu{}$s}{10.89 ms}{3.574 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/stats/asbs.txt:
--------------------------------------------------------------------------------
1 | \stat{| star a1 <.> star b1 |}{52.26 $\mu{}$s}{36.59 $\mu{}$s}{14.40 ms}{2.789 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/test/stats/asbas.txt:
--------------------------------------------------------------------------------
1 | \stat{| star a1 <.> b1 <.> star a1 |}{56.53 $\mu{}$s}{49.21 $\mu{}$s}{14.58 ms}{2.798 $\mu{}$s}{}
2 | 


--------------------------------------------------------------------------------
/errata.md:
--------------------------------------------------------------------------------
1 | ## Errata
2 | 
3 | *   Version of March 25, 2019:
4 |     *   Figure 5, first word should be "class" (Ed Kmett).
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.cabal
2 | test/Benchmarks/*.html
3 | wizard-*.png
4 | supplemental-material
5 | convolution*.tex
6 | *.zip
7 | 
8 | supplemental.tar
9 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-13.4 # ghc-8.6.3
2 | 
3 | # Some other choices:
4 | 
5 | # resolver: lts-12.19 # ghc-8.4.4
6 | # resolver: lts-12.4  # ghc-8.4.3
7 | # resolver: lts-11.12 # ghc-8.2.2
8 | # resolver: lts-9.20  # ghc-8.0.2
9 | 


--------------------------------------------------------------------------------
/test/stats-tweak.el:
--------------------------------------------------------------------------------
1 | (defun stats-tweak ()
2 |   "Tweak running times for the non-terminating examples"
3 |   (interactive)
4 |   (beginning-of-buffer)
5 |   (forward-sexp 3)
6 |   (kill-sexp 2)
7 |   (insert "{\\hang}{\\hang}")
8 |   )
9 | 


--------------------------------------------------------------------------------
/test/do-bench:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stack bench --benchmark-arguments "$1" >& o.tmp
 4 | 
 5 | echo -n "\\stat{|" $2 "|}{"
 6 | grep Group o.tmp | cut -c10-100 | sed 's/"/}{/' | tr -d '\n'
 7 | # grep time o.tmp | cut -c22-30 | sed 's/ /}{/g' | sed 's/μ/$\\mu{}$/g' | tr -d '\n'
 8 | grep time o.tmp | cut -c22-29 | tr '\n' "#" | sed 's/#/}{/g' | sed 's/μ/$\\mu{}$/g' | tr -d '\n'
 9 | echo "}"
10 | 


--------------------------------------------------------------------------------
/test/do-benches:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | mkdir -p stats
 4 | 
 5 | ./do-bench1 star-a   'star a1'
 6 | ./do-bench1 letters  'star letter'
 7 | ./do-bench1 fishy    'fishy'
 8 | ./do-bench1 asas     'star a1 <.> star a1'
 9 | ./do-bench1 asbs     'star a1 <.> star b1'
10 | ./do-bench1 asbas    'star a1 <.> b1 <.> star a1'
11 | ./do-bench1 dyck     'dyck'
12 | ./do-bench1 anbn     'anbn'
13 | 
14 | # # Rerender the paper
15 | # (cd .. ; make)
16 | 


--------------------------------------------------------------------------------
/src/Language.hs:
--------------------------------------------------------------------------------
 1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 2 | 
 3 | -- | Generalized "languages", which is mostly Semiring & friends
 4 | 
 5 | #NOTUSED
 6 | 
 7 | module Language where
 8 | 
 9 | import Semi
10 | 
11 | oneBool :: Additive x => (a -> x) -> a -> Bool -> x
12 | oneBool _ _ False = zero
13 | oneBool f a True  = f a
14 | 
15 | equal' :: (Eq a, Additive b) => a -> b -> a -> b
16 | equal' a b a' = if a == a' then b else zero
17 | 
18 | equal :: (Eq a, Semiring b) => a -> a -> b
19 | equal a = equal' a one
20 | 
21 | -- >>> splits (4 :: N)
22 | -- [(Sum 0,Sum 4),(Sum 1,Sum 3),(Sum 2,Sum 2),(Sum 3,Sum 1),(Sum 4,Sum 0)]
23 | 
24 | {--------------------------------------------------------------------
25 |     Temporary hack
26 | --------------------------------------------------------------------}
27 | 
28 | -- allVals :: [c]
29 | -- allVals = error "allVals not defined"
30 | 


--------------------------------------------------------------------------------
/Changes.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Description of changes
 3 | ...
 4 | 
 5 | 
 6 | ## Submitted to ICFP (March 1, 2019)
 7 | 
 8 | *   Submitted version: [Git commit](https://github.com/conal/convolution-paper/commit/65c5fd3a08172b19e35eddda9f07c56fd2fc2f48) and [PDF](https://github.com/conal/convolution-paper/blob/master/icfp19-paper46-submitted.pdf).
 9 | 
10 | ## Updates
11 | 
12 | ### March 9, 2019
13 | 
14 | *   Errata noted and fixed:
15 |     *   In Figure 4, replace `Indexable` instances first two lines with the following line
16 | 
17 |             instance Indexable c (Cofree h b) (h (Cofree h b)) => Indexable [c] b (Cofree h b) where
18 | 
19 |     *   The definition of `lift2` in Section 11 shouldn't have "`\ w ->".
20 | *   Section 11 (Beyond Convolution):
21 |     *   In Figure 8 ("Functor and Applicative classes and some instances"): Added *Functor* and *Applicative* instances for *(→) a*, aside the instances for *(←) b*.
22 |     *   In Figure 9 ("The a → b and b ← a semirings"): Added 𝒫' type and *Semiring* instance.
23 |     *   Added function preimages example.
24 | 
25 | 


--------------------------------------------------------------------------------
/src/MMap.hs:
--------------------------------------------------------------------------------
 1 | 
 2 | {-# OPTIONS_GHC -Wall #-}
 3 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 4 | 
 5 | -- | Finite maps with a more sensible Monoid instance
 6 | 
 7 | module MMap where
 8 | 
 9 | import qualified Data.Map as M
10 | 
11 | -- For now, but maybe use MMap in place of Map throughout
12 | import Semi
13 | 
14 | newtype Map k v = M (M.Map k v) deriving
15 |   ( Eq,Ord,Show,Functor,Foldable,Additive,Semiring
16 |   , DetectableZero, DetectableOne
17 |   , Indexable k v,Listable k v, HasSingle k v )
18 | 
19 | instance (Ord k, Semigroup v) => Semigroup (Map k v) where
20 |   M u <> M v = M (M.unionWith (<>) u v)
21 | 
22 | instance (Ord k, Monoid v) => Monoid (Map k v) where
23 |   mempty = M mempty
24 |   mappend = (<>)
25 | 
26 | toDescList :: Map k v -> [(k,v)]
27 | toDescList (M m) = M.toDescList m
28 | 
29 | toAscList :: Map k v -> [(k,v)]
30 | toAscList (M m) = M.toAscList m
31 | 
32 | toList :: Map k v -> [(k,v)]
33 | toList (M m) = M.toList m
34 | 
35 | fromList :: Ord k => [(k,v)] -> Map k v
36 | fromList ps = M (M.fromList ps)
37 | 
38 | keys :: Map k v -> [k]
39 | keys (M m) = M.keys m
40 | 


--------------------------------------------------------------------------------
/src/Stream.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples
 2 | 
 3 | -- | Streams (infinite lists)
 4 | 
 5 | module Stream where
 6 | 
 7 | import Control.Applicative (Applicative(..))
 8 | import Data.Functor.Identity (Identity(..))
 9 | 
10 | import Constrained
11 | import Semi
12 | 
13 | #include "GenInstances.inc"
14 | 
15 | infixr 1 :<
16 | data Stream b = b :< Stream b
17 | 
18 | instance Additive b => Additive (Stream b) where
19 |   zero = zero :< zero
20 |   (a :< dp) <+> (b :< dq) = a <+> b  :<  dp <+> dq
21 | 
22 | instance Semiring b => LeftSemimodule b (Stream b) where
23 |   scale s = go where go (b :< dp) = s <.> b :< go dp
24 | 
25 | instance (Additive b, DetectableZero b) => DetectableZero (Stream b) where
26 |   isZero (a :< dp) = isZero a && isZero dp
27 | 
28 | instance (Semiring b, DetectableZero b) => Semiring (Stream b) where
29 |   one = one :< zero
30 |   (a :< dp) <.> q = a .> q <+> (zero :< dp <.> q)
31 | 
32 | instance (Additive b, DetectableZero b, DetectableOne b) => DetectableOne (Stream b) where
33 |   isOne (a :< dp) = isOne a && isZero dp
34 | 
35 | instance (StarSemiring b, DetectableZero b) => StarSemiring (Stream b) where
36 |   star (a :< dp) = q where q = star a .> (one :< dp <.> q)
37 | 
38 | instance Additive b => HasSingle N b (Stream b) where
39 |   w +-> b = foldN (zero :<) (b :< zero) w
40 | 
41 | foldN :: (b -> b) -> b -> N -> b
42 | foldN h e 0 = e
43 | foldN h e n = foldN h (h e) (n-1)
44 | 
45 | instance Indexable N b (Stream b) where
46 |   (b :< bs) ! n = if n == 0 then b else bs ! (n-1)
47 | 


--------------------------------------------------------------------------------
/src/GenInstances.inc:
--------------------------------------------------------------------------------
 1 | -- CPP macros for generating instances of Semiring, etc
 2 | 
 3 | -- Instances for numeric types
 4 | #define Nums(t) \
 5 | instance Additive (t) where { (<+>) = (+) ; zero = 0 } ; \
 6 | instance DetectableZero (t) where { isZero = (== 0)} ; \
 7 | instance DetectableOne  (t) where { isOne  = (== 1)} ; \
 8 | instance Semiring (t) where { (<.>) = (*) ; one  = 1 }
 9 | 
10 | #define FunctorSemimodule(f) \
11 | instance (Semiring zz, Additive ((f) zz)) => LeftSemimodule zz ((f) zz)
12 | 
13 | #define FunctorStar(f) \
14 | instance (Semiring ((f) qq), Ok (f) qq, StarSemiring qq) => StarSemiring ((f) qq) where \
15 |   { star = fmapC star; plus = fmapC plus }
16 | 
17 | #define NullZero(f) \
18 | instance Additive ((f) qq) => DetectableZero ((f) qq) where \
19 |   { isZero = null } ; \
20 | 
21 | -- Additive, LeftSemimodule, Semiring from Applicative
22 | #define ApplSemi(f) \
23 | instance Additive zz => Additive ((f) zz) where \
24 |   { (<+>) = liftA2C (<+>) ; zero = pureC zero } ; \
25 | FunctorSemimodule(f) ; \
26 | instance Semiring zz => Semiring ((f) zz) where \
27 |   { (<.>) = liftA2C (<.>) ; one = pureC one } ; \
28 | FunctorStar(f)
29 | 
30 | -- TODO: Maybe rely on Pointed and Zip instead of Applicative here, considering
31 | -- these definitions.
32 | 
33 | -- Additive from Applicative+Monoid+Foldable
34 | #define ApplMono(f) \
35 | instance Monoid ((f) qq) => Additive ((f) qq) where \
36 |   { zero = mempty ; (<+>) = (<>) } ; \
37 | NullZero(f) ; \
38 | instance (ApplicativeC f, Ok f qq, Monoid qq) => Semiring ((f) qq) where \
39 |   { one = pureC mempty ; (<.>) = liftA2C (<>) } ; \
40 | FunctorStar(f)
41 | 


--------------------------------------------------------------------------------
/src/Examples.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 2 | 
 3 | -- | Some polymorphic language examples
 4 | 
 5 | module Examples where
 6 | 
 7 | import Prelude hiding (sum,product)
 8 | 
 9 | import Data.Set (Set)
10 | import qualified Data.Set as S
11 | 
12 | import Data.Char (toUpper)
13 | 
14 | import Semi
15 | 
16 | a1, b1, pink, pig :: (HasSingle String b x, Semiring b) => x
17 | a1   = single "a"
18 | b1   = single "b"
19 | pink = single "pink"
20 | pig  = single "pig"
21 | 
22 | pp :: (HasSingle String b x, Additive x, Semiring b) => x
23 | pp   = pink <+> pig
24 | 
25 | as, ass, pps, asbs, asbas, asas, fishy :: (HasSingle String b x, StarSemiring x, StarSemiring b) => x
26 | as  = star a1
27 | ass = star as
28 | pps = star pp
29 | asbs = star a1 <.> star b1
30 | asbas = star a1 <.> b1 <.> star a1
31 | asas = star a1 <.> star a1
32 | fishy = star letter <.> single "fish" <.> star letter
33 | 
34 | anbn :: (HasSingle String b x, Semiring x, Semiring b) => x
35 | anbn = one <+> a1 <.> anbn <.> b1
36 | 
37 | singleChar :: (Ord c, HasSingle [c] b x, Semiring b) => [c] -> x
38 | singleChar cs = singles (S.fromList [[c] | c <- cs])
39 | 
40 | letter :: (HasSingle String b x, Semiring x, Semiring b) => x
41 | letter = singleChar ['a' .. 'z']
42 | -- letter = singles (S.fromList [[c] | c <- ['a' .. 'z']])  -- Should be much more efficient for SharedMap
43 | -- letter = sum [single [c] | c <- ['a' .. 'z']]
44 | 
45 | -- Balanced brackets <https://en.wikipedia.org/wiki/Dyck_language>
46 | dyck :: (HasSingle String b x, StarSemiring x, Semiring b) => x
47 | dyck = star (single "[" <.> dyck <.> single "]")
48 | 
49 | -- Will dyck get repeatedly reconstructed, considering polymorphism?
50 | 
51 | -- TODO: try other formulations, including an explicit local recursion and star.
52 | 
53 | starL :: Semiring b => b -> b
54 | starL b = one <+> starL b <.> b
55 | 
56 | starR :: Semiring b => b -> b
57 | starR b = one <+> b <.> starR b
58 | 


--------------------------------------------------------------------------------
/src/Misc.hs:
--------------------------------------------------------------------------------
 1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 2 | 
 3 | -- | Miscellany
 4 | 
 5 | module Misc where
 6 | 
 7 | import GHC.TypeLits (KnownNat,natVal)
 8 | import Data.Typeable (Proxy(..))
 9 | import GHC.Exts (Constraint)
10 | 
11 | infixl 7 :*
12 | infixl 6 :+
13 | 
14 | type (:*) = (,)
15 | type (:+) = Either
16 | 
17 | type Unop a = a -> a
18 | 
19 | bool :: a -> a -> Bool -> a
20 | bool t e b = if b then t else e
21 | 
22 | -- | Handy universal constraint alias
23 | class    (forall u. con u => con (h u)) => Con1 con h
24 | instance (forall u. con u => con (h u)) => Con1 con h
25 | 
26 | -- | Handy universal constraint alias
27 | class    (forall u v. con u v => con (h u) (h v)) => Con2 con h
28 | instance (forall u v. con u v => con (h u) (h v)) => Con2 con h
29 | 
30 | cats :: Monoid a => Int -> a -> a
31 | cats n a = mconcat (replicate n a)
32 | 
33 | nat :: forall n. KnownNat n => Integer
34 | nat = natVal (Proxy @n)
35 | {-# INLINE nat #-}
36 | 
37 | int :: forall n. KnownNat n => Int
38 | int = fromIntegral (nat @n)
39 | {-# INLINE int #-}
40 | 
41 | type ConF con f = (forall a. con a => con (f a) :: Constraint)
42 | 
43 | -- type ShowF f = (forall a. Show a => Show (f a) :: Constraint)
44 | -- type ShowF f = ConF Show f
45 | 
46 | {--------------------------------------------------------------------
47 |     Invertible monoids
48 | --------------------------------------------------------------------}
49 | 
50 | class Monoid t => Splittable t where
51 |   -- Whether equal to 'mempty'
52 |   isEmpty :: t -> Bool
53 |   -- | The inverse of 'mappend'
54 |   splits :: t -> [(t,t)]
55 | 
56 | instance Splittable [a] where
57 |   isEmpty = null
58 |   splits []     = [([],[])]
59 |   splits (a:as) = ([],a:as) : [((a:l),r) | (l,r) <- splits as]
60 | 
61 | -- Equivalently,
62 | 
63 | --   splits as@(a:as') = ([],as) : map (first (a:)) (splits as')
64 | 
65 | --   splits' as = ([],as) : go as
66 | --    where
67 | --      go []       = []
68 | --      go (a:as')  = [((a:l),r) | (l,r) <- splits' as']
69 | 


--------------------------------------------------------------------------------
/formatting.fmt:
--------------------------------------------------------------------------------
 1 | % -*- text -*-
 2 | %% Misc lhs2TeX directives
 3 | 
 4 | %format family = "\mathbf{family}"
 5 | 
 6 | %% spaces (in 18ths of a quad): \, = 3, \: = 4, \; = 5, \! = -3
 7 | 
 8 | %format liftA2
 9 | 
10 | %format =~ = " \simeq "
11 | 
12 | %format NOP = "{}"
13 | 
14 | %% optional double-dollar spelling, to avoid $ confusing emacs latex-mode.
15 | %format <$$> = <$>
16 | %format <#> = <$>
17 | 
18 | %format <$> = "\mathbin{<\!\!\!\$\!\!\!>}"
19 | %format <*> = "\mathbin{<\!\!\!*\!\!\!>}"
20 | 
21 | %% \usepackage{amssymb}
22 | 
23 | %% hack: add missing space, e.g., before "{" in data type decl
24 | %format SPC = "\ "
25 | %format SPC2 = SPC SPC
26 | %format SPC4 = SPC2 SPC2
27 | %format SPC8 = SPC4 SPC4
28 | 
29 | %format mempty = "\varepsilon"
30 | %format <> = " \diamond "
31 | %format `mappend` = " \diamond "
32 | %format mappend = "(\diamond)"
33 | 
34 | %format :* = " \times "
35 | %format :+ = " + "
36 | 
37 | %% %format *^ = *"\!\hat{}"
38 | %% %format *^ = "\cdot"
39 | 
40 | %% %format <.> = "<\!\!\!\cdot\!\!\!>"
41 | 
42 | %% %format @ = "{\ @}"
43 | %% %format @@ = "@"
44 | 
45 | %format <=> = "\Longleftrightarrow"
46 | 
47 | %format ldq = "\dq\!\!"
48 | %format rdq = "\!\!\dq"
49 | 
50 | %format (qq (op)) = ldq op rdq
51 | 
52 | %% %format not = "\Varid{not}"
53 | 
54 | %format ~ = "\mathbin{\sim}"
55 | 
56 | %format BACKex = "\hspace{-1ex}"
57 | %format BACKquad = "\hspace{-2.2ex}"
58 | 
59 | %format == = =
60 | %format /= = "\not="
61 | 
62 | % Got from Andres L:
63 | \newcommand{\calculationcomments}{%
64 |   \let\onelinecomment=\onelinecommentchars
65 |   \def\commentbegin{\ \{ }%
66 |   \def\commentend{\}}%
67 | }
68 | \calculationcomments
69 | 
70 | %% %format * = "\times"
71 | 
72 | %format >=> = "\mathbin{>\!\!=\!\!\!>}"
73 | 
74 | %format ... = "\ldots"
75 | %format ...^ = "\cdots"
76 | 
77 | %% Add a thin space after lambda
78 | %format \ = "\lambda\, "
79 | 
80 | %% Treat exists like forall, with following period showing as ".", not "\circ"
81 | %format exists(x) = "\exists " x "\hsforall "
82 | %% Add a space following the period used by forall and exists
83 | %format period_   = ".\:"
84 | 


--------------------------------------------------------------------------------
/sections.tex:
--------------------------------------------------------------------------------
 1 | %% For handy insertion
 2 | 
 3 | \secref{Introduction}
 4 | \secref{Monoids, Semirings and Semimodules}
 5 | \secref{Monoids}
 6 | \secref{Additive Monoids}
 7 | \secref{Semirings}
 8 | \secref{Star Semirings}
 9 | \secref{Semimodules}
10 | \secref{Function-like Types and Singletons}
11 | \secref{Calculating Instances from Homomorphisms}
12 | \secref{Languages and the Monoid Semiring}
13 | \secref{Finite maps}
14 | \secref{Decomposing Functions from Lists}
15 | \secref{Regular Expressions}
16 | \secref{Tries}
17 | \secref{Convolution}
18 | \secref{Beyond Convolution}
19 | \secref{The Free Semimodule Monad}
20 | \secref{More Applications}
21 | \secref{Polynomials}
22 | \secref{Miscellaneous Notes}
23 | \secref{Proofs}
24 | 
25 | \deflabel{monoid homomorphism}
26 | \deflabel{additive monoid homomorphism}
27 | \thmlabel{curry additive}
28 |  \deflabel{semiring homomorphism}
29 | \thmlabel{curry semiring}
30 | \deflabel{star semiring homomorphism}
31 | \lemlabel{affine over semiring}
32 | \deflabel{left semimodule homomorphism}
33 | \lemlabel{affine over semimodule}
34 | \lemlabel{decomp +->}
35 | \thmlabel{Semiring (b <-- a)}
36 | \lemlabel{decomp (b <-- [c])}
37 | \lemlabel{atEps b <-- [c]}
38 | \lemlabel{deriv b <-- [c]}
39 | \thmlabel{semiring decomp b <-- [c]}
40 | \thmlabel{semiring decomp generalized}
41 | \thmlabel{RegExpFun}
42 | \thmlabel{LTrie}
43 | \thmlabel{Fourier}
44 | \lemlabel{deriv b <-- [c]} specializes for |b <-- N| as follows:
45 | \thmlabel{Stream}
46 | \thmlabel{standard FunApp}
47 | \thmlabel{poly fun}
48 | \prooflabel{theorem:curry additive}
49 | \prooflabel{lemma:decomp +->}
50 | \prooflabel{theorem:Semiring (b <-- a)}
51 | \prooflabel{lemma:decomp (b <-- [c])}
52 | \prooflabel{lemma:atEps b <-- [c]}
53 | \prooflabel{lemma:deriv b <-- [c]}
54 | \lemlabel{deriv +->}
55 | \prooflabel{theorem:semiring decomp b <-- [c]}
56 | \lemlabel{atEps and deriv via (<:)}
57 | \prooflabel{theorem:LTrie}
58 | \prooflabel{theorem:Fourier}
59 | \prooflabel{theorem:decomp (b <-- N)}
60 | \prooflabel{lemma:deriv (b <-- N)}
61 | \lemlabel{deriv +-> Nat}
62 | \prooflabel{theorem:standard FunApp}
63 | \prooflabel{theorem:poly fun}
64 | \lemlabel{poly +->}
65 | 
66 | \figrefdef{<--}{The monoid semiring}
67 | \figrefdef{Map}{Finite maps}
68 | \figrefdef{RegExp}{Semiring-generalized regular expressions}
69 | \figrefdef{LTrie}{Tries as |[c] -> b| and as |[c] -> b|}
70 | \figrefdef{Stream}{Streams}
71 | \figrefdef{FunApp}{|Functor| and |Applicative| classes and some instances}
72 | 


--------------------------------------------------------------------------------
/test/ImageTest.hs:
--------------------------------------------------------------------------------
 1 | 
 2 | {-# OPTIONS_GHC -Wall #-}
 3 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 4 | 
 5 | -- | 
 6 | 
 7 | module Main where
 8 | 
 9 | import qualified Data.Vector as V
10 | import Codec.Picture (convertRGB8, readImage, savePngImage)
11 | import Codec.Picture.Types
12 |   ( DynamicImage(ImageYF), Image(..), PixelF , dynamicMap
13 |   , pixelAt, promoteImage , extractLumaPlane )
14 | import Data.Vector.Storable (convert)
15 | 
16 | import Semi hiding ((^))
17 | 
18 | main :: IO ()
19 | main = do convolve "original" ident "wizard"
20 |           convolve "blur" (boxBlur 5) "wizard"
21 |           convolve "sharpen" sharpen "wizard"
22 |           convolve "edge-detect" edgy "wizard"
23 | 
24 | convolve :: String -> Arr Double -> FilePath -> IO ()
25 | convolve opName kernel origName =
26 |   do img <- readArr (origName ++ ".jpg")
27 |      saveArr (origName ++ "-" ++ opName ++ ".png") (img <.> kernel)
28 | 
29 | {--------------------------------------------------------------------
30 |     Kernels
31 | --------------------------------------------------------------------}
32 | 
33 | ident :: Arr Double
34 | ident = [[1]]
35 | 
36 | boxBlur :: Int -> Arr Double
37 | boxBlur n = (fmap.fmap) (/ fromIntegral (n*n)) ((replicate n . replicate n) 1)
38 | 
39 | sharpen :: Arr Double
40 | sharpen = [[ 0,-1, 0]
41 |           ,[-1, 5,-1]
42 |           ,[ 0,-1, 0]]
43 | 
44 | edgy :: Arr Double
45 | edgy = [[-1,-1,-1]
46 |        ,[-1, 8,-1]
47 |        ,[-1,-1,-1]]
48 | 
49 | {--------------------------------------------------------------------
50 |     Conversion between [[b]] and DynamicImage (JuicyPixels)
51 | --------------------------------------------------------------------}
52 | 
53 | -- TODO: use statically sized vectors.
54 | 
55 | -- | A 2D array represented as a list of lists
56 | type Arr b = [[b]]
57 | 
58 | gen :: Int -> (Int -> a) -> [a]
59 | gen dim f = f <$> [0 .. dim-1]
60 | 
61 | imgToArr :: Fractional b => DynamicImage -> Arr b
62 | imgToArr im =
63 |   gen height $ \ y ->
64 |   gen width  $ \ x ->
65 |   realToFrac $ pixelAt dat x y
66 |   where
67 |     width  = dynamicMap imageWidth  im
68 |     height = dynamicMap imageHeight im
69 |     dat    = (promoteImage . extractLumaPlane . convertRGB8) im :: Image PixelF
70 | 
71 | -- Assume arr is rectangular and nonempty.
72 | arrToImg :: Real a => Arr a -> DynamicImage
73 | arrToImg arr = ImageYF $ Image
74 |   { imageWidth  = length (head arr)
75 |   , imageHeight = length arr
76 |   , imageData   = (convert . V.fromList . map realToFrac . concat) arr
77 |   }
78 | 
79 | readArr :: FilePath -> IO (Arr Double)
80 | readArr path =
81 |   fmap (either (error ("couldn't read " ++ path)) imgToArr) (readImage path)
82 | 
83 | -- For now just PNG
84 | saveArr :: FilePath -> Arr Double -> IO ()
85 | saveArr path arr = savePngImage path (arrToImg arr)
86 | 


--------------------------------------------------------------------------------
/test/Benchmark.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
 2 | 
 3 | -- | Benchmarks
 4 | 
 5 | module Main where
 6 | 
 7 | import Control.DeepSeq (NFData)
 8 | import Data.Map (Map)
 9 | import System.Directory (createDirectoryIfMissing)
10 | import Criterion.Main
11 | import Criterion.Types (Config(..),Verbosity(..))
12 | 
13 | import Misc (cats)
14 | import Semi
15 | import RegExp
16 | import Cofree
17 | import ShareMap
18 | 
19 | import Examples
20 | 
21 | main :: IO ()
22 | main =
23 |   defaultMainWith config [
24 |       bgroup "" []
25 | 
26 |     , group "star-a" (star a1) [] $
27 |         replicate 100 'a'
28 | 
29 |     , group "letters" (star letter) [] $
30 |         cats 25 "asdf" 
31 | 
32 |     , group "fishy" (star letter <.> single "fish" <.> star letter) [] $
33 |         take 48 (cycle az) ++ "fish" ++ take 48 (cycle az)
34 | 
35 |     , group "asas" (star a1 <.> star a1) [] $
36 |         replicate 50 'a' ++ replicate 50 'a'
37 | 
38 |     , group "asbs" (star a1 <.> star b1) [] $
39 |         replicate 50 'a' ++ replicate 50 'b'
40 | 
41 |     , group "asbas" (star a1 <.> b1 <.> star a1) [] $
42 |         replicate 50 'a' ++ "b" ++ replicate 49 'a'
43 | 
44 |       -- With O = N, the dyck examples don't work for RegExp:Function, while anbn
45 |       -- is okay.
46 |     , group "dyck" dyck ["RegExp:Map","RegExp:IntMap"] $
47 |         "[[[[[[]][[[[]]]][[[[]]]][[[]]][]][[[[]]]]]][]][[[]]][[[[]]]][[[[[[[]]][[[[]]]][[[]]][]][[[[]]]]]][]]"
48 | 
49 |     , group "anbn" anbn ["RegExp:Map","RegExp:IntMap"] $
50 |         replicate 50 'a' ++ replicate 50 'b'
51 | 
52 |     ]
53 |  where
54 |    config = defaultConfig
55 |      { 
56 |        timeLimit = 1 -- 5
57 |      , reportFile = Just "test/stats.html"
58 |      }
59 | 
60 | type Ok x b = (HasSingle String b x, StarSemiring x, StarSemiring b, NFData b)
61 | 
62 | group :: String -> (forall x b. Ok x b => x) -> [String] -> String -> Benchmark
63 | group groupName example omit str =
64 |   bgroup groupName
65 |     [ bgroup "" []
66 | 
67 |     -- , style @(RegExp ((->)   Char) O) "RegExp:Function"
68 |     -- , style @(RegExp (Map    Char) O) "RegExp:Map"
69 |     -- -- , style @(RegExp CharMap       O) "RegExp:IntMap"
70 | 
71 |     -- , style @(Cofree  ((->)     Char) O) "Cofree:Function"
72 |     , style @(Cofree  (Map      Char) O) "Cofree:Map"
73 |     -- , style @(Cofree  CharMap         O) "Cofree:IntMap"
74 |     , style @(Cofree  (ShareMap Char) O) "Cofree:ShareMap"
75 | 
76 |     ]
77 |  where
78 |    style :: forall x b. Ok x b => String -> Benchmark
79 |    style s | s `elem` omit = bench s (whnf id ())
80 |                              -- bgroup "" []
81 |            | otherwise     = bench s (nf (example @x @b !) str)
82 | 
83 | 
84 | type O = Bool -- N
85 | 
86 | -- TODO: Generate the style name from the type via TypeRep.
87 | 
88 | az :: String
89 | az = ['a'..'z']
90 | 


--------------------------------------------------------------------------------
/package.yaml:
--------------------------------------------------------------------------------
  1 | name:                paper
  2 | version:             0.1.0.0
  3 | github:              "conal/convolution-paper"
  4 | license:             BSD3
  5 | author:              "Conal Elliott"
  6 | maintainer:          "conal@conal.net"
  7 | copyright:           "2018 Conal Elliott"
  8 | 
  9 | synopsis:            Language recognition in the spirit of Brzowski's derivatives
 10 | category:            language, math
 11 | 
 12 | description:         Please see the README on GitHub at <https://github.com/conal/convolution-paper#readme>
 13 | 
 14 | default-extensions:
 15 |   - AllowAmbiguousTypes
 16 |   - CPP
 17 |   - ConstraintKinds
 18 |   - DefaultSignatures
 19 |   - DeriveFoldable
 20 |   - DeriveFunctor
 21 |   - FlexibleContexts
 22 |   - FlexibleInstances
 23 |   - FunctionalDependencies
 24 |   - GADTs
 25 |   - GeneralizedNewtypeDeriving
 26 |   - LambdaCase
 27 |   - MultiParamTypeClasses
 28 |   - OverloadedStrings  # for Poly, since dante doesn't pick up
 29 |   - PatternSynonyms
 30 |   - QuantifiedConstraints
 31 |   - RankNTypes
 32 |   - StandaloneDeriving
 33 |   - TupleSections
 34 |   - TypeApplications
 35 |   - TypeFamilies
 36 |   - TypeOperators
 37 |   - UndecidableInstances
 38 |   - ViewPatterns
 39 |   - ScopedTypeVariables
 40 | 
 41 | ghc-options:
 42 |   -Wall
 43 |   # -O2  # Use when benchmarking
 44 | 
 45 | cpp-options: -DEXAMPLES
 46 | 
 47 | library:
 48 |   dependencies:
 49 |     - base >= 4.7 && < 5
 50 |     - ghc-prim
 51 |     - deepseq
 52 |     - data-inttrie
 53 |     - MemoTrie
 54 |     - containers
 55 |     - multiset
 56 |     - finite-typelits
 57 |     - vector-sized
 58 |   other-modules: []
 59 |   include-dirs: src
 60 |   install-includes: GenInstances.inc
 61 |   source-dirs: src
 62 |   exposed-modules:
 63 |     - Misc
 64 |     - Constrained
 65 |     - Semi
 66 |     - RegExp
 67 |     - Cofree
 68 |     - MMap
 69 |     - ShareMap
 70 |     - Poly
 71 |     - Examples
 72 | 
 73 | tests:
 74 |   test:
 75 |     source-dirs:         test
 76 |     main:                GoldTests.hs
 77 |     other-modules: []
 78 |     dependencies:
 79 |     - base
 80 |     - Cabal >= 1.24.0.0
 81 |     - containers
 82 |     - bytestring
 83 |     - tasty
 84 |     - tasty-golden
 85 |     - paper
 86 | 
 87 | benchmarks:
 88 |   bench:
 89 |     ghc-options:
 90 |       -O2
 91 |     source-dirs:         test
 92 |     main:                Benchmark.hs
 93 |     other-modules: []
 94 |     dependencies:
 95 |     - base
 96 |     - Cabal >= 1.24.0.0
 97 |     - containers
 98 |     - criterion
 99 |     - directory
100 |     - deepseq
101 |     - MemoTrie
102 |     - paper
103 | 
104 | # stack bench
105 | 
106 | executables:
107 |   image-test:
108 |     ghc-options:
109 |       -O2
110 |     source-dirs:         test
111 |     main:                ImageTest.hs
112 |     other-modules: []
113 |     dependencies:
114 |     - base
115 |     - Cabal >= 1.24.0.0
116 |     - JuicyPixels
117 |     - vector
118 |     - paper
119 | 
120 | # In the test directory:
121 | # stack build :image-test && stack exec image-test
122 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | paper = convolution
  2 | 
  3 | # # breaks arXiv upload
  4 | # outdir=out
  5 | outdir=.
  6 | 
  7 | conf = $(outdir)/$(paper)-conf
  8 | long = $(outdir)/$(paper)-long
  9 | long-anon = $(outdir)/$(paper)-long-anon
 10 | 
 11 | pdf: $(long).pdf
 12 | pdf: $(conf).pdf
 13 | pdf: $(long-anon).pdf
 14 | 
 15 | see: $(long).see
 16 | # see: $(conf).see
 17 | # see: $(long-anon).see
 18 | 
 19 | long: $(long).pdf
 20 | conf: $(conf).pdf
 21 | long-anon: $(long-anon).pdf
 22 | 
 23 | all: conf
 24 | all: long
 25 | # all: long-anon
 26 | 
 27 | all.see: conf.see long.see long-anon.see
 28 | 
 29 | 
 30 | # # Doesn't work
 31 | # .PRECIOUS: $(outdir)/%.tex $(outdir)/%.pdf
 32 | 
 33 | .PRECIOUS: out/convolution.tex out/convolution.pdf
 34 | .PRECIOUS: out/was1.tex out/was1.pdf
 35 | .PRECIOUS: out/was2.tex out/was2.pdf
 36 | .PRECIOUS: out/was3.tex out/was3.pdf
 37 | .PRECIOUS: $(conf).tex $(conf).pdf
 38 | .PRECIOUS: $(long).tex $(long).pdf
 39 | .PRECIOUS: $(long-anon).tex $(long-anon).pdf
 40 | 
 41 | latex=latexmk -pdf
 42 | latex+= -outdir=${outdir}
 43 | # latex+= -synctex=1
 44 | latex+= -halt-on-error
 45 | # # Preview continuously
 46 | # latex+= -pvc
 47 | 
 48 | stats = $(wildcard test/stats/*.txt)
 49 | stats: $(stats)
 50 | 
 51 | see-stats:
 52 | 	echo $(stats)
 53 | 
 54 | %.pdf: %.tex macros.tex bib.bib Makefile
 55 | 	$(latex) $*.tex
 56 | 	touch $@
 57 | 
 58 | # The previous rule always ran. Passing "-d" (debug) to make revealed that the
 59 | # PDFs were not getting updated by latexmk, so their prerequisites stay newer.
 60 | # Workaround: touch the PDF.
 61 | 
 62 | texdeps = formatting.fmt Makefile $(stats)
 63 | 
 64 | $(conf).tex: $(paper).lhs $(texdeps)
 65 | 	lhs2tex --set=conf --set=anonymous -o $*.tex $(paper).lhs
 66 | 
 67 | $(long).tex: $(paper).lhs $(texdeps)
 68 | 	lhs2tex --set=long -o $*.tex $(paper).lhs
 69 | 
 70 | # --set=draft
 71 | 
 72 | $(long-anon).tex: $(paper).lhs $(texdeps)
 73 | 	lhs2tex --set=long --set=anonymous -o $*.tex $(paper).lhs
 74 | 
 75 | showpdf=skim
 76 | 
 77 | %.see: %.pdf
 78 | 	${showpdf} $<
 79 | 
 80 | conf.see: $(conf).see
 81 | long.see: $(long).see
 82 | long-anon.see: $(long-anon).see
 83 | 
 84 | pics = $(wildcard test/*.png)
 85 | 
 86 | long.zip: $(long).tex $(long).bbl macros.tex $(pics)
 87 | 	zip $@ $^
 88 | 
 89 | SHELL = bash
 90 | 
 91 | # clean:
 92 | # 	rm -f ${outdir}/*
 93 | 
 94 | clean:
 95 | 	rm -f {$(conf),$(long),$(long-anon)}.{tex,pdf,aux,nav,snm,ptb,log,out,toc,bbl,blg,fdb_latexmk,fls}
 96 | 
 97 | 
 98 | TAGS: *.tex *.lhs *.bib src/*.hs src/*.inc
 99 | 	etags $^
100 | 
101 | supp = supplemental-material
102 | $(supp): readme.md stack.yaml package.yaml src/*.hs src/*.inc test/*.hs test/gold/*.txt test/wizard.jpg out/convolution-long-anon.pdf
103 | 	mkdir -p $(supp)/{src,test}
104 | 	grep -vi conal package.yaml > $(supp)/package.yaml
105 | 	cp -p stack.yaml readme.md out/convolution-long-anon.pdf $(supp)
106 | 	cp -p src/*.hs src/*.inc $(supp)/src
107 | 	cp -p test/*.hs test/wizard.jpg $(supp)/test
108 | 	cp -rp test/gold $(supp)/test
109 | 
110 | # Supplemental tarball in progress
111 | tar: supplemental.tar
112 | 
113 | supplemental.tar: $(supp)
114 | 	tar -cvf $@ $(supp)
115 | 
116 | web: web-token
117 | 
118 | web-token: $(long).pdf
119 | 	scp $< conal@conal.net:/home/conal/domains/conal/htdocs/papers/$(paper)
120 | 	touch web-token
121 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## Generalized Convolution and Efficient Language Recognition
 2 | 
 3 | This repo contains an unpublished [paper](http://conal.net/papers/convolution/) along with the source code appearing in the paper.
 4 | 
 5 | 
 6 | ### Abstract
 7 | 
 8 | *Convolution* is a broadly useful operation with applications including signal processing, machine learning, probability, optics, polynomial multiplication, and efficient parsing. Usually, however, this operation is understood and implemented in more specialized forms, hiding commonalities and limiting usefulness. This paper formulates convolution in the common algebraic framework of semirings and semimodules and populates that framework with various representation types. One of those types is the grand abstract template and itself generalizes to the free semimodule monad. Other representations serve varied uses and performance trade-offs, with implementations calculated from simple and regular specifications.
 9 | 
10 | Of particular interest is Brzozowski's method for regular expression matching. Uncovering the method's essence frees it from syntactic manipulations, while generalizing from boolean to weighted membership (such as multisets and probability distributions) and from sets to *n*-ary relations. The classic *trie* data structure then provides an elegant and efficient alternative to syntax. Pleasantly, polynomial arithmetic requires no additional implementation effort, works correctly with a variety of representations, and handles multivariate polynomials and power series with ease. Image convolution also falls out as a special case.
11 | 
12 | Pleasantly, polynomial arithmetic requires no additional implementation effort, works correctly with a variety of representations, and handles multivariate polynomials and power series with ease.
13 | Image convolution also falls out as a special case.
14 | 
15 | 
16 | ### Haskell source code
17 | 
18 | You can find the source code for the paper's functionality and examples in the `src` directory.
19 | 
20 | To try out the Haskell implementation, make sure you have [`stack`](https://docs.haskellstack.org/en/stable/README/) installed, and then
21 | 
22 | *   Compile: `stack build`
23 | *   Gold tests: `stack test`.
24 |     These gold tests check that all of the representations yield the same answer.
25 | *   Benchmarks: `stack bench`.
26 |     This one takes a while and generates stats.html as well as a lot of textual statistics.
27 |     You can make it go faster (at the cost of less accurate measurements) by lowing `timeLimit` in test/Benchmark.hs.
28 |     The examples (`anbn` and `dyck`) that don't terminate with `RegExp (Map Char)` appear to have running times of about 3 ns but really are skipped.
29 | *   To run the image convolution examples:
30 | 
31 |         (cd test; stack build :image-test && stack exec image-test)
32 | 
33 | 
34 | Some of the modules contain comments like the following (in src/Poly.hs):
35 | 
36 | ``` haskell
37 | -- >>> let p = single 1 <+> value 3 :: Poly1 Z
38 | -- >>> p
39 | -- x + 3
40 | -- 
41 | -- >>> p^3
42 | -- x^3 + 9 * x^2 + 27 * x + 27
43 | -- 
44 | -- >>> p^5
45 | -- x^5 + 15 * x^4 + 90 * x^3 + 270 * x^2 + 405 * x + 243
46 | ```
47 | 
48 | If you use Emacs and have [dante](https://github.com/jyp/dante) installed, you can run these examples in place via `dante-eval-block` (`C-c "`).
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/test/GoldTests.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -Wno-unused-binds -Wno-unused-imports #-}  -- TEMP
  2 | 
  3 | module Main where
  4 | 
  5 | import Prelude hiding (sum,product,(^))
  6 | 
  7 | import Data.Map.Lazy (Map)
  8 | import qualified Data.Map.Lazy as Map
  9 | 
 10 | import Data.IntMap.Lazy (IntMap)
 11 | 
 12 | import Data.ByteString.Lazy.Char8 (pack)
 13 | import Data.Semigroup ((<>))
 14 | import Test.Tasty (defaultMain, TestTree, testGroup)
 15 | import Test.Tasty.Golden
 16 | 
 17 | -- import Data.MemoTrie
 18 | 
 19 | import Misc   (cats)
 20 | import Semi
 21 | import RegExp (RegExp)
 22 | import Cofree  (Cofree)
 23 | import ShareMap (ShareMap)
 24 | 
 25 | import Examples
 26 | 
 27 | main :: IO ()
 28 | main = do
 29 |   defaultMain basicTests
 30 | 
 31 | basicTests :: TestTree
 32 | basicTests = testGroup "Various representations"
 33 |   [ testGroup "" []
 34 | 
 35 |   -- , tests @(String  ->              N) "Function"
 36 | 
 37 |   -- , tests @(RegExp ((->) Char)      N) "RegExpFun"
 38 |   -- , tests @(RegExp (Map  Char)      N) "RegExpMap"
 39 |   -- , tests @(RegExp CharMap          N) "RegExpIntMap"
 40 |   -- , tests @(RegExp (ShareMap Char)  N) "RegExpShareMap"
 41 | 
 42 |   -- , tests @(Cofree  ((->) Char)     N) "CofreeFun"
 43 |   , tests @(Cofree  (Map  Char)     N) "CofreeMap"
 44 |   -- , tests @(Cofree  CharMap         N) "CofreeIntMap"
 45 |   , tests @(Cofree  (ShareMap Char) N) "CofreeShareMap"
 46 | 
 47 |   ]
 48 | 
 49 | -- TODO: some tests with s other than Bool.
 50 | 
 51 | -- tests' :: forall x. Semiring x => String -> TestTree
 52 | -- tests' = undefined
 53 | 
 54 | tests :: forall x b.
 55 |   (HasSingle String b x, StarSemiring x, StarSemiring b, Show b)
 56 |   => String -> TestTree
 57 | tests group = testGroup group
 58 |   [ testGroup "" []
 59 | 
 60 |   , gold "as-eps"                         $ as  # ""
 61 |   , gold "as-a"                           $ as  # "a"
 62 | 
 63 |   , gold "pp-pi"                          $ pp # "pi"
 64 |   , gold "pp-pig"                         $ pp # "pig"
 65 |   , gold "pp-pig"                         $ pp # "pig"
 66 |   , gold "pp-pink"                        $ pp # "pink"
 67 |   , gold "pp-ping"                        $ pp # "ping"
 68 | 
 69 |   , gold "pps-q"                          $ pps # "q"
 70 |   , gold "pps-pig"                        $ pps # "pig"
 71 |   , gold "pps-pigpig"                     $ pps # "pigpig"
 72 |   , gold "pps-pigping"                    $ pps # "pigping"
 73 |   , gold "pps-pinkpigpinkpigpig"          $ pps # "pinkpigpinkpigpig"
 74 | 
 75 |   , gold "letters as0df"                  $ star letter # "as0df"
 76 |   , gold "letters asdf"                   $ star letter # "asdf"
 77 |   , gold "letters asdf 40"                $ star letter # cats 40 "asdf"
 78 | 
 79 |   , groupNot ["RegExpMap","RegExpIntMap"] $
 80 |     testGroup "anbn"
 81 |     [ gold "anbn-eps"                     $ anbn # ""
 82 |     , gold "anbn-ab"                      $ anbn # "ab"
 83 |     , gold "anbn-ba"                      $ anbn # "ba"
 84 |     , gold "anbn-aabb"                    $ anbn # "aabb"
 85 |     , gold "anbn-aacbb"                   $ anbn # "aacbb"
 86 |     , gold "anbn-aaabbb"                  $ anbn # "aaabbb"
 87 |     , gold "anbn-aaabbbb"                 $ anbn # "aaabbbb"
 88 |     ]
 89 | 
 90 |   , gold "asbs-aabbbb"  $ asbs # "aabbbb"
 91 |   , gold "asbs-aabbba"  $ asbs # "aabbba"
 92 | 
 93 |   , gold "asbas-aabaaa" $ asbas # "aabaaa"
 94 |   , gold "asbas-aabba"  $ asbas # "aabba"
 95 | 
 96 |   , gold "asas-100" $ asas # replicate 100 'a'
 97 | 
 98 |   , groupNot ["RegExpFun","RegExpMap","RegExpIntMap"] $
 99 |     testGroup "dyck"
100 |     [ gold "dyck-a"                       $ dyck # "[]"
101 |     , gold "dyck-b"                       $ dyck # "[[]]"
102 |     , gold "dyck-c"                       $ dyck # "[[a]]"
103 |     , gold "dyck-d"                       $ dyck # "[[]][]"
104 |     , gold "dyck-e"                       $ dyck # "[[]][[]"
105 |     ]
106 | 
107 |   ]
108 |  where
109 |    infixl 2 #
110 |    (#) :: x -> String -> b
111 |    (#) = (!)
112 |    groupNot :: [String] -> TestTree -> TestTree
113 |    groupNot gs | group `elem` gs = const (testGroup "" [])
114 |                | otherwise       = id
115 |    gold :: Show z => String -> z -> TestTree
116 |    gold nm = goldenVsString nm ("test/gold/" <> nm <> ".txt")
117 |              . pure . pack . show
118 | 


--------------------------------------------------------------------------------
/src/ShareMap.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
  2 | 
  3 | -- | Finite maps with sharing (for non-injectivity)
  4 | 
  5 | module ShareMap where
  6 | 
  7 | import Control.Arrow (second,(***))
  8 | import Data.Foldable (foldl')
  9 | import Data.Maybe (fromJust)
 10 | import Data.Functor.Classes (Show1(..),Show2(..))
 11 | 
 12 | import Data.Set (Set)
 13 | import qualified Data.Set as S
 14 | import Data.Map (Map)
 15 | import qualified Data.Map as M
 16 | 
 17 | import Semi
 18 | 
 19 | -- Point each key to a canonical representative, and point each representative
 20 | -- to the equivalence class and the map value.
 21 | data ShareMap k v = SM (Map k k) (Map k (Set k, v)) deriving (Show)
 22 | 
 23 | -- instance Show2 ShareMap where
 24 | --   liftShowsPrec2 _ _ _ _ = showsPrec
 25 | 
 26 | instance Ord k => Semigroup (ShareMap k v) where
 27 |   SM rep m <> SM rep' m' = SM (M.union rep rep') (M.union m m')
 28 | 
 29 | instance Ord k => Monoid (ShareMap k v) where
 30 |   mempty = SM M.empty M.empty
 31 |   mappend = (<>)
 32 | 
 33 | -- I could instead newtype-wrap a pair, and derive Semigroup and Monoid
 34 | 
 35 | empty :: ShareMap k v
 36 | empty = SM M.empty M.empty
 37 | 
 38 | type instance Key (ShareMap k) = k
 39 | 
 40 | instance (Ord k, Additive v) => Indexable k v (ShareMap k v) where
 41 |   SM reps m ! k = case M.lookup k reps of
 42 |                     Nothing -> zero
 43 |                     Just k' -> (snd <$> m) ! k'  -- split second map
 44 | 
 45 | -- TODO: move (*->) into HasSingle with defaults.
 46 | instance (Ord k, Additive v) => HasSingle k v (ShareMap k v) where
 47 |   ks *-> v = case S.minView ks of
 48 |                Nothing -> empty
 49 |                Just (k,_) -> SM (M.fromDistinctAscList ((,k) <$> S.toAscList ks))
 50 |                                 (M.singleton k (ks,v))
 51 | 
 52 | instance Functor (ShareMap k) where
 53 |   fmap f (SM reps m) = SM reps (fmap (second f) m)
 54 | 
 55 | -- Addition is trickier. See 2019-03-{05,06,09} journal notes.
 56 | 
 57 | type Chunk k v = (Set k, v)
 58 | type Chunks k v = [Chunk k v] -- disjoint k subsets
 59 | 
 60 | addChunks :: (Ord k, Additive v) => Chunks k v -> Chunks k v -> Chunks k v
 61 | addChunks p p' =
 62 |      [ (ks  `S.difference` support', x ) | (ks ,x ) <- p ]
 63 |   ++ [ (ks' `S.difference` support , x') | (ks',x') <- p']
 64 |   ++ [ (ks `S.intersection` ks', x <+> x') | (ks,x) <- p, (ks',x') <- p' ]
 65 |  where
 66 |    support  = chunksSupport p
 67 |    support' = chunksSupport p'
 68 | 
 69 | chunksSupport :: Ord k => Chunks k v -> Set k
 70 | chunksSupport = S.unions . map fst
 71 | 
 72 | -- TODO: pull support out of the keys of ShareMap' Map k k. Or even leave it there.
 73 | 
 74 | -- data ShareMap k v = SM (Map k k) (Map k (Set k, v)) deriving Show
 75 | 
 76 | -- Build a ShareMap from disjoint chunks.
 77 | shareMap :: forall k v. (Ord k, Additive v) => Chunks k v -> ShareMap k v
 78 | shareMap (filter (not . null . fst) -> chunks) = foldMap h (chunks `zip` maxes)
 79 |  where
 80 |    maxes :: [k]
 81 |    maxes = (fromJust . S.lookupMax . fst) <$> chunks
 82 |    h :: (Chunk k v, k) -> ShareMap k v
 83 |    h ((ks,v),maxk) =
 84 |      SM (M.fromList ((,maxk) <$> S.elems ks)) (M.singleton maxk (ks,v))
 85 | 
 86 | instance (Ord k, Additive v) => Additive (ShareMap k v) where
 87 |   zero = SM M.empty M.empty
 88 |   SM _ (M.elems -> p) <+> SM _ (M.elems -> q) = shareMap (p `addChunks` q)
 89 | 
 90 | -- >>> let a2 = 'a' +-> 2 :: ShareMap Char Z
 91 | -- >>> a2
 92 | -- SM (fromList [('a','a')]) (fromList [('a',(fromList "a",2))])
 93 | -- >>> let b3 = 'b' +-> 3 :: ShareMap Char Z
 94 | -- >>> b3
 95 | -- SM (fromList [('b','b')]) (fromList [('b',(fromList "b",3))])
 96 | -- >>> a2 <+> b3
 97 | -- SM (fromList [('a','a'),('b','b')]) (fromList [('a',(fromList "a",2)),('b',(fromList "b",3))])
 98 | -- >>> a2 <+> a2
 99 | -- SM (fromList [('a','a')]) (fromList [('a',(fromList "a",4))])
100 | -- >>> b3 <+> b3
101 | -- SM (fromList [('b','b')]) (fromList [('b',(fromList "b",6))])
102 | --
103 | -- >>> (a2 <+> b3) <+> b3
104 | -- SM (fromList [('a','a'),('b','b')]) (fromList [('a',(fromList "a",2)),('b',(fromList "b",6))])
105 | -- 
106 | -- >>> let a2c = [(S.fromList "a",2)] :: Chunks Char Z
107 | -- >>> let b3c = [(S.fromList "b",3)] :: Chunks Char Z
108 | -- >>> (a2c,b3c)
109 | -- ([(fromList "a",2)],[(fromList "b",3)])
110 | -- >>> addChunks a2c b3c
111 | -- [(fromList "a",2),(fromList "b",3),(fromList "",5)]
112 | -- >>> S.fromList "abcdef" *-> 3 :: ShareMap Char Z
113 | -- SM (fromList [('a','a'),('b','a'),('c','a'),('d','a'),('e','a'),('f','a')]) (fromList [('a',(fromList "abcdef",3))])
114 | 


--------------------------------------------------------------------------------
/src/Constrained.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
  2 | 
  3 | -- | Some constrained classes
  4 | 
  5 | module Constrained where
  6 | 
  7 | import Prelude hiding (sum)
  8 | 
  9 | import Control.Applicative (liftA2)
 10 | import Control.Monad (join)
 11 | import GHC.Types (Constraint)
 12 | import Data.Set (Set)
 13 | import qualified Data.Set as S
 14 | import Data.MultiSet (MultiSet)
 15 | import qualified Data.MultiSet as MS
 16 | import Data.Map (Map)
 17 | import qualified Data.Map as M
 18 | 
 19 | -- I'd like to make Constrained more primitive than Semi, so I may need to
 20 | -- shuffle some things.
 21 | import Semi
 22 | 
 23 | import Misc ((:*))
 24 | 
 25 | type Ok2 f a b = (Ok f a, Ok f b)
 26 | type Ok3 f a b c = (Ok2 f a b, Ok f c)
 27 | type Ok4 f a b c d = (Ok2 f a b, Ok2 f c d)
 28 | 
 29 | class FunctorC f where
 30 |   type Ok f a :: Constraint
 31 |   type Ok f a = ()
 32 |   fmapC :: Ok2 f a b => (a -> b) -> f a -> f b
 33 |   default fmapC :: Functor f => (a -> b) -> f a -> f b
 34 |   fmapC = fmap
 35 | 
 36 | class FunctorC f => ApplicativeC f where
 37 |   pureC :: Ok f a => a -> f a
 38 |   default pureC :: Applicative f => a -> f a
 39 |   pureC = pure
 40 |   liftA2C :: Ok3 f a b c => (a -> b -> c) -> f a -> f b -> f c
 41 |   default liftA2C :: Applicative f => (a -> b -> c) -> f a -> f b -> f c
 42 |   liftA2C = liftA2
 43 | 
 44 | infixl 1 >>==
 45 | class ApplicativeC f => MonadC f where
 46 |   joinC :: Ok f a => f (f a) -> f a
 47 |   default joinC :: Monad f => f (f a) -> f a
 48 |   joinC = join
 49 |   (>>==) :: Ok2 f a b => f a -> (a -> f b) -> f b
 50 |   default (>>==) :: Monad f => f a -> (a -> f b) -> f b
 51 |   (>>==) = (>>=)
 52 | 
 53 | bindViaJoin :: (MonadC f, Ok3 f a b (f b)) => f a -> (a -> f b) -> f b
 54 | bindViaJoin as f = joinC (fmapC f as)
 55 | 
 56 | joinViaBind :: (MonadC f, Ok2 f a (f a)) => f (f a) -> f a
 57 | joinViaBind q = q >>== id
 58 | 
 59 | class FunctorC f => MonoidalC f where
 60 |   unitC :: Ok f () => f ()
 61 |   crossC :: Ok2 f a b => f a -> f b -> f (a :* b)
 62 | 
 63 | pureViaUnit :: Ok2 f () a => MonoidalC f => a -> f a
 64 | pureViaUnit a = fmapC (const a) unitC
 65 | 
 66 | unitViaPure :: Ok f () => ApplicativeC f => f ()
 67 | unitViaPure = pureC ()
 68 | 
 69 | liftA2ViaCross :: (MonoidalC f, Ok4 f a b (a :* b) c) => (a -> b -> c) -> f a -> f b -> f c
 70 | liftA2ViaCross h as bs = fmapC (uncurry h) (as `crossC` bs)
 71 | 
 72 | crossViaLiftA2 :: (ApplicativeC f, Ok3 f a b (a :* b)) => f a -> f b -> f (a :* b)
 73 | crossViaLiftA2 = liftA2C (,)
 74 | 
 75 | instance FunctorC ((->) a)
 76 | instance ApplicativeC ((->) a)
 77 | instance MonadC ((->) a)
 78 | 
 79 | instance FunctorC []
 80 | instance ApplicativeC []
 81 | instance MonadC []
 82 | 
 83 | -- etc
 84 | 
 85 | instance MonoidalC [] where
 86 |   unitC = unitViaPure
 87 |   crossC = crossViaLiftA2
 88 | 
 89 | instance FunctorC Set where
 90 |   type Ok Set a = Ord a
 91 |   fmapC = S.map
 92 | 
 93 | instance MonoidalC Set where
 94 |   unitC = unitViaPure
 95 |   crossC = S.cartesianProduct
 96 | 
 97 | instance ApplicativeC Set where
 98 |   pureC = S.singleton
 99 |   liftA2C = liftA2ViaCross
100 | 
101 | instance MonadC Set where
102 |   joinC = S.unions . S.elems
103 |   (>>==) = bindViaJoin
104 | 
105 | instance FunctorC MultiSet where
106 |   type Ok MultiSet a = Ord a
107 |   fmapC = MS.map
108 | 
109 | instance MonoidalC MultiSet where
110 |   unitC = unitViaPure
111 |   crossC = crossViaLiftA2
112 |   -- as `crossC` bs =
113 |   --   MS.fromOccurList
114 |   --     [((a,b),m*n) | (a,m) <- MS.toOccurList as, (b,n) <- MS.toOccurList bs]
115 | 
116 | -- Maybe use the explicit crossC but with `fromDistinctAscOccurList`, since the
117 | -- list is ordered and distinct.
118 | 
119 | instance ApplicativeC MultiSet where
120 |   pureC = MS.singleton
121 |   liftA2C h as bs =
122 |     MS.fromOccurList
123 |       [(h a b,m*n) | (a,m) <- MS.toOccurList as, (b,n) <- MS.toOccurList bs]
124 |   -- liftA2C = liftA2ViaCross
125 | 
126 | instance MonadC MultiSet where
127 |   joinC = MS.join
128 |   (>>==) = bindViaJoin
129 | 
130 | -- newtype Pred s = Pred (s -> Bool)
131 | 
132 | -- Can we give a FunctorC instance for Pred? I guess we'd have to sum over the
133 | -- preimage of the function being mapped.
134 | 
135 | newtype Map' b a = M { unM :: Map a b }
136 |   deriving (Show, Additive, DetectableZero, LeftSemimodule b)
137 | 
138 | instance Additive b => FunctorC (Map' b) where
139 |   type Ok (Map' b) a = Ord a
140 |   fmapC h (M p) = M (sum [h a +-> p ! a | a <- M.keys p])
141 |  
142 | instance Semiring b => ApplicativeC (Map' b) where
143 |   pureC a = M (single a)
144 |   liftA2C h (M p) (M q) = M (sum [h a b +-> p!a <.> q!b | a <- M.keys p, b <- M.keys q])
145 | 
146 | instance (Semiring b, DetectableZero b, DetectableOne b) => MonadC (Map' b) where
147 |   -- F f >>= h = bigSum a f a .> h a
148 |   M m >>== h = sum [m!a .> h a | a <- M.keys m]
149 |   -- joinC is more demanding on b and Map'. Maybe eliminate it altogether.
150 |   -- I could give bindViaJoin an explicit join function as argument.
151 |   joinC = error "joinC on Map' b not yet implemented"
152 | 
153 | #if 0
154 | 
155 | M f :: Map' b a
156 | f :: Map a b
157 | h :: (a -> Map' b c)
158 | h a :: Map' b c
159 | 
160 | #endif
161 | 
162 | #if 0
163 | 
164 | instance FunctorC Map where
165 |   type Ok Map a = Ord a
166 |   fmapC = M.map
167 | 
168 | instance MonoidalC Map where
169 |   unitC = unitViaPure
170 |   crossC = crossViaLiftA2
171 |   -- as `crossC` bs =
172 |   --   M.fromOccurList
173 |   --     [((a,b),m*n) | (a,m) <- M.toOccurList as, (b,n) <- M.toOccurList bs]
174 | 
175 | -- Maybe use the explicit crossC but with `fromDistinctAscOccurList`, since the
176 | -- list is ordered and distinct.
177 | 
178 | instance ApplicativeC Map where
179 |   pureC = M.singleton
180 |   liftA2C h as bs =
181 |     M.fromOccurList
182 |       [(h a b,m*n) | (a,m) <- M.toOccurList as, (b,n) <- M.toOccurList bs]
183 |   -- liftA2C = liftA2ViaCross
184 | 
185 | instance MonadC Map where
186 |   joinC = MS.join
187 |   (>>==) = bindViaJoin
188 | 
189 | #endif
190 | 


--------------------------------------------------------------------------------
/src/RegExp.hs:
--------------------------------------------------------------------------------
  1 | #ifdef EXAMPLES
  2 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples
  3 | #endif
  4 | 
  5 | -- | Regular expressions
  6 | 
  7 | module RegExp where
  8 | 
  9 | import Prelude hiding (sum,product)
 10 | 
 11 | import Data.Map (Map,keys)
 12 | import Data.MemoTrie ((:->:))
 13 | 
 14 | import Semi
 15 | 
 16 | #ifdef EXAMPLES
 17 | import Examples
 18 | #endif
 19 | 
 20 | infixl 6 :<+>
 21 | infixl 7 :<.>
 22 | 
 23 | -- | Regular expression
 24 | data RegExp h b = Char (Key h)
 25 |                 | Value b
 26 |                 | RegExp h b :<+> RegExp h b
 27 |                 | RegExp h b :<.> RegExp h b
 28 |                 | Star (RegExp h b)
 29 |  deriving Functor
 30 | 
 31 | deriving instance (Show (Key h), Show b) => Show (RegExp h b)
 32 | deriving instance (Eq   (Key h), Eq   b) => Eq   (RegExp h b)
 33 | 
 34 | #define OPTIMIZE
 35 | 
 36 | #ifdef OPTIMIZE
 37 | 
 38 | type D0 b = DetectableZero b
 39 | type D1 b = DetectableOne b
 40 | 
 41 | instance DetectableZero b => DetectableZero (RegExp h b) where
 42 |   isZero (Value b) = isZero b
 43 |   isZero _         = False
 44 | 
 45 | instance (DetectableZero b, DetectableOne b) => DetectableOne (RegExp h b) where
 46 |   isOne (Value b) = isOne b
 47 |   isOne _         = False
 48 | 
 49 | #else
 50 | 
 51 | type D0 b = (() ~ ())
 52 | type D1 b = (() ~ ())
 53 | 
 54 | #endif
 55 | 
 56 | instance (D0 b, Additive b) => Additive (RegExp h b) where
 57 |   zero = Value zero
 58 | #ifdef OPTIMIZE
 59 |   p <+> q | isZero p  = q
 60 |           | isZero q  = p
 61 |           | otherwise = p :<+> q
 62 | #else
 63 |   (<+>) = (:<+>)
 64 | #endif
 65 | 
 66 | instance (Semiring b, D0 b, D1 b) => LeftSemimodule b (RegExp h b) where
 67 | #if 1
 68 |   scale b = fmap (b <.>)
 69 | #elif 1
 70 |   b `scale` e = Value b <.> e
 71 | #else
 72 |   scale b = go
 73 |    where
 74 |      go (Char c)   = Char c
 75 |      go (Value b') = Value (b <.> b')
 76 |      go (u :<+> v) = go u <+> go v
 77 |      go (u :<.> v) = go u <.> go v
 78 |      go (Star u)   = star (go u)
 79 | #endif
 80 | 
 81 | instance (D0 b, D1 b, Semiring b) => Semiring (RegExp h b) where
 82 |   one   = Value one
 83 | #ifdef OPTIMIZE
 84 |   p <.> q | isZero p = zero
 85 |           | isOne  p = q
 86 |           -- | isZero q  = zero
 87 |           -- | isOne  q  = p
 88 |           | otherwise = p :<.> q
 89 | #else
 90 |   (<.>) = (:<.>)
 91 | #endif
 92 | 
 93 | instance (D0 b, D1 b, Semiring b) => StarSemiring (RegExp h b) where
 94 | #if 0
 95 |   -- Slightly slower in all benchmarks I tested
 96 |   star p | isZero p  = one
 97 |          | otherwise = Star p
 98 | #else
 99 |   star = Star
100 | #endif
101 | 
102 | type FR h b = ( Additive (h (RegExp h b)), HasSingle (Key h) (RegExp h b) (h (RegExp h b))
103 |               , Functor h, DetectableZero b, DetectableOne b )
104 | 
105 | instance (FR h b, StarSemiring b, c ~ Key h, Eq c)
106 |       => Indexable [c] b (RegExp h b) where
107 |   e ! w = atEps (foldl ((!) . deriv) e w)
108 |   -- (!) e = atEps . foldl ((!) . deriv) e
109 | 
110 | instance (FR h b, StarSemiring b, c ~ Key h, Eq c)
111 |       => HasSingle [c] b (RegExp h b) where
112 |   w +-> b = b .> product (map Char w)
113 | 
114 | atEps :: StarSemiring b => RegExp h b -> b
115 | atEps (Char _)   = zero
116 | atEps (Value b)  = b
117 | atEps (p :<+> q) = atEps p <+> atEps q
118 | atEps (p :<.> q) = atEps p <.> atEps q
119 | atEps (Star p)   = star (atEps p)
120 | 
121 | deriv :: (FR h b, StarSemiring b)
122 |       => RegExp h b -> h (RegExp h b)
123 | deriv (Char c)   = single c
124 | deriv (Value _)  = zero
125 | deriv (p :<+> q) = deriv p <+> deriv q
126 | deriv (p :<.> q) = fmap (<.> q) (deriv p) <+> fmap (atEps p .>) (deriv q)
127 |                    -- fmap (atEps p .>) (deriv q) <+> fmap (<.> q) (deriv p)
128 | deriv (Star p)   = fmap (\ d -> star (atEps p) .> d <.> Star p) (deriv p)
129 | 
130 | -- | Interpret a regular expression
131 | regexp :: (StarSemiring x, HasSingle [Key h] b x, Semiring b) => RegExp h b -> x
132 | regexp (Char c)   = single [c]
133 | regexp (Value b)  = value b
134 | regexp (u :<+> v) = regexp u <+> regexp v
135 | regexp (u :<.> v) = regexp u <.> regexp v
136 | regexp (Star u)   = star (regexp u)
137 | 
138 | 
139 | -- Alternatively, use regexp to convert to Cofree, and then use (!).
140 | 
141 | #ifdef EXAMPLES
142 | 
143 | {--------------------------------------------------------------------
144 |     Examples
145 | --------------------------------------------------------------------}
146 | 
147 | type L = RegExp ((->) Char) Bool
148 | -- type L = RegExp (Map Char) Bool
149 | 
150 | star1 :: Semiring b => b -> b
151 | star1 b = one <+> b <.> star1 b
152 | 
153 | star2 :: L -> L
154 | star2 b = one <+> b <.> star2 b
155 | 
156 | star3 :: L -> L
157 | star3 b = Value True <+> b <.> star3 b
158 | 
159 | x1 :: L
160 | x1 = star1 (single "a")
161 | 
162 | -- Non-recursive examples are tidier with OPTIMIZE
163 | 
164 | -- >>> pig :: L
165 | -- Char 'p' :<.> (Char 'i' :<.> Char 'g')
166 | -- >>> pink :: L
167 | -- Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k'))
168 | -- >>> pp :: L
169 | -- Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')) :<+> Char 'p' :<.> (Char 'i' :<.> Char 'g')
170 | 
171 | -- >>> pig :: L'
172 | -- C (Char 'p' :<.> (Char 'i' :<.> Char 'g'))
173 | -- >>> pink :: L'
174 | -- C (Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')))
175 | -- >>> pp :: L'
176 | -- C (Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')) :<+> Char 'p' :<.> (Char 'i' :<.> Char 'g'))
177 | 
178 | -- >>> pig :: L'
179 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'g' :<.> Value True))) :<.> Value True)
180 | -- >>> pink :: L'
181 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> (Char 'k' :<.> Value True)))) :<.> Value True)
182 | -- >>> pp :: L'
183 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> (Char 'k' :<.> Value True)))) :<.> Value True :<+> (Char 'p' :<.> (Char 'i' :<.> (Char 'g' :<.> Value True))) :<.> Value True)
184 | -- >>> (anbn :: L') ! ""
185 | -- True
186 | -- >>> deriv (anbn :: L')
187 | 
188 | -- The following examples wedge. I think they worked when we used functions
189 | -- instead of maps.
190 | 
191 | -- >>> (anbn :: L') ! "a"
192 | -- False
193 | -- >>> (anbn :: L') ! "ab"
194 | -- True
195 | -- >>> (anbn :: L') ! "aabb"
196 | -- True
197 | -- >>> (anbn :: L') ! "aaaaabbbbb"
198 | -- True
199 | 
200 | #endif
201 | 
202 | 
203 | #if 0
204 | #endif
205 | 


--------------------------------------------------------------------------------
/test/Benchmarks/opt regexp.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Group "star-a"
  3 | 
  4 | ```
  5 | 
  6 | benchmarking "a"/RegExp:Function
  7 | time                 442.4 ns   (439.5 ns .. 445.7 ns)
  8 |                      0.999 R²   (0.999 R² .. 1.000 R²)
  9 | mean                 445.6 ns   (441.9 ns .. 453.1 ns)
 10 | std dev              12.18 ns   (6.401 ns .. 18.36 ns)
 11 | variance introduced by outliers: 36% (moderately inflated)
 12 | 
 13 | benchmarking "a"/RegExp:Map
 14 | time                 355.3 ns   (352.7 ns .. 359.3 ns)
 15 |                      0.999 R²   (0.998 R² .. 1.000 R²)
 16 | mean                 355.7 ns   (353.3 ns .. 362.4 ns)
 17 | std dev              10.04 ns   (4.439 ns .. 17.40 ns)
 18 | variance introduced by outliers: 38% (moderately inflated)
 19 | 
 20 | benchmarking "a"/RegExp:IntMap
 21 | time                 352.6 ns   (350.9 ns .. 354.6 ns)
 22 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 23 | mean                 352.8 ns   (351.3 ns .. 354.6 ns)
 24 | std dev              4.295 ns   (3.512 ns .. 5.377 ns)
 25 | variance introduced by outliers: 10% (moderately inflated)
 26 | 
 27 | benchmarking a50/RegExp:Function
 28 | time                 15.00 μs   (14.89 μs .. 15.13 μs)
 29 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 30 | mean                 15.06 μs   (14.96 μs .. 15.34 μs)
 31 | std dev              377.3 ns   (172.8 ns .. 715.1 ns)
 32 | variance introduced by outliers: 24% (moderately inflated)
 33 | 
 34 | benchmarking a50/RegExp:Map
 35 | time                 10.59 μs   (10.54 μs .. 10.63 μs)
 36 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 37 | mean                 10.62 μs   (10.58 μs .. 10.67 μs)
 38 | std dev              114.1 ns   (84.24 ns .. 153.1 ns)
 39 | 
 40 | benchmarking a50/RegExp:IntMap
 41 | time                 10.27 μs   (10.21 μs .. 10.33 μs)
 42 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 43 | mean                 10.29 μs   (10.23 μs .. 10.39 μs)
 44 | std dev              195.7 ns   (131.9 ns .. 276.0 ns)
 45 | variance introduced by outliers: 16% (moderately inflated)
 46 | 
 47 | ```
 48 | 
 49 | # Group "letters"
 50 | 
 51 | ```
 52 | 
 53 | benchmarking asdf-50/RegExp:Function
 54 | time                 3.994 ms   (3.951 ms .. 4.040 ms)
 55 |                      0.999 R²   (0.998 R² .. 1.000 R²)
 56 | mean                 4.023 ms   (3.990 ms .. 4.055 ms)
 57 | std dev              81.76 μs   (61.86 μs .. 108.6 μs)
 58 | 
 59 | benchmarking asdf-50/RegExp:Map
 60 | time                 3.231 ms   (3.190 ms .. 3.273 ms)
 61 |                      0.999 R²   (0.998 R² .. 0.999 R²)
 62 | mean                 3.287 ms   (3.259 ms .. 3.329 ms)
 63 | std dev              86.04 μs   (57.24 μs .. 129.6 μs)
 64 | 
 65 | benchmarking asdf-50/RegExp:IntMap
 66 | time                 3.215 ms   (3.043 ms .. 3.354 ms)
 67 |                      0.985 R²   (0.976 R² .. 0.992 R²)
 68 | mean                 3.006 ms   (2.939 ms .. 3.099 ms)
 69 | std dev              203.4 μs   (172.1 μs .. 259.5 μs)
 70 | variance introduced by outliers: 37% (moderately inflated)
 71 | 
 72 | ```
 73 | 
 74 | # Group "dyck"
 75 | 
 76 | ```
 77 | 
 78 | benchmarking "[]"/RegExp:Function
 79 | time                 1.824 μs   (1.793 μs .. 1.858 μs)
 80 |                      0.996 R²   (0.993 R² .. 0.998 R²)
 81 | mean                 1.863 μs   (1.823 μs .. 1.917 μs)
 82 | std dev              129.6 ns   (85.15 ns .. 219.4 ns)
 83 | variance introduced by outliers: 77% (severely inflated)
 84 | 
 85 | benchmarking "[[]]"/RegExp:Function
 86 | time                 4.037 μs   (4.008 μs .. 4.076 μs)
 87 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 88 | mean                 4.055 μs   (4.023 μs .. 4.097 μs)
 89 | std dev              94.03 ns   (69.75 ns .. 125.5 ns)
 90 | variance introduced by outliers: 24% (moderately inflated)
 91 | 
 92 | benchmarking "[[a]]"/RegExp:Function
 93 | time                 3.054 μs   (3.018 μs .. 3.095 μs)
 94 |                      0.999 R²   (0.998 R² .. 1.000 R²)
 95 | mean                 3.059 μs   (3.025 μs .. 3.102 μs)
 96 | std dev              92.42 ns   (67.31 ns .. 126.8 ns)
 97 | variance introduced by outliers: 36% (moderately inflated)
 98 | 
 99 | benchmarking "[[]][]"/RegExp:Function
100 | time                 5.524 μs   (5.461 μs .. 5.599 μs)
101 |                      0.999 R²   (0.998 R² .. 1.000 R²)
102 | mean                 5.509 μs   (5.468 μs .. 5.567 μs)
103 | std dev              125.9 ns   (98.77 ns .. 165.2 ns)
104 | variance introduced by outliers: 23% (moderately inflated)
105 | 
106 | ```
107 | 
108 | # Group "anbn"
109 | 
110 | ```
111 | 
112 | benchmarking ""/RegExp:Function
113 | time                 27.07 ns   (26.89 ns .. 27.25 ns)
114 |                      1.000 R²   (1.000 R² .. 1.000 R²)
115 | mean                 27.02 ns   (26.88 ns .. 27.22 ns)
116 | std dev              397.1 ps   (275.1 ps .. 561.2 ps)
117 | variance introduced by outliers: 17% (moderately inflated)
118 | 
119 | benchmarking "ab"/RegExp:Function
120 | time                 1.486 μs   (1.475 μs .. 1.498 μs)
121 |                      1.000 R²   (0.999 R² .. 1.000 R²)
122 | mean                 1.485 μs   (1.473 μs .. 1.500 μs)
123 | std dev              31.58 ns   (21.79 ns .. 48.42 ns)
124 | variance introduced by outliers: 23% (moderately inflated)
125 | 
126 | benchmarking "aacbb"/RegExp:Function
127 | time                 2.666 μs   (2.645 μs .. 2.684 μs)
128 |                      1.000 R²   (1.000 R² .. 1.000 R²)
129 | mean                 2.654 μs   (2.641 μs .. 2.674 μs)
130 | std dev              37.57 ns   (27.44 ns .. 50.11 ns)
131 | variance introduced by outliers: 11% (moderately inflated)
132 | 
133 | benchmarking "aaabbb"/RegExp:Function
134 | time                 4.693 μs   (4.667 μs .. 4.717 μs)
135 |                      1.000 R²   (0.999 R² .. 1.000 R²)
136 | mean                 4.685 μs   (4.662 μs .. 4.707 μs)
137 | std dev              60.54 ns   (48.42 ns .. 83.94 ns)
138 | 
139 | benchmarking "aaabbbb"/RegExp:Function
140 | time                 4.780 μs   (4.739 μs .. 4.822 μs)
141 |                      0.999 R²   (0.999 R² .. 1.000 R²)
142 | mean                 4.762 μs   (4.724 μs .. 4.824 μs)
143 | std dev              122.1 ns   (84.35 ns .. 193.5 ns)
144 | variance introduced by outliers: 28% (moderately inflated)
145 | 
146 | benchmarking 30/RegExp:Function
147 | time                 278.9 μs   (277.7 μs .. 280.5 μs)
148 |                      0.999 R²   (0.998 R² .. 1.000 R²)
149 | mean                 283.1 μs   (280.7 μs .. 291.4 μs)
150 | std dev              10.39 μs   (4.486 μs .. 20.61 μs)
151 | variance introduced by outliers: 27% (moderately inflated)
152 | 
153 | ```
154 | 


--------------------------------------------------------------------------------
/test/Benchmarks/no-opt regexp.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Group "star-a"
  3 | 
  4 | ```
  5 | 
  6 | benchmarking "a"/RegExp:Function
  7 | time                 368.5 ns   (365.8 ns .. 371.5 ns)
  8 |                      0.999 R²   (0.999 R² .. 1.000 R²)
  9 | mean                 369.7 ns   (366.8 ns .. 374.1 ns)
 10 | std dev              8.669 ns   (6.324 ns .. 12.29 ns)
 11 | variance introduced by outliers: 30% (moderately inflated)
 12 | 
 13 | benchmarking "a"/RegExp:Map
 14 | time                 353.3 ns   (349.8 ns .. 357.2 ns)
 15 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 16 | mean                 352.4 ns   (350.0 ns .. 355.5 ns)
 17 | std dev              6.608 ns   (4.544 ns .. 9.254 ns)
 18 | variance introduced by outliers: 21% (moderately inflated)
 19 | 
 20 | benchmarking "a"/RegExp:IntMap
 21 | time                 355.2 ns   (353.0 ns .. 357.5 ns)
 22 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 23 | mean                 356.1 ns   (354.0 ns .. 358.0 ns)
 24 | std dev              5.292 ns   (4.270 ns .. 6.328 ns)
 25 | variance introduced by outliers: 15% (moderately inflated)
 26 | 
 27 | benchmarking a50/RegExp:Function
 28 | time                 3.266 ms   (3.223 ms .. 3.323 ms)
 29 |                      0.998 R²   (0.996 R² .. 0.999 R²)
 30 | mean                 3.299 ms   (3.270 ms .. 3.338 ms)
 31 | std dev              89.00 μs   (66.92 μs .. 131.0 μs)
 32 | 
 33 | benchmarking a50/RegExp:Map
 34 | time                 17.97 μs   (17.82 μs .. 18.14 μs)
 35 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 36 | mean                 18.02 μs   (17.87 μs .. 18.20 μs)
 37 | std dev              389.9 ns   (313.0 ns .. 482.6 ns)
 38 | variance introduced by outliers: 19% (moderately inflated)
 39 | 
 40 | benchmarking a50/RegExp:IntMap
 41 | time                 18.62 μs   (18.18 μs .. 19.10 μs)
 42 |                      0.997 R²   (0.996 R² .. 0.999 R²)
 43 | mean                 18.52 μs   (18.30 μs .. 18.90 μs)
 44 | std dev              695.1 ns   (462.1 ns .. 994.4 ns)
 45 | variance introduced by outliers: 41% (moderately inflated)
 46 | 
 47 | ```
 48 | 
 49 | # Group "letters"
 50 | 
 51 | ```
 52 | 
 53 | benchmarking asdf-50/RegExp:Function
 54 | time                 22.67 s    (22.00 s .. 24.17 s)
 55 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 56 | mean                 22.39 s    (22.21 s .. 22.57 s)
 57 | std dev              227.9 ms   (99.00 ms .. 287.7 ms)
 58 | variance introduced by outliers: 19% (moderately inflated)
 59 | 
 60 | benchmarking asdf-50/RegExp:Map
 61 | time                 3.334 ms   (3.267 ms .. 3.405 ms)
 62 |                      0.998 R²   (0.996 R² .. 0.999 R²)
 63 | mean                 3.466 ms   (3.423 ms .. 3.527 ms)
 64 | std dev              127.9 μs   (87.80 μs .. 181.3 μs)
 65 | variance introduced by outliers: 15% (moderately inflated)
 66 | 
 67 | benchmarking asdf-50/RegExp:IntMap
 68 | time                 3.114 ms   (3.016 ms .. 3.233 ms)
 69 |                      0.996 R²   (0.993 R² .. 0.999 R²)
 70 | mean                 3.135 ms   (3.099 ms .. 3.170 ms)
 71 | std dev              91.81 μs   (74.63 μs .. 116.6 μs)
 72 | variance introduced by outliers: 11% (moderately inflated)
 73 | 
 74 | ```
 75 | 
 76 | # Group "dyck"
 77 | 
 78 | ```
 79 | 
 80 | benchmarking "[]"/RegExp:Function
 81 | time                 2.430 μs   (2.410 μs .. 2.450 μs)
 82 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 83 | mean                 2.440 μs   (2.422 μs .. 2.461 μs)
 84 | std dev              51.17 ns   (40.01 ns .. 68.95 ns)
 85 | variance introduced by outliers: 22% (moderately inflated)
 86 | 
 87 | benchmarking "[[]]"/RegExp:Function
 88 | time                 10.70 μs   (10.48 μs .. 10.88 μs)
 89 |                      0.998 R²   (0.998 R² .. 0.999 R²)
 90 | mean                 10.56 μs   (10.46 μs .. 10.71 μs)
 91 | std dev              305.8 ns   (233.4 ns .. 442.1 ns)
 92 | variance introduced by outliers: 31% (moderately inflated)
 93 | 
 94 | benchmarking "[[a]]"/RegExp:Function
 95 | time                 17.56 μs   (17.47 μs .. 17.67 μs)
 96 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 97 | mean                 17.71 μs   (17.59 μs .. 17.87 μs)
 98 | std dev              330.3 ns   (253.1 ns .. 448.6 ns)
 99 | variance introduced by outliers: 15% (moderately inflated)
100 | 
101 | benchmarking "[[]][]"/RegExp:Function
102 | time                 27.58 μs   (27.02 μs .. 28.02 μs)
103 |                      0.999 R²   (0.998 R² .. 0.999 R²)
104 | mean                 27.26 μs   (27.03 μs .. 27.60 μs)
105 | std dev              723.2 ns   (530.5 ns .. 940.6 ns)
106 | variance introduced by outliers: 24% (moderately inflated)
107 | 
108 | ```
109 | 
110 | # Group "anbn"
111 | 
112 | ```
113 | 
114 | benchmarking ""/RegExp:Function
115 | time                 28.50 ns   (28.24 ns .. 28.87 ns)
116 |                      0.999 R²   (0.999 R² .. 0.999 R²)
117 | mean                 28.72 ns   (28.40 ns .. 29.03 ns)
118 | std dev              787.8 ps   (681.4 ps .. 974.4 ps)
119 | variance introduced by outliers: 42% (moderately inflated)
120 | 
121 | benchmarking "ab"/RegExp:Function
122 | time                 2.294 μs   (2.264 μs .. 2.336 μs)
123 |                      0.998 R²   (0.997 R² .. 0.999 R²)
124 | mean                 2.315 μs   (2.283 μs .. 2.368 μs)
125 | std dev              102.2 ns   (68.07 ns .. 136.0 ns)
126 | variance introduced by outliers: 56% (severely inflated)
127 | 
128 | benchmarking "aacbb"/RegExp:Function
129 | time                 15.73 μs   (15.45 μs .. 16.05 μs)
130 |                      0.998 R²   (0.997 R² .. 0.999 R²)
131 | mean                 15.80 μs   (15.55 μs .. 16.05 μs)
132 | std dev              610.3 ns   (513.3 ns .. 727.4 ns)
133 | variance introduced by outliers: 43% (moderately inflated)
134 | 
135 | benchmarking "aaabbb"/RegExp:Function
136 | time                 24.06 μs   (23.92 μs .. 24.23 μs)
137 |                      0.999 R²   (0.998 R² .. 1.000 R²)
138 | mean                 24.28 μs   (24.02 μs .. 24.63 μs)
139 | std dev              757.8 ns   (497.2 ns .. 1.004 μs)
140 | variance introduced by outliers: 31% (moderately inflated)
141 | 
142 | benchmarking "aaabbbb"/RegExp:Function
143 | time                 36.65 μs   (36.48 μs .. 36.87 μs)
144 |                      1.000 R²   (0.999 R² .. 1.000 R²)
145 | mean                 36.85 μs   (36.59 μs .. 37.26 μs)
146 | std dev              802.4 ns   (506.3 ns .. 1.238 μs)
147 | variance introduced by outliers: 18% (moderately inflated)
148 | 
149 | benchmarking 30/RegExp:Function
150 | time                 96.66 ms   (94.61 ms .. 99.47 ms)
151 |                      0.999 R²   (0.998 R² .. 1.000 R²)
152 | mean                 92.75 ms   (90.01 ms .. 94.29 ms)
153 | std dev              2.748 ms   (1.469 ms .. 3.980 ms)
154 | variance introduced by outliers: 14% (moderately inflated)
155 | 
156 | ```
157 | 


--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Notes for a paper about generalized convolution
  3 | substMap: [("<+>","+"),("<.>","·")]
  4 | ...
  5 | 
  6 | [*Derivatives of Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.98.4378 "paper by Janusz Brzozowski (1964"
  7 | 
  8 | \newcommand\set[1]{\{\,#1\,\}}
  9 | \newcommand\Pow{\mathcal{P}}
 10 | \newcommand\setop[1]{\mathbin{\hat{#1}}}
 11 | \newcommand\eps{\varepsilon}
 12 | \newcommand\closure[1]{#1^{\ast}}
 13 | \newcommand\mappend{\diamond}
 14 | \newcommand\cat{\cdot}
 15 | \newcommand\single\overline
 16 | \newcommand\union{+}
 17 | \newcommand\bigunion{\sum}
 18 | \newcommand\has[2]{\delta_{#1}\,#2}
 19 | \newcommand\del[1]{\has\eps{#1}}
 20 | \newcommand\consl[2]{\single{[#1]} \cat #2}
 21 | \newcommand\conslp[2]{\consl{#1}{(#2)}}
 22 | \newcommand\lquot{\setminus}
 23 | 
 24 | ## Paper outline
 25 | 
 26 | ### Contributions
 27 | 
 28 | Generalize and unify:
 29 | 
 30 | *   Parsing by "derivatives" for regular languages and beyond, generalized beyond *sets* of strings.
 31 |     Some other possibilities: multisets, fuzzy sets, parsing probability distributions.
 32 |     Can we get semantic analysis cheaply?
 33 | *   Includes probabilistic computing.
 34 | *   Maybe generalize parsing beyond strings to trees and other shapes.
 35 |     Perhaps 2D parsing?
 36 | *   Polynomial arithmetic (addition, multiplication, and I hope composition), generalized to multivariate case.
 37 | *   Convolution: higher dimensions, continuous and discrete, and other shaped spaces.
 38 | *   Super-memoization.
 39 | 
 40 | ### Languages
 41 | 
 42 | *   Identify the vocabulary of a "language" (singleton plus semiring).
 43 | *   Define where a language is set of strings.
 44 | *   Note the semiring interface.
 45 | 
 46 | ### Matching
 47 | 
 48 | *   The set-based language definition doesn't give an implementation, because the sets may be infinite.
 49 | *   Change to a predicate, and specify the new method definitions via homomorphism equations.
 50 |     Easy to solve, and gets an effective implementation (thanks to laziness).
 51 | *   Rephrase in terms of string predicates/recognizers, where $s \lquot p$ becomes $p \circ (s\,\mappend)$, which specializes to $p \circ (c:)$ when $s=[c]$.
 52 | 
 53 | ### List of successes
 54 | 
 55 | ### Efficient matching
 56 | 
 57 | *   First decomposition law: $p = \bigcup\limits_{s \in p} \single s$.
 58 | *   Second decomposition law: $p = \bigcup\limits_s \has s p$, where
 59 |     $\has s p =
 60 |     \begin{cases}
 61 |     \single s & \text{if $s \in p$} \\
 62 |     \emptyset & \text{otherwise}
 63 |     \end{cases}$.
 64 |     Specialize to empty strings: $\del p = \has \eps p$.
 65 | *   So far we can accommodate any monoid.
 66 |     Now focus on sequences.
 67 | *   "Derivative": $c \lquot p = \set {s \mid c:s \in p}$.
 68 | *   Third decomposition law [Brzozowski, 1964, Theorem 4.4]:
 69 |     $p = \del p \union \bigcup\limits_{c\,\in\,A} \single c \cat (c \lquot p)$.
 70 |     Holds for all languages, not just regular.
 71 | *   Maybe same for a free representation (regular expressions), though trivial.
 72 | *   Review (string) tries.
 73 |     Note the appearance of $p \eps$ and $p \circ (c:)$.
 74 |     Define the homomorphism equations, which are easy to solve, via trie isomorphism.
 75 |     Simplifying yields a simple and efficient implementation.
 76 | 
 77 | ### Generalizing
 78 | 
 79 | *   Semirings.
 80 | *   Convolution.
 81 | *   Beyond convolution: the free semimodule monad.
 82 | *   Variations: counting, probability distributions, temporal/spatial convolution.
 83 | 
 84 | ### Other applications
 85 | 
 86 | *   Univariate and multivariate polynomials.
 87 | *   Convolution: discrete and continuous, one- and multi-dimensional, dense and sparse.
 88 | *   2D parsing?
 89 | 
 90 | ## Miscellaneous notes
 91 | 
 92 | *   Summarize/review languages as sets, including singleton, union, concatenation, and star/closure.
 93 |     Survey some representations for parsing, including a naive one as predicates (requiring nondeterministic splitting).
 94 |     For regular languages specified in this vocabulary, the classic technique for efficient parsing is to generate a finite state machine.
 95 |     Another technique is Brzozowski's "derivatives of regular expressions", extended much more recently to context-free languages.
 96 |     Maybe revisit Brzozowski's technique; alternatively just mention, and compare in related work.
 97 |     Calculate a generalized variant from a simple specification.
 98 |     Key is a known but not widely used monadic structure, namely that of *free semimodules*.
 99 | 
100 | *   Once I have a restricted `Applicative` instance, I can make language itself be a monoid in a perfectly standard way, with `mempty = pure mempty` and `mappend = liftA2 mappend`.
101 |     Likewise, temporal and (multidimensional) spatial convolution is simply `liftA2 (+)`, which is a standard definition for `(+)` on applicatives.
102 |     We can give full instances for numeric classes in this style.
103 | *   For multivariate polynomials, I was thinking of using maps from exponent tuples.
104 |     Alternatively, replace tuples by statically sized vectors.
105 |     More generally, use a representable functor or even zippable.
106 |     I guess anything "summable", i.e., a monoid.
107 |     Perhaps whatever simplifies parsing and unparsing.
108 | *   What symbol to use for `mappend`?
109 |     Some candidates: \\cdot, \\diamond, \\ast, \\circledast, \\APLstar, several dingbats choices (\\ding), \\Snowflake.
110 | *   The `Comonad` interface captures Brzozowski's two main operations: contains-empty and derivative
111 |     ([notes](11-25#Parsing-with-derivatives-(A))).
112 | *   There's also a `Monad` for functions that's unlike the usual one and seems to support convolution
113 |     ([notes](11-25#Parsing-with-derivatives-(A))).
114 |     Is this monad known?
115 |     Maybe it corresponds to the vector space monad in one of Dan Piponi blog posts.
116 |     Yes; it's the "free vector space monad", and more generally the "free semimodule monad".
117 | *   I think the monad structure is more essential than the comonad structure.
118 |     ([notes](12-02#Parsing-with-derivatives-(A))).
119 | *   Generalize and use theorem 4.4 from [*Derivatives of Regular Expressions*]
120 |     ([notes](11-25#Parsing-with-derivatives-(B/))).
121 | *   Is Day convolution helpful?
122 | *   Can I define languages monadically and get efficient convolutional parsing?
123 |     Must values then be in a semiring?
124 | *   Multivariate polynomials and power series.
125 |     Rational? Streams?
126 | *   Come up with another data type besides lists having a useful binary operation.
127 |     Needn't be monoidal.
128 |     Trees?
129 | *   Discuss the function-of-monoid perspective explored in my journal notes for 2018-12-29 and 2018-12-30.
130 |     I like its duality with the *Comonad* instance for the same type.
131 | 
132 | ## Super-memoization
133 | 
134 | I suspect that my take on Brzozowski's technique is just one example of a much more general technique akin to memoization but in which we get partial sharing of work across calls to a function with *different* arguments (unlike regular memoization).
135 | 


--------------------------------------------------------------------------------
/macros.tex:
--------------------------------------------------------------------------------
  1 | \newcommand\nc\newcommand
  2 | \nc\rnc\renewcommand
  3 | 
  4 | %% https://www.conference-publishing.com/Help.php
  5 | \usepackage[utf8]{inputenc} 
  6 | \usepackage[T1]{fontenc}
  7 | \usepackage{microtype}
  8 | 
  9 | \usepackage{epsfig}
 10 | \usepackage{latexsym}
 11 | \usepackage{amsmath}
 12 | \usepackage{amssymb}
 13 | \usepackage{color}
 14 | 
 15 | \usepackage{subcaption}
 16 | 
 17 | \usepackage[us,12hr]{datetime}
 18 | \usepackage{setspace}
 19 | 
 20 | % bottom prevents floats (figures) below footnotes, while hang & flushmargin
 21 | % avoids indentation.
 22 | \usepackage[bottom,hang,flushmargin]{footmisc}
 23 | 
 24 | \nc\out[1]{}
 25 | 
 26 | %% \nc\noteOut[2]{\note{#1}\out{#2}}
 27 | 
 28 | %% To redefine for a non-draft
 29 | \nc\indraft[1]{#1}
 30 | 
 31 | \nc\note[1]{\indraft{\textcolor{red}{#1}}}
 32 | 
 33 | \nc\notefoot[1]{\note{\footnote{\note{#1}}}}
 34 | 
 35 | \nc\todo[1]{\note{To do: #1}}
 36 | 
 37 | \nc\eqnlabel[1]{\label{equation:#1}}
 38 | \nc\eqnref[1]{Equation~\ref{equation:#1}}
 39 | \nc\eqnreftwo[2]{Equations~\ref{equation:#1} and \ref{equation:#2}}
 40 | 
 41 | \nc\figlabel[1]{\label{fig:#1}}
 42 | \nc\figref[1]{Figure~\ref{fig:#1}}
 43 | \nc\figreftwo[2]{Figures~\ref{fig:#1} and \ref{fig:#2}}
 44 | 
 45 | \nc\seclabel[1]{\label{sec:#1}}
 46 | \nc\secref[1]{Section~\ref{sec:#1}}
 47 | \nc\secreftwo[2]{Sections~\ref{sec:#1} and~\ref{sec:#2}}
 48 | \nc\secrefs[2]{Sections \ref{sec:#1} through \ref{sec:#2}}
 49 | 
 50 | \nc\appref[1]{Appendix~\ref{sec:#1}}
 51 | 
 52 | %% The name \secdef is already taken
 53 | \nc\sectiondef[1]{\section{#1}\seclabel{#1}}
 54 | \nc\subsectiondef[1]{\subsection{#1}\seclabel{#1}}
 55 | \nc\subsubsectiondef[1]{\subsubsection{#1}\seclabel{#1}}
 56 | 
 57 | \nc\needcite{\note{[ref]}}
 58 | 
 59 | % \nc\myurl\texttt
 60 | 
 61 | %% For acmlarge, I have to put my figures side by side
 62 | 
 63 | %% http://tex.stackexchange.com/questions/5769/two-figures-side-by-side
 64 | 
 65 | \setlength{\fboxsep}{1ex}
 66 | \setlength{\fboxrule}{0.05ex} % gray or dotted might work better
 67 | 
 68 | %% \figone{fraction}{label}{caption}{content}
 69 | \nc\figoneW[4]{
 70 | \fbox{%
 71 | \begin{minipage}{#1\linewidth}
 72 |   \centering
 73 |   \setlength\mathindent{0ex}
 74 |   #4
 75 |   \vspace*{-5ex}
 76 |   \captionof{figure}{#3}
 77 |   \label{fig:#2}
 78 | \end{minipage}
 79 | }
 80 | }
 81 | \nc\figone{\figoneW{\stdWidth}}
 82 | 
 83 | %% %% \figo{fraction}{content}
 84 | %% \nc\figoW[2]{
 85 | %% \fbox{
 86 | %% \begin{minipage}{#1\linewidth}
 87 | %%   \centering
 88 | %%   \setlength\mathindent{0ex}
 89 | %%   #2
 90 | %% \end{minipage}
 91 | %% }
 92 | %% }
 93 | 
 94 | \nc\figo[1]{
 95 | \begin{figure}
 96 | \centering
 97 | #1
 98 | \end{figure}
 99 | }
100 | 
101 | \nc\figp[2]{\begin{figure}\centering #1 \hspace{-2ex} #2\end{figure}}
102 | 
103 | % Arguments: env, label, caption, body
104 | \nc\figdefG[4]{\begin{#1}[tbp]
105 | \begin{center}
106 | #4
107 | \end{center}
108 | \caption{#3}
109 | \figlabel{#2}
110 | \end{#1}}
111 | 
112 | % Arguments: label, caption, body
113 | \nc\figdef{\figdefG{figure}}
114 | \nc\figdefwide{\figdefG{figure*}}
115 | 
116 | % Arguments: label, caption, body
117 | \nc\figrefdef[3]{\figref{#1}\figdef{#1}{#2}{#3}}
118 | 
119 | \nc\figrefdefwide[3]{\figref{#1}\figdefwide{#1}{#2}{#3}}
120 | 
121 | %% %% \circdefW{frac}{label/file}{caption}{work}{depth}
122 | %% \nc\circdefW[5]{\figoneW{#1}{#2}{#3 \stats{#4}{#5}}{\incpic{#2}}}
123 | %% \nc\circdef{\circdefW{\stdWidth}}
124 | 
125 | \nc\stdWidth{0.46}
126 | 
127 | %% \figpair{frac1}{frac2}{label1}{caption1}{content1}{label2}{caption2}{content2}
128 | \nc\figpairW[8]{
129 | \begin{figure}
130 | \centering
131 | \figoneW{#1}{#3}{#4}{#5}
132 | %\hspace{.05\linewidth}
133 | %\hfill
134 | \figoneW{#2}{#6}{#7}{#8}
135 | \end{figure}
136 | }
137 | %% \figpair{label1}{caption1}{content1}{label2}{caption2}{content2}
138 | \nc\figpair{\figpairW{\stdWidth}{\stdWidth}}
139 | 
140 | \nc\incpic[1]{\includegraphics[width=\linewidth]{figures/#1}}
141 | 
142 | %% \incpicW{frac}{file}
143 | \nc\incpicW[2]{\includegraphics[width=#1\linewidth]{figures/#2}}
144 | 
145 | %%%%%%
146 | 
147 | %% \nc\symTwo[1]{\mathbin{#1\!\!\!#1}}
148 | %% \nc\symThree[1]{\mathbin{#1\!\!\!#1\!\!\!#1}}
149 | 
150 | \setlength\mathindent{4ex}
151 | \nc\db[1]{\llbracket#1\rrbracket}
152 | 
153 | \nc\smalltriangleup{\triangle}
154 | \nc\smalltriangledown{\triangledown}
155 | 
156 | %% Double quote symbol
157 | \nc\dq{\text{\tt\char34}}
158 | %% Quoted haskell string with formatted content
159 | \nc\hquoted[1]{\dq\!#1\!\dq}
160 | 
161 | \nc\sectionl[1]{\section{#1}\seclabel{#1}}
162 | \nc\subsectionl[1]{\subsection{#1}\seclabel{#1}}
163 | 
164 | \nc\workingHere{
165 | \vspace{1ex}
166 | \begin{center}
167 | \setlength{\fboxsep}{3ex}
168 | \setlength{\fboxrule}{4pt}
169 | \huge\textcolor{red}{\framebox{Working here}}
170 | \end{center}
171 | \vspace{1ex}
172 | }
173 | 
174 | \usepackage{amsthm}
175 | %% %% Roman font, and drop vertical spacing before & after.
176 | \theoremstyle{definition} % remark
177 | 
178 | \newtheoremstyle{plainstyle}
179 |   {\topsep} % Space above
180 |   {\topsep} % Space below
181 |   {} % Body font
182 |   {} % Indent amount
183 |   {\bfseries} % Theorem head font
184 |   {.} % Punctuation after theorem head
185 |   {.5em} % Space after theorem head
186 |   {} % Theorem head spec (can be left empty, meaning `normal')
187 | 
188 | %% \theoremstyle{plainstyle}
189 | 
190 | \newtheorem{definition}{Definition}%[section]
191 | \nc\deflabel[1]{\label{definition:#1}}
192 | \nc\defref[1]{Definition \ref{definition:#1}}
193 | \nc\defreftwo[2]{Definitions \ref{definition:#1} and \ref{definition:#2}}
194 | \nc\defrefs[2]{Definitions \ref{definition:#1} through \ref{definition:#2}}
195 | 
196 | \newtheorem{theorem}{Theorem}%[section]
197 | \nc\thmlabel[1]{\label{theorem:#1}}
198 | \nc\thmref[1]{Theorem \ref{theorem:#1}}
199 | \nc\thmreftwo[2]{Theorems \ref{theorem:#1} and \ref{theorem:#2}}
200 | \nc\thmrefs[2]{Theorems \ref{theorem:#1} through \ref{theorem:#2}}
201 | 
202 | \newtheorem{corollary}{Corollary}[theorem]
203 | \nc\corlabel[1]{\label{corollary:#1}}
204 | \nc\corref[1]{Corollary \ref{corollary:#1}}
205 | \nc\correftwo[2]{Corollaries \ref{corollary:#1} and \ref{corollary:#2}}
206 | \nc\correfs[2]{Corollaries \ref{corollary:#1} through \ref{corollary:#2}}
207 | 
208 | \newtheorem{lemma}[theorem]{Lemma}
209 | \nc\lemlabel[1]{\label{lemma:#1}}
210 | \nc\lemref[1]{Lemma \ref{lemma:#1}}
211 | \nc\lemreftwo[2]{Lemmas \ref{lemma:#1} and \ref{lemma:#2}}
212 | \nc\lemrefthree[3]{Lemmas \ref{lemma:#1}, \ref{lemma:#2}, and \ref{lemma:#3}}
213 | \nc\lemrefs[2]{Lemmas \ref{lemma:#1} through \ref{lemma:#2}}
214 | 
215 | \newtheorem{exercise}[theorem]{Exercise}
216 | \nc\exclabel[1]{\label{exercise:#1}}
217 | \nc\excref[1]{Exercise \ref{exercise:#1}}
218 | \nc\excreftwo[2]{Exercises \ref{exercise:#1} and \ref{exercise:#2}}
219 | \nc\excrefs[2]{Exercises \ref{exercise:#1} through \ref{exercise:#2}}
220 | 
221 | \definecolor{codesep}{gray}{0.85}
222 | \nc\codesep[1]{
223 | \begin{minipage}[b]{0ex}
224 | \color{codesep}{\rule[1ex]{0.8pt}{#1}}
225 | \end{minipage}}
226 | 
227 | \nc\twocol[4]{
228 | \\
229 | \begin{minipage}[c]{#1\textwidth}
230 | #2
231 | \vspace{-2ex}
232 | \end{minipage}
233 | \begin{minipage}[c]{#3\textwidth} % \mathindent1em
234 | #4
235 | \vspace{-2ex}
236 | \end{minipage}
237 | \\
238 | }
239 | 
240 | %% For multiple footnotes at a point. Adapted to recognize \notefoot as well
241 | %% as \footnote. See https://tex.stackexchange.com/a/71347,
242 | \let\oldFootnote\footnote
243 | \nc\nextToken\relax
244 | \rnc\footnote[1]{%
245 |     \oldFootnote{#1}\futurelet\nextToken\isFootnote}
246 | \nc\footcomma[1]{\ifx#1\nextToken\textsuperscript{,}\fi}
247 | \nc\isFootnote{%
248 |     \footcomma\footnote
249 |     \footcomma\notefoot
250 | }
251 | 
252 | %% https://tex.stackexchange.com/questions/336854/typesetting-a-wider-bar-sign
253 | \DeclareFontFamily{U}{mathx}{\hyphenchar\font45}
254 | \DeclareFontShape{U}{mathx}{m}{n}{
255 |       <5> <6> <7> <8> <9> <10>
256 |       <10.95> <12> <14.4> <17.28> <20.74> <24.88>
257 |       mathx10
258 |       }{}
259 | \DeclareSymbolFont{mathx}{U}{mathx}{m}{n}
260 | \DeclareFontSubstitution{U}{mathx}{m}{n}
261 | \DeclareMathAccent{\widebar}{0}{mathx}{"73} % "
262 | 
263 | \usepackage{footnotebackref}
264 | \usepackage{hyperref}
265 | 
266 | 


--------------------------------------------------------------------------------
/src/Cofree.hs:
--------------------------------------------------------------------------------
  1 | #ifdef EXAMPLES
  2 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples
  3 | #endif
  4 | 
  5 | -- | List tries using Convo
  6 | 
  7 | module Cofree where
  8 | 
  9 | import Prelude hiding (sum,product)
 10 | 
 11 | import Data.Functor.Classes (Show1(..),showsPrec1)
 12 | import GHC.Exts (coerce)
 13 | import Data.Map (Map)
 14 | import qualified Data.Map as M
 15 | 
 16 | import Misc
 17 | import Constrained
 18 | import Semi
 19 | 
 20 | #ifdef EXAMPLES
 21 | import Examples
 22 | import ShareMap (ShareMap)
 23 | import Data.Set (Set)
 24 | import qualified Data.Set as S
 25 | #endif
 26 | 
 27 | -- #include "GenInstances.inc"
 28 | 
 29 | -- TODO: maybe rename Cofree to "Cofree". I'd use Ed's Cofree from the "free" library,
 30 | -- but he defined Key (Cofree f) = Seq (Key f), and I want [Key f]. Oh well.
 31 | 
 32 | -- Move elsewhere
 33 | 
 34 | infix 1 <:
 35 | (<:) :: b -> (c -> ([c] -> b)) -> ([c] -> b)
 36 | b <: h = \ case { [] -> b ; c:cs  -> h c cs }
 37 | 
 38 | -- -- Experiment
 39 | -- infix 1 <#
 40 | -- (<#) :: (Indexable ([c] -> b) h, Key h ~ c)
 41 | --      => b -> h ([c] -> b) -> ([c] -> b)
 42 | -- b <# h = \ case { [] -> b ; c:cs  -> (h ! c) cs }
 43 | 
 44 | -- | List trie, denoting '[c] -> b'
 45 | infix 1 :<
 46 | data Cofree h b = b :< h (Cofree h b) deriving Functor
 47 | 
 48 | #if 0
 49 | -- Swiped from Control.Comonad.Cofree
 50 | instance (Show1 f, Show a) => Show (Cofree f a) where showsPrec = showsPrec1
 51 | instance Show1 f => Show1 (Cofree f) where
 52 |   liftShowsPrec sp sl = go
 53 |     where
 54 |       goList = liftShowList sp sl
 55 |       go d (a :< ds) = showParen (d > 5) $
 56 |         sp 6 a . showString " :< " . liftShowsPrec go goList 5 ds
 57 | #else
 58 | instance (ConF Show h, Show b) => Show (Cofree h b) where
 59 |   showsPrec p (a :< ds) = showParen (p > 5) $
 60 |     showsPrec 1 a . showString " :< " . showsPrec 1 ds
 61 | #endif
 62 | 
 63 | 
 64 | -- instance Functor h => Functor (Cofree h) where
 65 | --   fmap f = go where go (a :< dp) = f a :< fmap go dp
 66 | --   -- fmap f (a :< dp) = f a :< (fmap.fmap) f dp
 67 | --   -- fmap f (a :< dp) = f a :< fmap (fmap f) dp
 68 | 
 69 | -- TODO: I probably want FunctorC h, and inherit Ok.
 70 | instance Functor h => FunctorC (Cofree h)
 71 | 
 72 | instance Indexable c (Cofree h b) (h (Cofree h b)) => Indexable [c] b (Cofree h b) where
 73 |   -- (b :< _ ) ! [] = b
 74 |   -- (_ :< ts) ! (k:ks) = ts ! k ! ks
 75 |   -- (b :< dp) ! w = case w of { [] -> b ; c:cs -> dp ! c ! cs }
 76 |   -- (!) (b :< dp) = b <: (!) (fmap (!) dp)
 77 |   (!) (b :< dp) = b <: (!) . (!) dp
 78 | 
 79 | instance Listable c (Cofree h b) (h (Cofree h b)) =>  Listable [c] b (Cofree h b) where
 80 |   toList = go []
 81 |    where
 82 |      go cs (a :< dp) =
 83 |        (cs,a) : concatMap (\ (c,t) -> go (c:cs) t) (toList dp)
 84 | 
 85 | instance (Additive (h (Cofree h b)), Additive b) => Additive (Cofree h b) where
 86 |   zero = zero :< zero
 87 |   (a :< dp) <+> (b :< dq) = a <+> b  :<  dp <+> dq
 88 | 
 89 | -- FunctorSemimodule(Cofree h)
 90 | 
 91 | -- instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where scale s = fmap (s <.>)
 92 | 
 93 | -- instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where
 94 | --   s `scale` (b :< dp) = s <.> b :< fmap (s `scale`) dp
 95 | 
 96 | instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where
 97 |   scale s = fmap (s <.>)
 98 |   -- scale s = go where go (b :< dp) = s <.> b :< fmap go dp
 99 | 
100 | instance (Functor h, Additive (h (Cofree h b)), Semiring b, DetectableZero b, DetectableOne b) => Semiring (Cofree h b) where
101 |   one = one :< zero
102 |   (a :< dp) <.> q = a .> q <+> (zero :< fmap (<.> q) dp)
103 | 
104 | instance (Functor h, Additive (h (Cofree h b)), StarSemiring b, DetectableZero b, DetectableOne b) => StarSemiring (Cofree h b) where
105 |   star (a :< dp) = q where q = star a .> (one :< fmap (<.> q) dp)
106 | 
107 | instance ( HasSingle c (Cofree h b) (h (Cofree h b)), Additive (h (Cofree h b))
108 |          , Ord c, Semiring b )
109 |       => HasSingle [c] b (Cofree h b) where
110 |   w +-> b = foldr (\ c t -> zero :< c +-> t) (b :< zero) w
111 | #if 0
112 |   ws *-> b = fromBool eps
113 |              <+> zero :< sum [ c +-> ws' *-> b | (c,ws') <- M.toList ders]
114 |    where
115 |      (eps,ders) = unconsSet ws
116 | #else
117 |   ws *-> b =
118 |     fromBool eps <+>
119 |     zero :< sum [ prefixes *-> suffix +-> b | (suffix,prefixes) <- M.toList ders']
120 |    where
121 |      (eps,ders') = unconsSet' ws
122 | #endif
123 | 
124 | -- preimageM :: (Ord a, Ord b) => Map a b -> Map b (Set a)
125 | -- preimageM m = sum [M.singleton b (S.singleton a) | (a,b) <- M.toList m]
126 | 
127 | -- unconsSet :: Ord c => Set [c] -> Bool :* Map c (Set [c])
128 | 
129 | -- unconsSet' :: Ord c => Set [c] -> Bool :* Map [c] (Set c)
130 | 
131 | 
132 | instance (Additive (h (Cofree h b)), DetectableZero (h (Cofree h b)), DetectableZero b)
133 |       => DetectableZero (Cofree h b) where
134 |   isZero (a :< dp) = isZero a && isZero dp
135 | 
136 | instance (Functor h, Additive (h (Cofree h b)), DetectableZero b, DetectableZero (h (Cofree h b)), DetectableOne b)
137 |       => DetectableOne (Cofree h b) where
138 |   isOne (a :< dp) = isOne a && isZero dp
139 | 
140 | -- | Trim to a finite depth, for examination.
141 | trim :: (Functor h, Additive (h (Cofree h b)), Additive b, DetectableZero b) => Int -> Cofree h b -> Cofree h b
142 | trim 0 _ = zero
143 | trim n (c :< ts) = c :< fmap (trim (n-1)) ts
144 | 
145 | -- To remove
146 | unconsSet :: Ord c => Set [c] -> Bool :* Map c (Set [c])
147 | unconsSet s =
148 |   ( [] `S.member` s
149 |   , sum [M.singleton c (S.singleton cs) | (c:cs) <- S.toList s]
150 |   )
151 | 
152 | -- >>> unconsSet (S.fromList ["a","b","c","d"])
153 | -- (False,fromList [('a',fromList [""]),('b',fromList [""]),('c',fromList [""]),('d',fromList [""])])
154 | 
155 | -- >>> unconsSet (S.fromList ["act","art","cat","car","","cart"])
156 | -- (True,fromList [('a',fromList ["ct","rt"]),('c',fromList ["ar","art","at"])])
157 | 
158 | preimageM :: (Ord a, Ord b) => Map a b -> Map b (Set a)
159 | preimageM m = sum [M.singleton b (S.singleton a) | (a,b) <- M.toList m]
160 | 
161 | -- Similar to Brzozowski's derivative-based decomposition, but with inverted
162 | -- derivatives.
163 | unconsSet' :: Ord c => Set [c] -> Bool :* Map [c] (Set c)
164 | #if 0
165 | unconsSet' s = ( [] `S.member` s
166 |                , sum [M.singleton cs (S.singleton c) | (c:cs) <- S.toList s] )
167 | #else
168 | unconsSet' s = ( [] `S.member` s
169 |                , preimageM (M.fromList [(c,cs) | c:cs <- S.toList s]) )
170 | #endif
171 | 
172 | -- >>> preimageM
173 | 
174 | 
175 | #ifdef EXAMPLES
176 | 
177 | {--------------------------------------------------------------------
178 |     Examples
179 | --------------------------------------------------------------------}
180 | 
181 | type L = Cofree (Map Char) Bool
182 | 
183 | type LS = Cofree (ShareMap Char) Bool
184 | 
185 | -- >>> singleChar "abcd" :: LS
186 | -- False :< SM (fromList [('a','d'),('b','d'),('c','d'),('d','d')]) (fromList [('d',(fromList "abcd",True :< SM (fromList []) (fromList [])))])
187 | 
188 | -- >>> letter :: LS
189 | -- False :< SM (fromList [('a','z'),('b','z'),('c','z'),('d','z'),('e','z'),('f','z'),('g','z'),('h','z'),('i','z'),('j','z'),('k','z'),('l','z'),('m','z'),('n','z'),('o','z'),('p','z'),('q','z'),('r','z'),('s','z'),('t','z'),('u','z'),('v','z'),('w','z'),('x','z'),('y','z'),('z','z')]) (fromList [('z',(fromList "abcdefghijklmnopqrstuvwxyz",True :< SM (fromList []) (fromList [])))])
190 | 
191 | -- >>> pig :: LS
192 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('g','g')]) (fromList [('g',(fromList "g",True :< SM (fromList []) (fromList [])))])))])))])
193 | -- >>> pink :: LS
194 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('n','n')]) (fromList [('n',(fromList "n",False :< SM (fromList [('k','k')]) (fromList [('k',(fromList "k",True :< SM (fromList []) (fromList [])))])))])))])))])
195 | -- >>> pp :: LS
196 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('g','g'),('n','n')]) (fromList [('g',(fromList "g",True :< SM (fromList []) (fromList []))),('n',(fromList "n",False :< SM (fromList [('k','k')]) (fromList [('k',(fromList "k",True :< SM (fromList []) (fromList [])))])))])))])))])
197 | 
198 | -- >>> pig :: L
199 | -- False :< [('p',False :< [('i',False :< [('g',True :< [])])])]
200 | -- >>> pink :: L
201 | -- False :< [('p',False :< [('i',False :< [('n',False :< [('k',True :< [])])])])]
202 | -- >>> pp :: L
203 | -- False :< [('p',False :< [('i',False :< [('g',True :< []),('n',False :< [('k',True :< [])])])])]
204 | 
205 | -- >>> pig :: L
206 | -- False :< [('p',False :< [('i',False :< [('g',True :< [])])])]
207 | -- >>> pink :: L
208 | -- False :< [('p',False :< [('i',False :< [('n',False :< [('k',True :< [])])])])]
209 | -- >>> pp :: L
210 | -- False :< [('p',False :< [('i',False :< [('g',True :< []),('n',False :< [('k',True :< [])])])])]
211 | 
212 | -- >>> trimT 3 as :: L
213 | -- True :< [('a',True :< [('a',True :< [])])]
214 | -- >>> trimT 3 ass :: L
215 | -- True :< [('a',True :< [('a',True :< [])])]
216 | 
217 | -- >>> trimT 7 anbn :: L
218 | -- True :< [('a',False :< [('a',False :< [('a',False :< [('b',False :< [('b',False :< [('b',True :< [])])])]),('b',False :< [('b',True :< [])])]),('b',True :< [])])]
219 | 
220 | -- >>> (pig :: L) ! ""
221 | -- False
222 | -- >>> (pig :: L) ! "pi"
223 | -- False
224 | -- >>> (pig :: L) ! "pig"
225 | -- True
226 | -- >>> (pig :: L) ! "piggy"
227 | -- False
228 | 
229 | -- >>> (anbn :: L) ! ""
230 | -- True
231 | -- >>> (anbn :: L) ! "a"
232 | -- False
233 | -- >>> (anbn :: L) ! "ab"
234 | -- True
235 | -- >>> (anbn :: L) ! "aabb"
236 | -- True
237 | -- >>> (anbn :: L) ! "aaaaabbbbb"
238 | -- True
239 | 
240 | #endif
241 | 


--------------------------------------------------------------------------------
/src/Poly.hs:
--------------------------------------------------------------------------------
  1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
  2 | 
  3 | -- | Playing with polynomials
  4 | 
  5 | module Poly where
  6 | 
  7 | import Prelude hiding ((^),sum)
  8 | 
  9 | import Data.List (intersperse,sortOn) -- intercalate,
 10 | import GHC.Exts(IsString(..))
 11 | import Data.Functor.Identity (Identity(..))
 12 | 
 13 | -- import Misc ((:*))
 14 | import Semi
 15 | import Cofree (Cofree(..))
 16 | 
 17 | import MMap (Map)  -- like Data.Map but better Monoid instance
 18 | import qualified MMap as M
 19 | 
 20 | {--------------------------------------------------------------------
 21 |     Univariate polynomials
 22 | --------------------------------------------------------------------}
 23 | 
 24 | newtype Poly1 z = Poly1 z deriving
 25 |   ( Additive, Semiring, Indexable n b, Listable n b, HasSingle n b
 26 |   , Num )  -- , Functor
 27 | 
 28 | instance ( Show b, Ord b, Num b, D01 b, Listable n b z
 29 |          , Show n, DetectableZero n, DetectableOne n )
 30 |       => Show (Poly1 z) where
 31 |   showsPrec d (Poly1 m) = showsPrec d (Terms (term <$> toList m))
 32 |    where
 33 |      term (i,b) = Term b (Pow (Name "x") i)
 34 | 
 35 | -- instance ( DetectableZero b, DetectableOne b, Show b, Listable n b z
 36 | --          , Show n, DetectableZero n, DetectableOne n )
 37 | --       => Show (Poly1 z) where
 38 | --   showsPrec d (Poly1 m) = showsPrec d (Terms (term <$> toList m))
 39 | --    where
 40 | --      term (i,b) = Term b (Pow (Name "x") i)
 41 | 
 42 | type P1 b = Poly1 (Map N b)
 43 | 
 44 | poly1 :: (Listable N b z, Semiring b) => Poly1 z -> b -> b
 45 | poly1 (Poly1 m) z = sum [b <.> z^i | (i,b) <- toList m]
 46 | 
 47 | -- >>> let p = single 1 <+> value 3 :: Poly1 (Map N Z)
 48 | -- >>> p
 49 | -- 3 + x
 50 | -- 
 51 | -- >>> p^3
 52 | -- 27 + 27 * x + 9 * x^2 + x^3
 53 | -- 
 54 | -- >>> p^4
 55 | -- 81 + 108 * x + 54 * x^2 + 12 * x^3 + x^4
 56 | -- 
 57 | -- >>> p^5
 58 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5
 59 | -- 
 60 | -- >>> p^9
 61 | -- 19683 + 59049 * x + 78732 * x^2 + 61236 * x^3 + 30618 * x^4 + 10206 * x^5 + 2268 * x^6 + 324 * x^7 + 27 * x^8 + x^9
 62 | -- 
 63 | -- >>> poly1 p 10
 64 | -- 13
 65 | -- >>> poly1 (p^3) 10
 66 | -- 2197
 67 | -- >>> (poly1 p 10)^3
 68 | -- 2197
 69 | -- 
 70 | -- >>> poly1 (p^5) 17 == (poly1 p 17)^5
 71 | -- True
 72 | 
 73 | -- >>> let p = 1 <+> value 3 :: Poly1 [Z]
 74 | -- >>> p
 75 | -- 3 + x
 76 | -- >>> p^3
 77 | -- 27 + 27 * x + 9 * x^2 + x^3
 78 | -- 
 79 | -- >>> p^4
 80 | -- 27 + 27 * x + 9 * x^2 + x^3
 81 | -- 
 82 | -- >>> p^5
 83 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5
 84 | -- 
 85 | -- >>> p^9
 86 | -- 19683 + 59049 * x + 78732 * x^2 + 61236 * x^3 + 30618 * x^4 + 10206 * x^5 + 2268 * x^6 + 324 * x^7 + 27 * x^8 + x^9
 87 | -- 
 88 | -- >>> poly1 p 10
 89 | -- 13
 90 | -- >>> poly1 (p^3) 10
 91 | -- 2197
 92 | -- >>> (poly1 p 10)^3
 93 | -- 2197
 94 | -- 
 95 | -- >>> poly1 (p^5) 17 == (poly1 p 17)^5
 96 | -- True
 97 | 
 98 | -- >>> let p = single 1 <+> value 3 :: Poly1 (Cofree Maybe Z)
 99 | -- >>> p
100 | -- 3 + x
101 | -- >>> p^3
102 | -- 27 + 27 * x + 9 * x^2 + x^3
103 | -- 
104 | -- >>> p^5
105 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5
106 | -- 
107 | -- >>> poly1 p 10
108 | -- 13
109 | -- >>> poly1 (p^3) 10
110 | -- 2197
111 | -- >>> (poly1 p 10)^3
112 | -- 2197
113 | 
114 | -- poly :: (Listable n b x, HasPow x n b, Semiring b) => Poly1 x -> x -> b
115 | 
116 | poly :: (Listable n b x, Semiring b, HasPow z n b) => Poly1 x -> z -> b
117 | poly (Poly1 m) z = sum [b <.> z^#i | (i,b) <- toList m]
118 | 
119 | -- >>> take 50 $ show (single 1 :: Cofree Identity Z)
120 | -- "0 :< Identity (1 :< Identity (0 :< Identity (0 :< "
121 | 
122 | type PS1 b = Poly1 (Cofree Identity b) -- streams
123 | 
124 | type PL1 b = Poly1 (Cofree Maybe b)  -- nonempty lists
125 | 
126 | -- >>> let p = single 1 <+> value 3 :: PL1 Z
127 | -- >>> p
128 | -- 3 + x
129 | -- >>> p^3
130 | -- 27 + 27 * x + 9 * x^[(),()] + x^[(),(),()]
131 | 
132 | -- >>> single 3 :: Cofree Maybe Z
133 | -- 0 :< Just (0 :< Just (0 :< Just (1 :< Nothing)))
134 | 
135 | -- >>> p
136 | -- <interactive>:1088:2: error: Variable not in scope: p
137 | -- 
138 | -- >>> p^3
139 | -- <interactive>:1089:2: error: Variable not in scope: p
140 | -- 
141 | -- >>> p^5
142 | -- <interactive>:1090:2: error: Variable not in scope: p
143 | 
144 | -- type PolyL b = Poly1 [b]
145 | 
146 | -- As in Doug McIlroy's "The Music of Streams"
147 | integralL :: Fractional b => Poly1 [b] -> Poly1 [b]
148 | -- integralL (Poly1 []) = Poly1 []  -- Breaks ODE termination.
149 | integralL (Poly1 bs0) = Poly1 (0 : go 1 bs0)
150 |  where
151 |    go _ [] = []
152 |    go n (b : d) = b/n : go (n+1) d
153 | 
154 | derivativeL :: (Additive b, Fractional b) => Poly1 [b] -> Poly1 [b]
155 | derivativeL (Poly1 []) = zero
156 | derivativeL (Poly1 (_ : bs0)) = Poly1 (go 1 bs0)
157 |  where
158 |    go _ [] = []
159 |    go n (b : bs) = n * b : go (n+1) bs
160 | 
161 | -- integralL generalizes beyond Maybe, but derivativeL doesn't. TODO: fix.
162 | 
163 | sinL, cosL, expL :: Poly1 [Rational]
164 | sinL = integralL cosL
165 | cosL = 1 - integralL sinL
166 | expL = 1 + integralL expL
167 | 
168 | lop :: Show a => a -> IO ()
169 | lop = putStrLn . (++ " ...") . take 115 . show
170 | 
171 | -- >>> lop sinL
172 | -- x - 1 % 6 * x^3 + 1 % 120 * x^5 - 1 % 5040 * x^7 + 1 % 362880 * x^9 - 1 % 39916800 * x^11 + 1 % 6227020800 * x^13 - ...
173 | -- >>> lop cosL
174 | -- 1 % 1 - 1 % 2 * x^2 + 1 % 24 * x^4 - 1 % 720 * x^6 + 1 % 40320 * x^8 - 1 % 3628800 * x^10 + 1 % 479001600 * x^12 -  ...
175 | -- >>> lop expL
176 | -- 1 % 1 + x + 1 % 2 * x^2 + 1 % 6 * x^3 + 1 % 24 * x^4 + 1 % 120 * x^5 + 1 % 720 * x^6 + 1 % 5040 * x^7 + 1 % 40320 * ...
177 | 
178 | -- >>> lop (derivativeL sinL)  -- |== cosL|
179 | -- 1 % 1 - 1 % 2 * x^2 + 1 % 24 * x^4 - 1 % 720 * x^6 + 1 % 40320 * x^8 - 1 % 3628800 * x^10 + 1 % 479001600 * x^12 -  ...
180 | -- >>> lop (derivativeL cosL)  -- |== - sinL|
181 | -- (-1) % 1 * x + 1 % 6 * x^3 - 1 % 120 * x^5 + 1 % 5040 * x^7 - 1 % 362880 * x^9 + 1 % 39916800 * x^11 - 1 % 62270208 ...
182 | -- >>> lop (derivativeL expL)  -- |== expL|
183 | -- 1 % 1 + x + 1 % 2 * x^2 + 1 % 6 * x^3 + 1 % 24 * x^4 + 1 % 120 * x^5 + 1 % 720 * x^6 + 1 % 5040 * x^7 + 1 % 40320 * ...
184 | 
185 | -- >>> lop (2 * expL)
186 | -- 2 % 1 + 2 % 1 * x + x^2 + 1 % 3 * x^3 + 1 % 12 * x^4 + 1 % 60 * x^5 + 1 % 360 * x^6 + 1 % 2520 * x^7 + 1 % 20160 * x^8 + ...
187 | 
188 | -- >>> lop (sinL * cosL)
189 | -- x - 2 % 3 * x^3 + 2 % 15 * x^5 - 4 % 315 * x^7 + 2 % 2835 * x^9 - 4 % 155925 * x^11 + 4 % 6081075 * x^13 - 8 % 638512875 ...
190 | 
191 | -- TODO: multivariate power series
192 | -- Can I generalize Poly1 and PolyM?
193 | 
194 | {--------------------------------------------------------------------
195 |     Multivariate polynomials
196 | --------------------------------------------------------------------}
197 | 
198 | -- Polynomials with named "variables"
199 | newtype PolyM b = PolyM { unPolyM :: Map (Map Name N) b } deriving
200 |   (Additive, Semiring, Functor, Indexable (Map Name N) b, HasSingle (Map Name N) b)
201 | 
202 | instance (Show b, Ord b, Num b, D01 b) => Show (PolyM b) where
203 |   showsPrec d (PolyM m) =
204 |     showsPrec d (Terms (term <$> sortOn (M.keys . fst) (M.toDescList m)))
205 |    where
206 |      term (p,b) = Term b (Pows p)
207 | 
208 | -- instance (DetectableZero b, DetectableOne b, Show b) => Show (PolyM b) where
209 | --   showsPrec d (PolyM m) =
210 | --     showsPrec d (Terms (term <$> sortOn (M.keys . fst) (M.toDescList m)))
211 | --    where
212 | --      term (p,b) = Term b (Pows p)
213 | 
214 | -- TODO: improve the sorting criterion
215 | 
216 | -- varM :: Name -> PolyM Z
217 | varM :: Semiring b => Name -> PolyM b
218 | varM = single . single
219 | 
220 | -- >>> let p = varM "x" <+> varM "y" <+> varM "z" :: PolyM Z
221 | -- >>> p
222 | -- x + y + z
223 | -- 
224 | -- >>> p^2
225 | -- x^2 + 2 * x * y + 2 * x * z + y^2 + 2 * y * z + z^2
226 | -- 
227 | -- >>> p^3
228 | -- x^3 + 3 * x^2 * y + 3 * x * y^2 + 6 * x * y * z + 3 * x^2 * z + 3 * x * z^2 + y^3 + 3 * y^2 * z + 3 * y * z^2 + z^3
229 | 
230 | -- TODO: use the generalized Poly1 in place of PolyM. I'll have to tweak
231 | -- something, since the Poly1 Show instance wants to insert "x".
232 | 
233 | type PM b = Poly1 (Map (Map Name N) b)
234 | 
235 | -- >>> let p = single 1 <+> value 3 :: Poly1 [Z]
236 | -- >>> p
237 | -- 3 + x
238 | -- >>> p^3
239 | -- 27 + 27 * x + 9 * x^2 + x^3
240 | -- 
241 | -- >>> p^5
242 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5
243 | --
244 | -- >>> poly1 p 10
245 | -- 13
246 | -- >>> poly1 (p^3) 10
247 | -- 2197
248 | -- >>> (poly1 p 10)^3
249 | -- 2197
250 | 
251 | {--------------------------------------------------------------------
252 |     Show utilities
253 | --------------------------------------------------------------------}
254 | 
255 | compose :: [a -> a] -> (a -> a)
256 | compose = foldr (.) id
257 | 
258 | showIter :: Show b => Int -> String -> (b -> Bool) -> Int -> [b] -> ShowS
259 | showIter inner op omit outer bs =
260 |   showParen (outer > inner) $
261 |   compose (intersperse (showString op) (showsPrec inner <$> filter (not . omit) bs))
262 | 
263 | -- TODO: check for empty bs and for singleton bs
264 | 
265 | showSum :: (Show b, DetectableZero b) => Int -> [b] -> ShowS
266 | showSum  = showIter 6 " + " isZero
267 | 
268 | showProd :: (Show b, DetectableOne b) => Int -> [b] -> ShowS
269 | showProd = showIter 7 " * " isOne
270 | 
271 | -- Cleverer showSum that uses subtraction for negative coefficients
272 | showSum' :: (Show x, Show b, Ord b, Num b, D01 b, D01 x) 
273 |          => Int -> [Term b x] -> ShowS
274 | showSum' p terms = go (filter (not . isZero) terms)
275 |  where
276 |    go [] = showString "0"  -- I don't think we'll get this case
277 |    go (b0:bs0) = showParen (p > 6) $
278 |                  showsPrec 6 b0 . compose (term <$> bs0)
279 |     where
280 |       term (Term b x) | b < 0 = showString (" - ") . showsPrec 6 (Term (-b) x)
281 |                       | otherwise = showString (" + ") . showsPrec 6 (Term b x)
282 | 
283 | 
284 | {--------------------------------------------------------------------
285 |     Syntactic representations
286 | --------------------------------------------------------------------}
287 | 
288 | newtype Name = Name String deriving (Eq,Ord)
289 | 
290 | instance DetectableZero Name where isZero = const False
291 | instance DetectableOne  Name where isOne  = const False
292 | 
293 | instance Show     Name where show (Name s) = s
294 | instance IsString Name where fromString = Name
295 | 
296 | data Pow b n = Pow b n  -- b * x^n
297 | 
298 | instance DetectableZero b => DetectableZero (Pow b n) where
299 |   isZero (Pow b _) = isZero b
300 | 
301 | instance (DetectableOne b, DetectableZero n) => DetectableOne (Pow b n) where
302 |   isOne (Pow b n) = isOne b || isZero n
303 | 
304 | instance (Show n, Show b, DetectableZero n, DetectableOne n, DetectableOne b)
305 |       => Show (Pow b n) where
306 |   showsPrec d p@(Pow b n)
307 |     | isOne p   = showString "1"
308 |     | isOne n   = showsPrec d b
309 |     | otherwise = showParen (d >= 8) $
310 |                   showsPrec 8 b . showString "^" . showsPrec 8 n
311 | 
312 | -- >>> Pow "z" 5 :: Pow Name N
313 | -- z^5
314 | 
315 | newtype Pows b = Pows (Map b N) -- deriving (Semiring,Monoid)
316 | 
317 | instance DetectableZero b => DetectableZero (Pows b) where
318 |   isZero = const False
319 | 
320 | instance DetectableOne (Pows b) where
321 |   isOne (Pows m) = all isZero m
322 | 
323 | -- TODO: try changing isZero for Map to be 'all isZero'. Might wedge on recursive examples.
324 | 
325 | instance (Show b, DetectableZero b, DetectableOne b) => Show (Pows b) where
326 |   showsPrec d p@(Pows m)
327 |     | isOne p   = showString "1"
328 |     | otherwise = showProd d (uncurry Pow <$> M.toList m)
329 | 
330 | -- >>> Pows (("x" +-> 2) <+> ("y" +-> 3)) :: Pows Name
331 | -- x^2 * y^3
332 | 
333 | data Term b x = Term b x  -- b * x
334 | 
335 | instance (DetectableZero b, DetectableZero x) => DetectableZero (Term b x) where
336 |   isZero (Term b x) = isZero b || isZero x
337 | 
338 | instance (Show b, Show x, DetectableOne x, DetectableZero b, DetectableOne b)
339 |       => Show (Term b x) where
340 |   showsPrec d (Term b x)
341 |     | isZero b || isOne x = showsPrec d b
342 |     | isOne  b  = showsPrec d x
343 |     | otherwise = showParen (d > 6) $
344 |                   showsPrec 6 b . showString " * " . showsPrec 6 x
345 | 
346 | -- >>> Term 7 (Pows (("x" +-> 2) <+> ("y" +-> 3))) :: Term Z (Pows Name)
347 | -- 7 * x^2 * y^3
348 | 
349 | data Terms b = Terms [b]
350 | 
351 | -- instance (Show b, DetectableZero b) => Show (Terms b) where
352 | --   showsPrec d (Terms bs) = showSum d bs
353 | 
354 | instance (Show b, Show x, Ord b, Num b, D01 b, D01 x)
355 |       => Show (Terms (Term b x)) where
356 |   showsPrec d (Terms bs) = showSum' d bs
357 | 


--------------------------------------------------------------------------------
/reviews-icfp2019.md:
--------------------------------------------------------------------------------
  1 | % Reviews for "Generalized Convolution and Efficient Language Recognition"
  2 | 
  3 | ICFP 2019 Paper #46 Reviews and Comments
  4 | ===========================================================================
  5 | Paper #46 Generalized Convolution and Efficient Language Recognition
  6 | 
  7 | 
  8 | Review #46A
  9 | ===========================================================================
 10 | 
 11 | Overall merit
 12 | -------------
 13 | C. Weak paper, though I will not fight strongly against it.
 14 | 
 15 | Reviewer expertise
 16 | ------------------
 17 | Y. I am knowledgeable in the area, though not an expert.
 18 | 
 19 | Paper summary
 20 | -------------
 21 | This paper remarks that convolution is a general operation that endows
 22 | semiring structure on the set of functions from any monoid to any
 23 | semiring.  Usually the word "convolution" evokes a domain monoid of
 24 | integers (giving rise to polynomial convolution) or of integer tuples
 25 | (giving rise to image convolution).  But when the domain monoid is
 26 | the monoid of lists (i.e., strings), the paper generalizes Brzozowski
 27 | derivatives from sets (i.e., when the range semiring is the semiring of
 28 | Booleans) to any other range semiring, and derives two implementations:
 29 | one that stores regular expressions and one that stores tries.
 30 | 
 31 | Comments for author
 32 | -------------------
 33 | The generalizations represented in Figures 1-4 are worthwhile and to my
 34 | knowledge novel.  But these contributions are buried among definitions
 35 | and derivations whose applications within and beyond the paper remain
 36 | unclear.
 37 | 
 38 | - For example, it's not clear how Sections 10-13 are novel or help
 39 |   to produce Figure 9 efficiently or at all.  The paper would be far
 40 |   more interesting if it were shown that a generic implementation of
 41 |   convolution can achieve performance competitive against existing
 42 |   specialized implementations for string matching and image blurring.
 43 | 
 44 | - To take another example, line 664 introduces "an alternative to
 45 |   repeated syntactic differentiation" but it's unclear what shared goal
 46 |   is achieved by both "repeated syntactic differentiation" and this
 47 |   "alternative", there is no demonstration of "the syntactic overhead",
 48 |   and it is unclear what "f" means in "a choice of f".
 49 |   *[Conal: "f" should be "h".]*
 50 | 
 51 | This paper lacks
 52 | 
 53 | - upfront signposting of dependencies and contributions
 54 | 
 55 |   Examples of roadmap descriptions that need to be preponed in the
 56 |   paper include the first sentence of Section 7, the first paragraph
 57 |   of Section 8, and the first paragraph of Section 10.  It is unclear
 58 |   where lines 611-640 and lines 991-993 are used.  The implementations
 59 |   in Figures 3 and 4 should be demonstrated in the style of lines
 60 |   1163-1172.
 61 | 
 62 | - evidence of computational practicality
 63 | 
 64 |   Phrases such as "quite efficient" (line 85) and "dramatic speed
 65 |   improvement" (line 92) are vague because the standards of comparison
 66 |   are unclear.  And performance comparisons are not interesting if they
 67 |   are only made against other implementations in the same work.  Hence,
 68 |   Figure 6 should compare against at least one existing implementation
 69 |   (such as Happy).  Measurements should show standard errors.  Both
 70 |   matching and non-matching strings should be tested.
 71 | 
 72 | - comparison with related work
 73 | 
 74 |   The first paragraph of Section 14 is not enough because there is no
 75 |   comparison.
 76 | 
 77 | The code shown in the paper does not seem trustworthy: infinite sums
 78 | are confused with finite sums (for example on line 1106), and the `Key`
 79 | of an `Indexable` type constructor is sometimes expressed using a
 80 | functional dependency (line 300) and sometimes expressed using a type
 81 | family (lines 614, 789).
 82 | 
 83 | Minor comments:
 84 | 
 85 | *   Line 75 "the second and seventh decades of the twentieth century": just
 86 |     name the decades or years (1910s? etc.) to avoid off-by-one confusion.
 87 | *   Lines 87-90: This (4th) bullet point is unclear.  What "key operations"?
 88 |     What "comonad operations"? What "various representations"?  Each bullet
 89 |     point should name the specific section of the paper that makes that
 90 |     contribution.
 91 |     *[Conal: Added detail: "Observation that Brzozowski's two key operations on languages (emptiness and differentiation) generalize to the comonad operations (*coreturn* and *cojoin*) of the standard function-from-monoid comonad and various representations of those functions (including generalized regular expressions)."]*
 92 | *   Line 171 "this instance" referent unclear (too far from line 160).
 93 |     *[Conal: "... the `Additive (a -> b)` instance above"]*
 94 | *   Line 222: Clarify whether these two equations are equivalent.
 95 | *   Line 243 "a additive" -> "an additive" *[Conal: fixed]*
 96 | *   Line 265: Isn't this instance missing some context such as `Semiring s =>`?
 97 |     *[Conal: fixed.]*
 98 |     And why not generalize `(a -> s)` to `(a -> s')`?
 99 |     *[Conal: Because the type of multiplication requires that its arguments have the same type.]*
100 | *   Line 329: $\sum$ is undefined, and what if $f$ maps an infinite number
101 |     of $a$s to nonzero $b$s?
102 | *   Lemma 6 seems to hold only for the instance on line 315.
103 |     *[Conal: Yes, because of the use of curry. Clarified by adding "For functions,".]*
104 | *   Lemma 7 seems unused in the rest of the paper.
105 |     *[Conal: Yes. It's part of the theory developed here, though unused in this paper.]*
106 | *   Line 339 `(->) a` needs to refer more explicitly to the instance on line 265.
107 |     *[Conal: Added "(given the LeftSemimodule s (a → s) instance in Section 2.5)".*]
108 | *   Line 340 "homomorphisms" needs an `instance LeftSemimodule s s`.
109 | *   Line 340 `single` seems like it should be `value`.
110 |     *[Conal: Indeed. Fixed.]*
111 | *   Line 340 "semiring homomorphism" between which instances?
112 | *   Lines 436 and 440 typeset `P` inconsistently (as $P$ and $\mathcal{P}$)
113 |     *[Conal: Fixed.]*
114 | *   Lines 460-461 "apply the same sort of reasoning as in Section 3 and then
115 |     generalize from Bool to an arbitrary semiring" needs more details.
116 | *   Line 489 "represented is" -> "represented as"
117 |     *[Conal: Fixed.]*
118 | *   Line 501 `M.null` -> `all isZero . M.elems`.
119 | *   Line 583 is only proven for finite p, so it is not proven.
120 | *   Line 742: Please parenthesize `c -> LTrie c b`
121 |     *[Conal: Fixed.]*
122 | *   Line 766 "sanity check" -> "smoke test"
123 | *   Line 781 "one more crucial tricks" -> "one more crucial trick"
124 |     *[Conal: fixed to "one or more crucial trick".]*
125 | *   Line 782 "might" and line 783 "might" are not informative.
126 | *   Figure 6:  All time measurements should be given in the same unit (μs
127 |     for example) (lines 771-772), and all decimal points in each column
128 |     should be aligned.  Whether the two occurrences of $\infty$ represent
129 |     nontermination in theory or running out of time in practice should be
130 |     ascertained (lines 780-781).
131 | *   Line 901 "no fmap" is unclear.
132 |     *[Conal: removed "and no fmap". I don't remember what I was trying to say there.]*
133 | *   Line ~~1047~~ 1062 "a the" -> "the"
134 |     *[Conal: fixed.]*
135 | *   Switching `Semiring` instances in Figure 8 is confusing.  Instead, just
136 |     define `<-` early and use it in Figure 1. 
137 | *   Theorem 16 is meaningless because the Fourier transform is not defined
138 |     for generic types `a` and `b`.
139 | *   Line 1118: How does this `Show` instance enumerate the support of z?
140 |     *[Conal: Via a type-check I elided from the paper.]*
141 | 
142 | 
143 | 
144 | Review #46B
145 | ===========================================================================
146 | 
147 | Overall merit
148 | -------------
149 | C. Weak paper, though I will not fight strongly against it.
150 | 
151 | Reviewer expertise
152 | ------------------
153 | Y. I am knowledgeable in the area, though not an expert.
154 | 
155 | Paper summary
156 | -------------
157 | This paper provides a formalization of semirings, convolutions, and
158 | Brzozowski's derivatives (hereinafter simply called "derivatives")
159 | using functional programming idioms (i.e., type classes, monads,
160 | etc.).
161 | 
162 | Comments for author
163 | -------------------
164 | Generally I like this kind of papers. Re-formulation from functional
165 | programming may bring new perspectives and moreover general, concise,
166 | and correct-by-construction implementations. However, for this
167 | particular paper, I unfortunately did not get favorable impression. 
168 | 
169 | First, the contributions around derivatives seem (at least, partially)
170 | known.  Derivatives for "weighted" regular expressions are studied in
171 | the following papers.
172 | 
173 | * Sylvain Lombardy, Jacques Sakarovitch:
174 |   Derivatives of rational expressions with multiplicity.
175 |   Theor. Comput. Sci. 332(1-3): 141-177 (2005)
176 | * Jean-Marc Champarnaud, Gerard Duchamp:
177 |   Derivatives of rational expressions and related theorems.
178 |   Theor. Comput. Sci. 313(1): 31-44 (2004)
179 | 
180 | Moreover, a connection between convolutions and derivatives can be found
181 | in the following paper.
182 | 
183 | * Jan J. M. M. Rutten:
184 |   Behavioural differential equations: a coinductive calculus of
185 |   streams, automata, and power series.
186 |   Theor. Comput. Sci. 308(1-3): 1-53 (2003)
187 | 
188 | These preceding studies are not reasons for rejecting the current
189 | paper if a new insight from a perspective from functional programming
190 | is presented.  However, such an insight is unclear. For example:
191 | "comonadic understanding of derivatives" sounds interesting, but so
192 | what?  Are comonad laws useful for reasoning about derivatives for
193 | generalized regular expressions, for example?
194 | 
195 | A potential benefit is that the formalism itself is a run-able
196 | implementation. However, for regular expression matching, even
197 | specialized efficient implementations are favorable, and the current
198 | evaluation is not convincing that the developed implementation is
199 | efficient. The implementation can do more than regular expression
200 | matching, but usefulness of the generality is not presented.
201 | 
202 | Another weakness of the current paper is its presentation. In my
203 | impression, the focus and the technical developments are somewhat
204 | unclear. I pick up a few relatively serious issues:
205 | - Section 3 says "This process of calculating instances from
206 |   homomorphisms is the central guiding principle of this paper", but I
207 |   could not find such a "guiding principle". Most discussions are the
208 |   converse direction, namely giving an instance and then showing it is
209 |   a homomorphism.
210 | - Although convolutions are the central topic of this paper, their
211 |   introduction in Section 4 (the first paragraph of p.10) is
212 |   unreasonable.  Why is the usual definition disappointing? Why are
213 |   "the same sort of reasoning" omitted?
214 | - Although convolutions are the central topic of this paper,
215 |   connection between convolution and derivatives is unclear.
216 |   Convolution is introduced as an operation on some kinds of functions
217 |   in Section 4, and after that, never mentioned until Section 10.
218 | - (Though I have known derivatives) I suspect that for those who do
219 |   not know derivatives Section 6 seems not understandable. The
220 |   author(s) should explain why such decomposition is considered here
221 |   and what at_e and D are.
222 | 
223 | From above-mentioned issues, I judged the current paper is premature
224 | for publication, even though it studies potentially interesting
225 | research topic.
226 | 
227 | 
228 | 
229 | Review #46C
230 | ===========================================================================
231 | 
232 | Overall merit
233 | -------------
234 | C. Weak paper, though I will not fight strongly against it.
235 | 
236 | Reviewer expertise
237 | ------------------
238 | Y. I am knowledgeable in the area, though not an expert.
239 | 
240 | Paper summary
241 | -------------
242 | This work presents a quite interesting and powerful generalization of regular expressions and the associated method of Brzozowski derivatives. The authors present regular expression concatenation (or more generally, formal language concatenation)  as a special case of convolution and propose a couple of general algebraic structures that can represent a wide range of use cases beyond regular expression matching.
243 | 
244 | Comments for author
245 | -------------------
246 | When I read the abstract and the introduction of the paper, I was rather thrilled and was eager to accept and support the paper. The authors have obviously invested a lot of time and thought into this work. I find the proposed generalization quite interesting and would love to see it published.
247 | 
248 | However, my main problem with the paper is that it is presented in a way that makes it really hard to understand what is going on. There is so much barely explained code in the paper, and very little guidance of the reader, that I got lost completely somewhere around p.13 and I stopped reading.
249 | 
250 | Below I'll give some more detailed hints of where I think the presentation is lacking. If the paper gets rejected, I strongly encourage the papers to continue this line of work and resubmit a revised version soon. If the paper gets accepted, I also suggest to revise the paper for readability.
251 | 
252 | Detailed comments:
253 | 
254 | In general, I thought that the paper lacks a proper structure that is easy to understand. In the first sections, it was never clear to me whether the authors summarize preliminaries or whether there are novel ideas in here. I was always waiting impatiently for the "meat" of the paper to begin. I think you should at least give the reader some kind of roadmap about how you present the material, especially since it takes a very long time before you return to the issues mentioned in the introduction.
255 | 
256 | l.479ff: Here I was rather puzzled that you suddenly talk about "testing for membership" but without demonstrating how one does test for membership. I would have appreciated if you had shown the full code here and illustrated which type class instances are derived and needed for that example to go through.
257 | 
258 | In section 6, I began to feel completely lost. All that code in Lemma 9 with no guidance about what it does.
259 | 
260 | With regard to the definition of D in l. 534 I wondered whether you are cheating in the sense that the Brzozowski algorithm gives me a concrete regular expression that represents the set of suffixes that can still follow, but in l. 534 it looks like you are merely creating a closure and nothing happens until the closure is applied to a concrete suffix.
261 | 
262 | l.570: I was rather confused that you merely drop the words "coreturn", "cojoin" etc. here without explaining what they mean and how exactly that correspondence works out.
263 | 


--------------------------------------------------------------------------------
/test/Benchmarks/with isOne.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Group "star-a"
  3 | 
  4 | ```
  5 | 
  6 | benchmarking "a"/RegExp:Function
  7 | time                 448.9 ns   (442.9 ns .. 456.6 ns)
  8 |                      0.999 R²   (0.998 R² .. 1.000 R²)
  9 | mean                 454.3 ns   (449.1 ns .. 476.1 ns)
 10 | std dev              29.27 ns   (9.824 ns .. 68.71 ns)
 11 | variance introduced by outliers: 78% (severely inflated)
 12 | 
 13 | benchmarking "a"/RegExp:Map
 14 | time                 364.0 ns   (361.2 ns .. 367.4 ns)
 15 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 16 | mean                 364.1 ns   (361.8 ns .. 366.6 ns)
 17 | std dev              8.325 ns   (6.594 ns .. 10.44 ns)
 18 | variance introduced by outliers: 31% (moderately inflated)
 19 | 
 20 | benchmarking "a"/RegExp:IntMap
 21 | time                 363.0 ns   (358.2 ns .. 368.2 ns)
 22 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 23 | mean                 362.9 ns   (360.2 ns .. 366.3 ns)
 24 | std dev              11.06 ns   (8.428 ns .. 16.36 ns)
 25 | variance introduced by outliers: 44% (moderately inflated)
 26 | 
 27 | benchmarking "a"/Cofree:Map
 28 | time                 25.71 ns   (25.48 ns .. 25.97 ns)
 29 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 30 | mean                 25.64 ns   (25.50 ns .. 25.84 ns)
 31 | std dev              554.5 ps   (431.9 ps .. 735.4 ps)
 32 | variance introduced by outliers: 33% (moderately inflated)
 33 | 
 34 | benchmarking "a"/Cofree:IntMap
 35 | time                 24.44 ns   (24.29 ns .. 24.60 ns)
 36 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 37 | mean                 24.48 ns   (24.36 ns .. 24.60 ns)
 38 | std dev              414.3 ps   (336.1 ps .. 563.0 ps)
 39 | variance introduced by outliers: 23% (moderately inflated)
 40 | 
 41 | benchmarking a50/RegExp:Function
 42 | time                 14.65 μs   (14.55 μs .. 14.76 μs)
 43 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 44 | mean                 14.61 μs   (14.51 μs .. 14.73 μs)
 45 | std dev              348.4 ns   (259.3 ns .. 480.2 ns)
 46 | variance introduced by outliers: 24% (moderately inflated)
 47 | 
 48 | benchmarking a50/RegExp:Map
 49 | time                 11.07 μs   (10.98 μs .. 11.16 μs)
 50 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 51 | mean                 11.10 μs   (11.01 μs .. 11.21 μs)
 52 | std dev              329.8 ns   (252.6 ns .. 443.9 ns)
 53 | variance introduced by outliers: 34% (moderately inflated)
 54 | 
 55 | benchmarking a50/RegExp:IntMap
 56 | time                 10.83 μs   (10.67 μs .. 10.97 μs)
 57 |                      0.999 R²   (0.998 R² .. 0.999 R²)
 58 | mean                 10.71 μs   (10.62 μs .. 10.80 μs)
 59 | std dev              303.4 ns   (249.6 ns .. 357.6 ns)
 60 | variance introduced by outliers: 32% (moderately inflated)
 61 | 
 62 | benchmarking a50/Cofree:Map
 63 | time                 1.297 μs   (1.288 μs .. 1.308 μs)
 64 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 65 | mean                 1.301 μs   (1.292 μs .. 1.312 μs)
 66 | std dev              33.40 ns   (27.52 ns .. 41.50 ns)
 67 | variance introduced by outliers: 33% (moderately inflated)
 68 | 
 69 | benchmarking a50/Cofree:IntMap
 70 | time                 1.230 μs   (1.218 μs .. 1.240 μs)
 71 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 72 | mean                 1.226 μs   (1.219 μs .. 1.237 μs)
 73 | std dev              28.83 ns   (20.17 ns .. 40.29 ns)
 74 | variance introduced by outliers: 30% (moderately inflated)
 75 | 
 76 | ```
 77 | 
 78 | # Group "letters"
 79 | 
 80 | ```
 81 | 
 82 | benchmarking asdf-50/RegExp:Function
 83 | time                 4.091 ms   (4.051 ms .. 4.131 ms)
 84 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 85 | mean                 4.013 ms   (3.985 ms .. 4.043 ms)
 86 | std dev              90.09 μs   (74.66 μs .. 117.6 μs)
 87 | 
 88 | benchmarking asdf-50/RegExp:Map
 89 | time                 3.297 ms   (3.263 ms .. 3.329 ms)
 90 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 91 | mean                 3.326 ms   (3.304 ms .. 3.348 ms)
 92 | std dev              70.53 μs   (58.53 μs .. 92.42 μs)
 93 | 
 94 | benchmarking asdf-50/RegExp:IntMap
 95 | time                 2.840 ms   (2.813 ms .. 2.873 ms)
 96 |                      0.998 R²   (0.996 R² .. 0.999 R²)
 97 | mean                 2.936 ms   (2.894 ms .. 2.999 ms)
 98 | std dev              161.0 μs   (116.5 μs .. 217.3 μs)
 99 | variance introduced by outliers: 36% (moderately inflated)
100 | 
101 | benchmarking asdf-50/Cofree:Map
102 | time                 7.288 μs   (7.247 μs .. 7.331 μs)
103 |                      1.000 R²   (1.000 R² .. 1.000 R²)
104 | mean                 7.286 μs   (7.249 μs .. 7.331 μs)
105 | std dev              131.6 ns   (104.4 ns .. 160.1 ns)
106 | variance introduced by outliers: 17% (moderately inflated)
107 | 
108 | benchmarking asdf-50/Cofree:IntMap
109 | time                 6.775 μs   (6.741 μs .. 6.809 μs)
110 |                      1.000 R²   (1.000 R² .. 1.000 R²)
111 | mean                 6.787 μs   (6.753 μs .. 6.819 μs)
112 | std dev              112.8 ns   (95.03 ns .. 137.5 ns)
113 | variance introduced by outliers: 15% (moderately inflated)
114 | 
115 | ```
116 | 
117 | # Group "dyck"
118 | 
119 | ```
120 | 
121 | benchmarking "[]"/RegExp:Function
122 | time                 1.661 μs   (1.642 μs .. 1.678 μs)
123 |                      0.999 R²   (0.999 R² .. 1.000 R²)
124 | mean                 1.649 μs   (1.637 μs .. 1.662 μs)
125 | std dev              40.52 ns   (31.23 ns .. 52.59 ns)
126 | variance introduced by outliers: 31% (moderately inflated)
127 | 
128 | benchmarking "[]"/Cofree:Map
129 | time                 51.63 ns   (51.37 ns .. 51.94 ns)
130 |                      0.999 R²   (0.998 R² .. 1.000 R²)
131 | mean                 52.07 ns   (51.55 ns .. 53.30 ns)
132 | std dev              2.716 ns   (1.141 ns .. 5.270 ns)
133 | variance introduced by outliers: 73% (severely inflated)
134 | 
135 | benchmarking "[]"/Cofree:IntMap
136 | time                 51.72 ns   (51.33 ns .. 52.06 ns)
137 |                      1.000 R²   (0.999 R² .. 1.000 R²)
138 | mean                 51.76 ns   (51.39 ns .. 52.72 ns)
139 | std dev              1.908 ns   (757.1 ps .. 4.039 ns)
140 | variance introduced by outliers: 58% (severely inflated)
141 | 
142 | benchmarking "[[]]"/RegExp:Function
143 | time                 3.913 μs   (3.885 μs .. 3.950 μs)
144 |                      1.000 R²   (0.999 R² .. 1.000 R²)
145 | mean                 3.934 μs   (3.910 μs .. 3.960 μs)
146 | std dev              83.99 ns   (70.03 ns .. 100.5 ns)
147 | variance introduced by outliers: 23% (moderately inflated)
148 | 
149 | benchmarking "[[]]"/Cofree:Map
150 | time                 104.8 ns   (104.0 ns .. 105.6 ns)
151 |                      1.000 R²   (0.999 R² .. 1.000 R²)
152 | mean                 104.7 ns   (104.2 ns .. 105.5 ns)
153 | std dev              2.141 ns   (1.646 ns .. 2.729 ns)
154 | variance introduced by outliers: 28% (moderately inflated)
155 | 
156 | benchmarking "[[]]"/Cofree:IntMap
157 | time                 103.9 ns   (103.2 ns .. 104.6 ns)
158 |                      1.000 R²   (1.000 R² .. 1.000 R²)
159 | mean                 103.9 ns   (103.4 ns .. 104.6 ns)
160 | std dev              1.846 ns   (1.494 ns .. 2.460 ns)
161 | variance introduced by outliers: 23% (moderately inflated)
162 | 
163 | benchmarking "[[a]]"/RegExp:Function
164 | time                 2.951 μs   (2.941 μs .. 2.963 μs)
165 |                      1.000 R²   (1.000 R² .. 1.000 R²)
166 | mean                 2.937 μs   (2.926 μs .. 2.949 μs)
167 | std dev              38.86 ns   (31.23 ns .. 50.80 ns)
168 | variance introduced by outliers: 11% (moderately inflated)
169 | 
170 | benchmarking "[[a]]"/Cofree:Map
171 | time                 142.2 ns   (140.7 ns .. 143.9 ns)
172 |                      0.999 R²   (0.999 R² .. 1.000 R²)
173 | mean                 141.4 ns   (140.5 ns .. 142.5 ns)
174 | std dev              3.432 ns   (2.651 ns .. 5.189 ns)
175 | variance introduced by outliers: 35% (moderately inflated)
176 | 
177 | benchmarking "[[a]]"/Cofree:IntMap
178 | time                 128.1 ns   (127.6 ns .. 128.7 ns)
179 |                      1.000 R²   (0.999 R² .. 1.000 R²)
180 | mean                 128.8 ns   (128.0 ns .. 130.5 ns)
181 | std dev              3.726 ns   (2.232 ns .. 6.185 ns)
182 | variance introduced by outliers: 44% (moderately inflated)
183 | 
184 | benchmarking "[[]][]"/RegExp:Function
185 | time                 5.295 μs   (5.267 μs .. 5.318 μs)
186 |                      1.000 R²   (0.999 R² .. 1.000 R²)
187 | mean                 5.252 μs   (5.229 μs .. 5.275 μs)
188 | std dev              78.17 ns   (69.57 ns .. 89.95 ns)
189 | variance introduced by outliers: 12% (moderately inflated)
190 | 
191 | benchmarking "[[]][]"/Cofree:Map
192 | time                 161.0 ns   (159.9 ns .. 162.0 ns)
193 |                      1.000 R²   (1.000 R² .. 1.000 R²)
194 | mean                 160.5 ns   (159.8 ns .. 161.3 ns)
195 | std dev              2.586 ns   (2.240 ns .. 3.098 ns)
196 | variance introduced by outliers: 19% (moderately inflated)
197 | 
198 | benchmarking "[[]][]"/Cofree:IntMap
199 | time                 157.7 ns   (156.0 ns .. 159.7 ns)
200 |                      0.999 R²   (0.998 R² .. 0.999 R²)
201 | mean                 159.8 ns   (158.1 ns .. 162.3 ns)
202 | std dev              6.857 ns   (4.853 ns .. 9.427 ns)
203 | variance introduced by outliers: 63% (severely inflated)
204 | 
205 | ```
206 | 
207 | # Group "anbn"
208 | 
209 | ```
210 | 
211 | benchmarking ""/RegExp:Function
212 | time                 27.02 ns   (26.85 ns .. 27.20 ns)
213 |                      1.000 R²   (0.999 R² .. 1.000 R²)
214 | mean                 27.21 ns   (27.04 ns .. 27.41 ns)
215 | std dev              636.5 ps   (531.6 ps .. 781.0 ps)
216 | variance introduced by outliers: 36% (moderately inflated)
217 | 
218 | benchmarking ""/Cofree:Map
219 | time                 7.891 ns   (7.816 ns .. 7.950 ns)
220 |                      1.000 R²   (1.000 R² .. 1.000 R²)
221 | mean                 7.806 ns   (7.772 ns .. 7.852 ns)
222 | std dev              136.7 ps   (105.1 ps .. 195.5 ps)
223 | variance introduced by outliers: 26% (moderately inflated)
224 | 
225 | benchmarking ""/Cofree:IntMap
226 | time                 7.962 ns   (7.879 ns .. 8.048 ns)
227 |                      0.999 R²   (0.999 R² .. 1.000 R²)
228 | mean                 8.016 ns   (7.946 ns .. 8.104 ns)
229 | std dev              259.6 ps   (207.8 ps .. 337.5 ps)
230 | variance introduced by outliers: 55% (severely inflated)
231 | 
232 | benchmarking "ab"/RegExp:Function
233 | time                 1.473 μs   (1.465 μs .. 1.483 μs)
234 |                      1.000 R²   (0.999 R² .. 1.000 R²)
235 | mean                 1.483 μs   (1.473 μs .. 1.499 μs)
236 | std dev              44.05 ns   (31.24 ns .. 65.88 ns)
237 | variance introduced by outliers: 39% (moderately inflated)
238 | 
239 | benchmarking "ab"/Cofree:Map
240 | time                 51.86 ns   (51.60 ns .. 52.24 ns)
241 |                      1.000 R²   (1.000 R² .. 1.000 R²)
242 | mean                 51.71 ns   (51.48 ns .. 51.99 ns)
243 | std dev              830.9 ps   (644.1 ps .. 1.072 ns)
244 | variance introduced by outliers: 20% (moderately inflated)
245 | 
246 | benchmarking "ab"/Cofree:IntMap
247 | time                 52.55 ns   (51.82 ns .. 53.33 ns)
248 |                      0.999 R²   (0.999 R² .. 1.000 R²)
249 | mean                 52.11 ns   (51.81 ns .. 52.48 ns)
250 | std dev              1.208 ns   (920.3 ps .. 1.534 ns)
251 | variance introduced by outliers: 35% (moderately inflated)
252 | 
253 | benchmarking "aacbb"/RegExp:Function
254 | time                 2.682 μs   (2.653 μs .. 2.718 μs)
255 |                      0.999 R²   (0.997 R² .. 1.000 R²)
256 | mean                 2.658 μs   (2.638 μs .. 2.695 μs)
257 | std dev              89.17 ns   (50.82 ns .. 151.7 ns)
258 | variance introduced by outliers: 44% (moderately inflated)
259 | 
260 | benchmarking "aacbb"/Cofree:Map
261 | time                 141.0 ns   (140.0 ns .. 142.2 ns)
262 |                      1.000 R²   (0.999 R² .. 1.000 R²)
263 | mean                 142.0 ns   (141.2 ns .. 143.1 ns)
264 | std dev              3.433 ns   (2.370 ns .. 5.616 ns)
265 | variance introduced by outliers: 35% (moderately inflated)
266 | 
267 | benchmarking "aacbb"/Cofree:IntMap
268 | time                 130.9 ns   (130.2 ns .. 132.0 ns)
269 |                      0.998 R²   (0.996 R² .. 1.000 R²)
270 | mean                 134.0 ns   (132.1 ns .. 139.1 ns)
271 | std dev              9.891 ns   (4.344 ns .. 18.19 ns)
272 | variance introduced by outliers: 84% (severely inflated)
273 | 
274 | benchmarking "aaabbb"/RegExp:Function
275 | time                 4.607 μs   (4.564 μs .. 4.672 μs)
276 |                      0.999 R²   (0.998 R² .. 0.999 R²)
277 | mean                 4.749 μs   (4.693 μs .. 4.833 μs)
278 | std dev              233.4 ns   (173.3 ns .. 356.6 ns)
279 | variance introduced by outliers: 62% (severely inflated)
280 | 
281 | benchmarking "aaabbb"/Cofree:Map
282 | time                 162.7 ns   (159.1 ns .. 167.3 ns)
283 |                      0.997 R²   (0.995 R² .. 1.000 R²)
284 | mean                 160.3 ns   (159.2 ns .. 162.6 ns)
285 | std dev              5.071 ns   (3.128 ns .. 9.120 ns)
286 | variance introduced by outliers: 48% (moderately inflated)
287 | 
288 | benchmarking "aaabbb"/Cofree:IntMap
289 | time                 153.2 ns   (152.3 ns .. 154.4 ns)
290 |                      1.000 R²   (1.000 R² .. 1.000 R²)
291 | mean                 154.0 ns   (153.4 ns .. 155.3 ns)
292 | std dev              2.928 ns   (1.756 ns .. 4.910 ns)
293 | variance introduced by outliers: 25% (moderately inflated)
294 | 
295 | benchmarking "aaabbbb"/RegExp:Function
296 | time                 4.641 μs   (4.613 μs .. 4.669 μs)
297 |                      1.000 R²   (0.999 R² .. 1.000 R²)
298 | mean                 4.629 μs   (4.600 μs .. 4.662 μs)
299 | std dev              105.4 ns   (74.39 ns .. 141.8 ns)
300 | variance introduced by outliers: 25% (moderately inflated)
301 | 
302 | benchmarking "aaabbbb"/Cofree:Map
303 | time                 198.1 ns   (195.8 ns .. 201.3 ns)
304 |                      0.997 R²   (0.993 R² .. 0.999 R²)
305 | mean                 200.6 ns   (197.0 ns .. 206.6 ns)
306 | std dev              15.21 ns   (9.733 ns .. 22.60 ns)
307 | variance introduced by outliers: 84% (severely inflated)
308 | 
309 | benchmarking "aaabbbb"/Cofree:IntMap
310 | time                 179.6 ns   (178.4 ns .. 180.6 ns)
311 |                      1.000 R²   (1.000 R² .. 1.000 R²)
312 | mean                 178.7 ns   (177.8 ns .. 179.7 ns)
313 | std dev              3.004 ns   (2.585 ns .. 3.640 ns)
314 | variance introduced by outliers: 20% (moderately inflated)
315 | 
316 | benchmarking 30/RegExp:Function
317 | time                 302.3 μs   (296.2 μs .. 309.6 μs)
318 |                      0.997 R²   (0.996 R² .. 0.998 R²)
319 | mean                 309.5 μs   (304.8 μs .. 314.3 μs)
320 | std dev              17.20 μs   (13.48 μs .. 23.80 μs)
321 | variance introduced by outliers: 52% (severely inflated)
322 | 
323 | benchmarking 30/Cofree:Map
324 | time                 1.629 μs   (1.617 μs .. 1.642 μs)
325 |                      0.999 R²   (0.999 R² .. 1.000 R²)
326 | mean                 1.631 μs   (1.619 μs .. 1.651 μs)
327 | std dev              49.53 ns   (33.25 ns .. 93.67 ns)
328 | variance introduced by outliers: 40% (moderately inflated)
329 | 
330 | benchmarking 30/Cofree:IntMap
331 | time                 1.525 μs   (1.515 μs .. 1.535 μs)
332 |                      0.999 R²   (0.999 R² .. 1.000 R²)
333 | mean                 1.532 μs   (1.519 μs .. 1.574 μs)
334 | std dev              66.66 ns   (23.25 ns .. 153.9 ns)
335 | variance introduced by outliers: 58% (severely inflated)
336 | 
337 | ```
338 | 


--------------------------------------------------------------------------------
/test/Benchmarks/without isOne.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Group "star-a"
  3 | 
  4 | ```
  5 | 
  6 | benchmarking "a"/RegExp:Function
  7 | time                 454.1 ns   (448.0 ns .. 460.9 ns)
  8 |                      0.997 R²   (0.994 R² .. 0.999 R²)
  9 | mean                 456.9 ns   (449.7 ns .. 471.4 ns)
 10 | std dev              34.15 ns   (23.58 ns .. 47.80 ns)
 11 | variance introduced by outliers: 83% (severely inflated)
 12 | 
 13 | benchmarking "a"/RegExp:Map
 14 | time                 353.3 ns   (350.3 ns .. 356.1 ns)
 15 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 16 | mean                 351.3 ns   (349.4 ns .. 353.6 ns)
 17 | std dev              6.978 ns   (5.924 ns .. 8.273 ns)
 18 | variance introduced by outliers: 25% (moderately inflated)
 19 | 
 20 | benchmarking "a"/RegExp:IntMap
 21 | time                 358.6 ns   (356.6 ns .. 361.2 ns)
 22 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 23 | mean                 358.5 ns   (356.5 ns .. 361.2 ns)
 24 | std dev              7.612 ns   (5.861 ns .. 10.54 ns)
 25 | variance introduced by outliers: 28% (moderately inflated)
 26 | 
 27 | benchmarking "a"/Cofree:Map
 28 | time                 27.29 ns   (27.15 ns .. 27.49 ns)
 29 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 30 | mean                 27.54 ns   (27.39 ns .. 27.77 ns)
 31 | std dev              600.3 ps   (451.5 ps .. 920.6 ps)
 32 | variance introduced by outliers: 33% (moderately inflated)
 33 | 
 34 | benchmarking "a"/Cofree:IntMap
 35 | time                 24.81 ns   (24.70 ns .. 24.94 ns)
 36 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 37 | mean                 24.98 ns   (24.83 ns .. 25.23 ns)
 38 | std dev              660.6 ps   (419.2 ps .. 1.017 ns)
 39 | variance introduced by outliers: 42% (moderately inflated)
 40 | 
 41 | benchmarking a50/RegExp:Function
 42 | time                 17.68 μs   (17.62 μs .. 17.77 μs)
 43 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 44 | mean                 17.68 μs   (17.60 μs .. 17.79 μs)
 45 | std dev              293.4 ns   (230.1 ns .. 439.2 ns)
 46 | variance introduced by outliers: 13% (moderately inflated)
 47 | 
 48 | benchmarking a50/RegExp:Map
 49 | time                 13.28 μs   (13.07 μs .. 13.45 μs)
 50 |                      0.999 R²   (0.999 R² .. 1.000 R²)
 51 | mean                 13.12 μs   (13.05 μs .. 13.21 μs)
 52 | std dev              276.5 ns   (215.0 ns .. 341.7 ns)
 53 | variance introduced by outliers: 20% (moderately inflated)
 54 | 
 55 | benchmarking a50/RegExp:IntMap
 56 | time                 13.46 μs   (13.30 μs .. 13.62 μs)
 57 |                      0.999 R²   (0.998 R² .. 0.999 R²)
 58 | mean                 13.45 μs   (13.32 μs .. 13.63 μs)
 59 | std dev              512.3 ns   (407.4 ns .. 713.1 ns)
 60 | variance introduced by outliers: 46% (moderately inflated)
 61 | 
 62 | benchmarking a50/Cofree:Map
 63 | time                 1.329 μs   (1.307 μs .. 1.354 μs)
 64 |                      0.997 R²   (0.993 R² .. 0.999 R²)
 65 | mean                 1.329 μs   (1.307 μs .. 1.380 μs)
 66 | std dev              102.3 ns   (45.58 ns .. 165.0 ns)
 67 | variance introduced by outliers: 82% (severely inflated)
 68 | 
 69 | benchmarking a50/Cofree:IntMap
 70 | time                 1.283 μs   (1.261 μs .. 1.310 μs)
 71 |                      0.997 R²   (0.993 R² .. 0.999 R²)
 72 | mean                 1.306 μs   (1.287 μs .. 1.329 μs)
 73 | std dev              66.60 ns   (49.77 ns .. 109.4 ns)
 74 | variance introduced by outliers: 67% (severely inflated)
 75 | 
 76 | ```
 77 | 
 78 | # Group "letters"
 79 | 
 80 | ```
 81 | 
 82 | benchmarking asdf-50/RegExp:Function
 83 | time                 6.376 ms   (6.185 ms .. 6.566 ms)
 84 |                      0.993 R²   (0.988 R² .. 0.996 R²)
 85 | mean                 5.862 ms   (5.736 ms .. 5.997 ms)
 86 | std dev              393.9 μs   (337.9 μs .. 453.9 μs)
 87 | variance introduced by outliers: 40% (moderately inflated)
 88 | 
 89 | benchmarking asdf-50/RegExp:Map
 90 | time                 5.798 ms   (5.565 ms .. 5.993 ms)
 91 |                      0.990 R²   (0.984 R² .. 0.995 R²)
 92 | mean                 5.633 ms   (5.523 ms .. 5.750 ms)
 93 | std dev              346.6 μs   (292.9 μs .. 425.5 μs)
 94 | variance introduced by outliers: 37% (moderately inflated)
 95 | 
 96 | benchmarking asdf-50/RegExp:IntMap
 97 | time                 5.312 ms   (5.113 ms .. 5.477 ms)
 98 |                      0.992 R²   (0.987 R² .. 0.996 R²)
 99 | mean                 5.344 ms   (5.246 ms .. 5.416 ms)
100 | std dev              268.5 μs   (215.4 μs .. 335.5 μs)
101 | variance introduced by outliers: 29% (moderately inflated)
102 | 
103 | benchmarking asdf-50/Cofree:Map
104 | time                 13.78 μs   (13.49 μs .. 14.19 μs)
105 |                      0.993 R²   (0.987 R² .. 0.998 R²)
106 | mean                 14.37 μs   (14.12 μs .. 14.69 μs)
107 | std dev              972.3 ns   (776.8 ns .. 1.445 μs)
108 | variance introduced by outliers: 73% (severely inflated)
109 | 
110 | benchmarking asdf-50/Cofree:IntMap
111 | time                 12.71 μs   (12.48 μs .. 12.93 μs)
112 |                      0.998 R²   (0.997 R² .. 0.999 R²)
113 | mean                 12.74 μs   (12.60 μs .. 12.88 μs)
114 | std dev              500.0 ns   (415.7 ns .. 619.9 ns)
115 | variance introduced by outliers: 48% (moderately inflated)
116 | 
117 | ```
118 | 
119 | # Group "dyck"
120 | 
121 | ```
122 | 
123 | benchmarking "[]"/RegExp:Function
124 | time                 1.803 μs   (1.786 μs .. 1.820 μs)
125 |                      0.999 R²   (0.998 R² .. 0.999 R²)
126 | mean                 1.836 μs   (1.811 μs .. 1.868 μs)
127 | std dev              93.84 ns   (69.44 ns .. 143.5 ns)
128 | variance introduced by outliers: 66% (severely inflated)
129 | 
130 | benchmarking "[]"/Cofree:Map
131 | time                 55.05 ns   (54.82 ns .. 55.41 ns)
132 |                      0.999 R²   (0.998 R² .. 1.000 R²)
133 | mean                 55.95 ns   (55.42 ns .. 57.22 ns)
134 | std dev              2.558 ns   (1.370 ns .. 4.745 ns)
135 | variance introduced by outliers: 68% (severely inflated)
136 | 
137 | benchmarking "[]"/Cofree:IntMap
138 | time                 52.76 ns   (52.16 ns .. 53.53 ns)
139 |                      0.999 R²   (0.998 R² .. 1.000 R²)
140 | mean                 52.60 ns   (52.24 ns .. 53.11 ns)
141 | std dev              1.534 ns   (1.090 ns .. 2.243 ns)
142 | variance introduced by outliers: 46% (moderately inflated)
143 | 
144 | benchmarking "[[]]"/RegExp:Function
145 | time                 4.375 μs   (4.334 μs .. 4.421 μs)
146 |                      0.999 R²   (0.999 R² .. 0.999 R²)
147 | mean                 4.408 μs   (4.354 μs .. 4.466 μs)
148 | std dev              189.6 ns   (154.5 ns .. 232.5 ns)
149 | variance introduced by outliers: 55% (severely inflated)
150 | 
151 | benchmarking "[[]]"/Cofree:Map
152 | time                 111.0 ns   (109.8 ns .. 112.5 ns)
153 |                      0.999 R²   (0.998 R² .. 0.999 R²)
154 | mean                 110.8 ns   (109.8 ns .. 112.2 ns)
155 | std dev              3.976 ns   (3.116 ns .. 5.386 ns)
156 | variance introduced by outliers: 55% (severely inflated)
157 | 
158 | benchmarking "[[]]"/Cofree:IntMap
159 | time                 109.1 ns   (107.5 ns .. 111.0 ns)
160 |                      0.998 R²   (0.997 R² .. 0.999 R²)
161 | mean                 109.2 ns   (107.9 ns .. 110.8 ns)
162 | std dev              4.858 ns   (3.944 ns .. 6.490 ns)
163 | variance introduced by outliers: 66% (severely inflated)
164 | 
165 | benchmarking "[[a]]"/RegExp:Function
166 | time                 3.263 μs   (3.204 μs .. 3.342 μs)
167 |                      0.996 R²   (0.992 R² .. 0.999 R²)
168 | mean                 3.332 μs   (3.286 μs .. 3.417 μs)
169 | std dev              201.5 ns   (135.5 ns .. 307.8 ns)
170 | variance introduced by outliers: 72% (severely inflated)
171 | 
172 | benchmarking "[[a]]"/Cofree:Map
173 | time                 149.5 ns   (148.2 ns .. 150.9 ns)
174 |                      0.999 R²   (0.999 R² .. 1.000 R²)
175 | mean                 152.1 ns   (150.5 ns .. 155.5 ns)
176 | std dev              7.544 ns   (4.165 ns .. 13.81 ns)
177 | variance introduced by outliers: 70% (severely inflated)
178 | 
179 | benchmarking "[[a]]"/Cofree:IntMap
180 | time                 128.6 ns   (128.0 ns .. 129.3 ns)
181 |                      1.000 R²   (1.000 R² .. 1.000 R²)
182 | mean                 128.5 ns   (127.8 ns .. 129.3 ns)
183 | std dev              2.776 ns   (2.103 ns .. 4.072 ns)
184 | variance introduced by outliers: 30% (moderately inflated)
185 | 
186 | benchmarking "[[]][]"/RegExp:Function
187 | time                 6.002 μs   (5.929 μs .. 6.076 μs)
188 |                      0.999 R²   (0.998 R² .. 1.000 R²)
189 | mean                 5.946 μs   (5.905 μs .. 5.995 μs)
190 | std dev              154.5 ns   (122.9 ns .. 206.8 ns)
191 | variance introduced by outliers: 30% (moderately inflated)
192 | 
193 | benchmarking "[[]][]"/Cofree:Map
194 | time                 160.0 ns   (159.5 ns .. 160.4 ns)
195 |                      1.000 R²   (1.000 R² .. 1.000 R²)
196 | mean                 160.0 ns   (159.5 ns .. 160.5 ns)
197 | std dev              1.831 ns   (1.554 ns .. 2.244 ns)
198 | variance introduced by outliers: 11% (moderately inflated)
199 | 
200 | benchmarking "[[]][]"/Cofree:IntMap
201 | time                 154.9 ns   (154.0 ns .. 155.8 ns)
202 |                      1.000 R²   (1.000 R² .. 1.000 R²)
203 | mean                 155.3 ns   (154.5 ns .. 156.4 ns)
204 | std dev              3.068 ns   (2.517 ns .. 3.785 ns)
205 | variance introduced by outliers: 26% (moderately inflated)
206 | 
207 | ```
208 | 
209 | # Group "anbn"
210 | 
211 | ```
212 | 
213 | benchmarking ""/RegExp:Function
214 | time                 26.72 ns   (26.59 ns .. 26.89 ns)
215 |                      1.000 R²   (1.000 R² .. 1.000 R²)
216 | mean                 26.81 ns   (26.66 ns .. 27.02 ns)
217 | std dev              590.0 ps   (451.8 ps .. 790.3 ps)
218 | variance introduced by outliers: 33% (moderately inflated)
219 | 
220 | benchmarking ""/Cofree:Map
221 | time                 7.965 ns   (7.933 ns .. 8.004 ns)
222 |                      1.000 R²   (1.000 R² .. 1.000 R²)
223 | mean                 7.959 ns   (7.929 ns .. 7.999 ns)
224 | std dev              116.0 ps   (82.25 ps .. 164.0 ps)
225 | variance introduced by outliers: 19% (moderately inflated)
226 | 
227 | benchmarking ""/Cofree:IntMap
228 | time                 7.957 ns   (7.927 ns .. 7.990 ns)
229 |                      1.000 R²   (1.000 R² .. 1.000 R²)
230 | mean                 7.957 ns   (7.928 ns .. 7.988 ns)
231 | std dev              99.33 ps   (77.89 ps .. 144.3 ps)
232 | variance introduced by outliers: 15% (moderately inflated)
233 | 
234 | benchmarking "ab"/RegExp:Function
235 | time                 1.582 μs   (1.567 μs .. 1.598 μs)
236 |                      0.999 R²   (0.999 R² .. 1.000 R²)
237 | mean                 1.586 μs   (1.576 μs .. 1.599 μs)
238 | std dev              37.65 ns   (31.25 ns .. 48.00 ns)
239 | variance introduced by outliers: 29% (moderately inflated)
240 | 
241 | benchmarking "ab"/Cofree:Map
242 | time                 54.44 ns   (53.99 ns .. 54.98 ns)
243 |                      1.000 R²   (0.999 R² .. 1.000 R²)
244 | mean                 54.26 ns   (53.98 ns .. 54.76 ns)
245 | std dev              1.198 ns   (825.2 ps .. 1.869 ns)
246 | variance introduced by outliers: 33% (moderately inflated)
247 | 
248 | benchmarking "ab"/Cofree:IntMap
249 | time                 51.55 ns   (51.26 ns .. 51.84 ns)
250 |                      1.000 R²   (1.000 R² .. 1.000 R²)
251 | mean                 51.37 ns   (51.18 ns .. 51.66 ns)
252 | std dev              784.8 ps   (594.7 ps .. 1.036 ns)
253 | variance introduced by outliers: 19% (moderately inflated)
254 | 
255 | benchmarking "aacbb"/RegExp:Function
256 | time                 2.923 μs   (2.898 μs .. 2.950 μs)
257 |                      0.999 R²   (0.998 R² .. 0.999 R²)
258 | mean                 3.013 μs   (2.963 μs .. 3.105 μs)
259 | std dev              216.8 ns   (126.2 ns .. 348.2 ns)
260 | variance introduced by outliers: 79% (severely inflated)
261 | 
262 | benchmarking "aacbb"/Cofree:Map
263 | time                 146.7 ns   (146.0 ns .. 147.7 ns)
264 |                      1.000 R²   (0.999 R² .. 1.000 R²)
265 | mean                 147.0 ns   (146.2 ns .. 148.2 ns)
266 | std dev              3.158 ns   (2.604 ns .. 3.906 ns)
267 | variance introduced by outliers: 30% (moderately inflated)
268 | 
269 | benchmarking "aacbb"/Cofree:IntMap
270 | time                 129.9 ns   (129.3 ns .. 130.8 ns)
271 |                      1.000 R²   (1.000 R² .. 1.000 R²)
272 | mean                 130.3 ns   (129.8 ns .. 131.3 ns)
273 | std dev              2.427 ns   (1.766 ns .. 3.724 ns)
274 | variance introduced by outliers: 24% (moderately inflated)
275 | 
276 | benchmarking "aaabbb"/RegExp:Function
277 | time                 5.046 μs   (4.987 μs .. 5.101 μs)
278 |                      0.999 R²   (0.999 R² .. 1.000 R²)
279 | mean                 5.062 μs   (5.018 μs .. 5.115 μs)
280 | std dev              164.2 ns   (127.5 ns .. 201.8 ns)
281 | variance introduced by outliers: 41% (moderately inflated)
282 | 
283 | benchmarking "aaabbb"/Cofree:Map
284 | time                 165.2 ns   (163.2 ns .. 167.3 ns)
285 |                      0.999 R²   (0.999 R² .. 1.000 R²)
286 | mean                 163.6 ns   (162.6 ns .. 165.0 ns)
287 | std dev              3.739 ns   (2.844 ns .. 5.088 ns)
288 | variance introduced by outliers: 32% (moderately inflated)
289 | 
290 | benchmarking "aaabbb"/Cofree:IntMap
291 | time                 153.8 ns   (152.4 ns .. 154.9 ns)
292 |                      1.000 R²   (0.999 R² .. 1.000 R²)
293 | mean                 154.3 ns   (153.2 ns .. 155.3 ns)
294 | std dev              3.608 ns   (3.067 ns .. 4.458 ns)
295 | variance introduced by outliers: 33% (moderately inflated)
296 | 
297 | benchmarking "aaabbbb"/RegExp:Function
298 | time                 5.050 μs   (5.014 μs .. 5.100 μs)
299 |                      0.999 R²   (0.999 R² .. 1.000 R²)
300 | mean                 5.088 μs   (5.043 μs .. 5.151 μs)
301 | std dev              172.8 ns   (125.7 ns .. 267.3 ns)
302 | variance introduced by outliers: 43% (moderately inflated)
303 | 
304 | benchmarking "aaabbbb"/Cofree:Map
305 | time                 209.2 ns   (204.0 ns .. 212.8 ns)
306 |                      0.998 R²   (0.997 R² .. 0.998 R²)
307 | mean                 205.0 ns   (202.7 ns .. 207.5 ns)
308 | std dev              8.619 ns   (7.269 ns .. 9.856 ns)
309 | variance introduced by outliers: 62% (severely inflated)
310 | 
311 | benchmarking "aaabbbb"/Cofree:IntMap
312 | time                 180.5 ns   (179.3 ns .. 182.0 ns)
313 |                      1.000 R²   (0.999 R² .. 1.000 R²)
314 | mean                 180.5 ns   (179.5 ns .. 181.9 ns)
315 | std dev              4.205 ns   (3.242 ns .. 5.345 ns)
316 | variance introduced by outliers: 33% (moderately inflated)
317 | 
318 | benchmarking 30/RegExp:Function
319 | time                 297.1 μs   (294.8 μs .. 299.8 μs)
320 |                      0.999 R²   (0.999 R² .. 1.000 R²)
321 | mean                 300.4 μs   (298.6 μs .. 302.5 μs)
322 | std dev              6.556 μs   (5.417 μs .. 8.177 μs)
323 | variance introduced by outliers: 14% (moderately inflated)
324 | 
325 | benchmarking 30/Cofree:Map
326 | time                 1.661 μs   (1.640 μs .. 1.679 μs)
327 |                      0.999 R²   (0.999 R² .. 1.000 R²)
328 | mean                 1.641 μs   (1.629 μs .. 1.655 μs)
329 | std dev              44.37 ns   (32.67 ns .. 64.99 ns)
330 | variance introduced by outliers: 35% (moderately inflated)
331 | 
332 | benchmarking 30/Cofree:IntMap
333 | time                 1.538 μs   (1.528 μs .. 1.551 μs)
334 |                      1.000 R²   (0.999 R² .. 1.000 R²)
335 | mean                 1.540 μs   (1.530 μs .. 1.558 μs)
336 | std dev              42.94 ns   (27.63 ns .. 71.28 ns)
337 | variance introduced by outliers: 37% (moderately inflated)
338 | 
339 | ```
340 | 


--------------------------------------------------------------------------------
/src/Semi.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP
  2 | 
  3 | {-# OPTIONS_GHC -Wno-orphans #-}
  4 | 
  5 | -- | Commutative monoids, semirings, and semimodules
  6 | 
  7 | module Semi where
  8 | 
  9 | import Prelude hiding (sum,product,(^))
 10 | 
 11 | import Control.Applicative (liftA2)
 12 | import Control.Arrow (first)
 13 | import GHC.Natural (Natural)
 14 | import Data.Functor.Identity (Identity(..))
 15 | import Data.Maybe (fromMaybe,isNothing)
 16 | import GHC.Exts (Coercible,coerce,Constraint)
 17 | import Control.DeepSeq (NFData)
 18 | import GHC.TypeLits (KnownNat)
 19 | 
 20 | import Data.Map (Map)
 21 | import qualified Data.Map as M
 22 | import Data.IntMap.Lazy (IntMap)
 23 | import qualified Data.IntMap.Lazy as IntMap
 24 | import Data.Set (Set)
 25 | import qualified Data.Set as S
 26 | import Data.MemoTrie
 27 | 
 28 | import Data.Finite (Finite)
 29 | import Data.Vector.Sized (Vector)
 30 | import qualified Data.Vector.Sized as V
 31 | 
 32 | import Misc
 33 | -- import Constrained
 34 | 
 35 | #include "GenInstances.inc"
 36 | 
 37 | {--------------------------------------------------------------------
 38 |     Classes
 39 | --------------------------------------------------------------------}
 40 | 
 41 | -- Keyed functors. Useful for memoization in RegExp and maybe elsewhere.
 42 | type family Key (h :: * -> *) :: *
 43 | 
 44 | -- Taken from Data.Key
 45 | class Functor f => Keyed f where
 46 |   mapWithKey :: (Key f -> a -> b) -> f a -> f b
 47 | 
 48 | -- Inspired by Indexable from Data.Key in the keys library.
 49 | class Indexable a b x | x -> a b where
 50 |   infixl 9 !
 51 |   (!) :: x -> a -> b
 52 | 
 53 | -- | Enumerate non-zero values
 54 | class Indexable a b x => Listable a b x where toList :: x -> [(a,b)]
 55 | 
 56 | -- TODO: rename toList to "nonZeros" or some such. Introduce early in the paper,
 57 | -- and use for representations like Map.
 58 | 
 59 | -- TODO: Laws: (!) must be natural; h must presere additivity, and !)| is an
 60 | -- Additive homomorphism.
 61 | 
 62 | -- | Commutative monoid
 63 | class Additive b where
 64 |   zero :: b
 65 |   (<+>) :: b -> b -> b
 66 |   infixr 6 <+>
 67 | 
 68 | class {- Additive b => -} DetectableZero b where
 69 |   isZero :: b -> Bool
 70 | 
 71 | class Additive b => Semiring b where
 72 |   one :: b
 73 |   (<.>) :: b -> b -> b
 74 |   infixr 7 <.>
 75 | 
 76 | fromBool :: Semiring b => Bool -> b
 77 | fromBool False = zero
 78 | fromBool True  = one
 79 | 
 80 | class {- Semiring b => -} DetectableOne b where
 81 |   isOne :: b -> Bool
 82 | 
 83 | class Semiring b => StarSemiring b  where
 84 |   star :: b -> b
 85 |   plus :: b -> b
 86 |   star x = one <+> plus x
 87 |   plus x = x <.> star x
 88 |   {-# INLINE star #-}
 89 |   {-# INLINE plus #-}
 90 | 
 91 | class {- (Semiring s, Additive b) => -} LeftSemimodule s b | b -> s where
 92 |   scale :: s -> b -> b
 93 |   -- default scale :: (Semiring b, s ~ b) => s -> b -> b  -- experimental
 94 |   -- scale = (<.>)
 95 |   default scale :: (Semiring s, Functor f, b ~ f s) => s -> b -> b  -- experimental
 96 |   scale s = fmap (s <.>)
 97 |   {-# INLINE scale #-}
 98 | 
 99 | -- TODO: Add the Semiring superclass, and remove redundant constraints
100 | -- elsewhere. Search for occurrences of LeftSemimodule.
101 | 
102 | instance Semiring s => LeftSemimodule s (a -> s) where s `scale` f = \ a -> s <.> f a
103 | 
104 | -- | 'scale' optimized for zero or one scalar
105 | infixr 7 .>
106 | (.>) :: (Additive b, LeftSemimodule s b, DetectableZero s, DetectableOne s) => s -> b -> b
107 | s .> b | isZero s  = zero
108 |        | isOne  s  = b
109 |        | otherwise = s `scale` b
110 | {-# INLINE (.>) #-}
111 | 
112 | #if 0
113 | type Additive1     = Con1 Additive
114 | type Semiring1     = Con1 Semiring
115 | type StarSemiring1 = Con1 StarSemiring
116 | 
117 | type DetectableZero1 = Con1 DetectableZero
118 | type DetectableOne1  = Con1 DetectableOne
119 | #endif
120 | 
121 | {--------------------------------------------------------------------
122 |     Singletons
123 | --------------------------------------------------------------------}
124 | 
125 | class Indexable a b x => HasSingle a b x where
126 |   infixr 2 +->, *->
127 |   (+->) :: a -> b -> x
128 |   (*->) :: Set a -> b -> x
129 |   a +-> b = S.singleton a *-> b
130 |   default (*->) :: Additive x => Set a -> b -> x
131 |   as *-> b = sum [a +-> b | a <- S.elems as]
132 |   {-# MINIMAL (+->) | (*->) #-}
133 | 
134 | single :: (HasSingle a b x, Semiring b) => a -> x
135 | single a = a +-> one
136 | 
137 | singles :: (HasSingle a b x, Semiring b) => Set a -> x
138 | singles as = as *-> one
139 | 
140 | value :: (HasSingle a b x, Monoid a) => b -> x
141 | value b = mempty +-> b
142 | 
143 | -- instance HasSingle a Bool [a] where
144 | --   a +-> b = if b then [a] else []
145 | 
146 | -- instance HasSingle a Bool (Set a) where
147 | --   a +-> b = if b then S.singleton a else S.empty
148 | 
149 | {--------------------------------------------------------------------
150 |     Instances
151 | --------------------------------------------------------------------}
152 | 
153 | instance Additive Bool where
154 |   zero = False
155 |   (<+>) = (||)
156 | 
157 | instance DetectableZero Bool where isZero = not
158 | 
159 | instance Semiring Bool where
160 |   one = True
161 |   (<.>) = (&&)
162 | 
163 | instance DetectableOne Bool where isOne = id
164 | 
165 | instance StarSemiring Bool where star = const True
166 | 
167 | Nums(Integer)
168 | Nums(Natural)
169 | Nums(Int)
170 | Nums(Float)
171 | Nums(Double)
172 | Nums(Rational)
173 | -- etc
174 | 
175 | -- Experiment
176 | instance StarSemiring (Natural) where
177 |   star 0 = 1
178 |   star _ = 987654321
179 | 
180 | -- ApplSemi((->) a)  -- use monoid semiring instead for now
181 | -- etc
182 | 
183 | -- ApplMono([])
184 | -- ApplMono(Set)
185 | -- etc
186 | 
187 | type instance Key ((->) a) = a
188 | 
189 | instance Keyed ((->) a) where
190 |   mapWithKey h f = \ a -> h a (f a)
191 | 
192 | instance Indexable a b (a -> b) where
193 |   f ! k = f k
194 | 
195 | instance (Eq a, Additive b) => HasSingle a b (a -> b) where
196 |   a +-> b = \ a' -> if a' == a then b else zero
197 |   -- as *-> b = \ a' -> if a' `S.member` as then b else zero
198 | 
199 | instance Additive b => Additive (a -> b) where
200 |   zero = pure zero
201 |   (<+>) = liftA2 (<+>)
202 |   -- zero = \ _ -> zero
203 |   -- f <+> g  = \ a -> f a <+> g a
204 | 
205 | type D01 b = (DetectableZero b, DetectableOne b)
206 | 
207 | -- Short-circuiting <.>
208 | infixr 7 <..>
209 | (<..>) :: (Semiring b, DetectableZero b, DetectableOne b) => b -> b -> b
210 | x <..> y | isZero x  = zero
211 |          | isOne  x  = y
212 |          | otherwise = x <.> y
213 | 
214 | instance (Monoid a, Eq a, Splittable a, Semiring b, D01 b) => Semiring (a -> b) where
215 |   one = single mempty
216 |   f <.> g = \ w -> sum [f u <..> g v | (u,v) <- splits w]
217 | 
218 | instance (Monoid a, Eq a, Splittable a, Semiring b, D01 b) => StarSemiring (a -> b)
219 | 
220 | instance Ord a => Additive (Set a) where
221 |   zero = S.empty
222 |   (<+>) = S.union
223 | 
224 | instance (Ord a, Additive b) => Additive (Map a b) where
225 |   zero = M.empty
226 |   (<+>) = M.unionWith (<+>)
227 | 
228 | instance DetectableZero (Map a b) where isZero = M.null
229 | 
230 | #if 1
231 | -- Experiment for Poly
232 | instance DetectableOne (Map a b) where isOne = const False
233 | #else
234 | instance (Monoid a, Ord a, Additive b, DetectableOne b) => DetectableOne (Map a b) where
235 |   isOne m = isOne (m!mempty) && isZero (M.delete mempty m)
236 | #endif
237 | 
238 | -- FunctorSemimodule(Map a)
239 | 
240 | instance Semiring b => LeftSemimodule b (Map a b) where scale b = fmap (b <.>)
241 | 
242 | instance (Ord a, Monoid a, Semiring b) => Semiring (Map a b) where
243 |   one = mempty +-> one
244 |   p <.> q = sum [u <> v +-> p!u <.> q!v | u <- M.keys p, v <- M.keys q]
245 | 
246 | type instance Key (Map a) = a
247 | 
248 | instance Keyed (Map a) where mapWithKey = M.mapWithKey
249 | 
250 | instance (Ord a, Additive b) => Indexable a b (Map a b) where
251 |   m ! a = M.findWithDefault zero a m
252 | 
253 | instance (Ord a, Additive b) => Listable a b (Map a b) where toList = M.toList
254 | 
255 | instance (Ord a, Additive b) => HasSingle a b (Map a b) where
256 |   (+->) = M.singleton
257 |   as *-> b = M.fromList [(a,b) | a <- S.elems as]
258 | 
259 | -- type instance Key (Set a) = a
260 | 
261 | -- instance Keyed (Set a) where mapWithKey = M.mapWithKey
262 | 
263 | instance Ord a => Indexable a Bool (Set a) where
264 |   m ! a = a `S.member` m
265 | 
266 | instance Ord a => Listable a Bool (Set a) where
267 |   toList = fmap (,True) . S.toList
268 | 
269 | instance Ord a => HasSingle a Bool (Set a) where
270 |   _ +-> False = S.empty
271 |   a +-> True  = S.singleton a
272 |   _  *-> False = S.empty
273 |   as *-> True  = as
274 | 
275 | -- newtype Identity b = Identity b
276 | 
277 | type instance Key Identity = ()
278 | 
279 | instance Keyed Identity where mapWithKey h = fmap (h ())
280 | 
281 | instance Indexable () b (Identity b) where Identity a ! () = a
282 | 
283 | instance Listable () b (Identity b) where toList (Identity b) = [((),b)]
284 | 
285 | instance Additive b => HasSingle () b (Identity b) where
286 |   -- us *-> b = if S.null us then zero else Identity b
287 |   () +-> b = Identity b
288 | 
289 | deriving instance Additive b         => Additive (Identity b)
290 | deriving instance DetectableZero b   => DetectableZero (Identity b)
291 | deriving instance DetectableOne b    => DetectableOne (Identity b)
292 | deriving instance LeftSemimodule s b => LeftSemimodule s (Identity b)
293 | deriving instance Semiring b         => Semiring (Identity b)
294 | 
295 | -- newtype Id b = Id b deriving 
296 | --  (Functor, Additive, DetectableZero, DetectableOne, LeftSemimodule s, Semiring)
297 | 
298 | type instance Key Maybe = ()
299 | 
300 | instance Keyed Maybe where mapWithKey h = fmap (h ())
301 | 
302 | instance Additive b => Indexable () b (Maybe b) where
303 |   -- Nothing ! () = zero
304 |   -- Just b  ! () = b
305 |   mb ! () = fromMaybe zero mb
306 | 
307 | instance Additive b => Listable () b (Maybe b) where
308 |   toList Nothing  = []
309 |   toList (Just b) = [((),b)]
310 | 
311 | instance (DetectableZero b, Additive b) => HasSingle () b (Maybe b) where 
312 |   () +-> b | isZero b  = Nothing
313 |            | otherwise = Just b
314 | 
315 | instance Additive b => Additive (Maybe b) where
316 |   zero = Nothing
317 |   Nothing <+> v = v
318 |   u <+> Nothing = u
319 |   Just a <+> Just b = Just (a <+> b)
320 | 
321 | instance Semiring b => Semiring (Maybe b) where
322 |   one = Just one
323 |   Nothing <.> _ = zero
324 |   _ <.> Nothing = zero
325 |   Just a <.> Just b = Just (a <.> b)
326 | 
327 | instance DetectableZero (Maybe b) where
328 |   isZero = isNothing
329 |   -- We could also check Just b for b==0
330 | 
331 | instance DetectableOne b => DetectableOne (Maybe b) where
332 |   isOne (Just b) | isOne b = True
333 |   isOne _ = False
334 | 
335 | {--------------------------------------------------------------------
336 |     Sum and product monoids
337 | --------------------------------------------------------------------}
338 | 
339 | -- semiring-num defines 'add' and 'mul' via foldl', but I think I want foldr
340 | -- instead.
341 | 
342 | newtype Sum a = Sum a deriving
343 |   (Eq,Ord,NFData,Num,Real,Integral,Additive,DetectableZero,Semiring,DetectableOne,StarSemiring)
344 | 
345 | instance Show a => Show (Sum a) where
346 |   showsPrec d (Sum a) = showsPrec d a
347 | 
348 | getSum :: Sum a -> a
349 | getSum (Sum a) = a
350 | 
351 | instance Additive a => Semigroup (Sum a) where
352 |   Sum a <> Sum b = Sum (a <+> b)
353 | 
354 | instance Additive a => Monoid (Sum a) where
355 |   mempty = Sum zero
356 | 
357 | sum :: (Foldable f, Additive a) => f a -> a
358 | sum = getSum . foldMap Sum
359 | 
360 | missing :: String -> String -> z
361 | missing ty op = error ("No " ++ op ++ " method for " ++ ty)
362 | 
363 | noSum :: String -> z
364 | noSum = missing "Sum" "(*)"
365 | 
366 | instance Enum a => Enum (Sum a) where
367 |   toEnum = Sum . toEnum
368 |   fromEnum = fromEnum . getSum
369 | 
370 | newtype Product a = Product a deriving (Eq,Show)
371 | 
372 | getProduct :: Product a -> a
373 | getProduct (Product a) = a
374 | 
375 | instance Semiring a => Semigroup (Product a) where
376 |   Product a <> Product b = Product (a <.> b)
377 | 
378 | instance Semiring a => Monoid (Product a) where
379 |   mempty = Product one
380 | 
381 | product :: (Foldable f, Semiring a) => f a -> a
382 | product = getProduct . foldMap Product
383 | 
384 | type N = Sum Natural
385 | 
386 | type Z = Sum Integer
387 | 
388 | -- Experiment
389 | class HasPow x n b | x n -> b where
390 |   infixr 8 ^#
391 |   (^#) :: x -> n -> b
392 | 
393 | instance Semiring b => HasPow b N b where
394 |   (^#) = (^)
395 | 
396 | instance (HasPow x n b, Additive x, Ord k, Additive n, Semiring b)
397 |       => HasPow (Map k x) (Map k n) b where
398 |   bs ^# ns = product [b ^# ns!i | (i,b) <- toList bs]
399 | 
400 | 
401 | infixr 8 ^
402 | (^), pow :: Semiring a => a -> N -> a
403 | pow = (^)  -- useful for the paper
404 | #if 0
405 | a ^ n = product (replicate (fromIntegral n) a)
406 | #else
407 | -- Adapted from (^) in the GHC Prelude
408 | x0 ^ y0 | y0 == 0   = one
409 |         | otherwise = f x0 y0
410 |     where -- f : x0 ^ y0 = x ^ y
411 |           f x y | even y    = f (x <.> x) (y `quot` 2)
412 |                 | y == 1    = x
413 |                 | otherwise = g (x <.> x) (y `quot` 2) x         -- See Note [Half of y - 1]
414 |           -- g : x0 ^ y0 = (x ^ y) <.> z
415 |           g x y z | even y = g (x <.> x) (y `quot` 2) z
416 |                   | y == 1 = x <.> z
417 |                   | otherwise = g (x <.> x) (y `quot` 2) (x <.> z) -- See Note [Half of y - 1]
418 | 
419 | {- Note [Half of y - 1]
420 |    ~~~~~~~~~~~~~~~~~~~~~
421 |    Since y is guaranteed to be odd and positive here,
422 |    half of y - 1 can be computed as y `quot` 2, optimising subtraction away.
423 | -}
424 | 
425 | #endif
426 | 
427 | instance Splittable N where
428 |   isEmpty n = n == 0
429 |   splits n = [(i, n-i) | i <- [0 .. n]]
430 | 
431 | {--------------------------------------------------------------------
432 |     Misc
433 | --------------------------------------------------------------------}
434 | 
435 | instance DetectableZero () where isZero () = True
436 | instance DetectableOne  () where isOne  () = True
437 | 
438 | type instance Key [] = N
439 | 
440 | instance Keyed [] where mapWithKey h = zipWith h [0 ..]
441 | 
442 | instance Additive b => Indexable N b [b] where
443 |   [] ! _ = zero
444 |   (b : _ ) ! 0 = b
445 |   (_ : bs) ! n = bs ! (n-1)
446 | 
447 | instance Additive b => Listable N b [b] where
448 |   toList = zip [0 ..]
449 | 
450 | instance (Additive b, DetectableZero b) => HasSingle N b [b] where
451 |   0 +-> b | isZero b = zero
452 |           | otherwise = [b] 
453 |   n +-> b = zero : (n-1 +-> b)
454 |   -- TODO: efficient *->
455 | 
456 | -- Should I really be using up lists here instead of saving them?
457 | -- Maybe wrap in a newtype
458 | 
459 | -- [a] as a denotation of a <-- N
460 | 
461 | -- See Cofree
462 | instance Additive b => Additive [b] where
463 |   zero = []
464 |   [] <+> bs = bs
465 |   as <+> [] = as
466 |   (a : as) <+> (b : bs) = a <+> b : as <+> bs
467 | 
468 | instance (Semiring b, D01 b) => Semiring [b] where
469 |   one = one : zero
470 |   [] <.> _ = []  -- 0 * q == 0
471 |   (a : dp) <.> q = a .> q <+> (zero : dp <.> q)
472 | 
473 | instance DetectableZero [b] where isZero = null
474 | 
475 | instance DetectableOne b => DetectableOne [b] where
476 |   isOne [b] = isOne b
477 |   isOne _ = False
478 | 
479 | instance Semiring b => LeftSemimodule b [b] where
480 |   scale s = fmap (s <.>)
481 | 
482 | instance (Semiring b, Num b, DetectableZero b, DetectableOne b)
483 |       => Num [b] where
484 |   fromInteger = value . fromInteger
485 |   negate = ((-1) .>)
486 |   (+) = (<+>)
487 |   (*) = (<.>)
488 |   abs = fmap abs
489 |   signum = fmap signum
490 | 
491 | 
492 | -- TODO: generalize to other Integral or Enum types and add to Semi
493 | newtype CharMap b = CharMap (IntMap b) deriving Functor
494 | 
495 | type instance Key CharMap = Char
496 | 
497 | instance Keyed CharMap where
498 |   mapWithKey h (CharMap m) = CharMap (IntMap.mapWithKey (h . toEnum) m)
499 | 
500 | instance Additive b => Indexable Char b (CharMap b) where
501 |   CharMap m ! a = IntMap.findWithDefault zero (fromEnum a) m
502 | 
503 | instance Additive b => Listable Char b (CharMap b) where
504 |   toList (CharMap m) = first toEnum <$> IntMap.toList m
505 | 
506 | instance Additive b => HasSingle Char b (CharMap b) where
507 |   a +-> b = CharMap (IntMap.singleton (fromEnum a) b)
508 |   -- TODO: efficient *->
509 | 
510 | instance Additive b => Additive (CharMap b) where
511 |   zero = CharMap IntMap.empty
512 |   CharMap u <+> CharMap v = CharMap (IntMap.unionWith (<+>) u v)
513 | 
514 | instance Additive b => DetectableZero (CharMap b) where isZero (CharMap m) = IntMap.null m
515 | 
516 | type instance Key ((:->:) a) = a
517 | 
518 | instance HasTrie a => Indexable a b (a :->: b) where
519 |   (!) = untrie
520 | 
521 | instance (HasTrie a, Eq a, Additive b) => HasSingle a b (a :->: b) where
522 |   a +-> b = trie (a +-> b)
523 |   -- TODO: efficient *->
524 | 
525 | instance (HasTrie a, Additive b) => Additive (a :->: b) where
526 |   zero = trie zero
527 |   u <+> v = trie (untrie u <+> untrie v)
528 | 
529 | -- False negatives are okay. Only used for optimization
530 | instance (HasTrie a, Additive b) => DetectableZero (a :->: b) where isZero _ = False
531 | -- instance Additive b => DetectableOne  (a :->: b) where isOne  _ = False
532 | 
533 | 
534 | {--------------------------------------------------------------------
535 |     Vectors
536 | --------------------------------------------------------------------}
537 | 
538 | -- instance Splittable (Finite n) where
539 | --   splits n = 
540 | 
541 | type Fin n = Sum (Finite n)
542 | 
543 | -- Maybe Monoid won't cut it here, since Finite n isn't properly additive.
544 | 
545 | type instance Key (Vector n) = Fin n
546 | 
547 | instance Indexable (Fin n) b (Vector n b) where
548 |   v ! Sum i = v `V.index` i
549 | 
550 | instance (KnownNat n, DetectableZero b) => Listable (Fin n) b (Vector n b) where
551 |   toList v = filter (not . isZero . snd) ([0 ..] `zip` V.toList v)
552 | 
553 | instance (KnownNat n, Additive b) => HasSingle (Fin n) b (Vector n b) where
554 |   Sum i +-> b = V.generate (\ j -> if j == i then b else zero)
555 |   -- TODO: efficient *->
556 | 
557 | instance (KnownNat n, Additive b) => Additive (Vector n b) where
558 |   zero = pure zero
559 |   (<+>) = liftA2 (<+>)
560 | 
561 | instance DetectableZero b => DetectableZero (Vector n b) where
562 |   isZero = all isZero
563 | 
564 | -- instance (KnownNat n, Semiring b) => Semiring (Vector n b) where
565 | --   one = 0 +-> one
566 | 
567 | -- instance DetectableOne b => DetectableOne (Vector n b) where
568 | --   isOne = 
569 | 


--------------------------------------------------------------------------------
/todo.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: To-do items for the paper and code
  3 | substMap: [("<+>","+"),("<.>","∗"),(".>","·"),("+->","↦"),("<--","←")]
  4 | ...
  5 | 
  6 | ## To-do items for the paper and code
  7 | 
  8 | ### Major
  9 | 
 10 | *   Conclusions and Future work
 11 | *   Why homomorphisms matter.
 12 | *   Comonad proof
 13 | *   See all of the issues flagged in the non-anonymous draft.
 14 | *   Regular expressions:
 15 |     *   Optimizations. [done]
 16 |     *   Try partially applying `(!)`.
 17 |     *   Try implementing `(!)` by converting to `Cofree` (via `regexp`), and mention how it went.
 18 |         I expect the speed to be nearly indistinguishable from starting with `Cofree`.
 19 | *   Maybe include the implementation in the supplementary material.
 20 |     *   Tweak `Makefile` to generate a tarball with the extended paper and source.
 21 |     *   Remove author & copyright info from package.yaml, say via sed.
 22 |     *   Check that the README instructions work in isolation.
 23 |         Include how to get `stack`.
 24 |     *   I got all the way to a working tar ball but then noticed that it contained my user name in file ownership.
 25 |         There's a way around, but I'm not confident that the committee members will be able to access the files, including the extended pdf.
 26 |         Drop it.
 27 | *   Polynomials:
 28 |     *   Univariate and multivariate. [done]
 29 |     *   Power series: represent via list instead of stream. [done]
 30 |     *   Prettier negative coefficients. [done]
 31 |     *   Correctness theorem and proof for polynomial integration and differentiation.
 32 |         See commented-out start.
 33 | *   Image convolution example
 34 | *   Reduce page count in the ICFP submission. Some strategies:
 35 |     *   Remove polynomials (and image convolution)
 36 |     *   Move more proofs to the appendix.
 37 |     *   Drop `Splittable` instance examples.
 38 |     *   Drop the explicit enumeration of homomorphism equations in Lemma 10 ({atEps [c] -> b}) and maybe elsewhere.
 39 |     *   Drop the re-formulation of `deriv` between Lemmas 11 and 12.
 40 |     *   Rewrite some of my explicit lemmas and theorems as an in-line statement with a proof link.
 41 |         Keep the more formal style in the extended version.
 42 |         Make this change when I don't need to refer to a lemma/theorem other than from another proof.
 43 |     *   Remove homomorphism examples.
 44 |     *   Drop the whole section "Calculating Instances from Homomorphisms".
 45 |     *   More side-by-side code or derivations.
 46 |     *   Drop some intermediate steps, e.g.,
 47 |         *   `poly2`
 48 |         *   The first generalized `poly`
 49 |     *   Lots of small changes for succinctness or at least fewer lines.
 50 |     *   Drop details for finite maps, leaving a short verbal description and pointer to extended version.
 51 |     *   Tighten some spacing environments.
 52 | *   Move finite maps inside of another section.
 53 | 
 54 | 
 55 | ### Misc
 56 | 
 57 | *   Reference to Brent's monoid paper.
 58 | *   More references to [*Fun with Semirings: A functional pearl on the abuse of linear algebra*].
 59 | *   Probabilistic computation.
 60 | *   Reconsider title after writing the introduction and related work sections.
 61 | *   Wording improvements:
 62 |     *   "*Uncovering* the method's essence"
 63 | *   Sets are to languages as binary relations are to what?
 64 |     Currying gets us to `String -> String -> Bool` (for sets).
 65 |     Is there anything interesting and/or useful here?
 66 |     Note that a (generalized) trie from pairs of strings is going to look like a curried trie anyway.
 67 | *   Super-memoization.
 68 |     See notes from 2018-12-02 and 2018-12-05.
 69 | *   Possibly some items in the Future Work section.
 70 | *   Consider unhiding page numbers in bib.bib.
 71 | *   A variant of `pow` that's syntactically quieter than having to write (and generate) "`(wrap (pow ...))`".
 72 |     Instead, just "`(ppow ...)`", with `ppow`'s `%format` directive dropping the parentheses.
 73 |     Better: use "`@@`" as a fake infix to help parsing.
 74 |     Still better: Parenthesize second argument of summations (and similar), so lhs2tex can parse correctly.
 75 |     Drop those parens in formatting, and sometimes add a second set.
 76 | *   Maybe drop `HasSingle` and define `single` via `fmap` and `one`.
 77 | *   Derive a semiring for lists based on a homomorphism from `[a]` to `a <-- Nat`.
 78 | *   From recognition to "parsing", i.e., generating rich representations.
 79 | *   Learn about tree grammars, and see how they fit with parsing with derivatives.
 80 | *   Try using `TMap` from [total-map](https://github.com/conal/total-map) in place of `Map` from containers, including the `Applicative` and `Monad` instances.
 81 |     I may have to add some operations.
 82 | *   Consider the following intuition.
 83 |     The result of `fmap h (F f)` is moving all of the values in `f` to new locations according to `h`, summing all values that get moved to the same place.
 84 |     Similarly for `liftA2`.
 85 |     Graphics has a similar issue!
 86 |     Spatial transformations may be one-to-many, especially if non-projective.
 87 | *   [Free semirings](http://hackage.haskell.org/package/semiring-num-1.6.0.1/docs/Data-Semiring-Free.html)
 88 | *   Generalize lists to end with a value, where the usual lists end with unit.
 89 |     Then monadic bind generalizes appending, i.e., `(++) = (>>)`.
 90 |     Now generalize from the unary/sequential nature of lists, and we get monadic bind as "substitution".
 91 |     I think the generalization here is to the free monad induced by some functor.
 92 |     For `[a]`, that functor is `(a :*)`.
 93 | *   Polynomials and perhaps integer multiplication.
 94 | *   Implement convolution (really `liftA2 add`) on statically sized arrays.
 95 |     I think I'll want an interface for enumerating all values of a type.
 96 |     The finite-typelits library has useful operations:
 97 |     ``` haskell
 98 |     finites :: KnownNat n => [Finite n]
 99 |     add :: Finite n -> Finite m -> Finite (n + m)
100 |     ```
101 | 
102 |     Try 1D and 2D examples.
103 | *   Use semiring-num instead of my own classes for `Semiring`, `ClosedSemiring`, and `DetectableZero`. 
104 |     Consider uses for the other instances defined there.
105 | *   Define `sum` and `product` at their first use.
106 |     Consider renaming as in semiring-num.
107 | *   Tropical semirings?
108 | *   Generalize to other tries.
109 | *   Counting and closure and infinity.
110 | *   Probability distribution semiring:
111 |     *   Uses?
112 |     *   Closure
113 | *   Understand and apply [this MathOverflow answer](https://math.stackexchange.com/a/1651127) on convolution and Day convolution.
114 | *   Track down references for convolution over functions with arbitrary *monoidal* domains.
115 |     Also "convolutional algebras".
116 |     See, e.g., "[group algebra](https://www.encyclopediaofmath.org/index.php/Group_algebra)" and "[convolution algebra](https://ncatlab.org/nlab/show/convolution+algebra)".
117 | 
118 | ### Some references
119 | 
120 | *   Brzozowski derivatives:
121 |     *   [Wikipedia page on the *Brzozowski derivative*](https://en.wikipedia.org/wiki/Brzozowski_derivative)
122 |     *   [*Derivatives of Regular Expressions*]
123 |     *   [*Rewriting Extended Regular Expressions*]
124 |     *   [*Some Properties of Brzozowski Derivatives of Regular Expressions*]
125 |     *   [*Derivatives of Regular Expressions and an Application*]
126 |     *   [*Regular-expression derivatives reexamined*]
127 |     *   [*Derivatives for Enhanced Regular Expressions*]
128 |     *   [*Regular expression sub-matching using partial derivatives*]
129 |     *   [*Testing Extended Regular Language Membership Incrementally by Rewriting*]
130 |     *   [*Yacc is dead*]
131 |     *   [*Yacc is dead: An update*]
132 | *   Semirings:
133 |     *   [*Some Recent Applications of Semiring Theory*]
134 |     *   [*Fun with Semirings: A functional pearl on the abuse of linear algebra*]
135 |     *   [*Linear Algebra Over Semirings*]
136 | *   On semirings and parsing:
137 |     *   [*Regenerate: a language generator for extended regular expressions*]
138 |     *   [*A Play on Regular Expressions*] (also targets arbitrary semirings)
139 |     *   [*Product Rules and Distributive Laws*] ("We give a categorical perspective on various product rules, including Brzozowski’s product rule ...")
140 |     *   [*Parsing Inside-Out*]
141 |     *   [*Semiring Parsing*]
142 |     *   [*Algebraic Foundation of Statistical Parsing: Semiring Parsing*]
143 |     *   [*Goodman: Semiring Parsing*]
144 | *   Comonad references:
145 |     *   [Monads from Comonads](http://comonad.com/reader/2011/monads-from-comonads/) (blog post by Ed Kmett, 2011)
146 |     *   [*Monads from Comonads, Comonads from Monads*]
147 |     *   [*Should I use a Monad or a Comonad?*]
148 |     *   [*Moore for Less*] ([reddit discussion](https://www.reddit.com/r/haskell/comments/37lqxf/edward_kmett_moore_for_less/))
149 |     *   [*Radix Sort, Trie Trees, And Maps From Representable Functors*]
150 | *   Polynomials and power series:
151 |     *   [*In Praise of Sequence (Co-)Algebra and its implementation in Haskell*]
152 |     *   Doug McIlroy's work
153 | *   Other:
154 |     *   [*Convolution as a Unifying Concept: Applications in Separation Logic, Interval Calculi, and Concurrency*]
155 |     *   [*Introduction to weighted automata theory*]
156 |     *   [*Weighted Automata*]
157 |     *   [*Quantifiers on languages and codensity monads*]
158 |     *   [What is a coalgebra intuitively?](https://mathoverflow.net/questions/76509/what-is-a-coalgebra-intuitively)
159 |     *   [*Bases as coalgebras*](https://arxiv.org/pdf/1309.0844.pdf)
160 |     *   [*The Dual of Substitution is Redecoration*]
161 |     *   [*Higher Dimensional Trees, Algebraically*]
162 | 
163 | ## Did
164 | 
165 | *   Spell out the list semiring, or at least `(<.>)`
166 | *   Check spacing with summations etc in the ICFP version.
167 |     I made need some version-specific tweaks to `%format` definitions.
168 | *   Benchmarking results, including recursively defined languages.
169 | *   Acknowledge semiring-num, at least in related work.
170 | *   Related work. Lots of references below.
171 | *   Introduction, including contributions.
172 | *   The comonad connection (in progress).
173 | *   Arithmetic on infinite series.
174 | *   Lots of needed references (`\\needcite`).
175 | *   Benchmarking at least of regexp vs tries.
176 |     Last I checked, my regexp implementations fail to terminate on $a^n n^bn$.
177 | *   Convolution commutes with currying and with uncurrying.
178 |     So do addition, zero, and one, but `single` is different.
179 |     See 2019-01-28 notes.
180 | *   Generalize decomposition-based semirings, and streamline the special cases.
181 | *   Consider again removing the types that behave like `a -> b` instead of `b <-- a`.
182 |     I don't know how I could then get the functor/applicative/monad instances I want, since the type parameters would be in the wrong order.
183 |     One possibility is to save that discussion and the parallel types for the extended/journal version of the paper.
184 |     Idea: Gloss over the distinction through most of the paper, giving just the convolution-style semiring instances, noting that we've lost compelling semiring instances and promising that we'll get them back later.
185 |     Later, when I want FAM instances, I'll have to introduce `newtype` wrappers in order to swap type parameters.
186 |     Or hopefully just a single `newtype` adapter for a variety of types homomorphic to `b <-- a`.
187 |     At that point, we can restore the pointwise semiring instances for |a -> b|, inherit them via deriving, and add `Semiring` and `StarSemiring`.
188 | *   Use the conference style files.
189 | *   Factor `Additive` out of `Semiring`, and drop the `Monoid` requirement for `Additive (b :<-- a)` and `Applicative ((:<--) a)`.
190 |     I'll have to return to defining my own classes. Tip my hat to semiring-num.
191 | *   Remove a bunch of unused code, first moving to `Other`.
192 | *   Summation (etc) notation style:
193 |     *   Consider moving the condition to the body where it becomes multiplication:
194 | 
195 |         ``` haskell
196 |           liftA2 h (Pred f) (Pred g) = Pred (\ w -> or (u,v) (f u && g v && h u v == w))
197 |         ``` 
198 | 
199 |         Note that `w` appears only in the third conjunct.
200 |     *   Generalizing from predicates to flipped functions, 
201 | 
202 |         ``` haskell
203 |           liftA2 h (F f) (F g) = F (\ w -> sum (u,v) (f u * g v * single (h u v) w))
204 |         ```
205 | 
206 |     *   Simplify further
207 | 
208 |         ``` haskell
209 |           liftA2 h (F f) (F g)
210 |         = F (\ w -> sum (u,v) (f u * g v * single (h u v) w))
211 |         = sum (u,v) (\ w -> f u * g v * single (h u v) w)
212 |         = sum (u,v) ((f u * g v) .> (\ w -> single (h u v) w))
213 |         = sum (u,v) ((f u * g v) .> single (h u v))
214 |         ```
215 | 
216 |     *   Then simplify the "standard FunApp" proof.
217 |     *   Introduce notation "`a +-> b = b .> single a`".
218 |         Then `liftA2 h (F f) (F g) = sum (u,v) (f u * g v +-> h u v)`. 
219 | 
220 | 
221 | [*Differentiation of higher-order types*]: http://conal.net/blog/posts/differentiation-of-higher-order-types/ "blog post"
222 | 
223 | [*Another angle on zippers*]: http://conal.net/blog/posts/another-angle-on-zippers/ "blog post"
224 | 
225 | [*Derivatives of Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.98.4378 "paper by Janusz Brzozowski (1964"
226 | 
227 | [*Rewriting Extended Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.7335 "paper by by Valentin Antimirov and Peter Mosses (1994)"
228 | 
229 | [*Some Properties of Brzozowski Derivatives of Regular Expressions*]: https://arxiv.org/abs/1407.5902 "paper by N.Murugesan and O.V.Shanmuga Sundaram (2014)"
230 | 
231 | [*Derivatives of Regular Expressions and an Application*]: https://www.researchgate.net/publication/221350925_Derivatives_of_Regular_Expressions_and_an_Application "paper by Haiming ChenHaiming ChenSheng and YuSheng Yu (2012)"
232 | 
233 | [*Regular-expression derivatives reexamined*]: http://www.ccs.neu.edu/home/turon/re-deriv.pdf "paper by Scott Owens, John Reppy, and Aaron Turon (2009)"
234 | 
235 | [*Derivatives for Enhanced Regular Expressions*]: https://arxiv.org/abs/1605.00817 "paper by Peter Thiemann (2016)"
236 | 
237 | [*Regular expression sub-matching using partial derivatives*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.605.6379 "by Martin Sulzmann, Kenny Zhuo, and Ming Lu (2012)"
238 | 
239 | [*Testing Extended Regular Language Membership Incrementally by Rewriting*]: https://www.semanticscholar.org/paper/Testing-Extended-Regular-Language-Membership-by-Rosu-Viswanathan/90fdd53e5b29705967c3cc21c050463ded1b514d "paper by Grigore Rosu and Mahesh Viswanathan (2003)"
240 | 
241 | [*Yacc is dead*]: https://arxiv.org/abs/1010.5023 "paper by Matthew Might and David Darais (2010)"
242 | 
243 | [*Yacc is dead: An update*]: http://matt.might.net/articles/parsing-with-derivatives/ "blog post by Matt Might ()"
244 | 
245 | [*DFINITY Technology Overview Series, Consensus System*]: https://arxiv.org/abs/1805.04548 "paper by Timo Hanke, Mahnush Movahedi, and Dominic Williams (2018)"
246 | 
247 | [*Compiling to categories*]: http://conal.net/papers/compiling-to-categories "paper by Conal Elliott (2017)"
248 | 
249 | [*The simple essence of automatic differentiation*]: http://conal.net/papers/essence-of-ad "paper by Conal Elliott (2018)"
250 | 
251 | [*Generic functional parallel algorithms: Scan and FFT*]: http://conal.net/papers/generic-parallel-functional "paper by Conal Elliott (2017)"
252 | 
253 | [*Parsing Inside-Out*]: https://arxiv.org/abs/cmp-lg/9805007 "doctoral thesis by Joshua Goodman (1998)"
254 | 
255 | [*Semiring Parsing*]: http://www.aclweb.org/anthology/J99-4004 "Paper by Joshua Goodman (1999)"
256 | 
257 | [*Algebraic Foundation of Statistical Parsing: Semiring Parsing*]: https://pdfs.semanticscholar.org/7938/c9b56de70eb641d946353b9c0fa255f48b4f.pdf "PhD Depth Examination Report by Yudong Liu (2004)"
258 | 
259 | [*Goodman: Semiring Parsing*]: https://kevinbinz.com/2014/11/16/goodman-semiring-parsing/ "blog post by Kevin Binz (2014)"
260 | 
261 | [*Monads from Comonads, Comonads from Monads*]: http://www.cs.ox.ac.uk/ralf.hinze/WG2.8/28/slides/Comonad.pdf "paper by Ralf Hinze (2011?)"
262 | 
263 | [*Should I use a Monad or a Comonad?*]: https://www.semanticscholar.org/paper/Should-I-use-a-Monad-or-a-Comonad-%3F-Orchard/bec621991dd3e8b1e118fdd0a1d7b5819471a964 "paper by Dominic A. Orchard (2012)"
264 | 
265 | [`Data.Functor.Day`]: https://hackage.haskell.org/package/kan-extensions/docs/Data-Functor-Day.html "Haskell source module by Edward Kmett (2014--2016)"
266 | 
267 | [*Comonads and Day Convolution*]: https://blog.functorial.com/posts/2016-08-08-Comonad-And-Day-Convolution.html "blog post by Phil Freeman (2016)"
268 | 
269 | [*Quantifiers on languages and codensity monads*]: https://arxiv.org/abs/1702.08841 "paper by Mai Gehrke, Daniela Petrisan, and Luca Reggio
270 | (2018)"
271 | 
272 | [*Towards a Coalgebraic Chomsky Hierarchy*]: https://arxiv.org/abs/1401.5277v3 "paper by Sergey Goncharov, Stefan Milius, Alexandra Silva (2014)"
273 | 
274 | [*The monads of classical algebra are seldom weakly cartesian*]: https://link.springer.com/article/10.1007/s40062-013-0063-2 "paper by Maria Manuel Clementino, Dirk Hofmann, and George Janelidze (2013)"
275 | 
276 | [*Fun with Semirings: A functional pearl on the abuse of linear algebra*]: http://stedolan.net/research/semirings.pdf "paper by Stephen Dolan (2013)"
277 | 
278 | [*The Dual of Substitution is Redecoration*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.16.9369 "paper by Tarmo Uustalu and Varmo Vene (2002)"
279 | 
280 | [*Higher Dimensional Trees, Algebraically*]: https://www.semanticscholar.org/paper/Higher-Dimensional-Trees%2C-Algebraically-Ghani-Kurz/3b650d5ee01ac35c721c5bd51e4859aebe3880e2 "paper by Neil Ghani, Alexander Kurz (2007)"
281 | 
282 | [*Convolution as a Unifying Concept: Applications in Separation Logic, Interval Calculi, and Concurrency*]: https://dl.acm.org/citation.cfm?id=2874773 "paper by Brijesh Dongol, Ian J. Hayes, and Georg Struth (2016)"
283 | 
284 | [*Introduction to weighted automata theory*]: https://perso.telecom-paristech.fr/jsaka/CONF/Files/IWAT.pdf "Presentation by Jacques Sakarovitch (year?)"
285 | 
286 | [*Weighted Automata*]: https://www.semanticscholar.org/paper/Weighted-automata-Droste-Kuske/f8d5980f23814e1d69a737f1f178d4a2565f7c22 "paper by Manfred Droste and Dietrich Kuske (2012)"
287 | 
288 | [*Some Recent Applications of Semiring Theory*]: http://moonstone.math.ncku.edu.tw/Conferences/BeidarConference/golantalk.pdf "Paper by Jonathan S. Golan (2005)"
289 | 
290 | [*Linear Algebra Over Semirings*]: https://www.research.manchester.ac.uk/portal/files/54562608/FULL_TEXT.PDF "PhD thesis by David Wilding (2014)"
291 | 
292 | [*Regenerate: a language generator for extended regular expressions*]: https://hal.archives-ouvertes.fr/hal-01788827/document "Paper by Gabriel Radanne and Peter Thiemann (2018)"
293 | 
294 | [*A Play on Regular Expressions*]: https://sebfisch.github.io/haskell-regexp/regexp-play.pdf "Paper by Sebastian Fischer, Frank Huch, and
295 | Thomas Wilke (2010)"
296 | 
297 | [*Product Rules and Distributive Laws*]: https://www.mimuw.edu.pl/~jwinter/articles/cmcs16.pdf "Paper by Joost Winter"
298 | 
299 | [*Moore for Less*]: https://www.schoolofhaskell.com/user/edwardk/moore/for-less "Blog post by Edward Kmett (2015)"
300 | 
301 | [*Radix Sort, Trie Trees, And Maps From Representable Functors*]: https://chrispenner.ca/posts/representable-discrimination "blog post by Chris Penner (2017)"
302 | 
303 | [*In Praise of Sequence (Co-)Algebra and its implementation in Haskell*]: https://arxiv.org/abs/1812.05878 "paper by Kieran Clenaghan (2018)"
304 | 


--------------------------------------------------------------------------------
/was1.lhs:
--------------------------------------------------------------------------------
  1 | % -*- latex -*-
  2 | 
  3 | %% While editing/previewing, use 12pt and tiny margin.
  4 | \documentclass[twoside]{article}  % fleqn, 
  5 | \usepackage[margin=0.12in]{geometry}  % 0.12in, 0.9in, 1in
  6 | 
  7 | %% \geometry{paperheight=9in} % for 2-up on big monitor, larger text
  8 | 
  9 | %% \documentclass{article}
 10 | %% \usepackage{fullpage}
 11 | 
 12 | %% Temporary title
 13 | \def\tit{Efficient parsing and generalized convolution}
 14 | 
 15 | \author{Conal Elliott}
 16 | 
 17 | \usepackage{fancyhdr}
 18 | \pagestyle{fancy}
 19 | \fancyhf{}
 20 | \fancyhead[LO]\tit
 21 | \fancyhead[RE]{%
 22 | Conal Elliott
 23 | }
 24 | \fancyhead[LE,RO]{\thepage}
 25 | % \rnc{\headrulewidth}{0pt}
 26 | 
 27 | %include polycode.fmt
 28 | %include forall.fmt
 29 | %include greek.fmt
 30 | %include formatting.fmt
 31 | 
 32 | \input{macros}
 33 | 
 34 | \calculationcomments
 35 | 
 36 | \usepackage[square]{natbib}
 37 | \bibliographystyle{plainnat}
 38 | 
 39 | %% % author-date form
 40 | %% \usepackage[]{natbib}
 41 | %% \bibpunct();A{},
 42 | \let\cite=\citep
 43 | 
 44 | \title\tit
 45 | \date{Early draft of \today}
 46 | 
 47 | \setlength{\blanklineskip}{2ex} % blank lines in code environment
 48 | 
 49 | \nc\proofLabel[1]{\label{proof:#1}}
 50 | %if short
 51 | \nc\proofRef[1]{proof in \citep[Appendix C]{Elliott-2018-convolution-extended}}
 52 | %else
 53 | \nc\proofRef[1]{Appendix \ref{proof:#1}}
 54 | %endif
 55 | \nc\provedIn[1]{\textnormal{See \proofRef{#1}}}
 56 | 
 57 | \nc\set[1]{\{\,#1\,\}}
 58 | \nc\Pow{\mathcal{P}}
 59 | \nc\mempty{\varepsilon}
 60 | \nc\closure[1]{#1^{\ast}}
 61 | \nc\mappend{\diamond}
 62 | \nc\cat{\mathop{}}
 63 | \nc\single\overline
 64 | \nc\union{\cup}
 65 | \nc\bigunion{\bigcup}
 66 | \nc\has[1]{\mathop{\delta_{#1}}}
 67 | \nc\derivOp{\mathcal{D}}
 68 | \nc\derivsOp{\derivOp^{\ast}}
 69 | \nc\deriv[1]{\mathop{\derivOp_{#1}}}
 70 | \nc\derivs[1]{\mathop{\derivsOp_{#1}}}
 71 | %% \nc\conv{\ast}
 72 | \nc\conv{*}
 73 | \nc\zero{\mathbf{0}}
 74 | \nc\one{\mathbf{1}}
 75 | %% \nc\zero{0}
 76 | %% \nc\one{1}
 77 | \nc\hasEps{\mathop{\Varid{has}_{\mempty}}}
 78 | \nc\id{\mathop{\Varid{id}}}
 79 | \nc\ite[3]{\text{if}\ #1\ \text{then}\ #2\ \text{else}\ #3}
 80 | \newcommand\iteB[3]{
 81 | \begin{cases}
 82 | #2 & \text{if $#1$} \\
 83 | #3 & \text{otherwise}
 84 | \end{cases}}
 85 | \nc\lis{\mathop{\Varid{list}}}
 86 | \nc\liftA{\mathop{\Varid{liftA}}}
 87 | \nc\cons{\mathit{:}}
 88 | 
 89 | \DeclareMathOperator{\true}{true}
 90 | \DeclareMathOperator{\false}{false}
 91 | 
 92 | \begin{document}
 93 | 
 94 | \maketitle
 95 | 
 96 | %% \begin{abstract}
 97 | %% ...
 98 | %% \end{abstract}
 99 | 
100 | \sectionl{Introduction}
101 | 
102 | %format <+> = "+"
103 | %format <.> = "\conv"
104 | %% %format zero = 0
105 | %% %format one = 1
106 | %format zero = "\zero"
107 | %format one = "\one"
108 | 
109 | \sectionl{Languages}
110 | 
111 | \note{Summarize/review languages as sets, including singleton, union, concatenation, and star/closure.}
112 | 
113 | A \emph{language} is a set of strings, where a string is a sequence of values of some given type (``symbols'' from an ``alphabet'').
114 | Languages are commonly built up via a few simple operations:\notefoot{I may want to parametrize by a monoid instead of an alphabet.}
115 | \begin{itemize}
116 | \item The \emph{empty} language $\emptyset = \set{}$.
117 | \item For a string $s$, the \emph{singleton} language $\single s = \set{s}$.
118 | \item For two languages $P$ and $Q$, the \emph{union} $P \union Q = \set{s \mid s \in P \lor s \in Q}$.
119 | \item For two languages $P$ and $Q$, the element-wise \emph{concatenation} $P \cat Q = \set{p \mappend q \mid p \in P \land q \in Q}$, where ``$\mappend$'' denotes string concatenation.
120 | \item For a language $P$, the \emph{closure} $\closure P = \bigunion_{n \ge 0} P^n $, where $P^n$ is $P$ concatenated with itself $n$ times\out{ (and $P^0 = \single{\mempty}$)}.
121 | \end{itemize}
122 | %if False
123 | \out{Note that $\closure P$ can also be given a recursive specification: $\closure P = \mempty \union P \cat \closure P$.{Syntactically, we'll take concatenation (``$\cat$'') to bind more tightly than union (``$\union$''), so the RHS of this definition is equivalent to $\mempty \union (P \cat \closure P)$}
124 | %endif
125 | These operations suffice to describe all \emph{regular} languages.
126 | The language specifications (language-denoting \emph{expressions} rather than languages themselves) finitely constructed from these operations are called \emph{regular expressions}.
127 | %(If we allow \emph{recursive} definitions, we get \emph{context-free} languages.)
128 | 
129 | Some observations:
130 | \begin{itemize}
131 | \item Union is associative, with $\emptyset$ as its identity.\notefoot{Maybe state equations for this observations and the next two.}
132 | \item Element-Wise concatenation is associative and commutative, with $\single \mempty$ as its identity, where $\mempty$ is the empty string.
133 | \item Left- and right-concatenation distribute over union.
134 | \item The empty language annihilates under concatenation, i.e., $P \cat \emptyset = \emptyset \cat Q = \emptyset$.
135 | \item The $\closure P$ operation satisfies the equation $\closure P = \mempty \union (P \cat \closure P)$.
136 | \end{itemize}
137 | These observations are the defining properties of a \emph{star semiring} (also called a \emph{closed semiring}) \needcite{}.
138 | \figrefdef{classes}{Abstract interface for languages (and later generalizations)}{
139 | \begin{code}
140 | class Semiring a where
141 |   infixl 7 <.>
142 |   infixl 6 <+>
143 |   zero    , one    :: a
144 |   (<+>)   , (<.>)  :: a -> a -> a
145 | 
146 | sum, product :: (Foldable f, Semiring a) => f a -> a
147 | sum      = foldr (<+>)  zero
148 | product  = foldr (<.>)  one
149 | 
150 | class Semiring a => ClosedSemiring a where
151 |   closure :: a -> a
152 |   closure p = q where q = one <+> p <.> q  -- default
153 | 
154 | class HasSingle a x where
155 |   single :: x -> a
156 | 
157 | instance Semiring Bool where
158 |   zero    = False
159 |   one     = True
160 |   (<+>)   = (||)
161 |   (<.>)   = (&&)
162 | 
163 | instance ClosedSemiring Bool where
164 |   closure _ = one
165 | \end{code}
166 | \vspace{-4ex}
167 | } shows Haskell classes for representations of languages (and later generalizations), combining the star semiring vocabulary with an operation for singletons.
168 | The singleton-forming operation must satisfy the following properties:
169 | \begin{align*}
170 | \single \mempty &= \one \\
171 | \single {u \mappend v} &= \single u \conv \single v
172 | \end{align*}
173 | i.e., |single| is a monoid homomorphism (targeting the product monoid).
174 | As an example other than numbers and languages, \figref{classes} includes the closed semiring of boolean values.
175 | 
176 | %format Set = "\mathcal P"
177 | %format emptyset = "\emptyset"
178 | %format single (s) = "\single{"s"}"
179 | %format set (e) = "\set{"e"}"
180 | %format bigunion (lim) (body) = "\bigunion_{" lim "}{" body "}"
181 | %format pow a (b) = a "^{" b "}"
182 | 
183 | Languages fulfill this combined interface as described above and again in the pseudocode in \figrefdef{set as language}{Sets as a ``language''}{
184 | \begin{code}
185 | instance Monoid s => Semiring (Set s) where
186 |   zero  = emptyset
187 |   one   = single mempty
188 |   p  <+>  q = set (s | s `elem` p || s `elem` q)
189 |   p  <.>  q = set (u <> v | u `elem` p && v `elem` q)
190 | 
191 | instance ClosedSemiring (Set s)  -- default |closure|
192 | instance HasSingle (Set s) s where single s = set s
193 | \end{code}
194 | \vspace{-4ex}
195 | %%  closure p = bigunion (n >= 0) (pow p n)
196 | }, which generalizes from strings to any monoid.\footnote{The |Monoid| class defines $\mappend$ and $\mempty$.}
197 | 
198 | \note{On second thought, postpone generalization from lists to monoids later.}
199 | 
200 | \sectionl{Matching}
201 | 
202 | Now consider how we can computably \emph{match} a string for membership in a language described in the vocabulary given in the previous section.
203 | The set-based language definition does not lead directly to effective string matching, because the sets may be infinite.
204 | We can get around this difficulty easily enough by a change of representation.
205 | Sets are isomorphic to membership predicates.
206 | \begin{code}
207 | newtype Pred s = Pred (s -> Bool)
208 | 
209 | setPred :: Set a -> Pred a
210 | setPred as = Pred (\ a -> a `elem` as)
211 | 
212 | predSet :: Pred a -> Set a
213 | predSet (Pred f) = set (a | f a)
214 | \end{code}
215 | It's easy to show that |setPred . predSet == id| and |predSet . setPred == id|.
216 | % See 2018-12-10 notes.
217 | 
218 | This isomorphism suggests a simple specification for effective matching, namely the requirement that |setPred| (or |predSet|) is a \emph{homomorphism} with respect to the vocabulary of \figref{classes}.
219 | (This style of specification has proved useful for a range of problems \cite{Elliott-2009-tcm, Elliott-2018-ad-icfp}.)
220 | \begin{theorem}[\provedIn{theorem:Pred}]\thmlabel{Pred}
221 | Given the definitions in \figrefdef{Pred}{Predicates as a language (specified by homomorphicity of |predSet|/|setPred|)}{
222 | \begin{code}
223 | instance Semiring (Pred [c]) where
224 |   zero = Pred (const False)
225 |   one = Pred null
226 |   Pred f  <+>  Pred g = Pred (\ x -> f x || g x)
227 |   Pred f  <.>  Pred g = Pred (\ x -> or [ f u && g v | (u,v) <- splits x ] )
228 | 
229 | instance ClosedSemiring (Pred [c])  -- default |closure|
230 | 
231 | instance Eq s => HasSingle (Pred s) s where
232 |   single s = Pred (== s)
233 | 
234 | -- All ways of splitting a given list (inverting |(<>)|).
235 | splits :: [a] -> [([a],[a])]
236 | splits []       = [([],[])]
237 | splits (a:as')  = ([],a:as') : [((a:l),r) | (l,r) <- splits as']
238 | \end{code}
239 | \vspace{-4ex}
240 | }, |setPred| and |predSet| are homomorphisms with respect to each instantiated class.
241 | \end{theorem}
242 | 
243 | \note{Try some examples, including |star| and even the classic non-regular language $a^n b^n$ or \href{https://en.wikipedia.org/wiki/Dyck_language}{the Dyck language}.}
244 | 
245 | \sectionl{List of Successes}
246 | 
247 | Although the predicate-based language implementation in \secref{Matching} is effective, it is terribly inefficient, due to the backtracking search involved in the definitions of |(<.>)| and |splits| in \figref{Pred}.
248 | An alternative technique commonly used in monadic parsing involves matching a language against \emph{prefixes} of a given string, yielding a corresponding ``residual'' suffix for each successful match \cite{Wadler-85-successes, HuttonMeijer-98-parsing}.
249 | If there is some way to match an \emph{entire} given string (i.e., if any matching residual is empty), then that string is in the language.
250 | As with |Pred|, we can package this technique in a new data type with an interpretation function that relates it to an already understood language representation:
251 | \begin{code}
252 | newtype Resid c = Resid ([c] -> [[c]])
253 | 
254 | residPred :: Resid c -> Pred [c]
255 | residPred (Resid f) = Pred (any null . f)
256 | \end{code}
257 | \begin{theorem}[\provedIn{theorem:Resid}]\thmlabel{Resid}
258 | Given the definitions in \figrefdef{Resid}{List-of-successes as a language (specified by homomorphicity of |residPred|)}{
259 | \begin{code}
260 | instance Semiring (Resid c) where
261 |   zero = Resid (const [])
262 |   one = Resid (\ s -> [s])
263 |   Resid f  <+>  Resid g = Resid (\ s -> f s <> g s)
264 |   Resid f  <.>  Resid g = Resid (\ s -> [s'' | s' <- f s, s'' <- g s'])
265 | 
266 | -- Equivalent definition in monadic style
267 | instance Semiring (Resid c) where
268 |   zero = Resid (fail "no match")
269 |   one = Resid return
270 |   Resid f  <+>  Resid g = Resid (liftA2 (<>) f g)
271 |   Resid f  <.>  Resid g = Resid (f >=> g)
272 | 
273 | instance ClosedSemiring (Resid c)
274 | 
275 | instance Eq c => HasSingle (Resid c) [c] where
276 |   single x = Resid (maybeToList . stripPrefix x)
277 | 
278 | -- From |Data.List|
279 | stripPrefix :: Eq a => [a] -> [a] -> Maybe [a]
280 | stripPrefix []      ys               = Just ys
281 | stripPrefix (x:xs)  (y:ys) | x == y  = stripPrefix xs ys
282 | stripPrefix _ _                      = Nothing
283 | \end{code}
284 | \vspace{-4ex}
285 | }, |residPred| is a homomorphism with respect to each instantiated class.
286 | \end{theorem}
287 | 
288 | \sectionl{Regular Expressions}
289 | 
290 | %format :<+> = "\mathbin{:\!\!+}"
291 | %format :<.> = "\mathbin{:\!\!\conv}"
292 | 
293 | Regular expressions are widely used as a syntactic description of regular languages and can be represented as an algebraic data type:
294 | \begin{code}
295 | infixl 6  :<+>
296 | infixl 7  :<.>
297 | 
298 | data RegExp c  =  Char c
299 |                |  Zero
300 |                |  One
301 |                |  RegExp c  :<+>  RegExp c
302 |                |  RegExp c  :<.>  RegExp c
303 |                |  Closure (RegExp c)
304 |   deriving Show
305 | \end{code}
306 | We can convert regular expressions  to \emph{any} closed semiring with singletons:
307 | \begin{code}
308 | regexp :: (ClosedSemiring a, HasSingle a [c]) => RegExp c -> a
309 | regexp (Char c)      = single [c]
310 | regexp Zero          = zero
311 | regexp One           = one
312 | regexp (u  :<+>  v)  = regexp u  <+>  regexp v
313 | regexp (u  :<.>  v)  = regexp u  <.>  regexp v
314 | regexp (Closure u)   = closure (regexp u)
315 | \end{code}
316 | 
317 | \begin{theorem}[\provedIn{theorem:RegExp}]\thmlabel{RegExp}
318 | Given the definitions in \figrefdef{RegExp}{Regular expressions as a language (specified by homomorphicity of |regexp|)}{
319 | \begin{code}
320 | instance Eq c => Semiring (RegExp c) where
321 |   zero  = Zero
322 |   one   = One
323 |   Zero <+> b = b
324 |   a <+> Zero = a
325 |   a :<.> c <+> b :<.> d | a == b = a <.> (c <+> d)
326 |   a :<.> c <+> b :<.> d | c == d = (a <+> b) <.> c
327 |   a <+> b = a :<+> b
328 |   Zero <.> _ = Zero
329 |   _ <.> Zero = Zero
330 |   One <.> b = b
331 |   a <.> One = a
332 |   a <.> b = a :<.> b
333 | 
334 | instance ClosedSemiring (RegExp c) where
335 |   closure Zero  = one
336 |   closure e     = Closure e
337 | 
338 | instance HasSingle (RegExp c) [c] where
339 |   single s = foldr (\ c e -> Char c <.> e) one s
340 | \end{code}
341 | \vspace{-4ex}
342 | }, |regexp| is a homomorphism with respect to each instantiated class.\notefoot{The |HasSingle| instance can use any |Foldable| in place of |[]|. 
343 | One could also define balanced folding of sums and products via two monoid wrappers, probably a good idea.}
344 | (Note that the semiring laws allow optimization.\footnote{For idempotent semirings, one could also optimize |closure One| to |one|, but later interpretations will need a different value.})
345 | 
346 | \end{theorem}
347 | 
348 | 
349 | \sectionl{Derivatives of Languages}
350 | 
351 | The language matching algorithms embodied in the |Pred| and |Resid| types (defined in \secreftwo{Matching}{List of Successes}) both perform backtracking.
352 | We can do much better.
353 | A classic solution is to add token lookahead, as in LR, LL, and LALR parsers \needcite{}.
354 | While such parser generators typically have relatively complex implementations and look a fixed number of tokens ahead, Janusz Brzozowski discovered a simple and efficient technique that looks arbitrarily far ahead and eliminates all backtracking.
355 | He applied this technique only to regular languages and expressed it as a transformation that he termed ``derivatives of regular expressions'' \citep{Brzozowski64} \note{additional references}.
356 | Much more recently \citet{Might2010YaccID} extended the technique from regular to \emph{context-free} languages as a transformation on context-free grammars.
357 | 
358 | %format deriv (c) = "\derivOp_{"c"}"
359 | %format derivs (s) = "\derivsOp_{"s"}"
360 | 
361 | %% %format deriv (c) = "\deriv{"c"}"
362 | %% %format derivs (s) = "\derivs{"s"}"
363 | 
364 | \begin{definition} \deflabel{derivs}
365 | The \emph{derivative} $\derivs u p$ of a language $p$ with respect to a string $u$ is the set of $u$-suffixes of strings in $p$, i.e.,
366 | $$ \derivs u p = \set{ v \mid u \mappend v \in p } $$
367 | \end{definition}
368 | \begin{lemma}\lemlabel{derivs-member}
369 | For a string $s$ and language $p$,
370 | $$ s \in p \iff \mempty \in \derivs s p .$$
371 | Proof: immediate from \defref{derivs}.
372 | \end{lemma}
373 | The practical value of \lemref{derivs-member} is that |derivs s p| and |mempty|-containment can be computed easily and efficiently, thanks to \lemrefs{derivs-monoid}{hasEps} below.
374 | \begin{lemma}[\provedIn{lemma:derivs-monoid}]\lemlabel{derivs-monoid}
375 | |derivs| satisfies the following properties:
376 | \begin{align*}
377 | \derivs\mempty p &= p \\
378 | \derivs{u \mappend v} p &= \derivs v (\derivs u p)
379 | \end{align*}
380 | Equivalently,
381 | \begin{align*}
382 | \derivs\mempty &= \id \\
383 | \derivs{u \mappend v} &= \derivs v \circ \derivs u
384 | \end{align*}
385 | where $\id$ is the identity function.\footnote{In other words, |derivs| is a contravariant monoid homomorphism (targeting the monoid of endo-functions).}
386 | \end{lemma}
387 | 
388 | %% %format hasEps = "\hasEps"
389 | %format hasEps = "\Varid{has_{\mempty}}"
390 | 
391 | \begin{definition}
392 | The derivative $\deriv c p$ of a language $p$ with respect to a single value (``symbol'') $c$ is the derivative of $p$ with respect to the singleton sequence $[c]$, i.e. $$\deriv c p = \derivs{[c]} p.$$
393 | Equivalently, $\deriv c p = \set{v \mid c:v \in p}$, where ``$c:v$'' is the result of prepending $c$ to the sequence $v$ (so that $c:v = [c] \mappend v$).
394 | \end{definition}
395 | \begin{lemma}[\citet{Brzozowski64}, Theorem 3.1]\lemlabel{deriv}
396 | The $\derivOp$ operation has the following properties:\footnote{The fourth property can be written more directly as follows:
397 | $$\deriv c (p \conv q) = (\ite{\mempty \in p}{\deriv c q}0) + \deriv c p \conv q $$
398 | or even
399 | $$\deriv c (p \conv q) = \iteB{\mempty \in p}{\deriv c q + \deriv c p \conv q}{\deriv c p \conv q}. $$}
400 | \begin{align*}
401 | \deriv c \zero        &= \zero \\
402 | \deriv c \one         &= \zero \\
403 | \deriv c (p + q)      &= \deriv c p + \deriv c q \\
404 | \deriv c (p \conv q)  &= \delta\, p \conv \deriv c q + \deriv c p \conv q \\
405 | \deriv c (\closure p) &= \deriv c (p \conv \closure p) \\
406 | \end{align*}
407 | where $\delta\,p$ is the set containing just the empty string $\mempty$ if $\mempty \in p$ and otherwise the empty set itself:\notefoot{Consider eliminating |delta| in favor of just using |hasEps|.}
408 | $$ \delta\,p = \iteB{\mempty \in p}{\one}{\zero} . $$
409 | \end{lemma}
410 | All that remains now is to see how to test whether $\mempty \in p$ for a language $p$.
411 | \begin{lemma}[\provedIn{lemma:hasEps}]\lemlabel{hasEps}
412 | The following properties hold:\notefoot{Move this definition to after \defref{derivs} and \lemref{derivs-member}, which motivate |hasEps|.}
413 | $$ \mempty \not\in \zero $$
414 | $$ \mempty \in \one $$
415 | $$ \mempty \in p + q \iff \mempty \in p \lor \mempty \in q $$
416 | $$ \mempty \in p \conv q \iff \mempty \in p \land \mempty \in q $$
417 | $$ \mempty \in \closure p $$
418 | Recalling the nature of the closed-semiring of booleans from \figref{classes}, and defining $\hasEps p = \mempty \in p$, these properties amount to saying that $\hasEps$ is a closed-semiring homomorphism, i.e.,
419 | \begin{align*}
420 | \hasEps \zero        &= \zero \\
421 | \hasEps \one         &= \one \\
422 | \hasEps (p + q)      &= \hasEps p + \hasEps q \\
423 | \hasEps (p \conv q)  &= \hasEps p \conv \hasEps q \\
424 | \hasEps (\closure p) &= \closure{(\hasEps p)}\\
425 | \end{align*}
426 | \end{lemma}
427 | 
428 | %% \noindent
429 | %% With this new vocabulary, \lemrefthree{derivs-member}{derivs-monoid}{deriv} can be interpreted much more broadly than languages as sets of sequences.
430 | 
431 | Let's now package up these new operations as another abstract interface for language representations to implement. \lemrefs{derivs-member}{hasEps} can then be interpreted much more broadly than languages as sets of sequences.
432 | \begin{code}
433 | class HasDecomp a c | a -> c where
434 |   hasEps :: a -> Bool
435 |   deriv :: c -> a -> a
436 | 
437 | instance Eq a => HasDecomp (Set a) a where
438 |   hasEps p = [] `elem` p
439 |   deriv c p = set (cs | c : cs `elem` p)
440 | 
441 | derivs :: HasDecomp a c => [c] -> a -> a
442 | derivs s p = foldl (flip deriv) p s
443 | \end{code}
444 | As with |Semiring|, |ClosedSemiring|, and |HasSingle|, we can calculate instances of |HasDecomp|, as shown in \figref{HasDecomp}.
445 | \begin{theorem}[\provedIn{theorem:HasDecomp}]\thmlabel{HasDecomp}
446 | Given the definitions in \figrefdef{HasDecomp}{Decomposition of language representations (specified by homomorphicity)}{
447 | \begin{code}
448 | instance HasDecomp (Pred [c]) c where
449 |   hasEps   (Pred f) = f []
450 |   deriv c  (Pred f) = Pred (f . (c :))
451 | 
452 | instance HasDecomp (Resid s) s where
453 |   hasEps   (Resid f) = any null (f [])
454 |   deriv c  (Resid f) = Resid (f . (c :))
455 | \end{code}
456 | \begin{code}
457 | instance Eq c => HasDecomp (RegExp c) c where
458 |   hasEps (Char _)      = zero
459 |   hasEps Zero          = zero
460 |   hasEps One           = one
461 |   hasEps (p  :<+>  q)  = hasEps p  <+>  hasEps q
462 |   hasEps (p  :<.>  q)  = hasEps p  <.>  hasEps q
463 |   hasEps (Closure p)   = closure (hasEps p)
464 |   
465 |   deriv c (Char c')     = if c == c' then one else zero
466 |   deriv _ Zero          = zero
467 |   deriv _ One           = zero
468 |   deriv c (p  :<+>  q)  = deriv c p <+> deriv c q
469 |   deriv c (p  :<.>  q)  = delta p <.> deriv c q  <+>  deriv c p <.> q
470 |   deriv c (Closure p)   = deriv c (p <.> Closure p)
471 | \end{code}
472 | %%   deriv c (Char c')     = if c == c' then one else zero
473 | %%   deriv c (Char c')  | c == c'    = one
474 | %%                      | otherwise  = zero
475 | \vspace{-4ex}
476 | }, |predSet|, |residPred|, and |regexp| are |HasDecomp| homomorphisms.
477 | \end{theorem}
478 | 
479 | Taken together, \lemrefs{derivs-member}{hasEps} give us an effective test for ``language'' membership, assuming that the language is expressed via |Semiring|, |ClosedSemiring|, and |HasSingle| and assuming that the language representation supports |HasDecomp|:
480 | \begin{code}
481 | accept :: HasDecomp a c => a -> [c] -> Bool
482 | accept p s = hasEps (derivs s p)
483 | \end{code}
484 | \note{Show some examples.}
485 | 
486 | \sectionl{Tries}
487 | 
488 | The definition of |accept| works for every language representation that implements the |HasDecomp| methods.
489 | A natural alternative representation is thus an implementation of those two methods, as shown in \figref{Decomp}.
490 | \begin{theorem}[\provedIn{theorem:Decomp}]\thmlabel{Decomp}
491 | Given the definitions in \figrefdef{Decomp}{Language representation as |Decomp| methods}{
492 | %format :<: = "\mathrel{\Varid{:\!\blacktriangleleft}}"
493 | %format LazyPat = "\mathit{\sim}\!\!"
494 | \begin{code}
495 | data Decomp c = Bool :<: (c -> Decomp c)
496 | 
497 | inDecomp :: Decomp c -> [c] -> Bool
498 | inDecomp (e  :<:  _   ) []      = e
499 | inDecomp (_  :<:  ds  ) (c:cs)  = inDecomp (ds c) cs
500 | 
501 | decompPred :: Decomp c -> Pred [c]
502 | decompPred = Pred . inDecomp
503 | 
504 | instance Semiring (Decomp c) where
505 |   zero  = False  :<: const zero
506 |   one   = True   :<: const zero
507 |   (a :<: ps')  <+>  (b :<: qs') = (a  ||  b) :<: liftA2 (<+>)  ps' qs'
508 |   (a :<: ps')  <.>  (b :<: qs') = (a  &&  b) :<: liftA2 h      ps' qs'
509 |    where
510 |      h p' q' = (if a then b :<: qs' else zero) <+> p' <.> q
511 | 
512 | instance ClosedSemiring (Decomp c)
513 | 
514 | instance Eq c => HasSingle (Decomp c) [c] where
515 |   single s = product (map symbol s)
516 |    where symbol c = False :<: (\ c' -> if c'==c then one else zero)
517 | 
518 | instance HasDecomp (Decomp c) c where
519 |   hasEps   (a :<: _    )  = a
520 |   deriv c  (_ :<: ds   )  = ds c
521 | \end{code}
522 | \vspace{-4ex}
523 | }, |decompPred| is a homomorphism with respect to each instantiated class.
524 | \end{theorem}
525 | 
526 | %format :| = "\mathrel{\Varid{:\!\!\triangleleft}}"
527 | %format `mat` = !
528 | %format mat = (!)
529 | 
530 | Although the |Decomp| representation caches |hasEps|, |deriv c| will be recomputed due to the use of a function in the |Decomp| representation.
531 | To further improve performance, we can \emph{memoize} these functions, e.g., with a generalized trie \needcite{} or a finite map.
532 | Given the sparseness of typical languages, the latter choice seems preferable as a naturally sparse representation, interpreting missing entries as $\zero$ (the empty language).
533 | The resulting representation is exactly a trie \needcite{}, and |accept| for |Trie| is the usual membership test for tries.
534 | Another route to ``derivative''-based language recognition was hiding in the standard notion of tries all along!
535 | \begin{theorem}[\provedIn{theorem:Trie}]\thmlabel{Trie}
536 | Given the definitions in \figrefdef{Trie}{Tries as language representation}{
537 | \begin{code}
538 | data Trie c = Bool :| Map c (Trie c)
539 | 
540 | inTrie :: Ord c => Trie c -> [c] -> Bool
541 | inTrie (e   :|  _   ) []      = e
542 | inTrie (_   :|  ds  ) (c:cs)  = inTrie (ds `mat` c) cs
543 | 
544 | mat :: (Ord c, Semiring a) => Map c a -> c -> a
545 | m `mat` c = findWithDefault zero c m
546 | 
547 | triePred :: Ord c => Trie c -> Pred [c]
548 | triePred = Pred . inTrie
549 | 
550 | instance Ord c => Semiring (Trie c) where
551 |   zero = False  :|   empty
552 |   one  = True   :|   empty
553 |   (a :| ps')  <+>   (b :| qs') = (a || b) :| unionWith (<+>) ps'  qs'
554 |   (a :| ps')  <.>   (b :| qs') = (a && b) :| unionWith (<+>) us   vs
555 |    where
556 |      us = fmap (<.> NOP (b :| qs')) ps'
557 |      vs = if a then qs' else empty
558 | 
559 | instance Ord c => ClosedSemiring (Trie c) where
560 |   closure (_ :| ds) = q where q = True :| fmap (<.> NOP q) ds
561 | 
562 | instance Ord c => HasSingle (Trie c) [c] where
563 |   single s = product (map symbol s)
564 |    where symbol c = False :| singleton c one
565 | 
566 | instance Ord c => HasDecomp (Trie c) c where
567 |   hasEps (a :| _) = a
568 |   deriv c (_ :| ds) = ds `mat` c
569 | \end{code}
570 | \vspace{-4ex}
571 | }, |triePred| is a homomorphism with respect to each instantiated class.%
572 | \notefoot{Briefly describe the operations used from |Data.Map|: |empty|, |unionWith|, |singleton|, and |findWithDefault|.}
573 | \end{theorem}
574 | 
575 | \note{Examples, and maybe timing comparisons. Motivate the lazy pattern. Mention sharing work by memoizing the functions of characters.}
576 | 
577 | \sectionl{Beyond Booleans}
578 | 
579 | We began in with the question of language specification (\secref{Languages}) and recognition/matching (\secref{Matching}) and
580 | then moved on to regular expressions and language ``derivatives'' (\secreftwo{Regular Expressions}{Derivatives of Languages}).
581 | These derivatives turn out to arise in the classic \emph{trie} construction, yielding a simple and efficient means of language recognition (\secref{Tries}).
582 | Let's now \emph{generalize} the notion of languages along with the constructions introduced above for efficient recognition.
583 | 
584 | %format `suchThat` = "\mid"
585 | As pointed out in \secref{Matching}, sets and predicates are isomorphic notions.
586 | For any set |s|, we can construct the membership predicate, |\ x -> x `elem` s|.
587 | Conversely, for any predicate |p|, we can construct a corresponding set of values, |set (x || p x)|.
588 | Moreover, these two conversions are inverses.
589 | 
590 | A predicate over some type |a| is just a function from |a| to |Bool|, so the predicate perspective naturally suggests generalizing the result type beyond booleans.
591 | Examining the operations defined in \figref{Pred}, we can see that the needed operations on |Bool| are |False|, |True|, |(||||)|, and |(&&)|.
592 | As shown in \figref{classes}, those operations correspond to |zero|, |one|, |(+)|, and |(*)| operations for |Bool|.
593 | It therefore seems likely that we can generalize from |Bool| to \emph{any} semiring, and indeed we can do just that.
594 | We'll need to generalize |hasEps| as well to return a semiring value rather than just |Bool|:
595 | %format atEps = "\Varid{at_{\mempty}}"
596 | \begin{code}
597 | class HasDecomp a c s | a -> c s where
598 |   atEps  :: a -> s
599 |   deriv  :: c -> a -> a
600 | 
601 | derivs :: HasDecomp a c s => [c] -> a -> a
602 | derivs s p = foldl (flip deriv) p s
603 | 
604 | accept :: HasDecomp a c s => a -> [c] -> s
605 | accept p s = atEps (derivs s p)
606 | \end{code}
607 | \figrefdef{FunTo}{Semiring-generalized predicates}{
608 | \begin{code}
609 | newtype FunTo s a = FunTo (a -> s)
610 | 
611 | instance Semiring s => Semiring (FunTo s [c]) where
612 |   zero = FunTo (const zero)
613 |   one = FunTo (boolVal . null)
614 |   FunTo f   <+>   FunTo g = FunTo (\ w -> f w <+> g w)
615 |   FunTo f   <.>   FunTo g = FunTo (\ w -> sum [ f u <.> g v | (u,v) <- splits w ] )
616 | 
617 | boolVal :: Semiring s => Bool -> s
618 | boolVal False  = zero
619 | boolVal True   = one
620 | 
621 | instance Semiring s => ClosedSemiring (FunTo s [c])
622 | 
623 | instance (Semiring s, Eq b) => HasSingle (FunTo s b) b where
624 |   single x = FunTo (boolVal . (== x))
625 | 
626 | instance HasDecomp (FunTo s [c]) c s where
627 |   atEps    (FunTo f) = f []
628 |   deriv c  (FunTo f) = FunTo (f . (c :))
629 | \end{code}
630 | \vspace{-4ex}
631 | } shows the generalization of |Pred|, resulting from replacing boolean operations by their semiring generalizations in \figref{Pred}.
632 | In particular, omitting |FunTo| wrappers, the definition of |(*)| is equivalent to the following:
633 | \begin{align}\label{def:convolution}
634 | (f * g)\,w = \hspace{-1.5ex}\sum_{\substack{u,v \\ u \mappend v = w}}\hspace{-1ex} f\,u * g\,v
635 | \end{align}
636 | 
637 | The other language representations also generalize easily as well.
638 | \figrefdef{RegExpFun}{Semiring-generalized regular expressions}{
639 | \begin{code}
640 | infixl 6 :<+>
641 | infixl 7 :<.>
642 | 
643 | data RegExp c s  =  Char c
644 |                  |  Value s
645 |                  |  RegExp c s  :<+>  RegExp c s
646 |                  |  RegExp c s  :<.>  RegExp c s
647 |                  |  Closure (RegExp c s)
648 | 
649 | instance Semiring s => Semiring (RegExp c s) where
650 |   zero  = Value zero
651 |   one   = Value one
652 |   (<+>)  = (:<+>)
653 |   (<.>)  = (:<.>)
654 | 
655 | instance Semiring s => ClosedSemiring (RegExp c s) where
656 |   closure = Closure
657 | 
658 | instance (Functor f, Foldable f, Semiring s) => HasSingle (RegExp c s) (f c) where
659 |   single w = product (fmap Char w)
660 | 
661 | instance (Eq c, ClosedSemiring s) => HasDecomp (RegExp c s) c s where
662 |   atEps (Char _)        = zero
663 |   atEps (Value s)       = s
664 |   atEps (p  :<+>  q)    = atEps p <+> atEps q
665 |   atEps (p  :<.>  q)    = atEps p <.> atEps q
666 |   atEps (Closure p)     = closure (atEps p)
667 | 
668 |   deriv c (Char c')    = boolVal (c == c')
669 |   deriv _ (Value _)    = zero
670 |   deriv c (p :<+> q)   = deriv c p <+> deriv c q
671 |   deriv c (p :<.> q)   = Value (atEps p) <.> deriv c q <+> deriv c p <.> q
672 |   deriv c (Closure p)  = deriv c (p <.> Closure p)
673 | 
674 | -- Interpret a regular expression
675 | regexp :: (ClosedSemiring s, HasSingle s [c]) => RegExp c s -> s
676 | regexp (Char c)       = single [c]
677 | regexp (Value s)      = s
678 | regexp (u  :<+>   v)  = regexp u  <+>  regexp v
679 | regexp (u  :<.>   v)  = regexp u  <.>  regexp v
680 | regexp (Closure u)    = closure (regexp u)
681 | \end{code}
682 | \vspace{-4ex}
683 | } generalizes regular expressions from \figref{RegExp}.
684 | The only representation change is to replace the |Zero| and |One| constructors by a single general |Value| constructor taking a semiring value.
685 | Note for |deriv c (p :<.> q)| that |Value (atEps p)| replaces |delta p| and denotes (via |accept|) the function that maps |mempty| to |atEps p| and all other sequences to |zero|.
686 | \begin{theorem}\thmlabel{RegExpFun}
687 | Given the definitions in \figref{RegExpFun}, |regexp| is a homomorphism with respect to each instantiated class.
688 | \end{theorem}
689 | \noindent
690 | The proof is a straightforward adaptation of \proofRef{theorem:RegExp}.
691 | 
692 | %format scaleT = scale"\!_"T
693 | The |Decomp| and |Trie| types from \figreftwo{Decomp}{Trie} also generalize without difficulty, with the latter generalization shown in \figrefdef{TrieFun}{Function-generalized list tries}{
694 | \begin{code}
695 | infix 1 :|
696 | data Trie c s = s :| Map c (Trie c s) deriving Show
697 | 
698 | scaleT :: (Ord c, DetectableZero s) => s -> Trie c s -> Trie c s
699 | scaleT s _ | isZero s = zero
700 | scaleT s (e :| ts) = (s <.> e) :| fmap (scaleT s) ts
701 | 
702 | inTrie :: (Ord c, DetectableZero s) => Trie c s -> [c] -> s
703 | inTrie (e :| _ ) [] = e
704 | inTrie (_ :| ds) (c:cs) = inTrie (ds `mat` c) cs
705 | 
706 | mat :: (Ord c, Semiring a) => Map c a -> c -> a
707 | m `mat` c = findWithDefault zero c m
708 | 
709 | trieFunTo :: (Ord c, DetectableZero s) => Trie c s -> FunTo s [c]
710 | trieFunTo = FunTo . inTrie
711 | 
712 | instance (Ord c, DetectableZero s) => Semiring (Trie c s) where
713 |   zero = zero :| empty
714 |   one  = one  :| empty
715 |   (a :| ps') <+> (b :| qs') = (a <+> b) :| unionWith (<+>) ps' qs'
716 |   (a :| ps') <.> q@(b :| qs') =
717 |     (a <.> b) :| unionWith (<+>) (fmap (<.> q) ps') (fmap (scaleT a) qs')
718 | 
719 | instance (Ord c, DetectableZero s) => ClosedSemiring (Trie c s) where
720 |   closure (_ :| ds) = q where q = one :| fmap (<.> q) ds
721 | 
722 | instance (Ord c, DetectableZero s) => HasSingle (Trie c s) [c] where
723 |   single w = product (map symbol w)
724 |    where
725 |      symbol c = zero :| singleton c one
726 | 
727 | instance (Ord c, DetectableZero s) => HasDecomp (Trie c s) c s where
728 |   atEps (a :| _) = a
729 |   deriv c (_ :| ds) = ds `mat` c
730 | \end{code}
731 | \vspace{-4ex}
732 | }, where |scaleT s t| multiplies all of the semiring values in the trie |t| by the value |s|, with |scaleT zero t == zero|.
733 | \citet{Hinze2000GGT} generalize tries from denoting sets to functions in this same way.\notefoot{To do: explore generalizing to tries over other key types.}
734 | \begin{theorem}\thmlabel{TrieFun}
735 | Given the definitions in \figref{TrieFun}, |trieFunTo| is a homomorphism with respect to each instantiated class.
736 | \end{theorem}
737 | \noindent
738 | The proof is a straightforward adaptation of \proofRef{theorem:Trie}.\notefoot{On second thought, work out the proofs for this generalized versions, and then specialize for \thmref{Trie}.}
739 | 
740 | \workingHere
741 | 
742 | \sectionl{Convolution}
743 | 
744 | \note{Show that |(*)| corresponds to generalized convolution.}
745 | 
746 | \sectionl{Beyond Convolution}
747 | 
748 | \note{The free semimodule monad.}
749 | 
750 | \sectionl{More Variations}
751 | 
752 | \note{Variations: counting, probability distributions, temporal/spatial convolution.}
753 | 
754 | \sectionl{What else?}
755 | 
756 | \begin{itemize}
757 | \item Other applications:
758 | \begin{itemize}
759 |   \item Univariate and multivariate polynomials.
760 |   \item Convolution: discrete and continuous, one- and multi-dimensional, dense and sparse.
761 |   \item 2D parsing?
762 | \end{itemize}
763 | \end{itemize}
764 | 
765 | 
766 | % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % %
767 | 
768 | \appendix
769 | 
770 | \sectionl{Proofs}
771 | 
772 | \subsection{\thmref{Pred}}\proofLabel{theorem:Pred}
773 | 
774 | \subsection{\thmref{Resid}}\proofLabel{theorem:Resid}
775 | 
776 | \subsection{\thmref{RegExp}}\proofLabel{theorem:RegExp}
777 | 
778 | \subsection{\lemref{derivs-monoid}}\proofLabel{lemma:derivs-monoid}
779 | 
780 | \begin{code}
781 |     derivs mempty p
782 | ==  set ( w | mempty <> w `elem` p )  -- definition of |derivs mempty|
783 | ==  set ( w | w `elem` p )            -- monoid law
784 | ==  p                                 -- set notation
785 | \end{code}
786 | 
787 | \begin{code}
788 |     derivs (u <> v) p
789 | ==  set ( w | (u <> v) <> w `elem` p )          -- definition of |derivs (u<>v)|
790 | ==  set ( w | u <> (v <> w) `elem` p )          -- monoid law
791 | ==  set ( w | v <> w `elem` derivs u p )        -- definition of |derivs u|
792 | ==  set ( w | w `elem` derivs v (derivs u p) )  -- definition of |derivs v|
793 | ==  derivs v (derivs u p)
794 | \end{code}
795 | 
796 | \subsection{\lemref{hasEps}}\proofLabel{lemma:hasEps}
797 | 
798 | \subsection{\thmref{HasDecomp}}\proofLabel{theorem:HasDecomp}
799 | 
800 | \subsection{\thmref{Decomp}}\proofLabel{theorem:Decomp}
801 | 
802 | \subsection{\thmref{Trie}}\proofLabel{theorem:Trie}
803 | 
804 | \bibliography{bib}
805 | 
806 | \end{document}
807 | 
808 | 


--------------------------------------------------------------------------------