├── test ├── gold │ ├── anbn-ab.txt │ ├── anbn-ba.txt │ ├── anbn-eps.txt │ ├── as-a.txt │ ├── as-eps.txt │ ├── dyck-a.txt │ ├── dyck-b.txt │ ├── dyck-c.txt │ ├── dyck-d.txt │ ├── dyck-e.txt │ ├── pp-pi.txt │ ├── pp-pig.txt │ ├── pp-ping.txt │ ├── pp-pink.txt │ ├── pps-pig.txt │ ├── pps-q.txt │ ├── anbn-aaabbb.txt │ ├── anbn-aaabbbb.txt │ ├── anbn-aabb.txt │ ├── anbn-aacbb.txt │ ├── asas-100.txt │ ├── asbas-aabaaa.txt │ ├── asbas-aabba.txt │ ├── asbs-aabbba.txt │ ├── asbs-aabbbb.txt │ ├── letters as0df.txt │ ├── letters asdf.txt │ ├── pps-pigpig.txt │ ├── pps-pigping.txt │ ├── letters asdf 40.txt │ └── pps-pinkpigpinkpigpig.txt ├── stats │ ├── anbn.txt │ ├── dyck.txt │ ├── fishy.txt │ ├── asas.txt │ ├── star-a.txt │ ├── letters.txt │ ├── asbs.txt │ └── asbas.txt ├── do-bench1 ├── wizard.jpg ├── stats-tweak.el ├── do-bench ├── do-benches ├── ImageTest.hs ├── Benchmark.hs ├── GoldTests.hs └── Benchmarks │ ├── opt regexp.md │ ├── no-opt regexp.md │ ├── with isOne.md │ └── without isOne.md ├── errata.md ├── .gitignore ├── stack.yaml ├── src ├── Language.hs ├── MMap.hs ├── Stream.hs ├── GenInstances.inc ├── Examples.hs ├── Misc.hs ├── ShareMap.hs ├── Constrained.hs ├── RegExp.hs ├── Cofree.hs ├── Poly.hs └── Semi.hs ├── Changes.md ├── formatting.fmt ├── sections.tex ├── package.yaml ├── Makefile ├── readme.md ├── notes.md ├── macros.tex ├── reviews-icfp2019.md ├── todo.md └── was1.lhs /test/gold/anbn-ab.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/anbn-ba.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/anbn-eps.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/as-a.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/as-eps.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/dyck-a.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/dyck-b.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/dyck-c.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/dyck-d.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/dyck-e.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/pp-pi.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/pp-pig.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pp-ping.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/pp-pink.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pps-pig.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pps-q.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/anbn-aaabbb.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/anbn-aaabbbb.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/anbn-aabb.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/anbn-aacbb.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/asas-100.txt: -------------------------------------------------------------------------------- 1 | 101 -------------------------------------------------------------------------------- /test/gold/asbas-aabaaa.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/asbas-aabba.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/asbs-aabbba.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/asbs-aabbbb.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/letters as0df.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/letters asdf.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pps-pigpig.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pps-pigping.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /test/gold/letters asdf 40.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/gold/pps-pinkpigpinkpigpig.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /test/stats/anbn.txt: -------------------------------------------------------------------------------- 1 | \stat{| anbn |}{1.293 ms}{\hang}{12.12 ms}{2.770 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/do-bench1: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | echo $1 : $2 4 | ./do-bench $1 "$2" > stats/$1.txt 5 | -------------------------------------------------------------------------------- /test/stats/dyck.txt: -------------------------------------------------------------------------------- 1 | \stat{| dyck |}{254.9 $\mu{}$s}{\hang}{24.77 ms}{3.062 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/stats/fishy.txt: -------------------------------------------------------------------------------- 1 | \stat{| fishy |}{1.276 ms}{2.528 ms}{29.73 ms}{4.233 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/wizard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conal/convolution-paper/HEAD/test/wizard.jpg -------------------------------------------------------------------------------- /test/stats/asas.txt: -------------------------------------------------------------------------------- 1 | \stat{| star a1 <.> star a1 |}{2.818 ms}{1.274 ms}{601.6 ms}{2.619 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/stats/star-a.txt: -------------------------------------------------------------------------------- 1 | \stat{| star a1 |}{30.56 $\mu{}$s}{22.45 $\mu{}$s}{5.258 ms}{2.624 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/stats/letters.txt: -------------------------------------------------------------------------------- 1 | \stat{| star letter |}{690.4 $\mu{}$s}{690.9 $\mu{}$s}{10.89 ms}{3.574 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/stats/asbs.txt: -------------------------------------------------------------------------------- 1 | \stat{| star a1 <.> star b1 |}{52.26 $\mu{}$s}{36.59 $\mu{}$s}{14.40 ms}{2.789 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /test/stats/asbas.txt: -------------------------------------------------------------------------------- 1 | \stat{| star a1 <.> b1 <.> star a1 |}{56.53 $\mu{}$s}{49.21 $\mu{}$s}{14.58 ms}{2.798 $\mu{}$s}{} 2 | -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | ## Errata 2 | 3 | * Version of March 25, 2019: 4 | * Figure 5, first word should be "class" (Ed Kmett). 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.cabal 2 | test/Benchmarks/*.html 3 | wizard-*.png 4 | supplemental-material 5 | convolution*.tex 6 | *.zip 7 | 8 | supplemental.tar 9 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-13.4 # ghc-8.6.3 2 | 3 | # Some other choices: 4 | 5 | # resolver: lts-12.19 # ghc-8.4.4 6 | # resolver: lts-12.4 # ghc-8.4.3 7 | # resolver: lts-11.12 # ghc-8.2.2 8 | # resolver: lts-9.20 # ghc-8.0.2 9 | -------------------------------------------------------------------------------- /test/stats-tweak.el: -------------------------------------------------------------------------------- 1 | (defun stats-tweak () 2 | "Tweak running times for the non-terminating examples" 3 | (interactive) 4 | (beginning-of-buffer) 5 | (forward-sexp 3) 6 | (kill-sexp 2) 7 | (insert "{\\hang}{\\hang}") 8 | ) 9 | -------------------------------------------------------------------------------- /test/do-bench: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | stack bench --benchmark-arguments "$1" >& o.tmp 4 | 5 | echo -n "\\stat{|" $2 "|}{" 6 | grep Group o.tmp | cut -c10-100 | sed 's/"/}{/' | tr -d '\n' 7 | # grep time o.tmp | cut -c22-30 | sed 's/ /}{/g' | sed 's/μ/$\\mu{}$/g' | tr -d '\n' 8 | grep time o.tmp | cut -c22-29 | tr '\n' "#" | sed 's/#/}{/g' | sed 's/μ/$\\mu{}$/g' | tr -d '\n' 9 | echo "}" 10 | -------------------------------------------------------------------------------- /test/do-benches: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | mkdir -p stats 4 | 5 | ./do-bench1 star-a 'star a1' 6 | ./do-bench1 letters 'star letter' 7 | ./do-bench1 fishy 'fishy' 8 | ./do-bench1 asas 'star a1 <.> star a1' 9 | ./do-bench1 asbs 'star a1 <.> star b1' 10 | ./do-bench1 asbas 'star a1 <.> b1 <.> star a1' 11 | ./do-bench1 dyck 'dyck' 12 | ./do-bench1 anbn 'anbn' 13 | 14 | # # Rerender the paper 15 | # (cd .. ; make) 16 | -------------------------------------------------------------------------------- /src/Language.hs: -------------------------------------------------------------------------------- 1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Generalized "languages", which is mostly Semiring & friends 4 | 5 | #NOTUSED 6 | 7 | module Language where 8 | 9 | import Semi 10 | 11 | oneBool :: Additive x => (a -> x) -> a -> Bool -> x 12 | oneBool _ _ False = zero 13 | oneBool f a True = f a 14 | 15 | equal' :: (Eq a, Additive b) => a -> b -> a -> b 16 | equal' a b a' = if a == a' then b else zero 17 | 18 | equal :: (Eq a, Semiring b) => a -> a -> b 19 | equal a = equal' a one 20 | 21 | -- >>> splits (4 :: N) 22 | -- [(Sum 0,Sum 4),(Sum 1,Sum 3),(Sum 2,Sum 2),(Sum 3,Sum 1),(Sum 4,Sum 0)] 23 | 24 | {-------------------------------------------------------------------- 25 | Temporary hack 26 | --------------------------------------------------------------------} 27 | 28 | -- allVals :: [c] 29 | -- allVals = error "allVals not defined" 30 | -------------------------------------------------------------------------------- /Changes.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Description of changes 3 | ... 4 | 5 | 6 | ## Submitted to ICFP (March 1, 2019) 7 | 8 | * Submitted version: [Git commit](https://github.com/conal/convolution-paper/commit/65c5fd3a08172b19e35eddda9f07c56fd2fc2f48) and [PDF](https://github.com/conal/convolution-paper/blob/master/icfp19-paper46-submitted.pdf). 9 | 10 | ## Updates 11 | 12 | ### March 9, 2019 13 | 14 | * Errata noted and fixed: 15 | * In Figure 4, replace `Indexable` instances first two lines with the following line 16 | 17 | instance Indexable c (Cofree h b) (h (Cofree h b)) => Indexable [c] b (Cofree h b) where 18 | 19 | * The definition of `lift2` in Section 11 shouldn't have "`\ w ->". 20 | * Section 11 (Beyond Convolution): 21 | * In Figure 8 ("Functor and Applicative classes and some instances"): Added *Functor* and *Applicative* instances for *(→) a*, aside the instances for *(←) b*. 22 | * In Figure 9 ("The a → b and b ← a semirings"): Added 𝒫' type and *Semiring* instance. 23 | * Added function preimages example. 24 | 25 | -------------------------------------------------------------------------------- /src/MMap.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# OPTIONS_GHC -Wall #-} 3 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 4 | 5 | -- | Finite maps with a more sensible Monoid instance 6 | 7 | module MMap where 8 | 9 | import qualified Data.Map as M 10 | 11 | -- For now, but maybe use MMap in place of Map throughout 12 | import Semi 13 | 14 | newtype Map k v = M (M.Map k v) deriving 15 | ( Eq,Ord,Show,Functor,Foldable,Additive,Semiring 16 | , DetectableZero, DetectableOne 17 | , Indexable k v,Listable k v, HasSingle k v ) 18 | 19 | instance (Ord k, Semigroup v) => Semigroup (Map k v) where 20 | M u <> M v = M (M.unionWith (<>) u v) 21 | 22 | instance (Ord k, Monoid v) => Monoid (Map k v) where 23 | mempty = M mempty 24 | mappend = (<>) 25 | 26 | toDescList :: Map k v -> [(k,v)] 27 | toDescList (M m) = M.toDescList m 28 | 29 | toAscList :: Map k v -> [(k,v)] 30 | toAscList (M m) = M.toAscList m 31 | 32 | toList :: Map k v -> [(k,v)] 33 | toList (M m) = M.toList m 34 | 35 | fromList :: Ord k => [(k,v)] -> Map k v 36 | fromList ps = M (M.fromList ps) 37 | 38 | keys :: Map k v -> [k] 39 | keys (M m) = M.keys m 40 | -------------------------------------------------------------------------------- /src/Stream.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples 2 | 3 | -- | Streams (infinite lists) 4 | 5 | module Stream where 6 | 7 | import Control.Applicative (Applicative(..)) 8 | import Data.Functor.Identity (Identity(..)) 9 | 10 | import Constrained 11 | import Semi 12 | 13 | #include "GenInstances.inc" 14 | 15 | infixr 1 :< 16 | data Stream b = b :< Stream b 17 | 18 | instance Additive b => Additive (Stream b) where 19 | zero = zero :< zero 20 | (a :< dp) <+> (b :< dq) = a <+> b :< dp <+> dq 21 | 22 | instance Semiring b => LeftSemimodule b (Stream b) where 23 | scale s = go where go (b :< dp) = s <.> b :< go dp 24 | 25 | instance (Additive b, DetectableZero b) => DetectableZero (Stream b) where 26 | isZero (a :< dp) = isZero a && isZero dp 27 | 28 | instance (Semiring b, DetectableZero b) => Semiring (Stream b) where 29 | one = one :< zero 30 | (a :< dp) <.> q = a .> q <+> (zero :< dp <.> q) 31 | 32 | instance (Additive b, DetectableZero b, DetectableOne b) => DetectableOne (Stream b) where 33 | isOne (a :< dp) = isOne a && isZero dp 34 | 35 | instance (StarSemiring b, DetectableZero b) => StarSemiring (Stream b) where 36 | star (a :< dp) = q where q = star a .> (one :< dp <.> q) 37 | 38 | instance Additive b => HasSingle N b (Stream b) where 39 | w +-> b = foldN (zero :<) (b :< zero) w 40 | 41 | foldN :: (b -> b) -> b -> N -> b 42 | foldN h e 0 = e 43 | foldN h e n = foldN h (h e) (n-1) 44 | 45 | instance Indexable N b (Stream b) where 46 | (b :< bs) ! n = if n == 0 then b else bs ! (n-1) 47 | -------------------------------------------------------------------------------- /src/GenInstances.inc: -------------------------------------------------------------------------------- 1 | -- CPP macros for generating instances of Semiring, etc 2 | 3 | -- Instances for numeric types 4 | #define Nums(t) \ 5 | instance Additive (t) where { (<+>) = (+) ; zero = 0 } ; \ 6 | instance DetectableZero (t) where { isZero = (== 0)} ; \ 7 | instance DetectableOne (t) where { isOne = (== 1)} ; \ 8 | instance Semiring (t) where { (<.>) = (*) ; one = 1 } 9 | 10 | #define FunctorSemimodule(f) \ 11 | instance (Semiring zz, Additive ((f) zz)) => LeftSemimodule zz ((f) zz) 12 | 13 | #define FunctorStar(f) \ 14 | instance (Semiring ((f) qq), Ok (f) qq, StarSemiring qq) => StarSemiring ((f) qq) where \ 15 | { star = fmapC star; plus = fmapC plus } 16 | 17 | #define NullZero(f) \ 18 | instance Additive ((f) qq) => DetectableZero ((f) qq) where \ 19 | { isZero = null } ; \ 20 | 21 | -- Additive, LeftSemimodule, Semiring from Applicative 22 | #define ApplSemi(f) \ 23 | instance Additive zz => Additive ((f) zz) where \ 24 | { (<+>) = liftA2C (<+>) ; zero = pureC zero } ; \ 25 | FunctorSemimodule(f) ; \ 26 | instance Semiring zz => Semiring ((f) zz) where \ 27 | { (<.>) = liftA2C (<.>) ; one = pureC one } ; \ 28 | FunctorStar(f) 29 | 30 | -- TODO: Maybe rely on Pointed and Zip instead of Applicative here, considering 31 | -- these definitions. 32 | 33 | -- Additive from Applicative+Monoid+Foldable 34 | #define ApplMono(f) \ 35 | instance Monoid ((f) qq) => Additive ((f) qq) where \ 36 | { zero = mempty ; (<+>) = (<>) } ; \ 37 | NullZero(f) ; \ 38 | instance (ApplicativeC f, Ok f qq, Monoid qq) => Semiring ((f) qq) where \ 39 | { one = pureC mempty ; (<.>) = liftA2C (<>) } ; \ 40 | FunctorStar(f) 41 | -------------------------------------------------------------------------------- /src/Examples.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Some polymorphic language examples 4 | 5 | module Examples where 6 | 7 | import Prelude hiding (sum,product) 8 | 9 | import Data.Set (Set) 10 | import qualified Data.Set as S 11 | 12 | import Data.Char (toUpper) 13 | 14 | import Semi 15 | 16 | a1, b1, pink, pig :: (HasSingle String b x, Semiring b) => x 17 | a1 = single "a" 18 | b1 = single "b" 19 | pink = single "pink" 20 | pig = single "pig" 21 | 22 | pp :: (HasSingle String b x, Additive x, Semiring b) => x 23 | pp = pink <+> pig 24 | 25 | as, ass, pps, asbs, asbas, asas, fishy :: (HasSingle String b x, StarSemiring x, StarSemiring b) => x 26 | as = star a1 27 | ass = star as 28 | pps = star pp 29 | asbs = star a1 <.> star b1 30 | asbas = star a1 <.> b1 <.> star a1 31 | asas = star a1 <.> star a1 32 | fishy = star letter <.> single "fish" <.> star letter 33 | 34 | anbn :: (HasSingle String b x, Semiring x, Semiring b) => x 35 | anbn = one <+> a1 <.> anbn <.> b1 36 | 37 | singleChar :: (Ord c, HasSingle [c] b x, Semiring b) => [c] -> x 38 | singleChar cs = singles (S.fromList [[c] | c <- cs]) 39 | 40 | letter :: (HasSingle String b x, Semiring x, Semiring b) => x 41 | letter = singleChar ['a' .. 'z'] 42 | -- letter = singles (S.fromList [[c] | c <- ['a' .. 'z']]) -- Should be much more efficient for SharedMap 43 | -- letter = sum [single [c] | c <- ['a' .. 'z']] 44 | 45 | -- Balanced brackets 46 | dyck :: (HasSingle String b x, StarSemiring x, Semiring b) => x 47 | dyck = star (single "[" <.> dyck <.> single "]") 48 | 49 | -- Will dyck get repeatedly reconstructed, considering polymorphism? 50 | 51 | -- TODO: try other formulations, including an explicit local recursion and star. 52 | 53 | starL :: Semiring b => b -> b 54 | starL b = one <+> starL b <.> b 55 | 56 | starR :: Semiring b => b -> b 57 | starR b = one <+> b <.> starR b 58 | -------------------------------------------------------------------------------- /src/Misc.hs: -------------------------------------------------------------------------------- 1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Miscellany 4 | 5 | module Misc where 6 | 7 | import GHC.TypeLits (KnownNat,natVal) 8 | import Data.Typeable (Proxy(..)) 9 | import GHC.Exts (Constraint) 10 | 11 | infixl 7 :* 12 | infixl 6 :+ 13 | 14 | type (:*) = (,) 15 | type (:+) = Either 16 | 17 | type Unop a = a -> a 18 | 19 | bool :: a -> a -> Bool -> a 20 | bool t e b = if b then t else e 21 | 22 | -- | Handy universal constraint alias 23 | class (forall u. con u => con (h u)) => Con1 con h 24 | instance (forall u. con u => con (h u)) => Con1 con h 25 | 26 | -- | Handy universal constraint alias 27 | class (forall u v. con u v => con (h u) (h v)) => Con2 con h 28 | instance (forall u v. con u v => con (h u) (h v)) => Con2 con h 29 | 30 | cats :: Monoid a => Int -> a -> a 31 | cats n a = mconcat (replicate n a) 32 | 33 | nat :: forall n. KnownNat n => Integer 34 | nat = natVal (Proxy @n) 35 | {-# INLINE nat #-} 36 | 37 | int :: forall n. KnownNat n => Int 38 | int = fromIntegral (nat @n) 39 | {-# INLINE int #-} 40 | 41 | type ConF con f = (forall a. con a => con (f a) :: Constraint) 42 | 43 | -- type ShowF f = (forall a. Show a => Show (f a) :: Constraint) 44 | -- type ShowF f = ConF Show f 45 | 46 | {-------------------------------------------------------------------- 47 | Invertible monoids 48 | --------------------------------------------------------------------} 49 | 50 | class Monoid t => Splittable t where 51 | -- Whether equal to 'mempty' 52 | isEmpty :: t -> Bool 53 | -- | The inverse of 'mappend' 54 | splits :: t -> [(t,t)] 55 | 56 | instance Splittable [a] where 57 | isEmpty = null 58 | splits [] = [([],[])] 59 | splits (a:as) = ([],a:as) : [((a:l),r) | (l,r) <- splits as] 60 | 61 | -- Equivalently, 62 | 63 | -- splits as@(a:as') = ([],as) : map (first (a:)) (splits as') 64 | 65 | -- splits' as = ([],as) : go as 66 | -- where 67 | -- go [] = [] 68 | -- go (a:as') = [((a:l),r) | (l,r) <- splits' as'] 69 | -------------------------------------------------------------------------------- /formatting.fmt: -------------------------------------------------------------------------------- 1 | % -*- text -*- 2 | %% Misc lhs2TeX directives 3 | 4 | %format family = "\mathbf{family}" 5 | 6 | %% spaces (in 18ths of a quad): \, = 3, \: = 4, \; = 5, \! = -3 7 | 8 | %format liftA2 9 | 10 | %format =~ = " \simeq " 11 | 12 | %format NOP = "{}" 13 | 14 | %% optional double-dollar spelling, to avoid $ confusing emacs latex-mode. 15 | %format <$$> = <$> 16 | %format <#> = <$> 17 | 18 | %format <$> = "\mathbin{<\!\!\!\$\!\!\!>}" 19 | %format <*> = "\mathbin{<\!\!\!*\!\!\!>}" 20 | 21 | %% \usepackage{amssymb} 22 | 23 | %% hack: add missing space, e.g., before "{" in data type decl 24 | %format SPC = "\ " 25 | %format SPC2 = SPC SPC 26 | %format SPC4 = SPC2 SPC2 27 | %format SPC8 = SPC4 SPC4 28 | 29 | %format mempty = "\varepsilon" 30 | %format <> = " \diamond " 31 | %format `mappend` = " \diamond " 32 | %format mappend = "(\diamond)" 33 | 34 | %format :* = " \times " 35 | %format :+ = " + " 36 | 37 | %% %format *^ = *"\!\hat{}" 38 | %% %format *^ = "\cdot" 39 | 40 | %% %format <.> = "<\!\!\!\cdot\!\!\!>" 41 | 42 | %% %format @ = "{\ @}" 43 | %% %format @@ = "@" 44 | 45 | %format <=> = "\Longleftrightarrow" 46 | 47 | %format ldq = "\dq\!\!" 48 | %format rdq = "\!\!\dq" 49 | 50 | %format (qq (op)) = ldq op rdq 51 | 52 | %% %format not = "\Varid{not}" 53 | 54 | %format ~ = "\mathbin{\sim}" 55 | 56 | %format BACKex = "\hspace{-1ex}" 57 | %format BACKquad = "\hspace{-2.2ex}" 58 | 59 | %format == = = 60 | %format /= = "\not=" 61 | 62 | % Got from Andres L: 63 | \newcommand{\calculationcomments}{% 64 | \let\onelinecomment=\onelinecommentchars 65 | \def\commentbegin{\ \{ }% 66 | \def\commentend{\}}% 67 | } 68 | \calculationcomments 69 | 70 | %% %format * = "\times" 71 | 72 | %format >=> = "\mathbin{>\!\!=\!\!\!>}" 73 | 74 | %format ... = "\ldots" 75 | %format ...^ = "\cdots" 76 | 77 | %% Add a thin space after lambda 78 | %format \ = "\lambda\, " 79 | 80 | %% Treat exists like forall, with following period showing as ".", not "\circ" 81 | %format exists(x) = "\exists " x "\hsforall " 82 | %% Add a space following the period used by forall and exists 83 | %format period_ = ".\:" 84 | -------------------------------------------------------------------------------- /sections.tex: -------------------------------------------------------------------------------- 1 | %% For handy insertion 2 | 3 | \secref{Introduction} 4 | \secref{Monoids, Semirings and Semimodules} 5 | \secref{Monoids} 6 | \secref{Additive Monoids} 7 | \secref{Semirings} 8 | \secref{Star Semirings} 9 | \secref{Semimodules} 10 | \secref{Function-like Types and Singletons} 11 | \secref{Calculating Instances from Homomorphisms} 12 | \secref{Languages and the Monoid Semiring} 13 | \secref{Finite maps} 14 | \secref{Decomposing Functions from Lists} 15 | \secref{Regular Expressions} 16 | \secref{Tries} 17 | \secref{Convolution} 18 | \secref{Beyond Convolution} 19 | \secref{The Free Semimodule Monad} 20 | \secref{More Applications} 21 | \secref{Polynomials} 22 | \secref{Miscellaneous Notes} 23 | \secref{Proofs} 24 | 25 | \deflabel{monoid homomorphism} 26 | \deflabel{additive monoid homomorphism} 27 | \thmlabel{curry additive} 28 | \deflabel{semiring homomorphism} 29 | \thmlabel{curry semiring} 30 | \deflabel{star semiring homomorphism} 31 | \lemlabel{affine over semiring} 32 | \deflabel{left semimodule homomorphism} 33 | \lemlabel{affine over semimodule} 34 | \lemlabel{decomp +->} 35 | \thmlabel{Semiring (b <-- a)} 36 | \lemlabel{decomp (b <-- [c])} 37 | \lemlabel{atEps b <-- [c]} 38 | \lemlabel{deriv b <-- [c]} 39 | \thmlabel{semiring decomp b <-- [c]} 40 | \thmlabel{semiring decomp generalized} 41 | \thmlabel{RegExpFun} 42 | \thmlabel{LTrie} 43 | \thmlabel{Fourier} 44 | \lemlabel{deriv b <-- [c]} specializes for |b <-- N| as follows: 45 | \thmlabel{Stream} 46 | \thmlabel{standard FunApp} 47 | \thmlabel{poly fun} 48 | \prooflabel{theorem:curry additive} 49 | \prooflabel{lemma:decomp +->} 50 | \prooflabel{theorem:Semiring (b <-- a)} 51 | \prooflabel{lemma:decomp (b <-- [c])} 52 | \prooflabel{lemma:atEps b <-- [c]} 53 | \prooflabel{lemma:deriv b <-- [c]} 54 | \lemlabel{deriv +->} 55 | \prooflabel{theorem:semiring decomp b <-- [c]} 56 | \lemlabel{atEps and deriv via (<:)} 57 | \prooflabel{theorem:LTrie} 58 | \prooflabel{theorem:Fourier} 59 | \prooflabel{theorem:decomp (b <-- N)} 60 | \prooflabel{lemma:deriv (b <-- N)} 61 | \lemlabel{deriv +-> Nat} 62 | \prooflabel{theorem:standard FunApp} 63 | \prooflabel{theorem:poly fun} 64 | \lemlabel{poly +->} 65 | 66 | \figrefdef{<--}{The monoid semiring} 67 | \figrefdef{Map}{Finite maps} 68 | \figrefdef{RegExp}{Semiring-generalized regular expressions} 69 | \figrefdef{LTrie}{Tries as |[c] -> b| and as |[c] -> b|} 70 | \figrefdef{Stream}{Streams} 71 | \figrefdef{FunApp}{|Functor| and |Applicative| classes and some instances} 72 | -------------------------------------------------------------------------------- /test/ImageTest.hs: -------------------------------------------------------------------------------- 1 | 2 | {-# OPTIONS_GHC -Wall #-} 3 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 4 | 5 | -- | 6 | 7 | module Main where 8 | 9 | import qualified Data.Vector as V 10 | import Codec.Picture (convertRGB8, readImage, savePngImage) 11 | import Codec.Picture.Types 12 | ( DynamicImage(ImageYF), Image(..), PixelF , dynamicMap 13 | , pixelAt, promoteImage , extractLumaPlane ) 14 | import Data.Vector.Storable (convert) 15 | 16 | import Semi hiding ((^)) 17 | 18 | main :: IO () 19 | main = do convolve "original" ident "wizard" 20 | convolve "blur" (boxBlur 5) "wizard" 21 | convolve "sharpen" sharpen "wizard" 22 | convolve "edge-detect" edgy "wizard" 23 | 24 | convolve :: String -> Arr Double -> FilePath -> IO () 25 | convolve opName kernel origName = 26 | do img <- readArr (origName ++ ".jpg") 27 | saveArr (origName ++ "-" ++ opName ++ ".png") (img <.> kernel) 28 | 29 | {-------------------------------------------------------------------- 30 | Kernels 31 | --------------------------------------------------------------------} 32 | 33 | ident :: Arr Double 34 | ident = [[1]] 35 | 36 | boxBlur :: Int -> Arr Double 37 | boxBlur n = (fmap.fmap) (/ fromIntegral (n*n)) ((replicate n . replicate n) 1) 38 | 39 | sharpen :: Arr Double 40 | sharpen = [[ 0,-1, 0] 41 | ,[-1, 5,-1] 42 | ,[ 0,-1, 0]] 43 | 44 | edgy :: Arr Double 45 | edgy = [[-1,-1,-1] 46 | ,[-1, 8,-1] 47 | ,[-1,-1,-1]] 48 | 49 | {-------------------------------------------------------------------- 50 | Conversion between [[b]] and DynamicImage (JuicyPixels) 51 | --------------------------------------------------------------------} 52 | 53 | -- TODO: use statically sized vectors. 54 | 55 | -- | A 2D array represented as a list of lists 56 | type Arr b = [[b]] 57 | 58 | gen :: Int -> (Int -> a) -> [a] 59 | gen dim f = f <$> [0 .. dim-1] 60 | 61 | imgToArr :: Fractional b => DynamicImage -> Arr b 62 | imgToArr im = 63 | gen height $ \ y -> 64 | gen width $ \ x -> 65 | realToFrac $ pixelAt dat x y 66 | where 67 | width = dynamicMap imageWidth im 68 | height = dynamicMap imageHeight im 69 | dat = (promoteImage . extractLumaPlane . convertRGB8) im :: Image PixelF 70 | 71 | -- Assume arr is rectangular and nonempty. 72 | arrToImg :: Real a => Arr a -> DynamicImage 73 | arrToImg arr = ImageYF $ Image 74 | { imageWidth = length (head arr) 75 | , imageHeight = length arr 76 | , imageData = (convert . V.fromList . map realToFrac . concat) arr 77 | } 78 | 79 | readArr :: FilePath -> IO (Arr Double) 80 | readArr path = 81 | fmap (either (error ("couldn't read " ++ path)) imgToArr) (readImage path) 82 | 83 | -- For now just PNG 84 | saveArr :: FilePath -> Arr Double -> IO () 85 | saveArr path arr = savePngImage path (arrToImg arr) 86 | -------------------------------------------------------------------------------- /test/Benchmark.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Benchmarks 4 | 5 | module Main where 6 | 7 | import Control.DeepSeq (NFData) 8 | import Data.Map (Map) 9 | import System.Directory (createDirectoryIfMissing) 10 | import Criterion.Main 11 | import Criterion.Types (Config(..),Verbosity(..)) 12 | 13 | import Misc (cats) 14 | import Semi 15 | import RegExp 16 | import Cofree 17 | import ShareMap 18 | 19 | import Examples 20 | 21 | main :: IO () 22 | main = 23 | defaultMainWith config [ 24 | bgroup "" [] 25 | 26 | , group "star-a" (star a1) [] $ 27 | replicate 100 'a' 28 | 29 | , group "letters" (star letter) [] $ 30 | cats 25 "asdf" 31 | 32 | , group "fishy" (star letter <.> single "fish" <.> star letter) [] $ 33 | take 48 (cycle az) ++ "fish" ++ take 48 (cycle az) 34 | 35 | , group "asas" (star a1 <.> star a1) [] $ 36 | replicate 50 'a' ++ replicate 50 'a' 37 | 38 | , group "asbs" (star a1 <.> star b1) [] $ 39 | replicate 50 'a' ++ replicate 50 'b' 40 | 41 | , group "asbas" (star a1 <.> b1 <.> star a1) [] $ 42 | replicate 50 'a' ++ "b" ++ replicate 49 'a' 43 | 44 | -- With O = N, the dyck examples don't work for RegExp:Function, while anbn 45 | -- is okay. 46 | , group "dyck" dyck ["RegExp:Map","RegExp:IntMap"] $ 47 | "[[[[[[]][[[[]]]][[[[]]]][[[]]][]][[[[]]]]]][]][[[]]][[[[]]]][[[[[[[]]][[[[]]]][[[]]][]][[[[]]]]]][]]" 48 | 49 | , group "anbn" anbn ["RegExp:Map","RegExp:IntMap"] $ 50 | replicate 50 'a' ++ replicate 50 'b' 51 | 52 | ] 53 | where 54 | config = defaultConfig 55 | { 56 | timeLimit = 1 -- 5 57 | , reportFile = Just "test/stats.html" 58 | } 59 | 60 | type Ok x b = (HasSingle String b x, StarSemiring x, StarSemiring b, NFData b) 61 | 62 | group :: String -> (forall x b. Ok x b => x) -> [String] -> String -> Benchmark 63 | group groupName example omit str = 64 | bgroup groupName 65 | [ bgroup "" [] 66 | 67 | -- , style @(RegExp ((->) Char) O) "RegExp:Function" 68 | -- , style @(RegExp (Map Char) O) "RegExp:Map" 69 | -- -- , style @(RegExp CharMap O) "RegExp:IntMap" 70 | 71 | -- , style @(Cofree ((->) Char) O) "Cofree:Function" 72 | , style @(Cofree (Map Char) O) "Cofree:Map" 73 | -- , style @(Cofree CharMap O) "Cofree:IntMap" 74 | , style @(Cofree (ShareMap Char) O) "Cofree:ShareMap" 75 | 76 | ] 77 | where 78 | style :: forall x b. Ok x b => String -> Benchmark 79 | style s | s `elem` omit = bench s (whnf id ()) 80 | -- bgroup "" [] 81 | | otherwise = bench s (nf (example @x @b !) str) 82 | 83 | 84 | type O = Bool -- N 85 | 86 | -- TODO: Generate the style name from the type via TypeRep. 87 | 88 | az :: String 89 | az = ['a'..'z'] 90 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: paper 2 | version: 0.1.0.0 3 | github: "conal/convolution-paper" 4 | license: BSD3 5 | author: "Conal Elliott" 6 | maintainer: "conal@conal.net" 7 | copyright: "2018 Conal Elliott" 8 | 9 | synopsis: Language recognition in the spirit of Brzowski's derivatives 10 | category: language, math 11 | 12 | description: Please see the README on GitHub at 13 | 14 | default-extensions: 15 | - AllowAmbiguousTypes 16 | - CPP 17 | - ConstraintKinds 18 | - DefaultSignatures 19 | - DeriveFoldable 20 | - DeriveFunctor 21 | - FlexibleContexts 22 | - FlexibleInstances 23 | - FunctionalDependencies 24 | - GADTs 25 | - GeneralizedNewtypeDeriving 26 | - LambdaCase 27 | - MultiParamTypeClasses 28 | - OverloadedStrings # for Poly, since dante doesn't pick up 29 | - PatternSynonyms 30 | - QuantifiedConstraints 31 | - RankNTypes 32 | - StandaloneDeriving 33 | - TupleSections 34 | - TypeApplications 35 | - TypeFamilies 36 | - TypeOperators 37 | - UndecidableInstances 38 | - ViewPatterns 39 | - ScopedTypeVariables 40 | 41 | ghc-options: 42 | -Wall 43 | # -O2 # Use when benchmarking 44 | 45 | cpp-options: -DEXAMPLES 46 | 47 | library: 48 | dependencies: 49 | - base >= 4.7 && < 5 50 | - ghc-prim 51 | - deepseq 52 | - data-inttrie 53 | - MemoTrie 54 | - containers 55 | - multiset 56 | - finite-typelits 57 | - vector-sized 58 | other-modules: [] 59 | include-dirs: src 60 | install-includes: GenInstances.inc 61 | source-dirs: src 62 | exposed-modules: 63 | - Misc 64 | - Constrained 65 | - Semi 66 | - RegExp 67 | - Cofree 68 | - MMap 69 | - ShareMap 70 | - Poly 71 | - Examples 72 | 73 | tests: 74 | test: 75 | source-dirs: test 76 | main: GoldTests.hs 77 | other-modules: [] 78 | dependencies: 79 | - base 80 | - Cabal >= 1.24.0.0 81 | - containers 82 | - bytestring 83 | - tasty 84 | - tasty-golden 85 | - paper 86 | 87 | benchmarks: 88 | bench: 89 | ghc-options: 90 | -O2 91 | source-dirs: test 92 | main: Benchmark.hs 93 | other-modules: [] 94 | dependencies: 95 | - base 96 | - Cabal >= 1.24.0.0 97 | - containers 98 | - criterion 99 | - directory 100 | - deepseq 101 | - MemoTrie 102 | - paper 103 | 104 | # stack bench 105 | 106 | executables: 107 | image-test: 108 | ghc-options: 109 | -O2 110 | source-dirs: test 111 | main: ImageTest.hs 112 | other-modules: [] 113 | dependencies: 114 | - base 115 | - Cabal >= 1.24.0.0 116 | - JuicyPixels 117 | - vector 118 | - paper 119 | 120 | # In the test directory: 121 | # stack build :image-test && stack exec image-test 122 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | paper = convolution 2 | 3 | # # breaks arXiv upload 4 | # outdir=out 5 | outdir=. 6 | 7 | conf = $(outdir)/$(paper)-conf 8 | long = $(outdir)/$(paper)-long 9 | long-anon = $(outdir)/$(paper)-long-anon 10 | 11 | pdf: $(long).pdf 12 | pdf: $(conf).pdf 13 | pdf: $(long-anon).pdf 14 | 15 | see: $(long).see 16 | # see: $(conf).see 17 | # see: $(long-anon).see 18 | 19 | long: $(long).pdf 20 | conf: $(conf).pdf 21 | long-anon: $(long-anon).pdf 22 | 23 | all: conf 24 | all: long 25 | # all: long-anon 26 | 27 | all.see: conf.see long.see long-anon.see 28 | 29 | 30 | # # Doesn't work 31 | # .PRECIOUS: $(outdir)/%.tex $(outdir)/%.pdf 32 | 33 | .PRECIOUS: out/convolution.tex out/convolution.pdf 34 | .PRECIOUS: out/was1.tex out/was1.pdf 35 | .PRECIOUS: out/was2.tex out/was2.pdf 36 | .PRECIOUS: out/was3.tex out/was3.pdf 37 | .PRECIOUS: $(conf).tex $(conf).pdf 38 | .PRECIOUS: $(long).tex $(long).pdf 39 | .PRECIOUS: $(long-anon).tex $(long-anon).pdf 40 | 41 | latex=latexmk -pdf 42 | latex+= -outdir=${outdir} 43 | # latex+= -synctex=1 44 | latex+= -halt-on-error 45 | # # Preview continuously 46 | # latex+= -pvc 47 | 48 | stats = $(wildcard test/stats/*.txt) 49 | stats: $(stats) 50 | 51 | see-stats: 52 | echo $(stats) 53 | 54 | %.pdf: %.tex macros.tex bib.bib Makefile 55 | $(latex) $*.tex 56 | touch $@ 57 | 58 | # The previous rule always ran. Passing "-d" (debug) to make revealed that the 59 | # PDFs were not getting updated by latexmk, so their prerequisites stay newer. 60 | # Workaround: touch the PDF. 61 | 62 | texdeps = formatting.fmt Makefile $(stats) 63 | 64 | $(conf).tex: $(paper).lhs $(texdeps) 65 | lhs2tex --set=conf --set=anonymous -o $*.tex $(paper).lhs 66 | 67 | $(long).tex: $(paper).lhs $(texdeps) 68 | lhs2tex --set=long -o $*.tex $(paper).lhs 69 | 70 | # --set=draft 71 | 72 | $(long-anon).tex: $(paper).lhs $(texdeps) 73 | lhs2tex --set=long --set=anonymous -o $*.tex $(paper).lhs 74 | 75 | showpdf=skim 76 | 77 | %.see: %.pdf 78 | ${showpdf} $< 79 | 80 | conf.see: $(conf).see 81 | long.see: $(long).see 82 | long-anon.see: $(long-anon).see 83 | 84 | pics = $(wildcard test/*.png) 85 | 86 | long.zip: $(long).tex $(long).bbl macros.tex $(pics) 87 | zip $@ $^ 88 | 89 | SHELL = bash 90 | 91 | # clean: 92 | # rm -f ${outdir}/* 93 | 94 | clean: 95 | rm -f {$(conf),$(long),$(long-anon)}.{tex,pdf,aux,nav,snm,ptb,log,out,toc,bbl,blg,fdb_latexmk,fls} 96 | 97 | 98 | TAGS: *.tex *.lhs *.bib src/*.hs src/*.inc 99 | etags $^ 100 | 101 | supp = supplemental-material 102 | $(supp): readme.md stack.yaml package.yaml src/*.hs src/*.inc test/*.hs test/gold/*.txt test/wizard.jpg out/convolution-long-anon.pdf 103 | mkdir -p $(supp)/{src,test} 104 | grep -vi conal package.yaml > $(supp)/package.yaml 105 | cp -p stack.yaml readme.md out/convolution-long-anon.pdf $(supp) 106 | cp -p src/*.hs src/*.inc $(supp)/src 107 | cp -p test/*.hs test/wizard.jpg $(supp)/test 108 | cp -rp test/gold $(supp)/test 109 | 110 | # Supplemental tarball in progress 111 | tar: supplemental.tar 112 | 113 | supplemental.tar: $(supp) 114 | tar -cvf $@ $(supp) 115 | 116 | web: web-token 117 | 118 | web-token: $(long).pdf 119 | scp $< conal@conal.net:/home/conal/domains/conal/htdocs/papers/$(paper) 120 | touch web-token 121 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Generalized Convolution and Efficient Language Recognition 2 | 3 | This repo contains an unpublished [paper](http://conal.net/papers/convolution/) along with the source code appearing in the paper. 4 | 5 | 6 | ### Abstract 7 | 8 | *Convolution* is a broadly useful operation with applications including signal processing, machine learning, probability, optics, polynomial multiplication, and efficient parsing. Usually, however, this operation is understood and implemented in more specialized forms, hiding commonalities and limiting usefulness. This paper formulates convolution in the common algebraic framework of semirings and semimodules and populates that framework with various representation types. One of those types is the grand abstract template and itself generalizes to the free semimodule monad. Other representations serve varied uses and performance trade-offs, with implementations calculated from simple and regular specifications. 9 | 10 | Of particular interest is Brzozowski's method for regular expression matching. Uncovering the method's essence frees it from syntactic manipulations, while generalizing from boolean to weighted membership (such as multisets and probability distributions) and from sets to *n*-ary relations. The classic *trie* data structure then provides an elegant and efficient alternative to syntax. Pleasantly, polynomial arithmetic requires no additional implementation effort, works correctly with a variety of representations, and handles multivariate polynomials and power series with ease. Image convolution also falls out as a special case. 11 | 12 | Pleasantly, polynomial arithmetic requires no additional implementation effort, works correctly with a variety of representations, and handles multivariate polynomials and power series with ease. 13 | Image convolution also falls out as a special case. 14 | 15 | 16 | ### Haskell source code 17 | 18 | You can find the source code for the paper's functionality and examples in the `src` directory. 19 | 20 | To try out the Haskell implementation, make sure you have [`stack`](https://docs.haskellstack.org/en/stable/README/) installed, and then 21 | 22 | * Compile: `stack build` 23 | * Gold tests: `stack test`. 24 | These gold tests check that all of the representations yield the same answer. 25 | * Benchmarks: `stack bench`. 26 | This one takes a while and generates stats.html as well as a lot of textual statistics. 27 | You can make it go faster (at the cost of less accurate measurements) by lowing `timeLimit` in test/Benchmark.hs. 28 | The examples (`anbn` and `dyck`) that don't terminate with `RegExp (Map Char)` appear to have running times of about 3 ns but really are skipped. 29 | * To run the image convolution examples: 30 | 31 | (cd test; stack build :image-test && stack exec image-test) 32 | 33 | 34 | Some of the modules contain comments like the following (in src/Poly.hs): 35 | 36 | ``` haskell 37 | -- >>> let p = single 1 <+> value 3 :: Poly1 Z 38 | -- >>> p 39 | -- x + 3 40 | -- 41 | -- >>> p^3 42 | -- x^3 + 9 * x^2 + 27 * x + 27 43 | -- 44 | -- >>> p^5 45 | -- x^5 + 15 * x^4 + 90 * x^3 + 270 * x^2 + 405 * x + 243 46 | ``` 47 | 48 | If you use Emacs and have [dante](https://github.com/jyp/dante) installed, you can run these examples in place via `dante-eval-block` (`C-c "`). 49 | 50 | 51 | -------------------------------------------------------------------------------- /test/GoldTests.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-binds -Wno-unused-imports #-} -- TEMP 2 | 3 | module Main where 4 | 5 | import Prelude hiding (sum,product,(^)) 6 | 7 | import Data.Map.Lazy (Map) 8 | import qualified Data.Map.Lazy as Map 9 | 10 | import Data.IntMap.Lazy (IntMap) 11 | 12 | import Data.ByteString.Lazy.Char8 (pack) 13 | import Data.Semigroup ((<>)) 14 | import Test.Tasty (defaultMain, TestTree, testGroup) 15 | import Test.Tasty.Golden 16 | 17 | -- import Data.MemoTrie 18 | 19 | import Misc (cats) 20 | import Semi 21 | import RegExp (RegExp) 22 | import Cofree (Cofree) 23 | import ShareMap (ShareMap) 24 | 25 | import Examples 26 | 27 | main :: IO () 28 | main = do 29 | defaultMain basicTests 30 | 31 | basicTests :: TestTree 32 | basicTests = testGroup "Various representations" 33 | [ testGroup "" [] 34 | 35 | -- , tests @(String -> N) "Function" 36 | 37 | -- , tests @(RegExp ((->) Char) N) "RegExpFun" 38 | -- , tests @(RegExp (Map Char) N) "RegExpMap" 39 | -- , tests @(RegExp CharMap N) "RegExpIntMap" 40 | -- , tests @(RegExp (ShareMap Char) N) "RegExpShareMap" 41 | 42 | -- , tests @(Cofree ((->) Char) N) "CofreeFun" 43 | , tests @(Cofree (Map Char) N) "CofreeMap" 44 | -- , tests @(Cofree CharMap N) "CofreeIntMap" 45 | , tests @(Cofree (ShareMap Char) N) "CofreeShareMap" 46 | 47 | ] 48 | 49 | -- TODO: some tests with s other than Bool. 50 | 51 | -- tests' :: forall x. Semiring x => String -> TestTree 52 | -- tests' = undefined 53 | 54 | tests :: forall x b. 55 | (HasSingle String b x, StarSemiring x, StarSemiring b, Show b) 56 | => String -> TestTree 57 | tests group = testGroup group 58 | [ testGroup "" [] 59 | 60 | , gold "as-eps" $ as # "" 61 | , gold "as-a" $ as # "a" 62 | 63 | , gold "pp-pi" $ pp # "pi" 64 | , gold "pp-pig" $ pp # "pig" 65 | , gold "pp-pig" $ pp # "pig" 66 | , gold "pp-pink" $ pp # "pink" 67 | , gold "pp-ping" $ pp # "ping" 68 | 69 | , gold "pps-q" $ pps # "q" 70 | , gold "pps-pig" $ pps # "pig" 71 | , gold "pps-pigpig" $ pps # "pigpig" 72 | , gold "pps-pigping" $ pps # "pigping" 73 | , gold "pps-pinkpigpinkpigpig" $ pps # "pinkpigpinkpigpig" 74 | 75 | , gold "letters as0df" $ star letter # "as0df" 76 | , gold "letters asdf" $ star letter # "asdf" 77 | , gold "letters asdf 40" $ star letter # cats 40 "asdf" 78 | 79 | , groupNot ["RegExpMap","RegExpIntMap"] $ 80 | testGroup "anbn" 81 | [ gold "anbn-eps" $ anbn # "" 82 | , gold "anbn-ab" $ anbn # "ab" 83 | , gold "anbn-ba" $ anbn # "ba" 84 | , gold "anbn-aabb" $ anbn # "aabb" 85 | , gold "anbn-aacbb" $ anbn # "aacbb" 86 | , gold "anbn-aaabbb" $ anbn # "aaabbb" 87 | , gold "anbn-aaabbbb" $ anbn # "aaabbbb" 88 | ] 89 | 90 | , gold "asbs-aabbbb" $ asbs # "aabbbb" 91 | , gold "asbs-aabbba" $ asbs # "aabbba" 92 | 93 | , gold "asbas-aabaaa" $ asbas # "aabaaa" 94 | , gold "asbas-aabba" $ asbas # "aabba" 95 | 96 | , gold "asas-100" $ asas # replicate 100 'a' 97 | 98 | , groupNot ["RegExpFun","RegExpMap","RegExpIntMap"] $ 99 | testGroup "dyck" 100 | [ gold "dyck-a" $ dyck # "[]" 101 | , gold "dyck-b" $ dyck # "[[]]" 102 | , gold "dyck-c" $ dyck # "[[a]]" 103 | , gold "dyck-d" $ dyck # "[[]][]" 104 | , gold "dyck-e" $ dyck # "[[]][[]" 105 | ] 106 | 107 | ] 108 | where 109 | infixl 2 # 110 | (#) :: x -> String -> b 111 | (#) = (!) 112 | groupNot :: [String] -> TestTree -> TestTree 113 | groupNot gs | group `elem` gs = const (testGroup "" []) 114 | | otherwise = id 115 | gold :: Show z => String -> z -> TestTree 116 | gold nm = goldenVsString nm ("test/gold/" <> nm <> ".txt") 117 | . pure . pack . show 118 | -------------------------------------------------------------------------------- /src/ShareMap.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Finite maps with sharing (for non-injectivity) 4 | 5 | module ShareMap where 6 | 7 | import Control.Arrow (second,(***)) 8 | import Data.Foldable (foldl') 9 | import Data.Maybe (fromJust) 10 | import Data.Functor.Classes (Show1(..),Show2(..)) 11 | 12 | import Data.Set (Set) 13 | import qualified Data.Set as S 14 | import Data.Map (Map) 15 | import qualified Data.Map as M 16 | 17 | import Semi 18 | 19 | -- Point each key to a canonical representative, and point each representative 20 | -- to the equivalence class and the map value. 21 | data ShareMap k v = SM (Map k k) (Map k (Set k, v)) deriving (Show) 22 | 23 | -- instance Show2 ShareMap where 24 | -- liftShowsPrec2 _ _ _ _ = showsPrec 25 | 26 | instance Ord k => Semigroup (ShareMap k v) where 27 | SM rep m <> SM rep' m' = SM (M.union rep rep') (M.union m m') 28 | 29 | instance Ord k => Monoid (ShareMap k v) where 30 | mempty = SM M.empty M.empty 31 | mappend = (<>) 32 | 33 | -- I could instead newtype-wrap a pair, and derive Semigroup and Monoid 34 | 35 | empty :: ShareMap k v 36 | empty = SM M.empty M.empty 37 | 38 | type instance Key (ShareMap k) = k 39 | 40 | instance (Ord k, Additive v) => Indexable k v (ShareMap k v) where 41 | SM reps m ! k = case M.lookup k reps of 42 | Nothing -> zero 43 | Just k' -> (snd <$> m) ! k' -- split second map 44 | 45 | -- TODO: move (*->) into HasSingle with defaults. 46 | instance (Ord k, Additive v) => HasSingle k v (ShareMap k v) where 47 | ks *-> v = case S.minView ks of 48 | Nothing -> empty 49 | Just (k,_) -> SM (M.fromDistinctAscList ((,k) <$> S.toAscList ks)) 50 | (M.singleton k (ks,v)) 51 | 52 | instance Functor (ShareMap k) where 53 | fmap f (SM reps m) = SM reps (fmap (second f) m) 54 | 55 | -- Addition is trickier. See 2019-03-{05,06,09} journal notes. 56 | 57 | type Chunk k v = (Set k, v) 58 | type Chunks k v = [Chunk k v] -- disjoint k subsets 59 | 60 | addChunks :: (Ord k, Additive v) => Chunks k v -> Chunks k v -> Chunks k v 61 | addChunks p p' = 62 | [ (ks `S.difference` support', x ) | (ks ,x ) <- p ] 63 | ++ [ (ks' `S.difference` support , x') | (ks',x') <- p'] 64 | ++ [ (ks `S.intersection` ks', x <+> x') | (ks,x) <- p, (ks',x') <- p' ] 65 | where 66 | support = chunksSupport p 67 | support' = chunksSupport p' 68 | 69 | chunksSupport :: Ord k => Chunks k v -> Set k 70 | chunksSupport = S.unions . map fst 71 | 72 | -- TODO: pull support out of the keys of ShareMap' Map k k. Or even leave it there. 73 | 74 | -- data ShareMap k v = SM (Map k k) (Map k (Set k, v)) deriving Show 75 | 76 | -- Build a ShareMap from disjoint chunks. 77 | shareMap :: forall k v. (Ord k, Additive v) => Chunks k v -> ShareMap k v 78 | shareMap (filter (not . null . fst) -> chunks) = foldMap h (chunks `zip` maxes) 79 | where 80 | maxes :: [k] 81 | maxes = (fromJust . S.lookupMax . fst) <$> chunks 82 | h :: (Chunk k v, k) -> ShareMap k v 83 | h ((ks,v),maxk) = 84 | SM (M.fromList ((,maxk) <$> S.elems ks)) (M.singleton maxk (ks,v)) 85 | 86 | instance (Ord k, Additive v) => Additive (ShareMap k v) where 87 | zero = SM M.empty M.empty 88 | SM _ (M.elems -> p) <+> SM _ (M.elems -> q) = shareMap (p `addChunks` q) 89 | 90 | -- >>> let a2 = 'a' +-> 2 :: ShareMap Char Z 91 | -- >>> a2 92 | -- SM (fromList [('a','a')]) (fromList [('a',(fromList "a",2))]) 93 | -- >>> let b3 = 'b' +-> 3 :: ShareMap Char Z 94 | -- >>> b3 95 | -- SM (fromList [('b','b')]) (fromList [('b',(fromList "b",3))]) 96 | -- >>> a2 <+> b3 97 | -- SM (fromList [('a','a'),('b','b')]) (fromList [('a',(fromList "a",2)),('b',(fromList "b",3))]) 98 | -- >>> a2 <+> a2 99 | -- SM (fromList [('a','a')]) (fromList [('a',(fromList "a",4))]) 100 | -- >>> b3 <+> b3 101 | -- SM (fromList [('b','b')]) (fromList [('b',(fromList "b",6))]) 102 | -- 103 | -- >>> (a2 <+> b3) <+> b3 104 | -- SM (fromList [('a','a'),('b','b')]) (fromList [('a',(fromList "a",2)),('b',(fromList "b",6))]) 105 | -- 106 | -- >>> let a2c = [(S.fromList "a",2)] :: Chunks Char Z 107 | -- >>> let b3c = [(S.fromList "b",3)] :: Chunks Char Z 108 | -- >>> (a2c,b3c) 109 | -- ([(fromList "a",2)],[(fromList "b",3)]) 110 | -- >>> addChunks a2c b3c 111 | -- [(fromList "a",2),(fromList "b",3),(fromList "",5)] 112 | -- >>> S.fromList "abcdef" *-> 3 :: ShareMap Char Z 113 | -- SM (fromList [('a','a'),('b','a'),('c','a'),('d','a'),('e','a'),('f','a')]) (fromList [('a',(fromList "abcdef",3))]) 114 | -------------------------------------------------------------------------------- /src/Constrained.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Some constrained classes 4 | 5 | module Constrained where 6 | 7 | import Prelude hiding (sum) 8 | 9 | import Control.Applicative (liftA2) 10 | import Control.Monad (join) 11 | import GHC.Types (Constraint) 12 | import Data.Set (Set) 13 | import qualified Data.Set as S 14 | import Data.MultiSet (MultiSet) 15 | import qualified Data.MultiSet as MS 16 | import Data.Map (Map) 17 | import qualified Data.Map as M 18 | 19 | -- I'd like to make Constrained more primitive than Semi, so I may need to 20 | -- shuffle some things. 21 | import Semi 22 | 23 | import Misc ((:*)) 24 | 25 | type Ok2 f a b = (Ok f a, Ok f b) 26 | type Ok3 f a b c = (Ok2 f a b, Ok f c) 27 | type Ok4 f a b c d = (Ok2 f a b, Ok2 f c d) 28 | 29 | class FunctorC f where 30 | type Ok f a :: Constraint 31 | type Ok f a = () 32 | fmapC :: Ok2 f a b => (a -> b) -> f a -> f b 33 | default fmapC :: Functor f => (a -> b) -> f a -> f b 34 | fmapC = fmap 35 | 36 | class FunctorC f => ApplicativeC f where 37 | pureC :: Ok f a => a -> f a 38 | default pureC :: Applicative f => a -> f a 39 | pureC = pure 40 | liftA2C :: Ok3 f a b c => (a -> b -> c) -> f a -> f b -> f c 41 | default liftA2C :: Applicative f => (a -> b -> c) -> f a -> f b -> f c 42 | liftA2C = liftA2 43 | 44 | infixl 1 >>== 45 | class ApplicativeC f => MonadC f where 46 | joinC :: Ok f a => f (f a) -> f a 47 | default joinC :: Monad f => f (f a) -> f a 48 | joinC = join 49 | (>>==) :: Ok2 f a b => f a -> (a -> f b) -> f b 50 | default (>>==) :: Monad f => f a -> (a -> f b) -> f b 51 | (>>==) = (>>=) 52 | 53 | bindViaJoin :: (MonadC f, Ok3 f a b (f b)) => f a -> (a -> f b) -> f b 54 | bindViaJoin as f = joinC (fmapC f as) 55 | 56 | joinViaBind :: (MonadC f, Ok2 f a (f a)) => f (f a) -> f a 57 | joinViaBind q = q >>== id 58 | 59 | class FunctorC f => MonoidalC f where 60 | unitC :: Ok f () => f () 61 | crossC :: Ok2 f a b => f a -> f b -> f (a :* b) 62 | 63 | pureViaUnit :: Ok2 f () a => MonoidalC f => a -> f a 64 | pureViaUnit a = fmapC (const a) unitC 65 | 66 | unitViaPure :: Ok f () => ApplicativeC f => f () 67 | unitViaPure = pureC () 68 | 69 | liftA2ViaCross :: (MonoidalC f, Ok4 f a b (a :* b) c) => (a -> b -> c) -> f a -> f b -> f c 70 | liftA2ViaCross h as bs = fmapC (uncurry h) (as `crossC` bs) 71 | 72 | crossViaLiftA2 :: (ApplicativeC f, Ok3 f a b (a :* b)) => f a -> f b -> f (a :* b) 73 | crossViaLiftA2 = liftA2C (,) 74 | 75 | instance FunctorC ((->) a) 76 | instance ApplicativeC ((->) a) 77 | instance MonadC ((->) a) 78 | 79 | instance FunctorC [] 80 | instance ApplicativeC [] 81 | instance MonadC [] 82 | 83 | -- etc 84 | 85 | instance MonoidalC [] where 86 | unitC = unitViaPure 87 | crossC = crossViaLiftA2 88 | 89 | instance FunctorC Set where 90 | type Ok Set a = Ord a 91 | fmapC = S.map 92 | 93 | instance MonoidalC Set where 94 | unitC = unitViaPure 95 | crossC = S.cartesianProduct 96 | 97 | instance ApplicativeC Set where 98 | pureC = S.singleton 99 | liftA2C = liftA2ViaCross 100 | 101 | instance MonadC Set where 102 | joinC = S.unions . S.elems 103 | (>>==) = bindViaJoin 104 | 105 | instance FunctorC MultiSet where 106 | type Ok MultiSet a = Ord a 107 | fmapC = MS.map 108 | 109 | instance MonoidalC MultiSet where 110 | unitC = unitViaPure 111 | crossC = crossViaLiftA2 112 | -- as `crossC` bs = 113 | -- MS.fromOccurList 114 | -- [((a,b),m*n) | (a,m) <- MS.toOccurList as, (b,n) <- MS.toOccurList bs] 115 | 116 | -- Maybe use the explicit crossC but with `fromDistinctAscOccurList`, since the 117 | -- list is ordered and distinct. 118 | 119 | instance ApplicativeC MultiSet where 120 | pureC = MS.singleton 121 | liftA2C h as bs = 122 | MS.fromOccurList 123 | [(h a b,m*n) | (a,m) <- MS.toOccurList as, (b,n) <- MS.toOccurList bs] 124 | -- liftA2C = liftA2ViaCross 125 | 126 | instance MonadC MultiSet where 127 | joinC = MS.join 128 | (>>==) = bindViaJoin 129 | 130 | -- newtype Pred s = Pred (s -> Bool) 131 | 132 | -- Can we give a FunctorC instance for Pred? I guess we'd have to sum over the 133 | -- preimage of the function being mapped. 134 | 135 | newtype Map' b a = M { unM :: Map a b } 136 | deriving (Show, Additive, DetectableZero, LeftSemimodule b) 137 | 138 | instance Additive b => FunctorC (Map' b) where 139 | type Ok (Map' b) a = Ord a 140 | fmapC h (M p) = M (sum [h a +-> p ! a | a <- M.keys p]) 141 | 142 | instance Semiring b => ApplicativeC (Map' b) where 143 | pureC a = M (single a) 144 | liftA2C h (M p) (M q) = M (sum [h a b +-> p!a <.> q!b | a <- M.keys p, b <- M.keys q]) 145 | 146 | instance (Semiring b, DetectableZero b, DetectableOne b) => MonadC (Map' b) where 147 | -- F f >>= h = bigSum a f a .> h a 148 | M m >>== h = sum [m!a .> h a | a <- M.keys m] 149 | -- joinC is more demanding on b and Map'. Maybe eliminate it altogether. 150 | -- I could give bindViaJoin an explicit join function as argument. 151 | joinC = error "joinC on Map' b not yet implemented" 152 | 153 | #if 0 154 | 155 | M f :: Map' b a 156 | f :: Map a b 157 | h :: (a -> Map' b c) 158 | h a :: Map' b c 159 | 160 | #endif 161 | 162 | #if 0 163 | 164 | instance FunctorC Map where 165 | type Ok Map a = Ord a 166 | fmapC = M.map 167 | 168 | instance MonoidalC Map where 169 | unitC = unitViaPure 170 | crossC = crossViaLiftA2 171 | -- as `crossC` bs = 172 | -- M.fromOccurList 173 | -- [((a,b),m*n) | (a,m) <- M.toOccurList as, (b,n) <- M.toOccurList bs] 174 | 175 | -- Maybe use the explicit crossC but with `fromDistinctAscOccurList`, since the 176 | -- list is ordered and distinct. 177 | 178 | instance ApplicativeC Map where 179 | pureC = M.singleton 180 | liftA2C h as bs = 181 | M.fromOccurList 182 | [(h a b,m*n) | (a,m) <- M.toOccurList as, (b,n) <- M.toOccurList bs] 183 | -- liftA2C = liftA2ViaCross 184 | 185 | instance MonadC Map where 186 | joinC = MS.join 187 | (>>==) = bindViaJoin 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /src/RegExp.hs: -------------------------------------------------------------------------------- 1 | #ifdef EXAMPLES 2 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples 3 | #endif 4 | 5 | -- | Regular expressions 6 | 7 | module RegExp where 8 | 9 | import Prelude hiding (sum,product) 10 | 11 | import Data.Map (Map,keys) 12 | import Data.MemoTrie ((:->:)) 13 | 14 | import Semi 15 | 16 | #ifdef EXAMPLES 17 | import Examples 18 | #endif 19 | 20 | infixl 6 :<+> 21 | infixl 7 :<.> 22 | 23 | -- | Regular expression 24 | data RegExp h b = Char (Key h) 25 | | Value b 26 | | RegExp h b :<+> RegExp h b 27 | | RegExp h b :<.> RegExp h b 28 | | Star (RegExp h b) 29 | deriving Functor 30 | 31 | deriving instance (Show (Key h), Show b) => Show (RegExp h b) 32 | deriving instance (Eq (Key h), Eq b) => Eq (RegExp h b) 33 | 34 | #define OPTIMIZE 35 | 36 | #ifdef OPTIMIZE 37 | 38 | type D0 b = DetectableZero b 39 | type D1 b = DetectableOne b 40 | 41 | instance DetectableZero b => DetectableZero (RegExp h b) where 42 | isZero (Value b) = isZero b 43 | isZero _ = False 44 | 45 | instance (DetectableZero b, DetectableOne b) => DetectableOne (RegExp h b) where 46 | isOne (Value b) = isOne b 47 | isOne _ = False 48 | 49 | #else 50 | 51 | type D0 b = (() ~ ()) 52 | type D1 b = (() ~ ()) 53 | 54 | #endif 55 | 56 | instance (D0 b, Additive b) => Additive (RegExp h b) where 57 | zero = Value zero 58 | #ifdef OPTIMIZE 59 | p <+> q | isZero p = q 60 | | isZero q = p 61 | | otherwise = p :<+> q 62 | #else 63 | (<+>) = (:<+>) 64 | #endif 65 | 66 | instance (Semiring b, D0 b, D1 b) => LeftSemimodule b (RegExp h b) where 67 | #if 1 68 | scale b = fmap (b <.>) 69 | #elif 1 70 | b `scale` e = Value b <.> e 71 | #else 72 | scale b = go 73 | where 74 | go (Char c) = Char c 75 | go (Value b') = Value (b <.> b') 76 | go (u :<+> v) = go u <+> go v 77 | go (u :<.> v) = go u <.> go v 78 | go (Star u) = star (go u) 79 | #endif 80 | 81 | instance (D0 b, D1 b, Semiring b) => Semiring (RegExp h b) where 82 | one = Value one 83 | #ifdef OPTIMIZE 84 | p <.> q | isZero p = zero 85 | | isOne p = q 86 | -- | isZero q = zero 87 | -- | isOne q = p 88 | | otherwise = p :<.> q 89 | #else 90 | (<.>) = (:<.>) 91 | #endif 92 | 93 | instance (D0 b, D1 b, Semiring b) => StarSemiring (RegExp h b) where 94 | #if 0 95 | -- Slightly slower in all benchmarks I tested 96 | star p | isZero p = one 97 | | otherwise = Star p 98 | #else 99 | star = Star 100 | #endif 101 | 102 | type FR h b = ( Additive (h (RegExp h b)), HasSingle (Key h) (RegExp h b) (h (RegExp h b)) 103 | , Functor h, DetectableZero b, DetectableOne b ) 104 | 105 | instance (FR h b, StarSemiring b, c ~ Key h, Eq c) 106 | => Indexable [c] b (RegExp h b) where 107 | e ! w = atEps (foldl ((!) . deriv) e w) 108 | -- (!) e = atEps . foldl ((!) . deriv) e 109 | 110 | instance (FR h b, StarSemiring b, c ~ Key h, Eq c) 111 | => HasSingle [c] b (RegExp h b) where 112 | w +-> b = b .> product (map Char w) 113 | 114 | atEps :: StarSemiring b => RegExp h b -> b 115 | atEps (Char _) = zero 116 | atEps (Value b) = b 117 | atEps (p :<+> q) = atEps p <+> atEps q 118 | atEps (p :<.> q) = atEps p <.> atEps q 119 | atEps (Star p) = star (atEps p) 120 | 121 | deriv :: (FR h b, StarSemiring b) 122 | => RegExp h b -> h (RegExp h b) 123 | deriv (Char c) = single c 124 | deriv (Value _) = zero 125 | deriv (p :<+> q) = deriv p <+> deriv q 126 | deriv (p :<.> q) = fmap (<.> q) (deriv p) <+> fmap (atEps p .>) (deriv q) 127 | -- fmap (atEps p .>) (deriv q) <+> fmap (<.> q) (deriv p) 128 | deriv (Star p) = fmap (\ d -> star (atEps p) .> d <.> Star p) (deriv p) 129 | 130 | -- | Interpret a regular expression 131 | regexp :: (StarSemiring x, HasSingle [Key h] b x, Semiring b) => RegExp h b -> x 132 | regexp (Char c) = single [c] 133 | regexp (Value b) = value b 134 | regexp (u :<+> v) = regexp u <+> regexp v 135 | regexp (u :<.> v) = regexp u <.> regexp v 136 | regexp (Star u) = star (regexp u) 137 | 138 | 139 | -- Alternatively, use regexp to convert to Cofree, and then use (!). 140 | 141 | #ifdef EXAMPLES 142 | 143 | {-------------------------------------------------------------------- 144 | Examples 145 | --------------------------------------------------------------------} 146 | 147 | type L = RegExp ((->) Char) Bool 148 | -- type L = RegExp (Map Char) Bool 149 | 150 | star1 :: Semiring b => b -> b 151 | star1 b = one <+> b <.> star1 b 152 | 153 | star2 :: L -> L 154 | star2 b = one <+> b <.> star2 b 155 | 156 | star3 :: L -> L 157 | star3 b = Value True <+> b <.> star3 b 158 | 159 | x1 :: L 160 | x1 = star1 (single "a") 161 | 162 | -- Non-recursive examples are tidier with OPTIMIZE 163 | 164 | -- >>> pig :: L 165 | -- Char 'p' :<.> (Char 'i' :<.> Char 'g') 166 | -- >>> pink :: L 167 | -- Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')) 168 | -- >>> pp :: L 169 | -- Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')) :<+> Char 'p' :<.> (Char 'i' :<.> Char 'g') 170 | 171 | -- >>> pig :: L' 172 | -- C (Char 'p' :<.> (Char 'i' :<.> Char 'g')) 173 | -- >>> pink :: L' 174 | -- C (Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k'))) 175 | -- >>> pp :: L' 176 | -- C (Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> Char 'k')) :<+> Char 'p' :<.> (Char 'i' :<.> Char 'g')) 177 | 178 | -- >>> pig :: L' 179 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'g' :<.> Value True))) :<.> Value True) 180 | -- >>> pink :: L' 181 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> (Char 'k' :<.> Value True)))) :<.> Value True) 182 | -- >>> pp :: L' 183 | -- C ((Char 'p' :<.> (Char 'i' :<.> (Char 'n' :<.> (Char 'k' :<.> Value True)))) :<.> Value True :<+> (Char 'p' :<.> (Char 'i' :<.> (Char 'g' :<.> Value True))) :<.> Value True) 184 | -- >>> (anbn :: L') ! "" 185 | -- True 186 | -- >>> deriv (anbn :: L') 187 | 188 | -- The following examples wedge. I think they worked when we used functions 189 | -- instead of maps. 190 | 191 | -- >>> (anbn :: L') ! "a" 192 | -- False 193 | -- >>> (anbn :: L') ! "ab" 194 | -- True 195 | -- >>> (anbn :: L') ! "aabb" 196 | -- True 197 | -- >>> (anbn :: L') ! "aaaaabbbbb" 198 | -- True 199 | 200 | #endif 201 | 202 | 203 | #if 0 204 | #endif 205 | -------------------------------------------------------------------------------- /test/Benchmarks/opt regexp.md: -------------------------------------------------------------------------------- 1 | 2 | # Group "star-a" 3 | 4 | ``` 5 | 6 | benchmarking "a"/RegExp:Function 7 | time 442.4 ns (439.5 ns .. 445.7 ns) 8 | 0.999 R² (0.999 R² .. 1.000 R²) 9 | mean 445.6 ns (441.9 ns .. 453.1 ns) 10 | std dev 12.18 ns (6.401 ns .. 18.36 ns) 11 | variance introduced by outliers: 36% (moderately inflated) 12 | 13 | benchmarking "a"/RegExp:Map 14 | time 355.3 ns (352.7 ns .. 359.3 ns) 15 | 0.999 R² (0.998 R² .. 1.000 R²) 16 | mean 355.7 ns (353.3 ns .. 362.4 ns) 17 | std dev 10.04 ns (4.439 ns .. 17.40 ns) 18 | variance introduced by outliers: 38% (moderately inflated) 19 | 20 | benchmarking "a"/RegExp:IntMap 21 | time 352.6 ns (350.9 ns .. 354.6 ns) 22 | 1.000 R² (1.000 R² .. 1.000 R²) 23 | mean 352.8 ns (351.3 ns .. 354.6 ns) 24 | std dev 4.295 ns (3.512 ns .. 5.377 ns) 25 | variance introduced by outliers: 10% (moderately inflated) 26 | 27 | benchmarking a50/RegExp:Function 28 | time 15.00 μs (14.89 μs .. 15.13 μs) 29 | 0.999 R² (0.999 R² .. 1.000 R²) 30 | mean 15.06 μs (14.96 μs .. 15.34 μs) 31 | std dev 377.3 ns (172.8 ns .. 715.1 ns) 32 | variance introduced by outliers: 24% (moderately inflated) 33 | 34 | benchmarking a50/RegExp:Map 35 | time 10.59 μs (10.54 μs .. 10.63 μs) 36 | 1.000 R² (1.000 R² .. 1.000 R²) 37 | mean 10.62 μs (10.58 μs .. 10.67 μs) 38 | std dev 114.1 ns (84.24 ns .. 153.1 ns) 39 | 40 | benchmarking a50/RegExp:IntMap 41 | time 10.27 μs (10.21 μs .. 10.33 μs) 42 | 1.000 R² (0.999 R² .. 1.000 R²) 43 | mean 10.29 μs (10.23 μs .. 10.39 μs) 44 | std dev 195.7 ns (131.9 ns .. 276.0 ns) 45 | variance introduced by outliers: 16% (moderately inflated) 46 | 47 | ``` 48 | 49 | # Group "letters" 50 | 51 | ``` 52 | 53 | benchmarking asdf-50/RegExp:Function 54 | time 3.994 ms (3.951 ms .. 4.040 ms) 55 | 0.999 R² (0.998 R² .. 1.000 R²) 56 | mean 4.023 ms (3.990 ms .. 4.055 ms) 57 | std dev 81.76 μs (61.86 μs .. 108.6 μs) 58 | 59 | benchmarking asdf-50/RegExp:Map 60 | time 3.231 ms (3.190 ms .. 3.273 ms) 61 | 0.999 R² (0.998 R² .. 0.999 R²) 62 | mean 3.287 ms (3.259 ms .. 3.329 ms) 63 | std dev 86.04 μs (57.24 μs .. 129.6 μs) 64 | 65 | benchmarking asdf-50/RegExp:IntMap 66 | time 3.215 ms (3.043 ms .. 3.354 ms) 67 | 0.985 R² (0.976 R² .. 0.992 R²) 68 | mean 3.006 ms (2.939 ms .. 3.099 ms) 69 | std dev 203.4 μs (172.1 μs .. 259.5 μs) 70 | variance introduced by outliers: 37% (moderately inflated) 71 | 72 | ``` 73 | 74 | # Group "dyck" 75 | 76 | ``` 77 | 78 | benchmarking "[]"/RegExp:Function 79 | time 1.824 μs (1.793 μs .. 1.858 μs) 80 | 0.996 R² (0.993 R² .. 0.998 R²) 81 | mean 1.863 μs (1.823 μs .. 1.917 μs) 82 | std dev 129.6 ns (85.15 ns .. 219.4 ns) 83 | variance introduced by outliers: 77% (severely inflated) 84 | 85 | benchmarking "[[]]"/RegExp:Function 86 | time 4.037 μs (4.008 μs .. 4.076 μs) 87 | 0.999 R² (0.999 R² .. 1.000 R²) 88 | mean 4.055 μs (4.023 μs .. 4.097 μs) 89 | std dev 94.03 ns (69.75 ns .. 125.5 ns) 90 | variance introduced by outliers: 24% (moderately inflated) 91 | 92 | benchmarking "[[a]]"/RegExp:Function 93 | time 3.054 μs (3.018 μs .. 3.095 μs) 94 | 0.999 R² (0.998 R² .. 1.000 R²) 95 | mean 3.059 μs (3.025 μs .. 3.102 μs) 96 | std dev 92.42 ns (67.31 ns .. 126.8 ns) 97 | variance introduced by outliers: 36% (moderately inflated) 98 | 99 | benchmarking "[[]][]"/RegExp:Function 100 | time 5.524 μs (5.461 μs .. 5.599 μs) 101 | 0.999 R² (0.998 R² .. 1.000 R²) 102 | mean 5.509 μs (5.468 μs .. 5.567 μs) 103 | std dev 125.9 ns (98.77 ns .. 165.2 ns) 104 | variance introduced by outliers: 23% (moderately inflated) 105 | 106 | ``` 107 | 108 | # Group "anbn" 109 | 110 | ``` 111 | 112 | benchmarking ""/RegExp:Function 113 | time 27.07 ns (26.89 ns .. 27.25 ns) 114 | 1.000 R² (1.000 R² .. 1.000 R²) 115 | mean 27.02 ns (26.88 ns .. 27.22 ns) 116 | std dev 397.1 ps (275.1 ps .. 561.2 ps) 117 | variance introduced by outliers: 17% (moderately inflated) 118 | 119 | benchmarking "ab"/RegExp:Function 120 | time 1.486 μs (1.475 μs .. 1.498 μs) 121 | 1.000 R² (0.999 R² .. 1.000 R²) 122 | mean 1.485 μs (1.473 μs .. 1.500 μs) 123 | std dev 31.58 ns (21.79 ns .. 48.42 ns) 124 | variance introduced by outliers: 23% (moderately inflated) 125 | 126 | benchmarking "aacbb"/RegExp:Function 127 | time 2.666 μs (2.645 μs .. 2.684 μs) 128 | 1.000 R² (1.000 R² .. 1.000 R²) 129 | mean 2.654 μs (2.641 μs .. 2.674 μs) 130 | std dev 37.57 ns (27.44 ns .. 50.11 ns) 131 | variance introduced by outliers: 11% (moderately inflated) 132 | 133 | benchmarking "aaabbb"/RegExp:Function 134 | time 4.693 μs (4.667 μs .. 4.717 μs) 135 | 1.000 R² (0.999 R² .. 1.000 R²) 136 | mean 4.685 μs (4.662 μs .. 4.707 μs) 137 | std dev 60.54 ns (48.42 ns .. 83.94 ns) 138 | 139 | benchmarking "aaabbbb"/RegExp:Function 140 | time 4.780 μs (4.739 μs .. 4.822 μs) 141 | 0.999 R² (0.999 R² .. 1.000 R²) 142 | mean 4.762 μs (4.724 μs .. 4.824 μs) 143 | std dev 122.1 ns (84.35 ns .. 193.5 ns) 144 | variance introduced by outliers: 28% (moderately inflated) 145 | 146 | benchmarking 30/RegExp:Function 147 | time 278.9 μs (277.7 μs .. 280.5 μs) 148 | 0.999 R² (0.998 R² .. 1.000 R²) 149 | mean 283.1 μs (280.7 μs .. 291.4 μs) 150 | std dev 10.39 μs (4.486 μs .. 20.61 μs) 151 | variance introduced by outliers: 27% (moderately inflated) 152 | 153 | ``` 154 | -------------------------------------------------------------------------------- /test/Benchmarks/no-opt regexp.md: -------------------------------------------------------------------------------- 1 | 2 | # Group "star-a" 3 | 4 | ``` 5 | 6 | benchmarking "a"/RegExp:Function 7 | time 368.5 ns (365.8 ns .. 371.5 ns) 8 | 0.999 R² (0.999 R² .. 1.000 R²) 9 | mean 369.7 ns (366.8 ns .. 374.1 ns) 10 | std dev 8.669 ns (6.324 ns .. 12.29 ns) 11 | variance introduced by outliers: 30% (moderately inflated) 12 | 13 | benchmarking "a"/RegExp:Map 14 | time 353.3 ns (349.8 ns .. 357.2 ns) 15 | 0.999 R² (0.999 R² .. 1.000 R²) 16 | mean 352.4 ns (350.0 ns .. 355.5 ns) 17 | std dev 6.608 ns (4.544 ns .. 9.254 ns) 18 | variance introduced by outliers: 21% (moderately inflated) 19 | 20 | benchmarking "a"/RegExp:IntMap 21 | time 355.2 ns (353.0 ns .. 357.5 ns) 22 | 1.000 R² (1.000 R² .. 1.000 R²) 23 | mean 356.1 ns (354.0 ns .. 358.0 ns) 24 | std dev 5.292 ns (4.270 ns .. 6.328 ns) 25 | variance introduced by outliers: 15% (moderately inflated) 26 | 27 | benchmarking a50/RegExp:Function 28 | time 3.266 ms (3.223 ms .. 3.323 ms) 29 | 0.998 R² (0.996 R² .. 0.999 R²) 30 | mean 3.299 ms (3.270 ms .. 3.338 ms) 31 | std dev 89.00 μs (66.92 μs .. 131.0 μs) 32 | 33 | benchmarking a50/RegExp:Map 34 | time 17.97 μs (17.82 μs .. 18.14 μs) 35 | 0.999 R² (0.999 R² .. 1.000 R²) 36 | mean 18.02 μs (17.87 μs .. 18.20 μs) 37 | std dev 389.9 ns (313.0 ns .. 482.6 ns) 38 | variance introduced by outliers: 19% (moderately inflated) 39 | 40 | benchmarking a50/RegExp:IntMap 41 | time 18.62 μs (18.18 μs .. 19.10 μs) 42 | 0.997 R² (0.996 R² .. 0.999 R²) 43 | mean 18.52 μs (18.30 μs .. 18.90 μs) 44 | std dev 695.1 ns (462.1 ns .. 994.4 ns) 45 | variance introduced by outliers: 41% (moderately inflated) 46 | 47 | ``` 48 | 49 | # Group "letters" 50 | 51 | ``` 52 | 53 | benchmarking asdf-50/RegExp:Function 54 | time 22.67 s (22.00 s .. 24.17 s) 55 | 0.999 R² (0.999 R² .. 1.000 R²) 56 | mean 22.39 s (22.21 s .. 22.57 s) 57 | std dev 227.9 ms (99.00 ms .. 287.7 ms) 58 | variance introduced by outliers: 19% (moderately inflated) 59 | 60 | benchmarking asdf-50/RegExp:Map 61 | time 3.334 ms (3.267 ms .. 3.405 ms) 62 | 0.998 R² (0.996 R² .. 0.999 R²) 63 | mean 3.466 ms (3.423 ms .. 3.527 ms) 64 | std dev 127.9 μs (87.80 μs .. 181.3 μs) 65 | variance introduced by outliers: 15% (moderately inflated) 66 | 67 | benchmarking asdf-50/RegExp:IntMap 68 | time 3.114 ms (3.016 ms .. 3.233 ms) 69 | 0.996 R² (0.993 R² .. 0.999 R²) 70 | mean 3.135 ms (3.099 ms .. 3.170 ms) 71 | std dev 91.81 μs (74.63 μs .. 116.6 μs) 72 | variance introduced by outliers: 11% (moderately inflated) 73 | 74 | ``` 75 | 76 | # Group "dyck" 77 | 78 | ``` 79 | 80 | benchmarking "[]"/RegExp:Function 81 | time 2.430 μs (2.410 μs .. 2.450 μs) 82 | 0.999 R² (0.999 R² .. 1.000 R²) 83 | mean 2.440 μs (2.422 μs .. 2.461 μs) 84 | std dev 51.17 ns (40.01 ns .. 68.95 ns) 85 | variance introduced by outliers: 22% (moderately inflated) 86 | 87 | benchmarking "[[]]"/RegExp:Function 88 | time 10.70 μs (10.48 μs .. 10.88 μs) 89 | 0.998 R² (0.998 R² .. 0.999 R²) 90 | mean 10.56 μs (10.46 μs .. 10.71 μs) 91 | std dev 305.8 ns (233.4 ns .. 442.1 ns) 92 | variance introduced by outliers: 31% (moderately inflated) 93 | 94 | benchmarking "[[a]]"/RegExp:Function 95 | time 17.56 μs (17.47 μs .. 17.67 μs) 96 | 1.000 R² (0.999 R² .. 1.000 R²) 97 | mean 17.71 μs (17.59 μs .. 17.87 μs) 98 | std dev 330.3 ns (253.1 ns .. 448.6 ns) 99 | variance introduced by outliers: 15% (moderately inflated) 100 | 101 | benchmarking "[[]][]"/RegExp:Function 102 | time 27.58 μs (27.02 μs .. 28.02 μs) 103 | 0.999 R² (0.998 R² .. 0.999 R²) 104 | mean 27.26 μs (27.03 μs .. 27.60 μs) 105 | std dev 723.2 ns (530.5 ns .. 940.6 ns) 106 | variance introduced by outliers: 24% (moderately inflated) 107 | 108 | ``` 109 | 110 | # Group "anbn" 111 | 112 | ``` 113 | 114 | benchmarking ""/RegExp:Function 115 | time 28.50 ns (28.24 ns .. 28.87 ns) 116 | 0.999 R² (0.999 R² .. 0.999 R²) 117 | mean 28.72 ns (28.40 ns .. 29.03 ns) 118 | std dev 787.8 ps (681.4 ps .. 974.4 ps) 119 | variance introduced by outliers: 42% (moderately inflated) 120 | 121 | benchmarking "ab"/RegExp:Function 122 | time 2.294 μs (2.264 μs .. 2.336 μs) 123 | 0.998 R² (0.997 R² .. 0.999 R²) 124 | mean 2.315 μs (2.283 μs .. 2.368 μs) 125 | std dev 102.2 ns (68.07 ns .. 136.0 ns) 126 | variance introduced by outliers: 56% (severely inflated) 127 | 128 | benchmarking "aacbb"/RegExp:Function 129 | time 15.73 μs (15.45 μs .. 16.05 μs) 130 | 0.998 R² (0.997 R² .. 0.999 R²) 131 | mean 15.80 μs (15.55 μs .. 16.05 μs) 132 | std dev 610.3 ns (513.3 ns .. 727.4 ns) 133 | variance introduced by outliers: 43% (moderately inflated) 134 | 135 | benchmarking "aaabbb"/RegExp:Function 136 | time 24.06 μs (23.92 μs .. 24.23 μs) 137 | 0.999 R² (0.998 R² .. 1.000 R²) 138 | mean 24.28 μs (24.02 μs .. 24.63 μs) 139 | std dev 757.8 ns (497.2 ns .. 1.004 μs) 140 | variance introduced by outliers: 31% (moderately inflated) 141 | 142 | benchmarking "aaabbbb"/RegExp:Function 143 | time 36.65 μs (36.48 μs .. 36.87 μs) 144 | 1.000 R² (0.999 R² .. 1.000 R²) 145 | mean 36.85 μs (36.59 μs .. 37.26 μs) 146 | std dev 802.4 ns (506.3 ns .. 1.238 μs) 147 | variance introduced by outliers: 18% (moderately inflated) 148 | 149 | benchmarking 30/RegExp:Function 150 | time 96.66 ms (94.61 ms .. 99.47 ms) 151 | 0.999 R² (0.998 R² .. 1.000 R²) 152 | mean 92.75 ms (90.01 ms .. 94.29 ms) 153 | std dev 2.748 ms (1.469 ms .. 3.980 ms) 154 | variance introduced by outliers: 14% (moderately inflated) 155 | 156 | ``` 157 | -------------------------------------------------------------------------------- /notes.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Notes for a paper about generalized convolution 3 | substMap: [("<+>","+"),("<.>","·")] 4 | ... 5 | 6 | [*Derivatives of Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.98.4378 "paper by Janusz Brzozowski (1964" 7 | 8 | \newcommand\set[1]{\{\,#1\,\}} 9 | \newcommand\Pow{\mathcal{P}} 10 | \newcommand\setop[1]{\mathbin{\hat{#1}}} 11 | \newcommand\eps{\varepsilon} 12 | \newcommand\closure[1]{#1^{\ast}} 13 | \newcommand\mappend{\diamond} 14 | \newcommand\cat{\cdot} 15 | \newcommand\single\overline 16 | \newcommand\union{+} 17 | \newcommand\bigunion{\sum} 18 | \newcommand\has[2]{\delta_{#1}\,#2} 19 | \newcommand\del[1]{\has\eps{#1}} 20 | \newcommand\consl[2]{\single{[#1]} \cat #2} 21 | \newcommand\conslp[2]{\consl{#1}{(#2)}} 22 | \newcommand\lquot{\setminus} 23 | 24 | ## Paper outline 25 | 26 | ### Contributions 27 | 28 | Generalize and unify: 29 | 30 | * Parsing by "derivatives" for regular languages and beyond, generalized beyond *sets* of strings. 31 | Some other possibilities: multisets, fuzzy sets, parsing probability distributions. 32 | Can we get semantic analysis cheaply? 33 | * Includes probabilistic computing. 34 | * Maybe generalize parsing beyond strings to trees and other shapes. 35 | Perhaps 2D parsing? 36 | * Polynomial arithmetic (addition, multiplication, and I hope composition), generalized to multivariate case. 37 | * Convolution: higher dimensions, continuous and discrete, and other shaped spaces. 38 | * Super-memoization. 39 | 40 | ### Languages 41 | 42 | * Identify the vocabulary of a "language" (singleton plus semiring). 43 | * Define where a language is set of strings. 44 | * Note the semiring interface. 45 | 46 | ### Matching 47 | 48 | * The set-based language definition doesn't give an implementation, because the sets may be infinite. 49 | * Change to a predicate, and specify the new method definitions via homomorphism equations. 50 | Easy to solve, and gets an effective implementation (thanks to laziness). 51 | * Rephrase in terms of string predicates/recognizers, where $s \lquot p$ becomes $p \circ (s\,\mappend)$, which specializes to $p \circ (c:)$ when $s=[c]$. 52 | 53 | ### List of successes 54 | 55 | ### Efficient matching 56 | 57 | * First decomposition law: $p = \bigcup\limits_{s \in p} \single s$. 58 | * Second decomposition law: $p = \bigcup\limits_s \has s p$, where 59 | $\has s p = 60 | \begin{cases} 61 | \single s & \text{if $s \in p$} \\ 62 | \emptyset & \text{otherwise} 63 | \end{cases}$. 64 | Specialize to empty strings: $\del p = \has \eps p$. 65 | * So far we can accommodate any monoid. 66 | Now focus on sequences. 67 | * "Derivative": $c \lquot p = \set {s \mid c:s \in p}$. 68 | * Third decomposition law [Brzozowski, 1964, Theorem 4.4]: 69 | $p = \del p \union \bigcup\limits_{c\,\in\,A} \single c \cat (c \lquot p)$. 70 | Holds for all languages, not just regular. 71 | * Maybe same for a free representation (regular expressions), though trivial. 72 | * Review (string) tries. 73 | Note the appearance of $p \eps$ and $p \circ (c:)$. 74 | Define the homomorphism equations, which are easy to solve, via trie isomorphism. 75 | Simplifying yields a simple and efficient implementation. 76 | 77 | ### Generalizing 78 | 79 | * Semirings. 80 | * Convolution. 81 | * Beyond convolution: the free semimodule monad. 82 | * Variations: counting, probability distributions, temporal/spatial convolution. 83 | 84 | ### Other applications 85 | 86 | * Univariate and multivariate polynomials. 87 | * Convolution: discrete and continuous, one- and multi-dimensional, dense and sparse. 88 | * 2D parsing? 89 | 90 | ## Miscellaneous notes 91 | 92 | * Summarize/review languages as sets, including singleton, union, concatenation, and star/closure. 93 | Survey some representations for parsing, including a naive one as predicates (requiring nondeterministic splitting). 94 | For regular languages specified in this vocabulary, the classic technique for efficient parsing is to generate a finite state machine. 95 | Another technique is Brzozowski's "derivatives of regular expressions", extended much more recently to context-free languages. 96 | Maybe revisit Brzozowski's technique; alternatively just mention, and compare in related work. 97 | Calculate a generalized variant from a simple specification. 98 | Key is a known but not widely used monadic structure, namely that of *free semimodules*. 99 | 100 | * Once I have a restricted `Applicative` instance, I can make language itself be a monoid in a perfectly standard way, with `mempty = pure mempty` and `mappend = liftA2 mappend`. 101 | Likewise, temporal and (multidimensional) spatial convolution is simply `liftA2 (+)`, which is a standard definition for `(+)` on applicatives. 102 | We can give full instances for numeric classes in this style. 103 | * For multivariate polynomials, I was thinking of using maps from exponent tuples. 104 | Alternatively, replace tuples by statically sized vectors. 105 | More generally, use a representable functor or even zippable. 106 | I guess anything "summable", i.e., a monoid. 107 | Perhaps whatever simplifies parsing and unparsing. 108 | * What symbol to use for `mappend`? 109 | Some candidates: \\cdot, \\diamond, \\ast, \\circledast, \\APLstar, several dingbats choices (\\ding), \\Snowflake. 110 | * The `Comonad` interface captures Brzozowski's two main operations: contains-empty and derivative 111 | ([notes](11-25#Parsing-with-derivatives-(A))). 112 | * There's also a `Monad` for functions that's unlike the usual one and seems to support convolution 113 | ([notes](11-25#Parsing-with-derivatives-(A))). 114 | Is this monad known? 115 | Maybe it corresponds to the vector space monad in one of Dan Piponi blog posts. 116 | Yes; it's the "free vector space monad", and more generally the "free semimodule monad". 117 | * I think the monad structure is more essential than the comonad structure. 118 | ([notes](12-02#Parsing-with-derivatives-(A))). 119 | * Generalize and use theorem 4.4 from [*Derivatives of Regular Expressions*] 120 | ([notes](11-25#Parsing-with-derivatives-(B/))). 121 | * Is Day convolution helpful? 122 | * Can I define languages monadically and get efficient convolutional parsing? 123 | Must values then be in a semiring? 124 | * Multivariate polynomials and power series. 125 | Rational? Streams? 126 | * Come up with another data type besides lists having a useful binary operation. 127 | Needn't be monoidal. 128 | Trees? 129 | * Discuss the function-of-monoid perspective explored in my journal notes for 2018-12-29 and 2018-12-30. 130 | I like its duality with the *Comonad* instance for the same type. 131 | 132 | ## Super-memoization 133 | 134 | I suspect that my take on Brzozowski's technique is just one example of a much more general technique akin to memoization but in which we get partial sharing of work across calls to a function with *different* arguments (unlike regular memoization). 135 | -------------------------------------------------------------------------------- /macros.tex: -------------------------------------------------------------------------------- 1 | \newcommand\nc\newcommand 2 | \nc\rnc\renewcommand 3 | 4 | %% https://www.conference-publishing.com/Help.php 5 | \usepackage[utf8]{inputenc} 6 | \usepackage[T1]{fontenc} 7 | \usepackage{microtype} 8 | 9 | \usepackage{epsfig} 10 | \usepackage{latexsym} 11 | \usepackage{amsmath} 12 | \usepackage{amssymb} 13 | \usepackage{color} 14 | 15 | \usepackage{subcaption} 16 | 17 | \usepackage[us,12hr]{datetime} 18 | \usepackage{setspace} 19 | 20 | % bottom prevents floats (figures) below footnotes, while hang & flushmargin 21 | % avoids indentation. 22 | \usepackage[bottom,hang,flushmargin]{footmisc} 23 | 24 | \nc\out[1]{} 25 | 26 | %% \nc\noteOut[2]{\note{#1}\out{#2}} 27 | 28 | %% To redefine for a non-draft 29 | \nc\indraft[1]{#1} 30 | 31 | \nc\note[1]{\indraft{\textcolor{red}{#1}}} 32 | 33 | \nc\notefoot[1]{\note{\footnote{\note{#1}}}} 34 | 35 | \nc\todo[1]{\note{To do: #1}} 36 | 37 | \nc\eqnlabel[1]{\label{equation:#1}} 38 | \nc\eqnref[1]{Equation~\ref{equation:#1}} 39 | \nc\eqnreftwo[2]{Equations~\ref{equation:#1} and \ref{equation:#2}} 40 | 41 | \nc\figlabel[1]{\label{fig:#1}} 42 | \nc\figref[1]{Figure~\ref{fig:#1}} 43 | \nc\figreftwo[2]{Figures~\ref{fig:#1} and \ref{fig:#2}} 44 | 45 | \nc\seclabel[1]{\label{sec:#1}} 46 | \nc\secref[1]{Section~\ref{sec:#1}} 47 | \nc\secreftwo[2]{Sections~\ref{sec:#1} and~\ref{sec:#2}} 48 | \nc\secrefs[2]{Sections \ref{sec:#1} through \ref{sec:#2}} 49 | 50 | \nc\appref[1]{Appendix~\ref{sec:#1}} 51 | 52 | %% The name \secdef is already taken 53 | \nc\sectiondef[1]{\section{#1}\seclabel{#1}} 54 | \nc\subsectiondef[1]{\subsection{#1}\seclabel{#1}} 55 | \nc\subsubsectiondef[1]{\subsubsection{#1}\seclabel{#1}} 56 | 57 | \nc\needcite{\note{[ref]}} 58 | 59 | % \nc\myurl\texttt 60 | 61 | %% For acmlarge, I have to put my figures side by side 62 | 63 | %% http://tex.stackexchange.com/questions/5769/two-figures-side-by-side 64 | 65 | \setlength{\fboxsep}{1ex} 66 | \setlength{\fboxrule}{0.05ex} % gray or dotted might work better 67 | 68 | %% \figone{fraction}{label}{caption}{content} 69 | \nc\figoneW[4]{ 70 | \fbox{% 71 | \begin{minipage}{#1\linewidth} 72 | \centering 73 | \setlength\mathindent{0ex} 74 | #4 75 | \vspace*{-5ex} 76 | \captionof{figure}{#3} 77 | \label{fig:#2} 78 | \end{minipage} 79 | } 80 | } 81 | \nc\figone{\figoneW{\stdWidth}} 82 | 83 | %% %% \figo{fraction}{content} 84 | %% \nc\figoW[2]{ 85 | %% \fbox{ 86 | %% \begin{minipage}{#1\linewidth} 87 | %% \centering 88 | %% \setlength\mathindent{0ex} 89 | %% #2 90 | %% \end{minipage} 91 | %% } 92 | %% } 93 | 94 | \nc\figo[1]{ 95 | \begin{figure} 96 | \centering 97 | #1 98 | \end{figure} 99 | } 100 | 101 | \nc\figp[2]{\begin{figure}\centering #1 \hspace{-2ex} #2\end{figure}} 102 | 103 | % Arguments: env, label, caption, body 104 | \nc\figdefG[4]{\begin{#1}[tbp] 105 | \begin{center} 106 | #4 107 | \end{center} 108 | \caption{#3} 109 | \figlabel{#2} 110 | \end{#1}} 111 | 112 | % Arguments: label, caption, body 113 | \nc\figdef{\figdefG{figure}} 114 | \nc\figdefwide{\figdefG{figure*}} 115 | 116 | % Arguments: label, caption, body 117 | \nc\figrefdef[3]{\figref{#1}\figdef{#1}{#2}{#3}} 118 | 119 | \nc\figrefdefwide[3]{\figref{#1}\figdefwide{#1}{#2}{#3}} 120 | 121 | %% %% \circdefW{frac}{label/file}{caption}{work}{depth} 122 | %% \nc\circdefW[5]{\figoneW{#1}{#2}{#3 \stats{#4}{#5}}{\incpic{#2}}} 123 | %% \nc\circdef{\circdefW{\stdWidth}} 124 | 125 | \nc\stdWidth{0.46} 126 | 127 | %% \figpair{frac1}{frac2}{label1}{caption1}{content1}{label2}{caption2}{content2} 128 | \nc\figpairW[8]{ 129 | \begin{figure} 130 | \centering 131 | \figoneW{#1}{#3}{#4}{#5} 132 | %\hspace{.05\linewidth} 133 | %\hfill 134 | \figoneW{#2}{#6}{#7}{#8} 135 | \end{figure} 136 | } 137 | %% \figpair{label1}{caption1}{content1}{label2}{caption2}{content2} 138 | \nc\figpair{\figpairW{\stdWidth}{\stdWidth}} 139 | 140 | \nc\incpic[1]{\includegraphics[width=\linewidth]{figures/#1}} 141 | 142 | %% \incpicW{frac}{file} 143 | \nc\incpicW[2]{\includegraphics[width=#1\linewidth]{figures/#2}} 144 | 145 | %%%%%% 146 | 147 | %% \nc\symTwo[1]{\mathbin{#1\!\!\!#1}} 148 | %% \nc\symThree[1]{\mathbin{#1\!\!\!#1\!\!\!#1}} 149 | 150 | \setlength\mathindent{4ex} 151 | \nc\db[1]{\llbracket#1\rrbracket} 152 | 153 | \nc\smalltriangleup{\triangle} 154 | \nc\smalltriangledown{\triangledown} 155 | 156 | %% Double quote symbol 157 | \nc\dq{\text{\tt\char34}} 158 | %% Quoted haskell string with formatted content 159 | \nc\hquoted[1]{\dq\!#1\!\dq} 160 | 161 | \nc\sectionl[1]{\section{#1}\seclabel{#1}} 162 | \nc\subsectionl[1]{\subsection{#1}\seclabel{#1}} 163 | 164 | \nc\workingHere{ 165 | \vspace{1ex} 166 | \begin{center} 167 | \setlength{\fboxsep}{3ex} 168 | \setlength{\fboxrule}{4pt} 169 | \huge\textcolor{red}{\framebox{Working here}} 170 | \end{center} 171 | \vspace{1ex} 172 | } 173 | 174 | \usepackage{amsthm} 175 | %% %% Roman font, and drop vertical spacing before & after. 176 | \theoremstyle{definition} % remark 177 | 178 | \newtheoremstyle{plainstyle} 179 | {\topsep} % Space above 180 | {\topsep} % Space below 181 | {} % Body font 182 | {} % Indent amount 183 | {\bfseries} % Theorem head font 184 | {.} % Punctuation after theorem head 185 | {.5em} % Space after theorem head 186 | {} % Theorem head spec (can be left empty, meaning `normal') 187 | 188 | %% \theoremstyle{plainstyle} 189 | 190 | \newtheorem{definition}{Definition}%[section] 191 | \nc\deflabel[1]{\label{definition:#1}} 192 | \nc\defref[1]{Definition \ref{definition:#1}} 193 | \nc\defreftwo[2]{Definitions \ref{definition:#1} and \ref{definition:#2}} 194 | \nc\defrefs[2]{Definitions \ref{definition:#1} through \ref{definition:#2}} 195 | 196 | \newtheorem{theorem}{Theorem}%[section] 197 | \nc\thmlabel[1]{\label{theorem:#1}} 198 | \nc\thmref[1]{Theorem \ref{theorem:#1}} 199 | \nc\thmreftwo[2]{Theorems \ref{theorem:#1} and \ref{theorem:#2}} 200 | \nc\thmrefs[2]{Theorems \ref{theorem:#1} through \ref{theorem:#2}} 201 | 202 | \newtheorem{corollary}{Corollary}[theorem] 203 | \nc\corlabel[1]{\label{corollary:#1}} 204 | \nc\corref[1]{Corollary \ref{corollary:#1}} 205 | \nc\correftwo[2]{Corollaries \ref{corollary:#1} and \ref{corollary:#2}} 206 | \nc\correfs[2]{Corollaries \ref{corollary:#1} through \ref{corollary:#2}} 207 | 208 | \newtheorem{lemma}[theorem]{Lemma} 209 | \nc\lemlabel[1]{\label{lemma:#1}} 210 | \nc\lemref[1]{Lemma \ref{lemma:#1}} 211 | \nc\lemreftwo[2]{Lemmas \ref{lemma:#1} and \ref{lemma:#2}} 212 | \nc\lemrefthree[3]{Lemmas \ref{lemma:#1}, \ref{lemma:#2}, and \ref{lemma:#3}} 213 | \nc\lemrefs[2]{Lemmas \ref{lemma:#1} through \ref{lemma:#2}} 214 | 215 | \newtheorem{exercise}[theorem]{Exercise} 216 | \nc\exclabel[1]{\label{exercise:#1}} 217 | \nc\excref[1]{Exercise \ref{exercise:#1}} 218 | \nc\excreftwo[2]{Exercises \ref{exercise:#1} and \ref{exercise:#2}} 219 | \nc\excrefs[2]{Exercises \ref{exercise:#1} through \ref{exercise:#2}} 220 | 221 | \definecolor{codesep}{gray}{0.85} 222 | \nc\codesep[1]{ 223 | \begin{minipage}[b]{0ex} 224 | \color{codesep}{\rule[1ex]{0.8pt}{#1}} 225 | \end{minipage}} 226 | 227 | \nc\twocol[4]{ 228 | \\ 229 | \begin{minipage}[c]{#1\textwidth} 230 | #2 231 | \vspace{-2ex} 232 | \end{minipage} 233 | \begin{minipage}[c]{#3\textwidth} % \mathindent1em 234 | #4 235 | \vspace{-2ex} 236 | \end{minipage} 237 | \\ 238 | } 239 | 240 | %% For multiple footnotes at a point. Adapted to recognize \notefoot as well 241 | %% as \footnote. See https://tex.stackexchange.com/a/71347, 242 | \let\oldFootnote\footnote 243 | \nc\nextToken\relax 244 | \rnc\footnote[1]{% 245 | \oldFootnote{#1}\futurelet\nextToken\isFootnote} 246 | \nc\footcomma[1]{\ifx#1\nextToken\textsuperscript{,}\fi} 247 | \nc\isFootnote{% 248 | \footcomma\footnote 249 | \footcomma\notefoot 250 | } 251 | 252 | %% https://tex.stackexchange.com/questions/336854/typesetting-a-wider-bar-sign 253 | \DeclareFontFamily{U}{mathx}{\hyphenchar\font45} 254 | \DeclareFontShape{U}{mathx}{m}{n}{ 255 | <5> <6> <7> <8> <9> <10> 256 | <10.95> <12> <14.4> <17.28> <20.74> <24.88> 257 | mathx10 258 | }{} 259 | \DeclareSymbolFont{mathx}{U}{mathx}{m}{n} 260 | \DeclareFontSubstitution{U}{mathx}{m}{n} 261 | \DeclareMathAccent{\widebar}{0}{mathx}{"73} % " 262 | 263 | \usepackage{footnotebackref} 264 | \usepackage{hyperref} 265 | 266 | -------------------------------------------------------------------------------- /src/Cofree.hs: -------------------------------------------------------------------------------- 1 | #ifdef EXAMPLES 2 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- Examples 3 | #endif 4 | 5 | -- | List tries using Convo 6 | 7 | module Cofree where 8 | 9 | import Prelude hiding (sum,product) 10 | 11 | import Data.Functor.Classes (Show1(..),showsPrec1) 12 | import GHC.Exts (coerce) 13 | import Data.Map (Map) 14 | import qualified Data.Map as M 15 | 16 | import Misc 17 | import Constrained 18 | import Semi 19 | 20 | #ifdef EXAMPLES 21 | import Examples 22 | import ShareMap (ShareMap) 23 | import Data.Set (Set) 24 | import qualified Data.Set as S 25 | #endif 26 | 27 | -- #include "GenInstances.inc" 28 | 29 | -- TODO: maybe rename Cofree to "Cofree". I'd use Ed's Cofree from the "free" library, 30 | -- but he defined Key (Cofree f) = Seq (Key f), and I want [Key f]. Oh well. 31 | 32 | -- Move elsewhere 33 | 34 | infix 1 <: 35 | (<:) :: b -> (c -> ([c] -> b)) -> ([c] -> b) 36 | b <: h = \ case { [] -> b ; c:cs -> h c cs } 37 | 38 | -- -- Experiment 39 | -- infix 1 <# 40 | -- (<#) :: (Indexable ([c] -> b) h, Key h ~ c) 41 | -- => b -> h ([c] -> b) -> ([c] -> b) 42 | -- b <# h = \ case { [] -> b ; c:cs -> (h ! c) cs } 43 | 44 | -- | List trie, denoting '[c] -> b' 45 | infix 1 :< 46 | data Cofree h b = b :< h (Cofree h b) deriving Functor 47 | 48 | #if 0 49 | -- Swiped from Control.Comonad.Cofree 50 | instance (Show1 f, Show a) => Show (Cofree f a) where showsPrec = showsPrec1 51 | instance Show1 f => Show1 (Cofree f) where 52 | liftShowsPrec sp sl = go 53 | where 54 | goList = liftShowList sp sl 55 | go d (a :< ds) = showParen (d > 5) $ 56 | sp 6 a . showString " :< " . liftShowsPrec go goList 5 ds 57 | #else 58 | instance (ConF Show h, Show b) => Show (Cofree h b) where 59 | showsPrec p (a :< ds) = showParen (p > 5) $ 60 | showsPrec 1 a . showString " :< " . showsPrec 1 ds 61 | #endif 62 | 63 | 64 | -- instance Functor h => Functor (Cofree h) where 65 | -- fmap f = go where go (a :< dp) = f a :< fmap go dp 66 | -- -- fmap f (a :< dp) = f a :< (fmap.fmap) f dp 67 | -- -- fmap f (a :< dp) = f a :< fmap (fmap f) dp 68 | 69 | -- TODO: I probably want FunctorC h, and inherit Ok. 70 | instance Functor h => FunctorC (Cofree h) 71 | 72 | instance Indexable c (Cofree h b) (h (Cofree h b)) => Indexable [c] b (Cofree h b) where 73 | -- (b :< _ ) ! [] = b 74 | -- (_ :< ts) ! (k:ks) = ts ! k ! ks 75 | -- (b :< dp) ! w = case w of { [] -> b ; c:cs -> dp ! c ! cs } 76 | -- (!) (b :< dp) = b <: (!) (fmap (!) dp) 77 | (!) (b :< dp) = b <: (!) . (!) dp 78 | 79 | instance Listable c (Cofree h b) (h (Cofree h b)) => Listable [c] b (Cofree h b) where 80 | toList = go [] 81 | where 82 | go cs (a :< dp) = 83 | (cs,a) : concatMap (\ (c,t) -> go (c:cs) t) (toList dp) 84 | 85 | instance (Additive (h (Cofree h b)), Additive b) => Additive (Cofree h b) where 86 | zero = zero :< zero 87 | (a :< dp) <+> (b :< dq) = a <+> b :< dp <+> dq 88 | 89 | -- FunctorSemimodule(Cofree h) 90 | 91 | -- instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where scale s = fmap (s <.>) 92 | 93 | -- instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where 94 | -- s `scale` (b :< dp) = s <.> b :< fmap (s `scale`) dp 95 | 96 | instance (Functor h, Semiring b) => LeftSemimodule b (Cofree h b) where 97 | scale s = fmap (s <.>) 98 | -- scale s = go where go (b :< dp) = s <.> b :< fmap go dp 99 | 100 | instance (Functor h, Additive (h (Cofree h b)), Semiring b, DetectableZero b, DetectableOne b) => Semiring (Cofree h b) where 101 | one = one :< zero 102 | (a :< dp) <.> q = a .> q <+> (zero :< fmap (<.> q) dp) 103 | 104 | instance (Functor h, Additive (h (Cofree h b)), StarSemiring b, DetectableZero b, DetectableOne b) => StarSemiring (Cofree h b) where 105 | star (a :< dp) = q where q = star a .> (one :< fmap (<.> q) dp) 106 | 107 | instance ( HasSingle c (Cofree h b) (h (Cofree h b)), Additive (h (Cofree h b)) 108 | , Ord c, Semiring b ) 109 | => HasSingle [c] b (Cofree h b) where 110 | w +-> b = foldr (\ c t -> zero :< c +-> t) (b :< zero) w 111 | #if 0 112 | ws *-> b = fromBool eps 113 | <+> zero :< sum [ c +-> ws' *-> b | (c,ws') <- M.toList ders] 114 | where 115 | (eps,ders) = unconsSet ws 116 | #else 117 | ws *-> b = 118 | fromBool eps <+> 119 | zero :< sum [ prefixes *-> suffix +-> b | (suffix,prefixes) <- M.toList ders'] 120 | where 121 | (eps,ders') = unconsSet' ws 122 | #endif 123 | 124 | -- preimageM :: (Ord a, Ord b) => Map a b -> Map b (Set a) 125 | -- preimageM m = sum [M.singleton b (S.singleton a) | (a,b) <- M.toList m] 126 | 127 | -- unconsSet :: Ord c => Set [c] -> Bool :* Map c (Set [c]) 128 | 129 | -- unconsSet' :: Ord c => Set [c] -> Bool :* Map [c] (Set c) 130 | 131 | 132 | instance (Additive (h (Cofree h b)), DetectableZero (h (Cofree h b)), DetectableZero b) 133 | => DetectableZero (Cofree h b) where 134 | isZero (a :< dp) = isZero a && isZero dp 135 | 136 | instance (Functor h, Additive (h (Cofree h b)), DetectableZero b, DetectableZero (h (Cofree h b)), DetectableOne b) 137 | => DetectableOne (Cofree h b) where 138 | isOne (a :< dp) = isOne a && isZero dp 139 | 140 | -- | Trim to a finite depth, for examination. 141 | trim :: (Functor h, Additive (h (Cofree h b)), Additive b, DetectableZero b) => Int -> Cofree h b -> Cofree h b 142 | trim 0 _ = zero 143 | trim n (c :< ts) = c :< fmap (trim (n-1)) ts 144 | 145 | -- To remove 146 | unconsSet :: Ord c => Set [c] -> Bool :* Map c (Set [c]) 147 | unconsSet s = 148 | ( [] `S.member` s 149 | , sum [M.singleton c (S.singleton cs) | (c:cs) <- S.toList s] 150 | ) 151 | 152 | -- >>> unconsSet (S.fromList ["a","b","c","d"]) 153 | -- (False,fromList [('a',fromList [""]),('b',fromList [""]),('c',fromList [""]),('d',fromList [""])]) 154 | 155 | -- >>> unconsSet (S.fromList ["act","art","cat","car","","cart"]) 156 | -- (True,fromList [('a',fromList ["ct","rt"]),('c',fromList ["ar","art","at"])]) 157 | 158 | preimageM :: (Ord a, Ord b) => Map a b -> Map b (Set a) 159 | preimageM m = sum [M.singleton b (S.singleton a) | (a,b) <- M.toList m] 160 | 161 | -- Similar to Brzozowski's derivative-based decomposition, but with inverted 162 | -- derivatives. 163 | unconsSet' :: Ord c => Set [c] -> Bool :* Map [c] (Set c) 164 | #if 0 165 | unconsSet' s = ( [] `S.member` s 166 | , sum [M.singleton cs (S.singleton c) | (c:cs) <- S.toList s] ) 167 | #else 168 | unconsSet' s = ( [] `S.member` s 169 | , preimageM (M.fromList [(c,cs) | c:cs <- S.toList s]) ) 170 | #endif 171 | 172 | -- >>> preimageM 173 | 174 | 175 | #ifdef EXAMPLES 176 | 177 | {-------------------------------------------------------------------- 178 | Examples 179 | --------------------------------------------------------------------} 180 | 181 | type L = Cofree (Map Char) Bool 182 | 183 | type LS = Cofree (ShareMap Char) Bool 184 | 185 | -- >>> singleChar "abcd" :: LS 186 | -- False :< SM (fromList [('a','d'),('b','d'),('c','d'),('d','d')]) (fromList [('d',(fromList "abcd",True :< SM (fromList []) (fromList [])))]) 187 | 188 | -- >>> letter :: LS 189 | -- False :< SM (fromList [('a','z'),('b','z'),('c','z'),('d','z'),('e','z'),('f','z'),('g','z'),('h','z'),('i','z'),('j','z'),('k','z'),('l','z'),('m','z'),('n','z'),('o','z'),('p','z'),('q','z'),('r','z'),('s','z'),('t','z'),('u','z'),('v','z'),('w','z'),('x','z'),('y','z'),('z','z')]) (fromList [('z',(fromList "abcdefghijklmnopqrstuvwxyz",True :< SM (fromList []) (fromList [])))]) 190 | 191 | -- >>> pig :: LS 192 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('g','g')]) (fromList [('g',(fromList "g",True :< SM (fromList []) (fromList [])))])))])))]) 193 | -- >>> pink :: LS 194 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('n','n')]) (fromList [('n',(fromList "n",False :< SM (fromList [('k','k')]) (fromList [('k',(fromList "k",True :< SM (fromList []) (fromList [])))])))])))])))]) 195 | -- >>> pp :: LS 196 | -- False :< SM (fromList [('p','p')]) (fromList [('p',(fromList "p",False :< SM (fromList [('i','i')]) (fromList [('i',(fromList "i",False :< SM (fromList [('g','g'),('n','n')]) (fromList [('g',(fromList "g",True :< SM (fromList []) (fromList []))),('n',(fromList "n",False :< SM (fromList [('k','k')]) (fromList [('k',(fromList "k",True :< SM (fromList []) (fromList [])))])))])))])))]) 197 | 198 | -- >>> pig :: L 199 | -- False :< [('p',False :< [('i',False :< [('g',True :< [])])])] 200 | -- >>> pink :: L 201 | -- False :< [('p',False :< [('i',False :< [('n',False :< [('k',True :< [])])])])] 202 | -- >>> pp :: L 203 | -- False :< [('p',False :< [('i',False :< [('g',True :< []),('n',False :< [('k',True :< [])])])])] 204 | 205 | -- >>> pig :: L 206 | -- False :< [('p',False :< [('i',False :< [('g',True :< [])])])] 207 | -- >>> pink :: L 208 | -- False :< [('p',False :< [('i',False :< [('n',False :< [('k',True :< [])])])])] 209 | -- >>> pp :: L 210 | -- False :< [('p',False :< [('i',False :< [('g',True :< []),('n',False :< [('k',True :< [])])])])] 211 | 212 | -- >>> trimT 3 as :: L 213 | -- True :< [('a',True :< [('a',True :< [])])] 214 | -- >>> trimT 3 ass :: L 215 | -- True :< [('a',True :< [('a',True :< [])])] 216 | 217 | -- >>> trimT 7 anbn :: L 218 | -- True :< [('a',False :< [('a',False :< [('a',False :< [('b',False :< [('b',False :< [('b',True :< [])])])]),('b',False :< [('b',True :< [])])]),('b',True :< [])])] 219 | 220 | -- >>> (pig :: L) ! "" 221 | -- False 222 | -- >>> (pig :: L) ! "pi" 223 | -- False 224 | -- >>> (pig :: L) ! "pig" 225 | -- True 226 | -- >>> (pig :: L) ! "piggy" 227 | -- False 228 | 229 | -- >>> (anbn :: L) ! "" 230 | -- True 231 | -- >>> (anbn :: L) ! "a" 232 | -- False 233 | -- >>> (anbn :: L) ! "ab" 234 | -- True 235 | -- >>> (anbn :: L) ! "aabb" 236 | -- True 237 | -- >>> (anbn :: L) ! "aaaaabbbbb" 238 | -- True 239 | 240 | #endif 241 | -------------------------------------------------------------------------------- /src/Poly.hs: -------------------------------------------------------------------------------- 1 | -- {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | -- | Playing with polynomials 4 | 5 | module Poly where 6 | 7 | import Prelude hiding ((^),sum) 8 | 9 | import Data.List (intersperse,sortOn) -- intercalate, 10 | import GHC.Exts(IsString(..)) 11 | import Data.Functor.Identity (Identity(..)) 12 | 13 | -- import Misc ((:*)) 14 | import Semi 15 | import Cofree (Cofree(..)) 16 | 17 | import MMap (Map) -- like Data.Map but better Monoid instance 18 | import qualified MMap as M 19 | 20 | {-------------------------------------------------------------------- 21 | Univariate polynomials 22 | --------------------------------------------------------------------} 23 | 24 | newtype Poly1 z = Poly1 z deriving 25 | ( Additive, Semiring, Indexable n b, Listable n b, HasSingle n b 26 | , Num ) -- , Functor 27 | 28 | instance ( Show b, Ord b, Num b, D01 b, Listable n b z 29 | , Show n, DetectableZero n, DetectableOne n ) 30 | => Show (Poly1 z) where 31 | showsPrec d (Poly1 m) = showsPrec d (Terms (term <$> toList m)) 32 | where 33 | term (i,b) = Term b (Pow (Name "x") i) 34 | 35 | -- instance ( DetectableZero b, DetectableOne b, Show b, Listable n b z 36 | -- , Show n, DetectableZero n, DetectableOne n ) 37 | -- => Show (Poly1 z) where 38 | -- showsPrec d (Poly1 m) = showsPrec d (Terms (term <$> toList m)) 39 | -- where 40 | -- term (i,b) = Term b (Pow (Name "x") i) 41 | 42 | type P1 b = Poly1 (Map N b) 43 | 44 | poly1 :: (Listable N b z, Semiring b) => Poly1 z -> b -> b 45 | poly1 (Poly1 m) z = sum [b <.> z^i | (i,b) <- toList m] 46 | 47 | -- >>> let p = single 1 <+> value 3 :: Poly1 (Map N Z) 48 | -- >>> p 49 | -- 3 + x 50 | -- 51 | -- >>> p^3 52 | -- 27 + 27 * x + 9 * x^2 + x^3 53 | -- 54 | -- >>> p^4 55 | -- 81 + 108 * x + 54 * x^2 + 12 * x^3 + x^4 56 | -- 57 | -- >>> p^5 58 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5 59 | -- 60 | -- >>> p^9 61 | -- 19683 + 59049 * x + 78732 * x^2 + 61236 * x^3 + 30618 * x^4 + 10206 * x^5 + 2268 * x^6 + 324 * x^7 + 27 * x^8 + x^9 62 | -- 63 | -- >>> poly1 p 10 64 | -- 13 65 | -- >>> poly1 (p^3) 10 66 | -- 2197 67 | -- >>> (poly1 p 10)^3 68 | -- 2197 69 | -- 70 | -- >>> poly1 (p^5) 17 == (poly1 p 17)^5 71 | -- True 72 | 73 | -- >>> let p = 1 <+> value 3 :: Poly1 [Z] 74 | -- >>> p 75 | -- 3 + x 76 | -- >>> p^3 77 | -- 27 + 27 * x + 9 * x^2 + x^3 78 | -- 79 | -- >>> p^4 80 | -- 27 + 27 * x + 9 * x^2 + x^3 81 | -- 82 | -- >>> p^5 83 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5 84 | -- 85 | -- >>> p^9 86 | -- 19683 + 59049 * x + 78732 * x^2 + 61236 * x^3 + 30618 * x^4 + 10206 * x^5 + 2268 * x^6 + 324 * x^7 + 27 * x^8 + x^9 87 | -- 88 | -- >>> poly1 p 10 89 | -- 13 90 | -- >>> poly1 (p^3) 10 91 | -- 2197 92 | -- >>> (poly1 p 10)^3 93 | -- 2197 94 | -- 95 | -- >>> poly1 (p^5) 17 == (poly1 p 17)^5 96 | -- True 97 | 98 | -- >>> let p = single 1 <+> value 3 :: Poly1 (Cofree Maybe Z) 99 | -- >>> p 100 | -- 3 + x 101 | -- >>> p^3 102 | -- 27 + 27 * x + 9 * x^2 + x^3 103 | -- 104 | -- >>> p^5 105 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5 106 | -- 107 | -- >>> poly1 p 10 108 | -- 13 109 | -- >>> poly1 (p^3) 10 110 | -- 2197 111 | -- >>> (poly1 p 10)^3 112 | -- 2197 113 | 114 | -- poly :: (Listable n b x, HasPow x n b, Semiring b) => Poly1 x -> x -> b 115 | 116 | poly :: (Listable n b x, Semiring b, HasPow z n b) => Poly1 x -> z -> b 117 | poly (Poly1 m) z = sum [b <.> z^#i | (i,b) <- toList m] 118 | 119 | -- >>> take 50 $ show (single 1 :: Cofree Identity Z) 120 | -- "0 :< Identity (1 :< Identity (0 :< Identity (0 :< " 121 | 122 | type PS1 b = Poly1 (Cofree Identity b) -- streams 123 | 124 | type PL1 b = Poly1 (Cofree Maybe b) -- nonempty lists 125 | 126 | -- >>> let p = single 1 <+> value 3 :: PL1 Z 127 | -- >>> p 128 | -- 3 + x 129 | -- >>> p^3 130 | -- 27 + 27 * x + 9 * x^[(),()] + x^[(),(),()] 131 | 132 | -- >>> single 3 :: Cofree Maybe Z 133 | -- 0 :< Just (0 :< Just (0 :< Just (1 :< Nothing))) 134 | 135 | -- >>> p 136 | -- :1088:2: error: Variable not in scope: p 137 | -- 138 | -- >>> p^3 139 | -- :1089:2: error: Variable not in scope: p 140 | -- 141 | -- >>> p^5 142 | -- :1090:2: error: Variable not in scope: p 143 | 144 | -- type PolyL b = Poly1 [b] 145 | 146 | -- As in Doug McIlroy's "The Music of Streams" 147 | integralL :: Fractional b => Poly1 [b] -> Poly1 [b] 148 | -- integralL (Poly1 []) = Poly1 [] -- Breaks ODE termination. 149 | integralL (Poly1 bs0) = Poly1 (0 : go 1 bs0) 150 | where 151 | go _ [] = [] 152 | go n (b : d) = b/n : go (n+1) d 153 | 154 | derivativeL :: (Additive b, Fractional b) => Poly1 [b] -> Poly1 [b] 155 | derivativeL (Poly1 []) = zero 156 | derivativeL (Poly1 (_ : bs0)) = Poly1 (go 1 bs0) 157 | where 158 | go _ [] = [] 159 | go n (b : bs) = n * b : go (n+1) bs 160 | 161 | -- integralL generalizes beyond Maybe, but derivativeL doesn't. TODO: fix. 162 | 163 | sinL, cosL, expL :: Poly1 [Rational] 164 | sinL = integralL cosL 165 | cosL = 1 - integralL sinL 166 | expL = 1 + integralL expL 167 | 168 | lop :: Show a => a -> IO () 169 | lop = putStrLn . (++ " ...") . take 115 . show 170 | 171 | -- >>> lop sinL 172 | -- x - 1 % 6 * x^3 + 1 % 120 * x^5 - 1 % 5040 * x^7 + 1 % 362880 * x^9 - 1 % 39916800 * x^11 + 1 % 6227020800 * x^13 - ... 173 | -- >>> lop cosL 174 | -- 1 % 1 - 1 % 2 * x^2 + 1 % 24 * x^4 - 1 % 720 * x^6 + 1 % 40320 * x^8 - 1 % 3628800 * x^10 + 1 % 479001600 * x^12 - ... 175 | -- >>> lop expL 176 | -- 1 % 1 + x + 1 % 2 * x^2 + 1 % 6 * x^3 + 1 % 24 * x^4 + 1 % 120 * x^5 + 1 % 720 * x^6 + 1 % 5040 * x^7 + 1 % 40320 * ... 177 | 178 | -- >>> lop (derivativeL sinL) -- |== cosL| 179 | -- 1 % 1 - 1 % 2 * x^2 + 1 % 24 * x^4 - 1 % 720 * x^6 + 1 % 40320 * x^8 - 1 % 3628800 * x^10 + 1 % 479001600 * x^12 - ... 180 | -- >>> lop (derivativeL cosL) -- |== - sinL| 181 | -- (-1) % 1 * x + 1 % 6 * x^3 - 1 % 120 * x^5 + 1 % 5040 * x^7 - 1 % 362880 * x^9 + 1 % 39916800 * x^11 - 1 % 62270208 ... 182 | -- >>> lop (derivativeL expL) -- |== expL| 183 | -- 1 % 1 + x + 1 % 2 * x^2 + 1 % 6 * x^3 + 1 % 24 * x^4 + 1 % 120 * x^5 + 1 % 720 * x^6 + 1 % 5040 * x^7 + 1 % 40320 * ... 184 | 185 | -- >>> lop (2 * expL) 186 | -- 2 % 1 + 2 % 1 * x + x^2 + 1 % 3 * x^3 + 1 % 12 * x^4 + 1 % 60 * x^5 + 1 % 360 * x^6 + 1 % 2520 * x^7 + 1 % 20160 * x^8 + ... 187 | 188 | -- >>> lop (sinL * cosL) 189 | -- x - 2 % 3 * x^3 + 2 % 15 * x^5 - 4 % 315 * x^7 + 2 % 2835 * x^9 - 4 % 155925 * x^11 + 4 % 6081075 * x^13 - 8 % 638512875 ... 190 | 191 | -- TODO: multivariate power series 192 | -- Can I generalize Poly1 and PolyM? 193 | 194 | {-------------------------------------------------------------------- 195 | Multivariate polynomials 196 | --------------------------------------------------------------------} 197 | 198 | -- Polynomials with named "variables" 199 | newtype PolyM b = PolyM { unPolyM :: Map (Map Name N) b } deriving 200 | (Additive, Semiring, Functor, Indexable (Map Name N) b, HasSingle (Map Name N) b) 201 | 202 | instance (Show b, Ord b, Num b, D01 b) => Show (PolyM b) where 203 | showsPrec d (PolyM m) = 204 | showsPrec d (Terms (term <$> sortOn (M.keys . fst) (M.toDescList m))) 205 | where 206 | term (p,b) = Term b (Pows p) 207 | 208 | -- instance (DetectableZero b, DetectableOne b, Show b) => Show (PolyM b) where 209 | -- showsPrec d (PolyM m) = 210 | -- showsPrec d (Terms (term <$> sortOn (M.keys . fst) (M.toDescList m))) 211 | -- where 212 | -- term (p,b) = Term b (Pows p) 213 | 214 | -- TODO: improve the sorting criterion 215 | 216 | -- varM :: Name -> PolyM Z 217 | varM :: Semiring b => Name -> PolyM b 218 | varM = single . single 219 | 220 | -- >>> let p = varM "x" <+> varM "y" <+> varM "z" :: PolyM Z 221 | -- >>> p 222 | -- x + y + z 223 | -- 224 | -- >>> p^2 225 | -- x^2 + 2 * x * y + 2 * x * z + y^2 + 2 * y * z + z^2 226 | -- 227 | -- >>> p^3 228 | -- x^3 + 3 * x^2 * y + 3 * x * y^2 + 6 * x * y * z + 3 * x^2 * z + 3 * x * z^2 + y^3 + 3 * y^2 * z + 3 * y * z^2 + z^3 229 | 230 | -- TODO: use the generalized Poly1 in place of PolyM. I'll have to tweak 231 | -- something, since the Poly1 Show instance wants to insert "x". 232 | 233 | type PM b = Poly1 (Map (Map Name N) b) 234 | 235 | -- >>> let p = single 1 <+> value 3 :: Poly1 [Z] 236 | -- >>> p 237 | -- 3 + x 238 | -- >>> p^3 239 | -- 27 + 27 * x + 9 * x^2 + x^3 240 | -- 241 | -- >>> p^5 242 | -- 243 + 405 * x + 270 * x^2 + 90 * x^3 + 15 * x^4 + x^5 243 | -- 244 | -- >>> poly1 p 10 245 | -- 13 246 | -- >>> poly1 (p^3) 10 247 | -- 2197 248 | -- >>> (poly1 p 10)^3 249 | -- 2197 250 | 251 | {-------------------------------------------------------------------- 252 | Show utilities 253 | --------------------------------------------------------------------} 254 | 255 | compose :: [a -> a] -> (a -> a) 256 | compose = foldr (.) id 257 | 258 | showIter :: Show b => Int -> String -> (b -> Bool) -> Int -> [b] -> ShowS 259 | showIter inner op omit outer bs = 260 | showParen (outer > inner) $ 261 | compose (intersperse (showString op) (showsPrec inner <$> filter (not . omit) bs)) 262 | 263 | -- TODO: check for empty bs and for singleton bs 264 | 265 | showSum :: (Show b, DetectableZero b) => Int -> [b] -> ShowS 266 | showSum = showIter 6 " + " isZero 267 | 268 | showProd :: (Show b, DetectableOne b) => Int -> [b] -> ShowS 269 | showProd = showIter 7 " * " isOne 270 | 271 | -- Cleverer showSum that uses subtraction for negative coefficients 272 | showSum' :: (Show x, Show b, Ord b, Num b, D01 b, D01 x) 273 | => Int -> [Term b x] -> ShowS 274 | showSum' p terms = go (filter (not . isZero) terms) 275 | where 276 | go [] = showString "0" -- I don't think we'll get this case 277 | go (b0:bs0) = showParen (p > 6) $ 278 | showsPrec 6 b0 . compose (term <$> bs0) 279 | where 280 | term (Term b x) | b < 0 = showString (" - ") . showsPrec 6 (Term (-b) x) 281 | | otherwise = showString (" + ") . showsPrec 6 (Term b x) 282 | 283 | 284 | {-------------------------------------------------------------------- 285 | Syntactic representations 286 | --------------------------------------------------------------------} 287 | 288 | newtype Name = Name String deriving (Eq,Ord) 289 | 290 | instance DetectableZero Name where isZero = const False 291 | instance DetectableOne Name where isOne = const False 292 | 293 | instance Show Name where show (Name s) = s 294 | instance IsString Name where fromString = Name 295 | 296 | data Pow b n = Pow b n -- b * x^n 297 | 298 | instance DetectableZero b => DetectableZero (Pow b n) where 299 | isZero (Pow b _) = isZero b 300 | 301 | instance (DetectableOne b, DetectableZero n) => DetectableOne (Pow b n) where 302 | isOne (Pow b n) = isOne b || isZero n 303 | 304 | instance (Show n, Show b, DetectableZero n, DetectableOne n, DetectableOne b) 305 | => Show (Pow b n) where 306 | showsPrec d p@(Pow b n) 307 | | isOne p = showString "1" 308 | | isOne n = showsPrec d b 309 | | otherwise = showParen (d >= 8) $ 310 | showsPrec 8 b . showString "^" . showsPrec 8 n 311 | 312 | -- >>> Pow "z" 5 :: Pow Name N 313 | -- z^5 314 | 315 | newtype Pows b = Pows (Map b N) -- deriving (Semiring,Monoid) 316 | 317 | instance DetectableZero b => DetectableZero (Pows b) where 318 | isZero = const False 319 | 320 | instance DetectableOne (Pows b) where 321 | isOne (Pows m) = all isZero m 322 | 323 | -- TODO: try changing isZero for Map to be 'all isZero'. Might wedge on recursive examples. 324 | 325 | instance (Show b, DetectableZero b, DetectableOne b) => Show (Pows b) where 326 | showsPrec d p@(Pows m) 327 | | isOne p = showString "1" 328 | | otherwise = showProd d (uncurry Pow <$> M.toList m) 329 | 330 | -- >>> Pows (("x" +-> 2) <+> ("y" +-> 3)) :: Pows Name 331 | -- x^2 * y^3 332 | 333 | data Term b x = Term b x -- b * x 334 | 335 | instance (DetectableZero b, DetectableZero x) => DetectableZero (Term b x) where 336 | isZero (Term b x) = isZero b || isZero x 337 | 338 | instance (Show b, Show x, DetectableOne x, DetectableZero b, DetectableOne b) 339 | => Show (Term b x) where 340 | showsPrec d (Term b x) 341 | | isZero b || isOne x = showsPrec d b 342 | | isOne b = showsPrec d x 343 | | otherwise = showParen (d > 6) $ 344 | showsPrec 6 b . showString " * " . showsPrec 6 x 345 | 346 | -- >>> Term 7 (Pows (("x" +-> 2) <+> ("y" +-> 3))) :: Term Z (Pows Name) 347 | -- 7 * x^2 * y^3 348 | 349 | data Terms b = Terms [b] 350 | 351 | -- instance (Show b, DetectableZero b) => Show (Terms b) where 352 | -- showsPrec d (Terms bs) = showSum d bs 353 | 354 | instance (Show b, Show x, Ord b, Num b, D01 b, D01 x) 355 | => Show (Terms (Term b x)) where 356 | showsPrec d (Terms bs) = showSum' d bs 357 | -------------------------------------------------------------------------------- /reviews-icfp2019.md: -------------------------------------------------------------------------------- 1 | % Reviews for "Generalized Convolution and Efficient Language Recognition" 2 | 3 | ICFP 2019 Paper #46 Reviews and Comments 4 | =========================================================================== 5 | Paper #46 Generalized Convolution and Efficient Language Recognition 6 | 7 | 8 | Review #46A 9 | =========================================================================== 10 | 11 | Overall merit 12 | ------------- 13 | C. Weak paper, though I will not fight strongly against it. 14 | 15 | Reviewer expertise 16 | ------------------ 17 | Y. I am knowledgeable in the area, though not an expert. 18 | 19 | Paper summary 20 | ------------- 21 | This paper remarks that convolution is a general operation that endows 22 | semiring structure on the set of functions from any monoid to any 23 | semiring. Usually the word "convolution" evokes a domain monoid of 24 | integers (giving rise to polynomial convolution) or of integer tuples 25 | (giving rise to image convolution). But when the domain monoid is 26 | the monoid of lists (i.e., strings), the paper generalizes Brzozowski 27 | derivatives from sets (i.e., when the range semiring is the semiring of 28 | Booleans) to any other range semiring, and derives two implementations: 29 | one that stores regular expressions and one that stores tries. 30 | 31 | Comments for author 32 | ------------------- 33 | The generalizations represented in Figures 1-4 are worthwhile and to my 34 | knowledge novel. But these contributions are buried among definitions 35 | and derivations whose applications within and beyond the paper remain 36 | unclear. 37 | 38 | - For example, it's not clear how Sections 10-13 are novel or help 39 | to produce Figure 9 efficiently or at all. The paper would be far 40 | more interesting if it were shown that a generic implementation of 41 | convolution can achieve performance competitive against existing 42 | specialized implementations for string matching and image blurring. 43 | 44 | - To take another example, line 664 introduces "an alternative to 45 | repeated syntactic differentiation" but it's unclear what shared goal 46 | is achieved by both "repeated syntactic differentiation" and this 47 | "alternative", there is no demonstration of "the syntactic overhead", 48 | and it is unclear what "f" means in "a choice of f". 49 | *[Conal: "f" should be "h".]* 50 | 51 | This paper lacks 52 | 53 | - upfront signposting of dependencies and contributions 54 | 55 | Examples of roadmap descriptions that need to be preponed in the 56 | paper include the first sentence of Section 7, the first paragraph 57 | of Section 8, and the first paragraph of Section 10. It is unclear 58 | where lines 611-640 and lines 991-993 are used. The implementations 59 | in Figures 3 and 4 should be demonstrated in the style of lines 60 | 1163-1172. 61 | 62 | - evidence of computational practicality 63 | 64 | Phrases such as "quite efficient" (line 85) and "dramatic speed 65 | improvement" (line 92) are vague because the standards of comparison 66 | are unclear. And performance comparisons are not interesting if they 67 | are only made against other implementations in the same work. Hence, 68 | Figure 6 should compare against at least one existing implementation 69 | (such as Happy). Measurements should show standard errors. Both 70 | matching and non-matching strings should be tested. 71 | 72 | - comparison with related work 73 | 74 | The first paragraph of Section 14 is not enough because there is no 75 | comparison. 76 | 77 | The code shown in the paper does not seem trustworthy: infinite sums 78 | are confused with finite sums (for example on line 1106), and the `Key` 79 | of an `Indexable` type constructor is sometimes expressed using a 80 | functional dependency (line 300) and sometimes expressed using a type 81 | family (lines 614, 789). 82 | 83 | Minor comments: 84 | 85 | * Line 75 "the second and seventh decades of the twentieth century": just 86 | name the decades or years (1910s? etc.) to avoid off-by-one confusion. 87 | * Lines 87-90: This (4th) bullet point is unclear. What "key operations"? 88 | What "comonad operations"? What "various representations"? Each bullet 89 | point should name the specific section of the paper that makes that 90 | contribution. 91 | *[Conal: Added detail: "Observation that Brzozowski's two key operations on languages (emptiness and differentiation) generalize to the comonad operations (*coreturn* and *cojoin*) of the standard function-from-monoid comonad and various representations of those functions (including generalized regular expressions)."]* 92 | * Line 171 "this instance" referent unclear (too far from line 160). 93 | *[Conal: "... the `Additive (a -> b)` instance above"]* 94 | * Line 222: Clarify whether these two equations are equivalent. 95 | * Line 243 "a additive" -> "an additive" *[Conal: fixed]* 96 | * Line 265: Isn't this instance missing some context such as `Semiring s =>`? 97 | *[Conal: fixed.]* 98 | And why not generalize `(a -> s)` to `(a -> s')`? 99 | *[Conal: Because the type of multiplication requires that its arguments have the same type.]* 100 | * Line 329: $\sum$ is undefined, and what if $f$ maps an infinite number 101 | of $a$s to nonzero $b$s? 102 | * Lemma 6 seems to hold only for the instance on line 315. 103 | *[Conal: Yes, because of the use of curry. Clarified by adding "For functions,".]* 104 | * Lemma 7 seems unused in the rest of the paper. 105 | *[Conal: Yes. It's part of the theory developed here, though unused in this paper.]* 106 | * Line 339 `(->) a` needs to refer more explicitly to the instance on line 265. 107 | *[Conal: Added "(given the LeftSemimodule s (a → s) instance in Section 2.5)".*] 108 | * Line 340 "homomorphisms" needs an `instance LeftSemimodule s s`. 109 | * Line 340 `single` seems like it should be `value`. 110 | *[Conal: Indeed. Fixed.]* 111 | * Line 340 "semiring homomorphism" between which instances? 112 | * Lines 436 and 440 typeset `P` inconsistently (as $P$ and $\mathcal{P}$) 113 | *[Conal: Fixed.]* 114 | * Lines 460-461 "apply the same sort of reasoning as in Section 3 and then 115 | generalize from Bool to an arbitrary semiring" needs more details. 116 | * Line 489 "represented is" -> "represented as" 117 | *[Conal: Fixed.]* 118 | * Line 501 `M.null` -> `all isZero . M.elems`. 119 | * Line 583 is only proven for finite p, so it is not proven. 120 | * Line 742: Please parenthesize `c -> LTrie c b` 121 | *[Conal: Fixed.]* 122 | * Line 766 "sanity check" -> "smoke test" 123 | * Line 781 "one more crucial tricks" -> "one more crucial trick" 124 | *[Conal: fixed to "one or more crucial trick".]* 125 | * Line 782 "might" and line 783 "might" are not informative. 126 | * Figure 6: All time measurements should be given in the same unit (μs 127 | for example) (lines 771-772), and all decimal points in each column 128 | should be aligned. Whether the two occurrences of $\infty$ represent 129 | nontermination in theory or running out of time in practice should be 130 | ascertained (lines 780-781). 131 | * Line 901 "no fmap" is unclear. 132 | *[Conal: removed "and no fmap". I don't remember what I was trying to say there.]* 133 | * Line ~~1047~~ 1062 "a the" -> "the" 134 | *[Conal: fixed.]* 135 | * Switching `Semiring` instances in Figure 8 is confusing. Instead, just 136 | define `<-` early and use it in Figure 1. 137 | * Theorem 16 is meaningless because the Fourier transform is not defined 138 | for generic types `a` and `b`. 139 | * Line 1118: How does this `Show` instance enumerate the support of z? 140 | *[Conal: Via a type-check I elided from the paper.]* 141 | 142 | 143 | 144 | Review #46B 145 | =========================================================================== 146 | 147 | Overall merit 148 | ------------- 149 | C. Weak paper, though I will not fight strongly against it. 150 | 151 | Reviewer expertise 152 | ------------------ 153 | Y. I am knowledgeable in the area, though not an expert. 154 | 155 | Paper summary 156 | ------------- 157 | This paper provides a formalization of semirings, convolutions, and 158 | Brzozowski's derivatives (hereinafter simply called "derivatives") 159 | using functional programming idioms (i.e., type classes, monads, 160 | etc.). 161 | 162 | Comments for author 163 | ------------------- 164 | Generally I like this kind of papers. Re-formulation from functional 165 | programming may bring new perspectives and moreover general, concise, 166 | and correct-by-construction implementations. However, for this 167 | particular paper, I unfortunately did not get favorable impression. 168 | 169 | First, the contributions around derivatives seem (at least, partially) 170 | known. Derivatives for "weighted" regular expressions are studied in 171 | the following papers. 172 | 173 | * Sylvain Lombardy, Jacques Sakarovitch: 174 | Derivatives of rational expressions with multiplicity. 175 | Theor. Comput. Sci. 332(1-3): 141-177 (2005) 176 | * Jean-Marc Champarnaud, Gerard Duchamp: 177 | Derivatives of rational expressions and related theorems. 178 | Theor. Comput. Sci. 313(1): 31-44 (2004) 179 | 180 | Moreover, a connection between convolutions and derivatives can be found 181 | in the following paper. 182 | 183 | * Jan J. M. M. Rutten: 184 | Behavioural differential equations: a coinductive calculus of 185 | streams, automata, and power series. 186 | Theor. Comput. Sci. 308(1-3): 1-53 (2003) 187 | 188 | These preceding studies are not reasons for rejecting the current 189 | paper if a new insight from a perspective from functional programming 190 | is presented. However, such an insight is unclear. For example: 191 | "comonadic understanding of derivatives" sounds interesting, but so 192 | what? Are comonad laws useful for reasoning about derivatives for 193 | generalized regular expressions, for example? 194 | 195 | A potential benefit is that the formalism itself is a run-able 196 | implementation. However, for regular expression matching, even 197 | specialized efficient implementations are favorable, and the current 198 | evaluation is not convincing that the developed implementation is 199 | efficient. The implementation can do more than regular expression 200 | matching, but usefulness of the generality is not presented. 201 | 202 | Another weakness of the current paper is its presentation. In my 203 | impression, the focus and the technical developments are somewhat 204 | unclear. I pick up a few relatively serious issues: 205 | - Section 3 says "This process of calculating instances from 206 | homomorphisms is the central guiding principle of this paper", but I 207 | could not find such a "guiding principle". Most discussions are the 208 | converse direction, namely giving an instance and then showing it is 209 | a homomorphism. 210 | - Although convolutions are the central topic of this paper, their 211 | introduction in Section 4 (the first paragraph of p.10) is 212 | unreasonable. Why is the usual definition disappointing? Why are 213 | "the same sort of reasoning" omitted? 214 | - Although convolutions are the central topic of this paper, 215 | connection between convolution and derivatives is unclear. 216 | Convolution is introduced as an operation on some kinds of functions 217 | in Section 4, and after that, never mentioned until Section 10. 218 | - (Though I have known derivatives) I suspect that for those who do 219 | not know derivatives Section 6 seems not understandable. The 220 | author(s) should explain why such decomposition is considered here 221 | and what at_e and D are. 222 | 223 | From above-mentioned issues, I judged the current paper is premature 224 | for publication, even though it studies potentially interesting 225 | research topic. 226 | 227 | 228 | 229 | Review #46C 230 | =========================================================================== 231 | 232 | Overall merit 233 | ------------- 234 | C. Weak paper, though I will not fight strongly against it. 235 | 236 | Reviewer expertise 237 | ------------------ 238 | Y. I am knowledgeable in the area, though not an expert. 239 | 240 | Paper summary 241 | ------------- 242 | This work presents a quite interesting and powerful generalization of regular expressions and the associated method of Brzozowski derivatives. The authors present regular expression concatenation (or more generally, formal language concatenation) as a special case of convolution and propose a couple of general algebraic structures that can represent a wide range of use cases beyond regular expression matching. 243 | 244 | Comments for author 245 | ------------------- 246 | When I read the abstract and the introduction of the paper, I was rather thrilled and was eager to accept and support the paper. The authors have obviously invested a lot of time and thought into this work. I find the proposed generalization quite interesting and would love to see it published. 247 | 248 | However, my main problem with the paper is that it is presented in a way that makes it really hard to understand what is going on. There is so much barely explained code in the paper, and very little guidance of the reader, that I got lost completely somewhere around p.13 and I stopped reading. 249 | 250 | Below I'll give some more detailed hints of where I think the presentation is lacking. If the paper gets rejected, I strongly encourage the papers to continue this line of work and resubmit a revised version soon. If the paper gets accepted, I also suggest to revise the paper for readability. 251 | 252 | Detailed comments: 253 | 254 | In general, I thought that the paper lacks a proper structure that is easy to understand. In the first sections, it was never clear to me whether the authors summarize preliminaries or whether there are novel ideas in here. I was always waiting impatiently for the "meat" of the paper to begin. I think you should at least give the reader some kind of roadmap about how you present the material, especially since it takes a very long time before you return to the issues mentioned in the introduction. 255 | 256 | l.479ff: Here I was rather puzzled that you suddenly talk about "testing for membership" but without demonstrating how one does test for membership. I would have appreciated if you had shown the full code here and illustrated which type class instances are derived and needed for that example to go through. 257 | 258 | In section 6, I began to feel completely lost. All that code in Lemma 9 with no guidance about what it does. 259 | 260 | With regard to the definition of D in l. 534 I wondered whether you are cheating in the sense that the Brzozowski algorithm gives me a concrete regular expression that represents the set of suffixes that can still follow, but in l. 534 it looks like you are merely creating a closure and nothing happens until the closure is applied to a concrete suffix. 261 | 262 | l.570: I was rather confused that you merely drop the words "coreturn", "cojoin" etc. here without explaining what they mean and how exactly that correspondence works out. 263 | -------------------------------------------------------------------------------- /test/Benchmarks/with isOne.md: -------------------------------------------------------------------------------- 1 | 2 | # Group "star-a" 3 | 4 | ``` 5 | 6 | benchmarking "a"/RegExp:Function 7 | time 448.9 ns (442.9 ns .. 456.6 ns) 8 | 0.999 R² (0.998 R² .. 1.000 R²) 9 | mean 454.3 ns (449.1 ns .. 476.1 ns) 10 | std dev 29.27 ns (9.824 ns .. 68.71 ns) 11 | variance introduced by outliers: 78% (severely inflated) 12 | 13 | benchmarking "a"/RegExp:Map 14 | time 364.0 ns (361.2 ns .. 367.4 ns) 15 | 1.000 R² (0.999 R² .. 1.000 R²) 16 | mean 364.1 ns (361.8 ns .. 366.6 ns) 17 | std dev 8.325 ns (6.594 ns .. 10.44 ns) 18 | variance introduced by outliers: 31% (moderately inflated) 19 | 20 | benchmarking "a"/RegExp:IntMap 21 | time 363.0 ns (358.2 ns .. 368.2 ns) 22 | 0.999 R² (0.999 R² .. 1.000 R²) 23 | mean 362.9 ns (360.2 ns .. 366.3 ns) 24 | std dev 11.06 ns (8.428 ns .. 16.36 ns) 25 | variance introduced by outliers: 44% (moderately inflated) 26 | 27 | benchmarking "a"/Cofree:Map 28 | time 25.71 ns (25.48 ns .. 25.97 ns) 29 | 0.999 R² (0.999 R² .. 1.000 R²) 30 | mean 25.64 ns (25.50 ns .. 25.84 ns) 31 | std dev 554.5 ps (431.9 ps .. 735.4 ps) 32 | variance introduced by outliers: 33% (moderately inflated) 33 | 34 | benchmarking "a"/Cofree:IntMap 35 | time 24.44 ns (24.29 ns .. 24.60 ns) 36 | 1.000 R² (1.000 R² .. 1.000 R²) 37 | mean 24.48 ns (24.36 ns .. 24.60 ns) 38 | std dev 414.3 ps (336.1 ps .. 563.0 ps) 39 | variance introduced by outliers: 23% (moderately inflated) 40 | 41 | benchmarking a50/RegExp:Function 42 | time 14.65 μs (14.55 μs .. 14.76 μs) 43 | 1.000 R² (0.999 R² .. 1.000 R²) 44 | mean 14.61 μs (14.51 μs .. 14.73 μs) 45 | std dev 348.4 ns (259.3 ns .. 480.2 ns) 46 | variance introduced by outliers: 24% (moderately inflated) 47 | 48 | benchmarking a50/RegExp:Map 49 | time 11.07 μs (10.98 μs .. 11.16 μs) 50 | 0.999 R² (0.999 R² .. 1.000 R²) 51 | mean 11.10 μs (11.01 μs .. 11.21 μs) 52 | std dev 329.8 ns (252.6 ns .. 443.9 ns) 53 | variance introduced by outliers: 34% (moderately inflated) 54 | 55 | benchmarking a50/RegExp:IntMap 56 | time 10.83 μs (10.67 μs .. 10.97 μs) 57 | 0.999 R² (0.998 R² .. 0.999 R²) 58 | mean 10.71 μs (10.62 μs .. 10.80 μs) 59 | std dev 303.4 ns (249.6 ns .. 357.6 ns) 60 | variance introduced by outliers: 32% (moderately inflated) 61 | 62 | benchmarking a50/Cofree:Map 63 | time 1.297 μs (1.288 μs .. 1.308 μs) 64 | 0.999 R² (0.999 R² .. 1.000 R²) 65 | mean 1.301 μs (1.292 μs .. 1.312 μs) 66 | std dev 33.40 ns (27.52 ns .. 41.50 ns) 67 | variance introduced by outliers: 33% (moderately inflated) 68 | 69 | benchmarking a50/Cofree:IntMap 70 | time 1.230 μs (1.218 μs .. 1.240 μs) 71 | 1.000 R² (0.999 R² .. 1.000 R²) 72 | mean 1.226 μs (1.219 μs .. 1.237 μs) 73 | std dev 28.83 ns (20.17 ns .. 40.29 ns) 74 | variance introduced by outliers: 30% (moderately inflated) 75 | 76 | ``` 77 | 78 | # Group "letters" 79 | 80 | ``` 81 | 82 | benchmarking asdf-50/RegExp:Function 83 | time 4.091 ms (4.051 ms .. 4.131 ms) 84 | 0.999 R² (0.999 R² .. 1.000 R²) 85 | mean 4.013 ms (3.985 ms .. 4.043 ms) 86 | std dev 90.09 μs (74.66 μs .. 117.6 μs) 87 | 88 | benchmarking asdf-50/RegExp:Map 89 | time 3.297 ms (3.263 ms .. 3.329 ms) 90 | 0.999 R² (0.999 R² .. 1.000 R²) 91 | mean 3.326 ms (3.304 ms .. 3.348 ms) 92 | std dev 70.53 μs (58.53 μs .. 92.42 μs) 93 | 94 | benchmarking asdf-50/RegExp:IntMap 95 | time 2.840 ms (2.813 ms .. 2.873 ms) 96 | 0.998 R² (0.996 R² .. 0.999 R²) 97 | mean 2.936 ms (2.894 ms .. 2.999 ms) 98 | std dev 161.0 μs (116.5 μs .. 217.3 μs) 99 | variance introduced by outliers: 36% (moderately inflated) 100 | 101 | benchmarking asdf-50/Cofree:Map 102 | time 7.288 μs (7.247 μs .. 7.331 μs) 103 | 1.000 R² (1.000 R² .. 1.000 R²) 104 | mean 7.286 μs (7.249 μs .. 7.331 μs) 105 | std dev 131.6 ns (104.4 ns .. 160.1 ns) 106 | variance introduced by outliers: 17% (moderately inflated) 107 | 108 | benchmarking asdf-50/Cofree:IntMap 109 | time 6.775 μs (6.741 μs .. 6.809 μs) 110 | 1.000 R² (1.000 R² .. 1.000 R²) 111 | mean 6.787 μs (6.753 μs .. 6.819 μs) 112 | std dev 112.8 ns (95.03 ns .. 137.5 ns) 113 | variance introduced by outliers: 15% (moderately inflated) 114 | 115 | ``` 116 | 117 | # Group "dyck" 118 | 119 | ``` 120 | 121 | benchmarking "[]"/RegExp:Function 122 | time 1.661 μs (1.642 μs .. 1.678 μs) 123 | 0.999 R² (0.999 R² .. 1.000 R²) 124 | mean 1.649 μs (1.637 μs .. 1.662 μs) 125 | std dev 40.52 ns (31.23 ns .. 52.59 ns) 126 | variance introduced by outliers: 31% (moderately inflated) 127 | 128 | benchmarking "[]"/Cofree:Map 129 | time 51.63 ns (51.37 ns .. 51.94 ns) 130 | 0.999 R² (0.998 R² .. 1.000 R²) 131 | mean 52.07 ns (51.55 ns .. 53.30 ns) 132 | std dev 2.716 ns (1.141 ns .. 5.270 ns) 133 | variance introduced by outliers: 73% (severely inflated) 134 | 135 | benchmarking "[]"/Cofree:IntMap 136 | time 51.72 ns (51.33 ns .. 52.06 ns) 137 | 1.000 R² (0.999 R² .. 1.000 R²) 138 | mean 51.76 ns (51.39 ns .. 52.72 ns) 139 | std dev 1.908 ns (757.1 ps .. 4.039 ns) 140 | variance introduced by outliers: 58% (severely inflated) 141 | 142 | benchmarking "[[]]"/RegExp:Function 143 | time 3.913 μs (3.885 μs .. 3.950 μs) 144 | 1.000 R² (0.999 R² .. 1.000 R²) 145 | mean 3.934 μs (3.910 μs .. 3.960 μs) 146 | std dev 83.99 ns (70.03 ns .. 100.5 ns) 147 | variance introduced by outliers: 23% (moderately inflated) 148 | 149 | benchmarking "[[]]"/Cofree:Map 150 | time 104.8 ns (104.0 ns .. 105.6 ns) 151 | 1.000 R² (0.999 R² .. 1.000 R²) 152 | mean 104.7 ns (104.2 ns .. 105.5 ns) 153 | std dev 2.141 ns (1.646 ns .. 2.729 ns) 154 | variance introduced by outliers: 28% (moderately inflated) 155 | 156 | benchmarking "[[]]"/Cofree:IntMap 157 | time 103.9 ns (103.2 ns .. 104.6 ns) 158 | 1.000 R² (1.000 R² .. 1.000 R²) 159 | mean 103.9 ns (103.4 ns .. 104.6 ns) 160 | std dev 1.846 ns (1.494 ns .. 2.460 ns) 161 | variance introduced by outliers: 23% (moderately inflated) 162 | 163 | benchmarking "[[a]]"/RegExp:Function 164 | time 2.951 μs (2.941 μs .. 2.963 μs) 165 | 1.000 R² (1.000 R² .. 1.000 R²) 166 | mean 2.937 μs (2.926 μs .. 2.949 μs) 167 | std dev 38.86 ns (31.23 ns .. 50.80 ns) 168 | variance introduced by outliers: 11% (moderately inflated) 169 | 170 | benchmarking "[[a]]"/Cofree:Map 171 | time 142.2 ns (140.7 ns .. 143.9 ns) 172 | 0.999 R² (0.999 R² .. 1.000 R²) 173 | mean 141.4 ns (140.5 ns .. 142.5 ns) 174 | std dev 3.432 ns (2.651 ns .. 5.189 ns) 175 | variance introduced by outliers: 35% (moderately inflated) 176 | 177 | benchmarking "[[a]]"/Cofree:IntMap 178 | time 128.1 ns (127.6 ns .. 128.7 ns) 179 | 1.000 R² (0.999 R² .. 1.000 R²) 180 | mean 128.8 ns (128.0 ns .. 130.5 ns) 181 | std dev 3.726 ns (2.232 ns .. 6.185 ns) 182 | variance introduced by outliers: 44% (moderately inflated) 183 | 184 | benchmarking "[[]][]"/RegExp:Function 185 | time 5.295 μs (5.267 μs .. 5.318 μs) 186 | 1.000 R² (0.999 R² .. 1.000 R²) 187 | mean 5.252 μs (5.229 μs .. 5.275 μs) 188 | std dev 78.17 ns (69.57 ns .. 89.95 ns) 189 | variance introduced by outliers: 12% (moderately inflated) 190 | 191 | benchmarking "[[]][]"/Cofree:Map 192 | time 161.0 ns (159.9 ns .. 162.0 ns) 193 | 1.000 R² (1.000 R² .. 1.000 R²) 194 | mean 160.5 ns (159.8 ns .. 161.3 ns) 195 | std dev 2.586 ns (2.240 ns .. 3.098 ns) 196 | variance introduced by outliers: 19% (moderately inflated) 197 | 198 | benchmarking "[[]][]"/Cofree:IntMap 199 | time 157.7 ns (156.0 ns .. 159.7 ns) 200 | 0.999 R² (0.998 R² .. 0.999 R²) 201 | mean 159.8 ns (158.1 ns .. 162.3 ns) 202 | std dev 6.857 ns (4.853 ns .. 9.427 ns) 203 | variance introduced by outliers: 63% (severely inflated) 204 | 205 | ``` 206 | 207 | # Group "anbn" 208 | 209 | ``` 210 | 211 | benchmarking ""/RegExp:Function 212 | time 27.02 ns (26.85 ns .. 27.20 ns) 213 | 1.000 R² (0.999 R² .. 1.000 R²) 214 | mean 27.21 ns (27.04 ns .. 27.41 ns) 215 | std dev 636.5 ps (531.6 ps .. 781.0 ps) 216 | variance introduced by outliers: 36% (moderately inflated) 217 | 218 | benchmarking ""/Cofree:Map 219 | time 7.891 ns (7.816 ns .. 7.950 ns) 220 | 1.000 R² (1.000 R² .. 1.000 R²) 221 | mean 7.806 ns (7.772 ns .. 7.852 ns) 222 | std dev 136.7 ps (105.1 ps .. 195.5 ps) 223 | variance introduced by outliers: 26% (moderately inflated) 224 | 225 | benchmarking ""/Cofree:IntMap 226 | time 7.962 ns (7.879 ns .. 8.048 ns) 227 | 0.999 R² (0.999 R² .. 1.000 R²) 228 | mean 8.016 ns (7.946 ns .. 8.104 ns) 229 | std dev 259.6 ps (207.8 ps .. 337.5 ps) 230 | variance introduced by outliers: 55% (severely inflated) 231 | 232 | benchmarking "ab"/RegExp:Function 233 | time 1.473 μs (1.465 μs .. 1.483 μs) 234 | 1.000 R² (0.999 R² .. 1.000 R²) 235 | mean 1.483 μs (1.473 μs .. 1.499 μs) 236 | std dev 44.05 ns (31.24 ns .. 65.88 ns) 237 | variance introduced by outliers: 39% (moderately inflated) 238 | 239 | benchmarking "ab"/Cofree:Map 240 | time 51.86 ns (51.60 ns .. 52.24 ns) 241 | 1.000 R² (1.000 R² .. 1.000 R²) 242 | mean 51.71 ns (51.48 ns .. 51.99 ns) 243 | std dev 830.9 ps (644.1 ps .. 1.072 ns) 244 | variance introduced by outliers: 20% (moderately inflated) 245 | 246 | benchmarking "ab"/Cofree:IntMap 247 | time 52.55 ns (51.82 ns .. 53.33 ns) 248 | 0.999 R² (0.999 R² .. 1.000 R²) 249 | mean 52.11 ns (51.81 ns .. 52.48 ns) 250 | std dev 1.208 ns (920.3 ps .. 1.534 ns) 251 | variance introduced by outliers: 35% (moderately inflated) 252 | 253 | benchmarking "aacbb"/RegExp:Function 254 | time 2.682 μs (2.653 μs .. 2.718 μs) 255 | 0.999 R² (0.997 R² .. 1.000 R²) 256 | mean 2.658 μs (2.638 μs .. 2.695 μs) 257 | std dev 89.17 ns (50.82 ns .. 151.7 ns) 258 | variance introduced by outliers: 44% (moderately inflated) 259 | 260 | benchmarking "aacbb"/Cofree:Map 261 | time 141.0 ns (140.0 ns .. 142.2 ns) 262 | 1.000 R² (0.999 R² .. 1.000 R²) 263 | mean 142.0 ns (141.2 ns .. 143.1 ns) 264 | std dev 3.433 ns (2.370 ns .. 5.616 ns) 265 | variance introduced by outliers: 35% (moderately inflated) 266 | 267 | benchmarking "aacbb"/Cofree:IntMap 268 | time 130.9 ns (130.2 ns .. 132.0 ns) 269 | 0.998 R² (0.996 R² .. 1.000 R²) 270 | mean 134.0 ns (132.1 ns .. 139.1 ns) 271 | std dev 9.891 ns (4.344 ns .. 18.19 ns) 272 | variance introduced by outliers: 84% (severely inflated) 273 | 274 | benchmarking "aaabbb"/RegExp:Function 275 | time 4.607 μs (4.564 μs .. 4.672 μs) 276 | 0.999 R² (0.998 R² .. 0.999 R²) 277 | mean 4.749 μs (4.693 μs .. 4.833 μs) 278 | std dev 233.4 ns (173.3 ns .. 356.6 ns) 279 | variance introduced by outliers: 62% (severely inflated) 280 | 281 | benchmarking "aaabbb"/Cofree:Map 282 | time 162.7 ns (159.1 ns .. 167.3 ns) 283 | 0.997 R² (0.995 R² .. 1.000 R²) 284 | mean 160.3 ns (159.2 ns .. 162.6 ns) 285 | std dev 5.071 ns (3.128 ns .. 9.120 ns) 286 | variance introduced by outliers: 48% (moderately inflated) 287 | 288 | benchmarking "aaabbb"/Cofree:IntMap 289 | time 153.2 ns (152.3 ns .. 154.4 ns) 290 | 1.000 R² (1.000 R² .. 1.000 R²) 291 | mean 154.0 ns (153.4 ns .. 155.3 ns) 292 | std dev 2.928 ns (1.756 ns .. 4.910 ns) 293 | variance introduced by outliers: 25% (moderately inflated) 294 | 295 | benchmarking "aaabbbb"/RegExp:Function 296 | time 4.641 μs (4.613 μs .. 4.669 μs) 297 | 1.000 R² (0.999 R² .. 1.000 R²) 298 | mean 4.629 μs (4.600 μs .. 4.662 μs) 299 | std dev 105.4 ns (74.39 ns .. 141.8 ns) 300 | variance introduced by outliers: 25% (moderately inflated) 301 | 302 | benchmarking "aaabbbb"/Cofree:Map 303 | time 198.1 ns (195.8 ns .. 201.3 ns) 304 | 0.997 R² (0.993 R² .. 0.999 R²) 305 | mean 200.6 ns (197.0 ns .. 206.6 ns) 306 | std dev 15.21 ns (9.733 ns .. 22.60 ns) 307 | variance introduced by outliers: 84% (severely inflated) 308 | 309 | benchmarking "aaabbbb"/Cofree:IntMap 310 | time 179.6 ns (178.4 ns .. 180.6 ns) 311 | 1.000 R² (1.000 R² .. 1.000 R²) 312 | mean 178.7 ns (177.8 ns .. 179.7 ns) 313 | std dev 3.004 ns (2.585 ns .. 3.640 ns) 314 | variance introduced by outliers: 20% (moderately inflated) 315 | 316 | benchmarking 30/RegExp:Function 317 | time 302.3 μs (296.2 μs .. 309.6 μs) 318 | 0.997 R² (0.996 R² .. 0.998 R²) 319 | mean 309.5 μs (304.8 μs .. 314.3 μs) 320 | std dev 17.20 μs (13.48 μs .. 23.80 μs) 321 | variance introduced by outliers: 52% (severely inflated) 322 | 323 | benchmarking 30/Cofree:Map 324 | time 1.629 μs (1.617 μs .. 1.642 μs) 325 | 0.999 R² (0.999 R² .. 1.000 R²) 326 | mean 1.631 μs (1.619 μs .. 1.651 μs) 327 | std dev 49.53 ns (33.25 ns .. 93.67 ns) 328 | variance introduced by outliers: 40% (moderately inflated) 329 | 330 | benchmarking 30/Cofree:IntMap 331 | time 1.525 μs (1.515 μs .. 1.535 μs) 332 | 0.999 R² (0.999 R² .. 1.000 R²) 333 | mean 1.532 μs (1.519 μs .. 1.574 μs) 334 | std dev 66.66 ns (23.25 ns .. 153.9 ns) 335 | variance introduced by outliers: 58% (severely inflated) 336 | 337 | ``` 338 | -------------------------------------------------------------------------------- /test/Benchmarks/without isOne.md: -------------------------------------------------------------------------------- 1 | 2 | # Group "star-a" 3 | 4 | ``` 5 | 6 | benchmarking "a"/RegExp:Function 7 | time 454.1 ns (448.0 ns .. 460.9 ns) 8 | 0.997 R² (0.994 R² .. 0.999 R²) 9 | mean 456.9 ns (449.7 ns .. 471.4 ns) 10 | std dev 34.15 ns (23.58 ns .. 47.80 ns) 11 | variance introduced by outliers: 83% (severely inflated) 12 | 13 | benchmarking "a"/RegExp:Map 14 | time 353.3 ns (350.3 ns .. 356.1 ns) 15 | 1.000 R² (0.999 R² .. 1.000 R²) 16 | mean 351.3 ns (349.4 ns .. 353.6 ns) 17 | std dev 6.978 ns (5.924 ns .. 8.273 ns) 18 | variance introduced by outliers: 25% (moderately inflated) 19 | 20 | benchmarking "a"/RegExp:IntMap 21 | time 358.6 ns (356.6 ns .. 361.2 ns) 22 | 1.000 R² (1.000 R² .. 1.000 R²) 23 | mean 358.5 ns (356.5 ns .. 361.2 ns) 24 | std dev 7.612 ns (5.861 ns .. 10.54 ns) 25 | variance introduced by outliers: 28% (moderately inflated) 26 | 27 | benchmarking "a"/Cofree:Map 28 | time 27.29 ns (27.15 ns .. 27.49 ns) 29 | 1.000 R² (0.999 R² .. 1.000 R²) 30 | mean 27.54 ns (27.39 ns .. 27.77 ns) 31 | std dev 600.3 ps (451.5 ps .. 920.6 ps) 32 | variance introduced by outliers: 33% (moderately inflated) 33 | 34 | benchmarking "a"/Cofree:IntMap 35 | time 24.81 ns (24.70 ns .. 24.94 ns) 36 | 1.000 R² (1.000 R² .. 1.000 R²) 37 | mean 24.98 ns (24.83 ns .. 25.23 ns) 38 | std dev 660.6 ps (419.2 ps .. 1.017 ns) 39 | variance introduced by outliers: 42% (moderately inflated) 40 | 41 | benchmarking a50/RegExp:Function 42 | time 17.68 μs (17.62 μs .. 17.77 μs) 43 | 1.000 R² (0.999 R² .. 1.000 R²) 44 | mean 17.68 μs (17.60 μs .. 17.79 μs) 45 | std dev 293.4 ns (230.1 ns .. 439.2 ns) 46 | variance introduced by outliers: 13% (moderately inflated) 47 | 48 | benchmarking a50/RegExp:Map 49 | time 13.28 μs (13.07 μs .. 13.45 μs) 50 | 0.999 R² (0.999 R² .. 1.000 R²) 51 | mean 13.12 μs (13.05 μs .. 13.21 μs) 52 | std dev 276.5 ns (215.0 ns .. 341.7 ns) 53 | variance introduced by outliers: 20% (moderately inflated) 54 | 55 | benchmarking a50/RegExp:IntMap 56 | time 13.46 μs (13.30 μs .. 13.62 μs) 57 | 0.999 R² (0.998 R² .. 0.999 R²) 58 | mean 13.45 μs (13.32 μs .. 13.63 μs) 59 | std dev 512.3 ns (407.4 ns .. 713.1 ns) 60 | variance introduced by outliers: 46% (moderately inflated) 61 | 62 | benchmarking a50/Cofree:Map 63 | time 1.329 μs (1.307 μs .. 1.354 μs) 64 | 0.997 R² (0.993 R² .. 0.999 R²) 65 | mean 1.329 μs (1.307 μs .. 1.380 μs) 66 | std dev 102.3 ns (45.58 ns .. 165.0 ns) 67 | variance introduced by outliers: 82% (severely inflated) 68 | 69 | benchmarking a50/Cofree:IntMap 70 | time 1.283 μs (1.261 μs .. 1.310 μs) 71 | 0.997 R² (0.993 R² .. 0.999 R²) 72 | mean 1.306 μs (1.287 μs .. 1.329 μs) 73 | std dev 66.60 ns (49.77 ns .. 109.4 ns) 74 | variance introduced by outliers: 67% (severely inflated) 75 | 76 | ``` 77 | 78 | # Group "letters" 79 | 80 | ``` 81 | 82 | benchmarking asdf-50/RegExp:Function 83 | time 6.376 ms (6.185 ms .. 6.566 ms) 84 | 0.993 R² (0.988 R² .. 0.996 R²) 85 | mean 5.862 ms (5.736 ms .. 5.997 ms) 86 | std dev 393.9 μs (337.9 μs .. 453.9 μs) 87 | variance introduced by outliers: 40% (moderately inflated) 88 | 89 | benchmarking asdf-50/RegExp:Map 90 | time 5.798 ms (5.565 ms .. 5.993 ms) 91 | 0.990 R² (0.984 R² .. 0.995 R²) 92 | mean 5.633 ms (5.523 ms .. 5.750 ms) 93 | std dev 346.6 μs (292.9 μs .. 425.5 μs) 94 | variance introduced by outliers: 37% (moderately inflated) 95 | 96 | benchmarking asdf-50/RegExp:IntMap 97 | time 5.312 ms (5.113 ms .. 5.477 ms) 98 | 0.992 R² (0.987 R² .. 0.996 R²) 99 | mean 5.344 ms (5.246 ms .. 5.416 ms) 100 | std dev 268.5 μs (215.4 μs .. 335.5 μs) 101 | variance introduced by outliers: 29% (moderately inflated) 102 | 103 | benchmarking asdf-50/Cofree:Map 104 | time 13.78 μs (13.49 μs .. 14.19 μs) 105 | 0.993 R² (0.987 R² .. 0.998 R²) 106 | mean 14.37 μs (14.12 μs .. 14.69 μs) 107 | std dev 972.3 ns (776.8 ns .. 1.445 μs) 108 | variance introduced by outliers: 73% (severely inflated) 109 | 110 | benchmarking asdf-50/Cofree:IntMap 111 | time 12.71 μs (12.48 μs .. 12.93 μs) 112 | 0.998 R² (0.997 R² .. 0.999 R²) 113 | mean 12.74 μs (12.60 μs .. 12.88 μs) 114 | std dev 500.0 ns (415.7 ns .. 619.9 ns) 115 | variance introduced by outliers: 48% (moderately inflated) 116 | 117 | ``` 118 | 119 | # Group "dyck" 120 | 121 | ``` 122 | 123 | benchmarking "[]"/RegExp:Function 124 | time 1.803 μs (1.786 μs .. 1.820 μs) 125 | 0.999 R² (0.998 R² .. 0.999 R²) 126 | mean 1.836 μs (1.811 μs .. 1.868 μs) 127 | std dev 93.84 ns (69.44 ns .. 143.5 ns) 128 | variance introduced by outliers: 66% (severely inflated) 129 | 130 | benchmarking "[]"/Cofree:Map 131 | time 55.05 ns (54.82 ns .. 55.41 ns) 132 | 0.999 R² (0.998 R² .. 1.000 R²) 133 | mean 55.95 ns (55.42 ns .. 57.22 ns) 134 | std dev 2.558 ns (1.370 ns .. 4.745 ns) 135 | variance introduced by outliers: 68% (severely inflated) 136 | 137 | benchmarking "[]"/Cofree:IntMap 138 | time 52.76 ns (52.16 ns .. 53.53 ns) 139 | 0.999 R² (0.998 R² .. 1.000 R²) 140 | mean 52.60 ns (52.24 ns .. 53.11 ns) 141 | std dev 1.534 ns (1.090 ns .. 2.243 ns) 142 | variance introduced by outliers: 46% (moderately inflated) 143 | 144 | benchmarking "[[]]"/RegExp:Function 145 | time 4.375 μs (4.334 μs .. 4.421 μs) 146 | 0.999 R² (0.999 R² .. 0.999 R²) 147 | mean 4.408 μs (4.354 μs .. 4.466 μs) 148 | std dev 189.6 ns (154.5 ns .. 232.5 ns) 149 | variance introduced by outliers: 55% (severely inflated) 150 | 151 | benchmarking "[[]]"/Cofree:Map 152 | time 111.0 ns (109.8 ns .. 112.5 ns) 153 | 0.999 R² (0.998 R² .. 0.999 R²) 154 | mean 110.8 ns (109.8 ns .. 112.2 ns) 155 | std dev 3.976 ns (3.116 ns .. 5.386 ns) 156 | variance introduced by outliers: 55% (severely inflated) 157 | 158 | benchmarking "[[]]"/Cofree:IntMap 159 | time 109.1 ns (107.5 ns .. 111.0 ns) 160 | 0.998 R² (0.997 R² .. 0.999 R²) 161 | mean 109.2 ns (107.9 ns .. 110.8 ns) 162 | std dev 4.858 ns (3.944 ns .. 6.490 ns) 163 | variance introduced by outliers: 66% (severely inflated) 164 | 165 | benchmarking "[[a]]"/RegExp:Function 166 | time 3.263 μs (3.204 μs .. 3.342 μs) 167 | 0.996 R² (0.992 R² .. 0.999 R²) 168 | mean 3.332 μs (3.286 μs .. 3.417 μs) 169 | std dev 201.5 ns (135.5 ns .. 307.8 ns) 170 | variance introduced by outliers: 72% (severely inflated) 171 | 172 | benchmarking "[[a]]"/Cofree:Map 173 | time 149.5 ns (148.2 ns .. 150.9 ns) 174 | 0.999 R² (0.999 R² .. 1.000 R²) 175 | mean 152.1 ns (150.5 ns .. 155.5 ns) 176 | std dev 7.544 ns (4.165 ns .. 13.81 ns) 177 | variance introduced by outliers: 70% (severely inflated) 178 | 179 | benchmarking "[[a]]"/Cofree:IntMap 180 | time 128.6 ns (128.0 ns .. 129.3 ns) 181 | 1.000 R² (1.000 R² .. 1.000 R²) 182 | mean 128.5 ns (127.8 ns .. 129.3 ns) 183 | std dev 2.776 ns (2.103 ns .. 4.072 ns) 184 | variance introduced by outliers: 30% (moderately inflated) 185 | 186 | benchmarking "[[]][]"/RegExp:Function 187 | time 6.002 μs (5.929 μs .. 6.076 μs) 188 | 0.999 R² (0.998 R² .. 1.000 R²) 189 | mean 5.946 μs (5.905 μs .. 5.995 μs) 190 | std dev 154.5 ns (122.9 ns .. 206.8 ns) 191 | variance introduced by outliers: 30% (moderately inflated) 192 | 193 | benchmarking "[[]][]"/Cofree:Map 194 | time 160.0 ns (159.5 ns .. 160.4 ns) 195 | 1.000 R² (1.000 R² .. 1.000 R²) 196 | mean 160.0 ns (159.5 ns .. 160.5 ns) 197 | std dev 1.831 ns (1.554 ns .. 2.244 ns) 198 | variance introduced by outliers: 11% (moderately inflated) 199 | 200 | benchmarking "[[]][]"/Cofree:IntMap 201 | time 154.9 ns (154.0 ns .. 155.8 ns) 202 | 1.000 R² (1.000 R² .. 1.000 R²) 203 | mean 155.3 ns (154.5 ns .. 156.4 ns) 204 | std dev 3.068 ns (2.517 ns .. 3.785 ns) 205 | variance introduced by outliers: 26% (moderately inflated) 206 | 207 | ``` 208 | 209 | # Group "anbn" 210 | 211 | ``` 212 | 213 | benchmarking ""/RegExp:Function 214 | time 26.72 ns (26.59 ns .. 26.89 ns) 215 | 1.000 R² (1.000 R² .. 1.000 R²) 216 | mean 26.81 ns (26.66 ns .. 27.02 ns) 217 | std dev 590.0 ps (451.8 ps .. 790.3 ps) 218 | variance introduced by outliers: 33% (moderately inflated) 219 | 220 | benchmarking ""/Cofree:Map 221 | time 7.965 ns (7.933 ns .. 8.004 ns) 222 | 1.000 R² (1.000 R² .. 1.000 R²) 223 | mean 7.959 ns (7.929 ns .. 7.999 ns) 224 | std dev 116.0 ps (82.25 ps .. 164.0 ps) 225 | variance introduced by outliers: 19% (moderately inflated) 226 | 227 | benchmarking ""/Cofree:IntMap 228 | time 7.957 ns (7.927 ns .. 7.990 ns) 229 | 1.000 R² (1.000 R² .. 1.000 R²) 230 | mean 7.957 ns (7.928 ns .. 7.988 ns) 231 | std dev 99.33 ps (77.89 ps .. 144.3 ps) 232 | variance introduced by outliers: 15% (moderately inflated) 233 | 234 | benchmarking "ab"/RegExp:Function 235 | time 1.582 μs (1.567 μs .. 1.598 μs) 236 | 0.999 R² (0.999 R² .. 1.000 R²) 237 | mean 1.586 μs (1.576 μs .. 1.599 μs) 238 | std dev 37.65 ns (31.25 ns .. 48.00 ns) 239 | variance introduced by outliers: 29% (moderately inflated) 240 | 241 | benchmarking "ab"/Cofree:Map 242 | time 54.44 ns (53.99 ns .. 54.98 ns) 243 | 1.000 R² (0.999 R² .. 1.000 R²) 244 | mean 54.26 ns (53.98 ns .. 54.76 ns) 245 | std dev 1.198 ns (825.2 ps .. 1.869 ns) 246 | variance introduced by outliers: 33% (moderately inflated) 247 | 248 | benchmarking "ab"/Cofree:IntMap 249 | time 51.55 ns (51.26 ns .. 51.84 ns) 250 | 1.000 R² (1.000 R² .. 1.000 R²) 251 | mean 51.37 ns (51.18 ns .. 51.66 ns) 252 | std dev 784.8 ps (594.7 ps .. 1.036 ns) 253 | variance introduced by outliers: 19% (moderately inflated) 254 | 255 | benchmarking "aacbb"/RegExp:Function 256 | time 2.923 μs (2.898 μs .. 2.950 μs) 257 | 0.999 R² (0.998 R² .. 0.999 R²) 258 | mean 3.013 μs (2.963 μs .. 3.105 μs) 259 | std dev 216.8 ns (126.2 ns .. 348.2 ns) 260 | variance introduced by outliers: 79% (severely inflated) 261 | 262 | benchmarking "aacbb"/Cofree:Map 263 | time 146.7 ns (146.0 ns .. 147.7 ns) 264 | 1.000 R² (0.999 R² .. 1.000 R²) 265 | mean 147.0 ns (146.2 ns .. 148.2 ns) 266 | std dev 3.158 ns (2.604 ns .. 3.906 ns) 267 | variance introduced by outliers: 30% (moderately inflated) 268 | 269 | benchmarking "aacbb"/Cofree:IntMap 270 | time 129.9 ns (129.3 ns .. 130.8 ns) 271 | 1.000 R² (1.000 R² .. 1.000 R²) 272 | mean 130.3 ns (129.8 ns .. 131.3 ns) 273 | std dev 2.427 ns (1.766 ns .. 3.724 ns) 274 | variance introduced by outliers: 24% (moderately inflated) 275 | 276 | benchmarking "aaabbb"/RegExp:Function 277 | time 5.046 μs (4.987 μs .. 5.101 μs) 278 | 0.999 R² (0.999 R² .. 1.000 R²) 279 | mean 5.062 μs (5.018 μs .. 5.115 μs) 280 | std dev 164.2 ns (127.5 ns .. 201.8 ns) 281 | variance introduced by outliers: 41% (moderately inflated) 282 | 283 | benchmarking "aaabbb"/Cofree:Map 284 | time 165.2 ns (163.2 ns .. 167.3 ns) 285 | 0.999 R² (0.999 R² .. 1.000 R²) 286 | mean 163.6 ns (162.6 ns .. 165.0 ns) 287 | std dev 3.739 ns (2.844 ns .. 5.088 ns) 288 | variance introduced by outliers: 32% (moderately inflated) 289 | 290 | benchmarking "aaabbb"/Cofree:IntMap 291 | time 153.8 ns (152.4 ns .. 154.9 ns) 292 | 1.000 R² (0.999 R² .. 1.000 R²) 293 | mean 154.3 ns (153.2 ns .. 155.3 ns) 294 | std dev 3.608 ns (3.067 ns .. 4.458 ns) 295 | variance introduced by outliers: 33% (moderately inflated) 296 | 297 | benchmarking "aaabbbb"/RegExp:Function 298 | time 5.050 μs (5.014 μs .. 5.100 μs) 299 | 0.999 R² (0.999 R² .. 1.000 R²) 300 | mean 5.088 μs (5.043 μs .. 5.151 μs) 301 | std dev 172.8 ns (125.7 ns .. 267.3 ns) 302 | variance introduced by outliers: 43% (moderately inflated) 303 | 304 | benchmarking "aaabbbb"/Cofree:Map 305 | time 209.2 ns (204.0 ns .. 212.8 ns) 306 | 0.998 R² (0.997 R² .. 0.998 R²) 307 | mean 205.0 ns (202.7 ns .. 207.5 ns) 308 | std dev 8.619 ns (7.269 ns .. 9.856 ns) 309 | variance introduced by outliers: 62% (severely inflated) 310 | 311 | benchmarking "aaabbbb"/Cofree:IntMap 312 | time 180.5 ns (179.3 ns .. 182.0 ns) 313 | 1.000 R² (0.999 R² .. 1.000 R²) 314 | mean 180.5 ns (179.5 ns .. 181.9 ns) 315 | std dev 4.205 ns (3.242 ns .. 5.345 ns) 316 | variance introduced by outliers: 33% (moderately inflated) 317 | 318 | benchmarking 30/RegExp:Function 319 | time 297.1 μs (294.8 μs .. 299.8 μs) 320 | 0.999 R² (0.999 R² .. 1.000 R²) 321 | mean 300.4 μs (298.6 μs .. 302.5 μs) 322 | std dev 6.556 μs (5.417 μs .. 8.177 μs) 323 | variance introduced by outliers: 14% (moderately inflated) 324 | 325 | benchmarking 30/Cofree:Map 326 | time 1.661 μs (1.640 μs .. 1.679 μs) 327 | 0.999 R² (0.999 R² .. 1.000 R²) 328 | mean 1.641 μs (1.629 μs .. 1.655 μs) 329 | std dev 44.37 ns (32.67 ns .. 64.99 ns) 330 | variance introduced by outliers: 35% (moderately inflated) 331 | 332 | benchmarking 30/Cofree:IntMap 333 | time 1.538 μs (1.528 μs .. 1.551 μs) 334 | 1.000 R² (0.999 R² .. 1.000 R²) 335 | mean 1.540 μs (1.530 μs .. 1.558 μs) 336 | std dev 42.94 ns (27.63 ns .. 71.28 ns) 337 | variance introduced by outliers: 37% (moderately inflated) 338 | 339 | ``` 340 | -------------------------------------------------------------------------------- /src/Semi.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -Wno-unused-imports #-} -- TEMP 2 | 3 | {-# OPTIONS_GHC -Wno-orphans #-} 4 | 5 | -- | Commutative monoids, semirings, and semimodules 6 | 7 | module Semi where 8 | 9 | import Prelude hiding (sum,product,(^)) 10 | 11 | import Control.Applicative (liftA2) 12 | import Control.Arrow (first) 13 | import GHC.Natural (Natural) 14 | import Data.Functor.Identity (Identity(..)) 15 | import Data.Maybe (fromMaybe,isNothing) 16 | import GHC.Exts (Coercible,coerce,Constraint) 17 | import Control.DeepSeq (NFData) 18 | import GHC.TypeLits (KnownNat) 19 | 20 | import Data.Map (Map) 21 | import qualified Data.Map as M 22 | import Data.IntMap.Lazy (IntMap) 23 | import qualified Data.IntMap.Lazy as IntMap 24 | import Data.Set (Set) 25 | import qualified Data.Set as S 26 | import Data.MemoTrie 27 | 28 | import Data.Finite (Finite) 29 | import Data.Vector.Sized (Vector) 30 | import qualified Data.Vector.Sized as V 31 | 32 | import Misc 33 | -- import Constrained 34 | 35 | #include "GenInstances.inc" 36 | 37 | {-------------------------------------------------------------------- 38 | Classes 39 | --------------------------------------------------------------------} 40 | 41 | -- Keyed functors. Useful for memoization in RegExp and maybe elsewhere. 42 | type family Key (h :: * -> *) :: * 43 | 44 | -- Taken from Data.Key 45 | class Functor f => Keyed f where 46 | mapWithKey :: (Key f -> a -> b) -> f a -> f b 47 | 48 | -- Inspired by Indexable from Data.Key in the keys library. 49 | class Indexable a b x | x -> a b where 50 | infixl 9 ! 51 | (!) :: x -> a -> b 52 | 53 | -- | Enumerate non-zero values 54 | class Indexable a b x => Listable a b x where toList :: x -> [(a,b)] 55 | 56 | -- TODO: rename toList to "nonZeros" or some such. Introduce early in the paper, 57 | -- and use for representations like Map. 58 | 59 | -- TODO: Laws: (!) must be natural; h must presere additivity, and !)| is an 60 | -- Additive homomorphism. 61 | 62 | -- | Commutative monoid 63 | class Additive b where 64 | zero :: b 65 | (<+>) :: b -> b -> b 66 | infixr 6 <+> 67 | 68 | class {- Additive b => -} DetectableZero b where 69 | isZero :: b -> Bool 70 | 71 | class Additive b => Semiring b where 72 | one :: b 73 | (<.>) :: b -> b -> b 74 | infixr 7 <.> 75 | 76 | fromBool :: Semiring b => Bool -> b 77 | fromBool False = zero 78 | fromBool True = one 79 | 80 | class {- Semiring b => -} DetectableOne b where 81 | isOne :: b -> Bool 82 | 83 | class Semiring b => StarSemiring b where 84 | star :: b -> b 85 | plus :: b -> b 86 | star x = one <+> plus x 87 | plus x = x <.> star x 88 | {-# INLINE star #-} 89 | {-# INLINE plus #-} 90 | 91 | class {- (Semiring s, Additive b) => -} LeftSemimodule s b | b -> s where 92 | scale :: s -> b -> b 93 | -- default scale :: (Semiring b, s ~ b) => s -> b -> b -- experimental 94 | -- scale = (<.>) 95 | default scale :: (Semiring s, Functor f, b ~ f s) => s -> b -> b -- experimental 96 | scale s = fmap (s <.>) 97 | {-# INLINE scale #-} 98 | 99 | -- TODO: Add the Semiring superclass, and remove redundant constraints 100 | -- elsewhere. Search for occurrences of LeftSemimodule. 101 | 102 | instance Semiring s => LeftSemimodule s (a -> s) where s `scale` f = \ a -> s <.> f a 103 | 104 | -- | 'scale' optimized for zero or one scalar 105 | infixr 7 .> 106 | (.>) :: (Additive b, LeftSemimodule s b, DetectableZero s, DetectableOne s) => s -> b -> b 107 | s .> b | isZero s = zero 108 | | isOne s = b 109 | | otherwise = s `scale` b 110 | {-# INLINE (.>) #-} 111 | 112 | #if 0 113 | type Additive1 = Con1 Additive 114 | type Semiring1 = Con1 Semiring 115 | type StarSemiring1 = Con1 StarSemiring 116 | 117 | type DetectableZero1 = Con1 DetectableZero 118 | type DetectableOne1 = Con1 DetectableOne 119 | #endif 120 | 121 | {-------------------------------------------------------------------- 122 | Singletons 123 | --------------------------------------------------------------------} 124 | 125 | class Indexable a b x => HasSingle a b x where 126 | infixr 2 +->, *-> 127 | (+->) :: a -> b -> x 128 | (*->) :: Set a -> b -> x 129 | a +-> b = S.singleton a *-> b 130 | default (*->) :: Additive x => Set a -> b -> x 131 | as *-> b = sum [a +-> b | a <- S.elems as] 132 | {-# MINIMAL (+->) | (*->) #-} 133 | 134 | single :: (HasSingle a b x, Semiring b) => a -> x 135 | single a = a +-> one 136 | 137 | singles :: (HasSingle a b x, Semiring b) => Set a -> x 138 | singles as = as *-> one 139 | 140 | value :: (HasSingle a b x, Monoid a) => b -> x 141 | value b = mempty +-> b 142 | 143 | -- instance HasSingle a Bool [a] where 144 | -- a +-> b = if b then [a] else [] 145 | 146 | -- instance HasSingle a Bool (Set a) where 147 | -- a +-> b = if b then S.singleton a else S.empty 148 | 149 | {-------------------------------------------------------------------- 150 | Instances 151 | --------------------------------------------------------------------} 152 | 153 | instance Additive Bool where 154 | zero = False 155 | (<+>) = (||) 156 | 157 | instance DetectableZero Bool where isZero = not 158 | 159 | instance Semiring Bool where 160 | one = True 161 | (<.>) = (&&) 162 | 163 | instance DetectableOne Bool where isOne = id 164 | 165 | instance StarSemiring Bool where star = const True 166 | 167 | Nums(Integer) 168 | Nums(Natural) 169 | Nums(Int) 170 | Nums(Float) 171 | Nums(Double) 172 | Nums(Rational) 173 | -- etc 174 | 175 | -- Experiment 176 | instance StarSemiring (Natural) where 177 | star 0 = 1 178 | star _ = 987654321 179 | 180 | -- ApplSemi((->) a) -- use monoid semiring instead for now 181 | -- etc 182 | 183 | -- ApplMono([]) 184 | -- ApplMono(Set) 185 | -- etc 186 | 187 | type instance Key ((->) a) = a 188 | 189 | instance Keyed ((->) a) where 190 | mapWithKey h f = \ a -> h a (f a) 191 | 192 | instance Indexable a b (a -> b) where 193 | f ! k = f k 194 | 195 | instance (Eq a, Additive b) => HasSingle a b (a -> b) where 196 | a +-> b = \ a' -> if a' == a then b else zero 197 | -- as *-> b = \ a' -> if a' `S.member` as then b else zero 198 | 199 | instance Additive b => Additive (a -> b) where 200 | zero = pure zero 201 | (<+>) = liftA2 (<+>) 202 | -- zero = \ _ -> zero 203 | -- f <+> g = \ a -> f a <+> g a 204 | 205 | type D01 b = (DetectableZero b, DetectableOne b) 206 | 207 | -- Short-circuiting <.> 208 | infixr 7 <..> 209 | (<..>) :: (Semiring b, DetectableZero b, DetectableOne b) => b -> b -> b 210 | x <..> y | isZero x = zero 211 | | isOne x = y 212 | | otherwise = x <.> y 213 | 214 | instance (Monoid a, Eq a, Splittable a, Semiring b, D01 b) => Semiring (a -> b) where 215 | one = single mempty 216 | f <.> g = \ w -> sum [f u <..> g v | (u,v) <- splits w] 217 | 218 | instance (Monoid a, Eq a, Splittable a, Semiring b, D01 b) => StarSemiring (a -> b) 219 | 220 | instance Ord a => Additive (Set a) where 221 | zero = S.empty 222 | (<+>) = S.union 223 | 224 | instance (Ord a, Additive b) => Additive (Map a b) where 225 | zero = M.empty 226 | (<+>) = M.unionWith (<+>) 227 | 228 | instance DetectableZero (Map a b) where isZero = M.null 229 | 230 | #if 1 231 | -- Experiment for Poly 232 | instance DetectableOne (Map a b) where isOne = const False 233 | #else 234 | instance (Monoid a, Ord a, Additive b, DetectableOne b) => DetectableOne (Map a b) where 235 | isOne m = isOne (m!mempty) && isZero (M.delete mempty m) 236 | #endif 237 | 238 | -- FunctorSemimodule(Map a) 239 | 240 | instance Semiring b => LeftSemimodule b (Map a b) where scale b = fmap (b <.>) 241 | 242 | instance (Ord a, Monoid a, Semiring b) => Semiring (Map a b) where 243 | one = mempty +-> one 244 | p <.> q = sum [u <> v +-> p!u <.> q!v | u <- M.keys p, v <- M.keys q] 245 | 246 | type instance Key (Map a) = a 247 | 248 | instance Keyed (Map a) where mapWithKey = M.mapWithKey 249 | 250 | instance (Ord a, Additive b) => Indexable a b (Map a b) where 251 | m ! a = M.findWithDefault zero a m 252 | 253 | instance (Ord a, Additive b) => Listable a b (Map a b) where toList = M.toList 254 | 255 | instance (Ord a, Additive b) => HasSingle a b (Map a b) where 256 | (+->) = M.singleton 257 | as *-> b = M.fromList [(a,b) | a <- S.elems as] 258 | 259 | -- type instance Key (Set a) = a 260 | 261 | -- instance Keyed (Set a) where mapWithKey = M.mapWithKey 262 | 263 | instance Ord a => Indexable a Bool (Set a) where 264 | m ! a = a `S.member` m 265 | 266 | instance Ord a => Listable a Bool (Set a) where 267 | toList = fmap (,True) . S.toList 268 | 269 | instance Ord a => HasSingle a Bool (Set a) where 270 | _ +-> False = S.empty 271 | a +-> True = S.singleton a 272 | _ *-> False = S.empty 273 | as *-> True = as 274 | 275 | -- newtype Identity b = Identity b 276 | 277 | type instance Key Identity = () 278 | 279 | instance Keyed Identity where mapWithKey h = fmap (h ()) 280 | 281 | instance Indexable () b (Identity b) where Identity a ! () = a 282 | 283 | instance Listable () b (Identity b) where toList (Identity b) = [((),b)] 284 | 285 | instance Additive b => HasSingle () b (Identity b) where 286 | -- us *-> b = if S.null us then zero else Identity b 287 | () +-> b = Identity b 288 | 289 | deriving instance Additive b => Additive (Identity b) 290 | deriving instance DetectableZero b => DetectableZero (Identity b) 291 | deriving instance DetectableOne b => DetectableOne (Identity b) 292 | deriving instance LeftSemimodule s b => LeftSemimodule s (Identity b) 293 | deriving instance Semiring b => Semiring (Identity b) 294 | 295 | -- newtype Id b = Id b deriving 296 | -- (Functor, Additive, DetectableZero, DetectableOne, LeftSemimodule s, Semiring) 297 | 298 | type instance Key Maybe = () 299 | 300 | instance Keyed Maybe where mapWithKey h = fmap (h ()) 301 | 302 | instance Additive b => Indexable () b (Maybe b) where 303 | -- Nothing ! () = zero 304 | -- Just b ! () = b 305 | mb ! () = fromMaybe zero mb 306 | 307 | instance Additive b => Listable () b (Maybe b) where 308 | toList Nothing = [] 309 | toList (Just b) = [((),b)] 310 | 311 | instance (DetectableZero b, Additive b) => HasSingle () b (Maybe b) where 312 | () +-> b | isZero b = Nothing 313 | | otherwise = Just b 314 | 315 | instance Additive b => Additive (Maybe b) where 316 | zero = Nothing 317 | Nothing <+> v = v 318 | u <+> Nothing = u 319 | Just a <+> Just b = Just (a <+> b) 320 | 321 | instance Semiring b => Semiring (Maybe b) where 322 | one = Just one 323 | Nothing <.> _ = zero 324 | _ <.> Nothing = zero 325 | Just a <.> Just b = Just (a <.> b) 326 | 327 | instance DetectableZero (Maybe b) where 328 | isZero = isNothing 329 | -- We could also check Just b for b==0 330 | 331 | instance DetectableOne b => DetectableOne (Maybe b) where 332 | isOne (Just b) | isOne b = True 333 | isOne _ = False 334 | 335 | {-------------------------------------------------------------------- 336 | Sum and product monoids 337 | --------------------------------------------------------------------} 338 | 339 | -- semiring-num defines 'add' and 'mul' via foldl', but I think I want foldr 340 | -- instead. 341 | 342 | newtype Sum a = Sum a deriving 343 | (Eq,Ord,NFData,Num,Real,Integral,Additive,DetectableZero,Semiring,DetectableOne,StarSemiring) 344 | 345 | instance Show a => Show (Sum a) where 346 | showsPrec d (Sum a) = showsPrec d a 347 | 348 | getSum :: Sum a -> a 349 | getSum (Sum a) = a 350 | 351 | instance Additive a => Semigroup (Sum a) where 352 | Sum a <> Sum b = Sum (a <+> b) 353 | 354 | instance Additive a => Monoid (Sum a) where 355 | mempty = Sum zero 356 | 357 | sum :: (Foldable f, Additive a) => f a -> a 358 | sum = getSum . foldMap Sum 359 | 360 | missing :: String -> String -> z 361 | missing ty op = error ("No " ++ op ++ " method for " ++ ty) 362 | 363 | noSum :: String -> z 364 | noSum = missing "Sum" "(*)" 365 | 366 | instance Enum a => Enum (Sum a) where 367 | toEnum = Sum . toEnum 368 | fromEnum = fromEnum . getSum 369 | 370 | newtype Product a = Product a deriving (Eq,Show) 371 | 372 | getProduct :: Product a -> a 373 | getProduct (Product a) = a 374 | 375 | instance Semiring a => Semigroup (Product a) where 376 | Product a <> Product b = Product (a <.> b) 377 | 378 | instance Semiring a => Monoid (Product a) where 379 | mempty = Product one 380 | 381 | product :: (Foldable f, Semiring a) => f a -> a 382 | product = getProduct . foldMap Product 383 | 384 | type N = Sum Natural 385 | 386 | type Z = Sum Integer 387 | 388 | -- Experiment 389 | class HasPow x n b | x n -> b where 390 | infixr 8 ^# 391 | (^#) :: x -> n -> b 392 | 393 | instance Semiring b => HasPow b N b where 394 | (^#) = (^) 395 | 396 | instance (HasPow x n b, Additive x, Ord k, Additive n, Semiring b) 397 | => HasPow (Map k x) (Map k n) b where 398 | bs ^# ns = product [b ^# ns!i | (i,b) <- toList bs] 399 | 400 | 401 | infixr 8 ^ 402 | (^), pow :: Semiring a => a -> N -> a 403 | pow = (^) -- useful for the paper 404 | #if 0 405 | a ^ n = product (replicate (fromIntegral n) a) 406 | #else 407 | -- Adapted from (^) in the GHC Prelude 408 | x0 ^ y0 | y0 == 0 = one 409 | | otherwise = f x0 y0 410 | where -- f : x0 ^ y0 = x ^ y 411 | f x y | even y = f (x <.> x) (y `quot` 2) 412 | | y == 1 = x 413 | | otherwise = g (x <.> x) (y `quot` 2) x -- See Note [Half of y - 1] 414 | -- g : x0 ^ y0 = (x ^ y) <.> z 415 | g x y z | even y = g (x <.> x) (y `quot` 2) z 416 | | y == 1 = x <.> z 417 | | otherwise = g (x <.> x) (y `quot` 2) (x <.> z) -- See Note [Half of y - 1] 418 | 419 | {- Note [Half of y - 1] 420 | ~~~~~~~~~~~~~~~~~~~~~ 421 | Since y is guaranteed to be odd and positive here, 422 | half of y - 1 can be computed as y `quot` 2, optimising subtraction away. 423 | -} 424 | 425 | #endif 426 | 427 | instance Splittable N where 428 | isEmpty n = n == 0 429 | splits n = [(i, n-i) | i <- [0 .. n]] 430 | 431 | {-------------------------------------------------------------------- 432 | Misc 433 | --------------------------------------------------------------------} 434 | 435 | instance DetectableZero () where isZero () = True 436 | instance DetectableOne () where isOne () = True 437 | 438 | type instance Key [] = N 439 | 440 | instance Keyed [] where mapWithKey h = zipWith h [0 ..] 441 | 442 | instance Additive b => Indexable N b [b] where 443 | [] ! _ = zero 444 | (b : _ ) ! 0 = b 445 | (_ : bs) ! n = bs ! (n-1) 446 | 447 | instance Additive b => Listable N b [b] where 448 | toList = zip [0 ..] 449 | 450 | instance (Additive b, DetectableZero b) => HasSingle N b [b] where 451 | 0 +-> b | isZero b = zero 452 | | otherwise = [b] 453 | n +-> b = zero : (n-1 +-> b) 454 | -- TODO: efficient *-> 455 | 456 | -- Should I really be using up lists here instead of saving them? 457 | -- Maybe wrap in a newtype 458 | 459 | -- [a] as a denotation of a <-- N 460 | 461 | -- See Cofree 462 | instance Additive b => Additive [b] where 463 | zero = [] 464 | [] <+> bs = bs 465 | as <+> [] = as 466 | (a : as) <+> (b : bs) = a <+> b : as <+> bs 467 | 468 | instance (Semiring b, D01 b) => Semiring [b] where 469 | one = one : zero 470 | [] <.> _ = [] -- 0 * q == 0 471 | (a : dp) <.> q = a .> q <+> (zero : dp <.> q) 472 | 473 | instance DetectableZero [b] where isZero = null 474 | 475 | instance DetectableOne b => DetectableOne [b] where 476 | isOne [b] = isOne b 477 | isOne _ = False 478 | 479 | instance Semiring b => LeftSemimodule b [b] where 480 | scale s = fmap (s <.>) 481 | 482 | instance (Semiring b, Num b, DetectableZero b, DetectableOne b) 483 | => Num [b] where 484 | fromInteger = value . fromInteger 485 | negate = ((-1) .>) 486 | (+) = (<+>) 487 | (*) = (<.>) 488 | abs = fmap abs 489 | signum = fmap signum 490 | 491 | 492 | -- TODO: generalize to other Integral or Enum types and add to Semi 493 | newtype CharMap b = CharMap (IntMap b) deriving Functor 494 | 495 | type instance Key CharMap = Char 496 | 497 | instance Keyed CharMap where 498 | mapWithKey h (CharMap m) = CharMap (IntMap.mapWithKey (h . toEnum) m) 499 | 500 | instance Additive b => Indexable Char b (CharMap b) where 501 | CharMap m ! a = IntMap.findWithDefault zero (fromEnum a) m 502 | 503 | instance Additive b => Listable Char b (CharMap b) where 504 | toList (CharMap m) = first toEnum <$> IntMap.toList m 505 | 506 | instance Additive b => HasSingle Char b (CharMap b) where 507 | a +-> b = CharMap (IntMap.singleton (fromEnum a) b) 508 | -- TODO: efficient *-> 509 | 510 | instance Additive b => Additive (CharMap b) where 511 | zero = CharMap IntMap.empty 512 | CharMap u <+> CharMap v = CharMap (IntMap.unionWith (<+>) u v) 513 | 514 | instance Additive b => DetectableZero (CharMap b) where isZero (CharMap m) = IntMap.null m 515 | 516 | type instance Key ((:->:) a) = a 517 | 518 | instance HasTrie a => Indexable a b (a :->: b) where 519 | (!) = untrie 520 | 521 | instance (HasTrie a, Eq a, Additive b) => HasSingle a b (a :->: b) where 522 | a +-> b = trie (a +-> b) 523 | -- TODO: efficient *-> 524 | 525 | instance (HasTrie a, Additive b) => Additive (a :->: b) where 526 | zero = trie zero 527 | u <+> v = trie (untrie u <+> untrie v) 528 | 529 | -- False negatives are okay. Only used for optimization 530 | instance (HasTrie a, Additive b) => DetectableZero (a :->: b) where isZero _ = False 531 | -- instance Additive b => DetectableOne (a :->: b) where isOne _ = False 532 | 533 | 534 | {-------------------------------------------------------------------- 535 | Vectors 536 | --------------------------------------------------------------------} 537 | 538 | -- instance Splittable (Finite n) where 539 | -- splits n = 540 | 541 | type Fin n = Sum (Finite n) 542 | 543 | -- Maybe Monoid won't cut it here, since Finite n isn't properly additive. 544 | 545 | type instance Key (Vector n) = Fin n 546 | 547 | instance Indexable (Fin n) b (Vector n b) where 548 | v ! Sum i = v `V.index` i 549 | 550 | instance (KnownNat n, DetectableZero b) => Listable (Fin n) b (Vector n b) where 551 | toList v = filter (not . isZero . snd) ([0 ..] `zip` V.toList v) 552 | 553 | instance (KnownNat n, Additive b) => HasSingle (Fin n) b (Vector n b) where 554 | Sum i +-> b = V.generate (\ j -> if j == i then b else zero) 555 | -- TODO: efficient *-> 556 | 557 | instance (KnownNat n, Additive b) => Additive (Vector n b) where 558 | zero = pure zero 559 | (<+>) = liftA2 (<+>) 560 | 561 | instance DetectableZero b => DetectableZero (Vector n b) where 562 | isZero = all isZero 563 | 564 | -- instance (KnownNat n, Semiring b) => Semiring (Vector n b) where 565 | -- one = 0 +-> one 566 | 567 | -- instance DetectableOne b => DetectableOne (Vector n b) where 568 | -- isOne = 569 | -------------------------------------------------------------------------------- /todo.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: To-do items for the paper and code 3 | substMap: [("<+>","+"),("<.>","∗"),(".>","·"),("+->","↦"),("<--","←")] 4 | ... 5 | 6 | ## To-do items for the paper and code 7 | 8 | ### Major 9 | 10 | * Conclusions and Future work 11 | * Why homomorphisms matter. 12 | * Comonad proof 13 | * See all of the issues flagged in the non-anonymous draft. 14 | * Regular expressions: 15 | * Optimizations. [done] 16 | * Try partially applying `(!)`. 17 | * Try implementing `(!)` by converting to `Cofree` (via `regexp`), and mention how it went. 18 | I expect the speed to be nearly indistinguishable from starting with `Cofree`. 19 | * Maybe include the implementation in the supplementary material. 20 | * Tweak `Makefile` to generate a tarball with the extended paper and source. 21 | * Remove author & copyright info from package.yaml, say via sed. 22 | * Check that the README instructions work in isolation. 23 | Include how to get `stack`. 24 | * I got all the way to a working tar ball but then noticed that it contained my user name in file ownership. 25 | There's a way around, but I'm not confident that the committee members will be able to access the files, including the extended pdf. 26 | Drop it. 27 | * Polynomials: 28 | * Univariate and multivariate. [done] 29 | * Power series: represent via list instead of stream. [done] 30 | * Prettier negative coefficients. [done] 31 | * Correctness theorem and proof for polynomial integration and differentiation. 32 | See commented-out start. 33 | * Image convolution example 34 | * Reduce page count in the ICFP submission. Some strategies: 35 | * Remove polynomials (and image convolution) 36 | * Move more proofs to the appendix. 37 | * Drop `Splittable` instance examples. 38 | * Drop the explicit enumeration of homomorphism equations in Lemma 10 ({atEps [c] -> b}) and maybe elsewhere. 39 | * Drop the re-formulation of `deriv` between Lemmas 11 and 12. 40 | * Rewrite some of my explicit lemmas and theorems as an in-line statement with a proof link. 41 | Keep the more formal style in the extended version. 42 | Make this change when I don't need to refer to a lemma/theorem other than from another proof. 43 | * Remove homomorphism examples. 44 | * Drop the whole section "Calculating Instances from Homomorphisms". 45 | * More side-by-side code or derivations. 46 | * Drop some intermediate steps, e.g., 47 | * `poly2` 48 | * The first generalized `poly` 49 | * Lots of small changes for succinctness or at least fewer lines. 50 | * Drop details for finite maps, leaving a short verbal description and pointer to extended version. 51 | * Tighten some spacing environments. 52 | * Move finite maps inside of another section. 53 | 54 | 55 | ### Misc 56 | 57 | * Reference to Brent's monoid paper. 58 | * More references to [*Fun with Semirings: A functional pearl on the abuse of linear algebra*]. 59 | * Probabilistic computation. 60 | * Reconsider title after writing the introduction and related work sections. 61 | * Wording improvements: 62 | * "*Uncovering* the method's essence" 63 | * Sets are to languages as binary relations are to what? 64 | Currying gets us to `String -> String -> Bool` (for sets). 65 | Is there anything interesting and/or useful here? 66 | Note that a (generalized) trie from pairs of strings is going to look like a curried trie anyway. 67 | * Super-memoization. 68 | See notes from 2018-12-02 and 2018-12-05. 69 | * Possibly some items in the Future Work section. 70 | * Consider unhiding page numbers in bib.bib. 71 | * A variant of `pow` that's syntactically quieter than having to write (and generate) "`(wrap (pow ...))`". 72 | Instead, just "`(ppow ...)`", with `ppow`'s `%format` directive dropping the parentheses. 73 | Better: use "`@@`" as a fake infix to help parsing. 74 | Still better: Parenthesize second argument of summations (and similar), so lhs2tex can parse correctly. 75 | Drop those parens in formatting, and sometimes add a second set. 76 | * Maybe drop `HasSingle` and define `single` via `fmap` and `one`. 77 | * Derive a semiring for lists based on a homomorphism from `[a]` to `a <-- Nat`. 78 | * From recognition to "parsing", i.e., generating rich representations. 79 | * Learn about tree grammars, and see how they fit with parsing with derivatives. 80 | * Try using `TMap` from [total-map](https://github.com/conal/total-map) in place of `Map` from containers, including the `Applicative` and `Monad` instances. 81 | I may have to add some operations. 82 | * Consider the following intuition. 83 | The result of `fmap h (F f)` is moving all of the values in `f` to new locations according to `h`, summing all values that get moved to the same place. 84 | Similarly for `liftA2`. 85 | Graphics has a similar issue! 86 | Spatial transformations may be one-to-many, especially if non-projective. 87 | * [Free semirings](http://hackage.haskell.org/package/semiring-num-1.6.0.1/docs/Data-Semiring-Free.html) 88 | * Generalize lists to end with a value, where the usual lists end with unit. 89 | Then monadic bind generalizes appending, i.e., `(++) = (>>)`. 90 | Now generalize from the unary/sequential nature of lists, and we get monadic bind as "substitution". 91 | I think the generalization here is to the free monad induced by some functor. 92 | For `[a]`, that functor is `(a :*)`. 93 | * Polynomials and perhaps integer multiplication. 94 | * Implement convolution (really `liftA2 add`) on statically sized arrays. 95 | I think I'll want an interface for enumerating all values of a type. 96 | The finite-typelits library has useful operations: 97 | ``` haskell 98 | finites :: KnownNat n => [Finite n] 99 | add :: Finite n -> Finite m -> Finite (n + m) 100 | ``` 101 | 102 | Try 1D and 2D examples. 103 | * Use semiring-num instead of my own classes for `Semiring`, `ClosedSemiring`, and `DetectableZero`. 104 | Consider uses for the other instances defined there. 105 | * Define `sum` and `product` at their first use. 106 | Consider renaming as in semiring-num. 107 | * Tropical semirings? 108 | * Generalize to other tries. 109 | * Counting and closure and infinity. 110 | * Probability distribution semiring: 111 | * Uses? 112 | * Closure 113 | * Understand and apply [this MathOverflow answer](https://math.stackexchange.com/a/1651127) on convolution and Day convolution. 114 | * Track down references for convolution over functions with arbitrary *monoidal* domains. 115 | Also "convolutional algebras". 116 | See, e.g., "[group algebra](https://www.encyclopediaofmath.org/index.php/Group_algebra)" and "[convolution algebra](https://ncatlab.org/nlab/show/convolution+algebra)". 117 | 118 | ### Some references 119 | 120 | * Brzozowski derivatives: 121 | * [Wikipedia page on the *Brzozowski derivative*](https://en.wikipedia.org/wiki/Brzozowski_derivative) 122 | * [*Derivatives of Regular Expressions*] 123 | * [*Rewriting Extended Regular Expressions*] 124 | * [*Some Properties of Brzozowski Derivatives of Regular Expressions*] 125 | * [*Derivatives of Regular Expressions and an Application*] 126 | * [*Regular-expression derivatives reexamined*] 127 | * [*Derivatives for Enhanced Regular Expressions*] 128 | * [*Regular expression sub-matching using partial derivatives*] 129 | * [*Testing Extended Regular Language Membership Incrementally by Rewriting*] 130 | * [*Yacc is dead*] 131 | * [*Yacc is dead: An update*] 132 | * Semirings: 133 | * [*Some Recent Applications of Semiring Theory*] 134 | * [*Fun with Semirings: A functional pearl on the abuse of linear algebra*] 135 | * [*Linear Algebra Over Semirings*] 136 | * On semirings and parsing: 137 | * [*Regenerate: a language generator for extended regular expressions*] 138 | * [*A Play on Regular Expressions*] (also targets arbitrary semirings) 139 | * [*Product Rules and Distributive Laws*] ("We give a categorical perspective on various product rules, including Brzozowski’s product rule ...") 140 | * [*Parsing Inside-Out*] 141 | * [*Semiring Parsing*] 142 | * [*Algebraic Foundation of Statistical Parsing: Semiring Parsing*] 143 | * [*Goodman: Semiring Parsing*] 144 | * Comonad references: 145 | * [Monads from Comonads](http://comonad.com/reader/2011/monads-from-comonads/) (blog post by Ed Kmett, 2011) 146 | * [*Monads from Comonads, Comonads from Monads*] 147 | * [*Should I use a Monad or a Comonad?*] 148 | * [*Moore for Less*] ([reddit discussion](https://www.reddit.com/r/haskell/comments/37lqxf/edward_kmett_moore_for_less/)) 149 | * [*Radix Sort, Trie Trees, And Maps From Representable Functors*] 150 | * Polynomials and power series: 151 | * [*In Praise of Sequence (Co-)Algebra and its implementation in Haskell*] 152 | * Doug McIlroy's work 153 | * Other: 154 | * [*Convolution as a Unifying Concept: Applications in Separation Logic, Interval Calculi, and Concurrency*] 155 | * [*Introduction to weighted automata theory*] 156 | * [*Weighted Automata*] 157 | * [*Quantifiers on languages and codensity monads*] 158 | * [What is a coalgebra intuitively?](https://mathoverflow.net/questions/76509/what-is-a-coalgebra-intuitively) 159 | * [*Bases as coalgebras*](https://arxiv.org/pdf/1309.0844.pdf) 160 | * [*The Dual of Substitution is Redecoration*] 161 | * [*Higher Dimensional Trees, Algebraically*] 162 | 163 | ## Did 164 | 165 | * Spell out the list semiring, or at least `(<.>)` 166 | * Check spacing with summations etc in the ICFP version. 167 | I made need some version-specific tweaks to `%format` definitions. 168 | * Benchmarking results, including recursively defined languages. 169 | * Acknowledge semiring-num, at least in related work. 170 | * Related work. Lots of references below. 171 | * Introduction, including contributions. 172 | * The comonad connection (in progress). 173 | * Arithmetic on infinite series. 174 | * Lots of needed references (`\\needcite`). 175 | * Benchmarking at least of regexp vs tries. 176 | Last I checked, my regexp implementations fail to terminate on $a^n n^bn$. 177 | * Convolution commutes with currying and with uncurrying. 178 | So do addition, zero, and one, but `single` is different. 179 | See 2019-01-28 notes. 180 | * Generalize decomposition-based semirings, and streamline the special cases. 181 | * Consider again removing the types that behave like `a -> b` instead of `b <-- a`. 182 | I don't know how I could then get the functor/applicative/monad instances I want, since the type parameters would be in the wrong order. 183 | One possibility is to save that discussion and the parallel types for the extended/journal version of the paper. 184 | Idea: Gloss over the distinction through most of the paper, giving just the convolution-style semiring instances, noting that we've lost compelling semiring instances and promising that we'll get them back later. 185 | Later, when I want FAM instances, I'll have to introduce `newtype` wrappers in order to swap type parameters. 186 | Or hopefully just a single `newtype` adapter for a variety of types homomorphic to `b <-- a`. 187 | At that point, we can restore the pointwise semiring instances for |a -> b|, inherit them via deriving, and add `Semiring` and `StarSemiring`. 188 | * Use the conference style files. 189 | * Factor `Additive` out of `Semiring`, and drop the `Monoid` requirement for `Additive (b :<-- a)` and `Applicative ((:<--) a)`. 190 | I'll have to return to defining my own classes. Tip my hat to semiring-num. 191 | * Remove a bunch of unused code, first moving to `Other`. 192 | * Summation (etc) notation style: 193 | * Consider moving the condition to the body where it becomes multiplication: 194 | 195 | ``` haskell 196 | liftA2 h (Pred f) (Pred g) = Pred (\ w -> or (u,v) (f u && g v && h u v == w)) 197 | ``` 198 | 199 | Note that `w` appears only in the third conjunct. 200 | * Generalizing from predicates to flipped functions, 201 | 202 | ``` haskell 203 | liftA2 h (F f) (F g) = F (\ w -> sum (u,v) (f u * g v * single (h u v) w)) 204 | ``` 205 | 206 | * Simplify further 207 | 208 | ``` haskell 209 | liftA2 h (F f) (F g) 210 | = F (\ w -> sum (u,v) (f u * g v * single (h u v) w)) 211 | = sum (u,v) (\ w -> f u * g v * single (h u v) w) 212 | = sum (u,v) ((f u * g v) .> (\ w -> single (h u v) w)) 213 | = sum (u,v) ((f u * g v) .> single (h u v)) 214 | ``` 215 | 216 | * Then simplify the "standard FunApp" proof. 217 | * Introduce notation "`a +-> b = b .> single a`". 218 | Then `liftA2 h (F f) (F g) = sum (u,v) (f u * g v +-> h u v)`. 219 | 220 | 221 | [*Differentiation of higher-order types*]: http://conal.net/blog/posts/differentiation-of-higher-order-types/ "blog post" 222 | 223 | [*Another angle on zippers*]: http://conal.net/blog/posts/another-angle-on-zippers/ "blog post" 224 | 225 | [*Derivatives of Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.98.4378 "paper by Janusz Brzozowski (1964" 226 | 227 | [*Rewriting Extended Regular Expressions*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.7335 "paper by by Valentin Antimirov and Peter Mosses (1994)" 228 | 229 | [*Some Properties of Brzozowski Derivatives of Regular Expressions*]: https://arxiv.org/abs/1407.5902 "paper by N.Murugesan and O.V.Shanmuga Sundaram (2014)" 230 | 231 | [*Derivatives of Regular Expressions and an Application*]: https://www.researchgate.net/publication/221350925_Derivatives_of_Regular_Expressions_and_an_Application "paper by Haiming ChenHaiming ChenSheng and YuSheng Yu (2012)" 232 | 233 | [*Regular-expression derivatives reexamined*]: http://www.ccs.neu.edu/home/turon/re-deriv.pdf "paper by Scott Owens, John Reppy, and Aaron Turon (2009)" 234 | 235 | [*Derivatives for Enhanced Regular Expressions*]: https://arxiv.org/abs/1605.00817 "paper by Peter Thiemann (2016)" 236 | 237 | [*Regular expression sub-matching using partial derivatives*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.605.6379 "by Martin Sulzmann, Kenny Zhuo, and Ming Lu (2012)" 238 | 239 | [*Testing Extended Regular Language Membership Incrementally by Rewriting*]: https://www.semanticscholar.org/paper/Testing-Extended-Regular-Language-Membership-by-Rosu-Viswanathan/90fdd53e5b29705967c3cc21c050463ded1b514d "paper by Grigore Rosu and Mahesh Viswanathan (2003)" 240 | 241 | [*Yacc is dead*]: https://arxiv.org/abs/1010.5023 "paper by Matthew Might and David Darais (2010)" 242 | 243 | [*Yacc is dead: An update*]: http://matt.might.net/articles/parsing-with-derivatives/ "blog post by Matt Might ()" 244 | 245 | [*DFINITY Technology Overview Series, Consensus System*]: https://arxiv.org/abs/1805.04548 "paper by Timo Hanke, Mahnush Movahedi, and Dominic Williams (2018)" 246 | 247 | [*Compiling to categories*]: http://conal.net/papers/compiling-to-categories "paper by Conal Elliott (2017)" 248 | 249 | [*The simple essence of automatic differentiation*]: http://conal.net/papers/essence-of-ad "paper by Conal Elliott (2018)" 250 | 251 | [*Generic functional parallel algorithms: Scan and FFT*]: http://conal.net/papers/generic-parallel-functional "paper by Conal Elliott (2017)" 252 | 253 | [*Parsing Inside-Out*]: https://arxiv.org/abs/cmp-lg/9805007 "doctoral thesis by Joshua Goodman (1998)" 254 | 255 | [*Semiring Parsing*]: http://www.aclweb.org/anthology/J99-4004 "Paper by Joshua Goodman (1999)" 256 | 257 | [*Algebraic Foundation of Statistical Parsing: Semiring Parsing*]: https://pdfs.semanticscholar.org/7938/c9b56de70eb641d946353b9c0fa255f48b4f.pdf "PhD Depth Examination Report by Yudong Liu (2004)" 258 | 259 | [*Goodman: Semiring Parsing*]: https://kevinbinz.com/2014/11/16/goodman-semiring-parsing/ "blog post by Kevin Binz (2014)" 260 | 261 | [*Monads from Comonads, Comonads from Monads*]: http://www.cs.ox.ac.uk/ralf.hinze/WG2.8/28/slides/Comonad.pdf "paper by Ralf Hinze (2011?)" 262 | 263 | [*Should I use a Monad or a Comonad?*]: https://www.semanticscholar.org/paper/Should-I-use-a-Monad-or-a-Comonad-%3F-Orchard/bec621991dd3e8b1e118fdd0a1d7b5819471a964 "paper by Dominic A. Orchard (2012)" 264 | 265 | [`Data.Functor.Day`]: https://hackage.haskell.org/package/kan-extensions/docs/Data-Functor-Day.html "Haskell source module by Edward Kmett (2014--2016)" 266 | 267 | [*Comonads and Day Convolution*]: https://blog.functorial.com/posts/2016-08-08-Comonad-And-Day-Convolution.html "blog post by Phil Freeman (2016)" 268 | 269 | [*Quantifiers on languages and codensity monads*]: https://arxiv.org/abs/1702.08841 "paper by Mai Gehrke, Daniela Petrisan, and Luca Reggio 270 | (2018)" 271 | 272 | [*Towards a Coalgebraic Chomsky Hierarchy*]: https://arxiv.org/abs/1401.5277v3 "paper by Sergey Goncharov, Stefan Milius, Alexandra Silva (2014)" 273 | 274 | [*The monads of classical algebra are seldom weakly cartesian*]: https://link.springer.com/article/10.1007/s40062-013-0063-2 "paper by Maria Manuel Clementino, Dirk Hofmann, and George Janelidze (2013)" 275 | 276 | [*Fun with Semirings: A functional pearl on the abuse of linear algebra*]: http://stedolan.net/research/semirings.pdf "paper by Stephen Dolan (2013)" 277 | 278 | [*The Dual of Substitution is Redecoration*]: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.16.9369 "paper by Tarmo Uustalu and Varmo Vene (2002)" 279 | 280 | [*Higher Dimensional Trees, Algebraically*]: https://www.semanticscholar.org/paper/Higher-Dimensional-Trees%2C-Algebraically-Ghani-Kurz/3b650d5ee01ac35c721c5bd51e4859aebe3880e2 "paper by Neil Ghani, Alexander Kurz (2007)" 281 | 282 | [*Convolution as a Unifying Concept: Applications in Separation Logic, Interval Calculi, and Concurrency*]: https://dl.acm.org/citation.cfm?id=2874773 "paper by Brijesh Dongol, Ian J. Hayes, and Georg Struth (2016)" 283 | 284 | [*Introduction to weighted automata theory*]: https://perso.telecom-paristech.fr/jsaka/CONF/Files/IWAT.pdf "Presentation by Jacques Sakarovitch (year?)" 285 | 286 | [*Weighted Automata*]: https://www.semanticscholar.org/paper/Weighted-automata-Droste-Kuske/f8d5980f23814e1d69a737f1f178d4a2565f7c22 "paper by Manfred Droste and Dietrich Kuske (2012)" 287 | 288 | [*Some Recent Applications of Semiring Theory*]: http://moonstone.math.ncku.edu.tw/Conferences/BeidarConference/golantalk.pdf "Paper by Jonathan S. Golan (2005)" 289 | 290 | [*Linear Algebra Over Semirings*]: https://www.research.manchester.ac.uk/portal/files/54562608/FULL_TEXT.PDF "PhD thesis by David Wilding (2014)" 291 | 292 | [*Regenerate: a language generator for extended regular expressions*]: https://hal.archives-ouvertes.fr/hal-01788827/document "Paper by Gabriel Radanne and Peter Thiemann (2018)" 293 | 294 | [*A Play on Regular Expressions*]: https://sebfisch.github.io/haskell-regexp/regexp-play.pdf "Paper by Sebastian Fischer, Frank Huch, and 295 | Thomas Wilke (2010)" 296 | 297 | [*Product Rules and Distributive Laws*]: https://www.mimuw.edu.pl/~jwinter/articles/cmcs16.pdf "Paper by Joost Winter" 298 | 299 | [*Moore for Less*]: https://www.schoolofhaskell.com/user/edwardk/moore/for-less "Blog post by Edward Kmett (2015)" 300 | 301 | [*Radix Sort, Trie Trees, And Maps From Representable Functors*]: https://chrispenner.ca/posts/representable-discrimination "blog post by Chris Penner (2017)" 302 | 303 | [*In Praise of Sequence (Co-)Algebra and its implementation in Haskell*]: https://arxiv.org/abs/1812.05878 "paper by Kieran Clenaghan (2018)" 304 | -------------------------------------------------------------------------------- /was1.lhs: -------------------------------------------------------------------------------- 1 | % -*- latex -*- 2 | 3 | %% While editing/previewing, use 12pt and tiny margin. 4 | \documentclass[twoside]{article} % fleqn, 5 | \usepackage[margin=0.12in]{geometry} % 0.12in, 0.9in, 1in 6 | 7 | %% \geometry{paperheight=9in} % for 2-up on big monitor, larger text 8 | 9 | %% \documentclass{article} 10 | %% \usepackage{fullpage} 11 | 12 | %% Temporary title 13 | \def\tit{Efficient parsing and generalized convolution} 14 | 15 | \author{Conal Elliott} 16 | 17 | \usepackage{fancyhdr} 18 | \pagestyle{fancy} 19 | \fancyhf{} 20 | \fancyhead[LO]\tit 21 | \fancyhead[RE]{% 22 | Conal Elliott 23 | } 24 | \fancyhead[LE,RO]{\thepage} 25 | % \rnc{\headrulewidth}{0pt} 26 | 27 | %include polycode.fmt 28 | %include forall.fmt 29 | %include greek.fmt 30 | %include formatting.fmt 31 | 32 | \input{macros} 33 | 34 | \calculationcomments 35 | 36 | \usepackage[square]{natbib} 37 | \bibliographystyle{plainnat} 38 | 39 | %% % author-date form 40 | %% \usepackage[]{natbib} 41 | %% \bibpunct();A{}, 42 | \let\cite=\citep 43 | 44 | \title\tit 45 | \date{Early draft of \today} 46 | 47 | \setlength{\blanklineskip}{2ex} % blank lines in code environment 48 | 49 | \nc\proofLabel[1]{\label{proof:#1}} 50 | %if short 51 | \nc\proofRef[1]{proof in \citep[Appendix C]{Elliott-2018-convolution-extended}} 52 | %else 53 | \nc\proofRef[1]{Appendix \ref{proof:#1}} 54 | %endif 55 | \nc\provedIn[1]{\textnormal{See \proofRef{#1}}} 56 | 57 | \nc\set[1]{\{\,#1\,\}} 58 | \nc\Pow{\mathcal{P}} 59 | \nc\mempty{\varepsilon} 60 | \nc\closure[1]{#1^{\ast}} 61 | \nc\mappend{\diamond} 62 | \nc\cat{\mathop{}} 63 | \nc\single\overline 64 | \nc\union{\cup} 65 | \nc\bigunion{\bigcup} 66 | \nc\has[1]{\mathop{\delta_{#1}}} 67 | \nc\derivOp{\mathcal{D}} 68 | \nc\derivsOp{\derivOp^{\ast}} 69 | \nc\deriv[1]{\mathop{\derivOp_{#1}}} 70 | \nc\derivs[1]{\mathop{\derivsOp_{#1}}} 71 | %% \nc\conv{\ast} 72 | \nc\conv{*} 73 | \nc\zero{\mathbf{0}} 74 | \nc\one{\mathbf{1}} 75 | %% \nc\zero{0} 76 | %% \nc\one{1} 77 | \nc\hasEps{\mathop{\Varid{has}_{\mempty}}} 78 | \nc\id{\mathop{\Varid{id}}} 79 | \nc\ite[3]{\text{if}\ #1\ \text{then}\ #2\ \text{else}\ #3} 80 | \newcommand\iteB[3]{ 81 | \begin{cases} 82 | #2 & \text{if $#1$} \\ 83 | #3 & \text{otherwise} 84 | \end{cases}} 85 | \nc\lis{\mathop{\Varid{list}}} 86 | \nc\liftA{\mathop{\Varid{liftA}}} 87 | \nc\cons{\mathit{:}} 88 | 89 | \DeclareMathOperator{\true}{true} 90 | \DeclareMathOperator{\false}{false} 91 | 92 | \begin{document} 93 | 94 | \maketitle 95 | 96 | %% \begin{abstract} 97 | %% ... 98 | %% \end{abstract} 99 | 100 | \sectionl{Introduction} 101 | 102 | %format <+> = "+" 103 | %format <.> = "\conv" 104 | %% %format zero = 0 105 | %% %format one = 1 106 | %format zero = "\zero" 107 | %format one = "\one" 108 | 109 | \sectionl{Languages} 110 | 111 | \note{Summarize/review languages as sets, including singleton, union, concatenation, and star/closure.} 112 | 113 | A \emph{language} is a set of strings, where a string is a sequence of values of some given type (``symbols'' from an ``alphabet''). 114 | Languages are commonly built up via a few simple operations:\notefoot{I may want to parametrize by a monoid instead of an alphabet.} 115 | \begin{itemize} 116 | \item The \emph{empty} language $\emptyset = \set{}$. 117 | \item For a string $s$, the \emph{singleton} language $\single s = \set{s}$. 118 | \item For two languages $P$ and $Q$, the \emph{union} $P \union Q = \set{s \mid s \in P \lor s \in Q}$. 119 | \item For two languages $P$ and $Q$, the element-wise \emph{concatenation} $P \cat Q = \set{p \mappend q \mid p \in P \land q \in Q}$, where ``$\mappend$'' denotes string concatenation. 120 | \item For a language $P$, the \emph{closure} $\closure P = \bigunion_{n \ge 0} P^n $, where $P^n$ is $P$ concatenated with itself $n$ times\out{ (and $P^0 = \single{\mempty}$)}. 121 | \end{itemize} 122 | %if False 123 | \out{Note that $\closure P$ can also be given a recursive specification: $\closure P = \mempty \union P \cat \closure P$.{Syntactically, we'll take concatenation (``$\cat$'') to bind more tightly than union (``$\union$''), so the RHS of this definition is equivalent to $\mempty \union (P \cat \closure P)$} 124 | %endif 125 | These operations suffice to describe all \emph{regular} languages. 126 | The language specifications (language-denoting \emph{expressions} rather than languages themselves) finitely constructed from these operations are called \emph{regular expressions}. 127 | %(If we allow \emph{recursive} definitions, we get \emph{context-free} languages.) 128 | 129 | Some observations: 130 | \begin{itemize} 131 | \item Union is associative, with $\emptyset$ as its identity.\notefoot{Maybe state equations for this observations and the next two.} 132 | \item Element-Wise concatenation is associative and commutative, with $\single \mempty$ as its identity, where $\mempty$ is the empty string. 133 | \item Left- and right-concatenation distribute over union. 134 | \item The empty language annihilates under concatenation, i.e., $P \cat \emptyset = \emptyset \cat Q = \emptyset$. 135 | \item The $\closure P$ operation satisfies the equation $\closure P = \mempty \union (P \cat \closure P)$. 136 | \end{itemize} 137 | These observations are the defining properties of a \emph{star semiring} (also called a \emph{closed semiring}) \needcite{}. 138 | \figrefdef{classes}{Abstract interface for languages (and later generalizations)}{ 139 | \begin{code} 140 | class Semiring a where 141 | infixl 7 <.> 142 | infixl 6 <+> 143 | zero , one :: a 144 | (<+>) , (<.>) :: a -> a -> a 145 | 146 | sum, product :: (Foldable f, Semiring a) => f a -> a 147 | sum = foldr (<+>) zero 148 | product = foldr (<.>) one 149 | 150 | class Semiring a => ClosedSemiring a where 151 | closure :: a -> a 152 | closure p = q where q = one <+> p <.> q -- default 153 | 154 | class HasSingle a x where 155 | single :: x -> a 156 | 157 | instance Semiring Bool where 158 | zero = False 159 | one = True 160 | (<+>) = (||) 161 | (<.>) = (&&) 162 | 163 | instance ClosedSemiring Bool where 164 | closure _ = one 165 | \end{code} 166 | \vspace{-4ex} 167 | } shows Haskell classes for representations of languages (and later generalizations), combining the star semiring vocabulary with an operation for singletons. 168 | The singleton-forming operation must satisfy the following properties: 169 | \begin{align*} 170 | \single \mempty &= \one \\ 171 | \single {u \mappend v} &= \single u \conv \single v 172 | \end{align*} 173 | i.e., |single| is a monoid homomorphism (targeting the product monoid). 174 | As an example other than numbers and languages, \figref{classes} includes the closed semiring of boolean values. 175 | 176 | %format Set = "\mathcal P" 177 | %format emptyset = "\emptyset" 178 | %format single (s) = "\single{"s"}" 179 | %format set (e) = "\set{"e"}" 180 | %format bigunion (lim) (body) = "\bigunion_{" lim "}{" body "}" 181 | %format pow a (b) = a "^{" b "}" 182 | 183 | Languages fulfill this combined interface as described above and again in the pseudocode in \figrefdef{set as language}{Sets as a ``language''}{ 184 | \begin{code} 185 | instance Monoid s => Semiring (Set s) where 186 | zero = emptyset 187 | one = single mempty 188 | p <+> q = set (s | s `elem` p || s `elem` q) 189 | p <.> q = set (u <> v | u `elem` p && v `elem` q) 190 | 191 | instance ClosedSemiring (Set s) -- default |closure| 192 | instance HasSingle (Set s) s where single s = set s 193 | \end{code} 194 | \vspace{-4ex} 195 | %% closure p = bigunion (n >= 0) (pow p n) 196 | }, which generalizes from strings to any monoid.\footnote{The |Monoid| class defines $\mappend$ and $\mempty$.} 197 | 198 | \note{On second thought, postpone generalization from lists to monoids later.} 199 | 200 | \sectionl{Matching} 201 | 202 | Now consider how we can computably \emph{match} a string for membership in a language described in the vocabulary given in the previous section. 203 | The set-based language definition does not lead directly to effective string matching, because the sets may be infinite. 204 | We can get around this difficulty easily enough by a change of representation. 205 | Sets are isomorphic to membership predicates. 206 | \begin{code} 207 | newtype Pred s = Pred (s -> Bool) 208 | 209 | setPred :: Set a -> Pred a 210 | setPred as = Pred (\ a -> a `elem` as) 211 | 212 | predSet :: Pred a -> Set a 213 | predSet (Pred f) = set (a | f a) 214 | \end{code} 215 | It's easy to show that |setPred . predSet == id| and |predSet . setPred == id|. 216 | % See 2018-12-10 notes. 217 | 218 | This isomorphism suggests a simple specification for effective matching, namely the requirement that |setPred| (or |predSet|) is a \emph{homomorphism} with respect to the vocabulary of \figref{classes}. 219 | (This style of specification has proved useful for a range of problems \cite{Elliott-2009-tcm, Elliott-2018-ad-icfp}.) 220 | \begin{theorem}[\provedIn{theorem:Pred}]\thmlabel{Pred} 221 | Given the definitions in \figrefdef{Pred}{Predicates as a language (specified by homomorphicity of |predSet|/|setPred|)}{ 222 | \begin{code} 223 | instance Semiring (Pred [c]) where 224 | zero = Pred (const False) 225 | one = Pred null 226 | Pred f <+> Pred g = Pred (\ x -> f x || g x) 227 | Pred f <.> Pred g = Pred (\ x -> or [ f u && g v | (u,v) <- splits x ] ) 228 | 229 | instance ClosedSemiring (Pred [c]) -- default |closure| 230 | 231 | instance Eq s => HasSingle (Pred s) s where 232 | single s = Pred (== s) 233 | 234 | -- All ways of splitting a given list (inverting |(<>)|). 235 | splits :: [a] -> [([a],[a])] 236 | splits [] = [([],[])] 237 | splits (a:as') = ([],a:as') : [((a:l),r) | (l,r) <- splits as'] 238 | \end{code} 239 | \vspace{-4ex} 240 | }, |setPred| and |predSet| are homomorphisms with respect to each instantiated class. 241 | \end{theorem} 242 | 243 | \note{Try some examples, including |star| and even the classic non-regular language $a^n b^n$ or \href{https://en.wikipedia.org/wiki/Dyck_language}{the Dyck language}.} 244 | 245 | \sectionl{List of Successes} 246 | 247 | Although the predicate-based language implementation in \secref{Matching} is effective, it is terribly inefficient, due to the backtracking search involved in the definitions of |(<.>)| and |splits| in \figref{Pred}. 248 | An alternative technique commonly used in monadic parsing involves matching a language against \emph{prefixes} of a given string, yielding a corresponding ``residual'' suffix for each successful match \cite{Wadler-85-successes, HuttonMeijer-98-parsing}. 249 | If there is some way to match an \emph{entire} given string (i.e., if any matching residual is empty), then that string is in the language. 250 | As with |Pred|, we can package this technique in a new data type with an interpretation function that relates it to an already understood language representation: 251 | \begin{code} 252 | newtype Resid c = Resid ([c] -> [[c]]) 253 | 254 | residPred :: Resid c -> Pred [c] 255 | residPred (Resid f) = Pred (any null . f) 256 | \end{code} 257 | \begin{theorem}[\provedIn{theorem:Resid}]\thmlabel{Resid} 258 | Given the definitions in \figrefdef{Resid}{List-of-successes as a language (specified by homomorphicity of |residPred|)}{ 259 | \begin{code} 260 | instance Semiring (Resid c) where 261 | zero = Resid (const []) 262 | one = Resid (\ s -> [s]) 263 | Resid f <+> Resid g = Resid (\ s -> f s <> g s) 264 | Resid f <.> Resid g = Resid (\ s -> [s'' | s' <- f s, s'' <- g s']) 265 | 266 | -- Equivalent definition in monadic style 267 | instance Semiring (Resid c) where 268 | zero = Resid (fail "no match") 269 | one = Resid return 270 | Resid f <+> Resid g = Resid (liftA2 (<>) f g) 271 | Resid f <.> Resid g = Resid (f >=> g) 272 | 273 | instance ClosedSemiring (Resid c) 274 | 275 | instance Eq c => HasSingle (Resid c) [c] where 276 | single x = Resid (maybeToList . stripPrefix x) 277 | 278 | -- From |Data.List| 279 | stripPrefix :: Eq a => [a] -> [a] -> Maybe [a] 280 | stripPrefix [] ys = Just ys 281 | stripPrefix (x:xs) (y:ys) | x == y = stripPrefix xs ys 282 | stripPrefix _ _ = Nothing 283 | \end{code} 284 | \vspace{-4ex} 285 | }, |residPred| is a homomorphism with respect to each instantiated class. 286 | \end{theorem} 287 | 288 | \sectionl{Regular Expressions} 289 | 290 | %format :<+> = "\mathbin{:\!\!+}" 291 | %format :<.> = "\mathbin{:\!\!\conv}" 292 | 293 | Regular expressions are widely used as a syntactic description of regular languages and can be represented as an algebraic data type: 294 | \begin{code} 295 | infixl 6 :<+> 296 | infixl 7 :<.> 297 | 298 | data RegExp c = Char c 299 | | Zero 300 | | One 301 | | RegExp c :<+> RegExp c 302 | | RegExp c :<.> RegExp c 303 | | Closure (RegExp c) 304 | deriving Show 305 | \end{code} 306 | We can convert regular expressions to \emph{any} closed semiring with singletons: 307 | \begin{code} 308 | regexp :: (ClosedSemiring a, HasSingle a [c]) => RegExp c -> a 309 | regexp (Char c) = single [c] 310 | regexp Zero = zero 311 | regexp One = one 312 | regexp (u :<+> v) = regexp u <+> regexp v 313 | regexp (u :<.> v) = regexp u <.> regexp v 314 | regexp (Closure u) = closure (regexp u) 315 | \end{code} 316 | 317 | \begin{theorem}[\provedIn{theorem:RegExp}]\thmlabel{RegExp} 318 | Given the definitions in \figrefdef{RegExp}{Regular expressions as a language (specified by homomorphicity of |regexp|)}{ 319 | \begin{code} 320 | instance Eq c => Semiring (RegExp c) where 321 | zero = Zero 322 | one = One 323 | Zero <+> b = b 324 | a <+> Zero = a 325 | a :<.> c <+> b :<.> d | a == b = a <.> (c <+> d) 326 | a :<.> c <+> b :<.> d | c == d = (a <+> b) <.> c 327 | a <+> b = a :<+> b 328 | Zero <.> _ = Zero 329 | _ <.> Zero = Zero 330 | One <.> b = b 331 | a <.> One = a 332 | a <.> b = a :<.> b 333 | 334 | instance ClosedSemiring (RegExp c) where 335 | closure Zero = one 336 | closure e = Closure e 337 | 338 | instance HasSingle (RegExp c) [c] where 339 | single s = foldr (\ c e -> Char c <.> e) one s 340 | \end{code} 341 | \vspace{-4ex} 342 | }, |regexp| is a homomorphism with respect to each instantiated class.\notefoot{The |HasSingle| instance can use any |Foldable| in place of |[]|. 343 | One could also define balanced folding of sums and products via two monoid wrappers, probably a good idea.} 344 | (Note that the semiring laws allow optimization.\footnote{For idempotent semirings, one could also optimize |closure One| to |one|, but later interpretations will need a different value.}) 345 | 346 | \end{theorem} 347 | 348 | 349 | \sectionl{Derivatives of Languages} 350 | 351 | The language matching algorithms embodied in the |Pred| and |Resid| types (defined in \secreftwo{Matching}{List of Successes}) both perform backtracking. 352 | We can do much better. 353 | A classic solution is to add token lookahead, as in LR, LL, and LALR parsers \needcite{}. 354 | While such parser generators typically have relatively complex implementations and look a fixed number of tokens ahead, Janusz Brzozowski discovered a simple and efficient technique that looks arbitrarily far ahead and eliminates all backtracking. 355 | He applied this technique only to regular languages and expressed it as a transformation that he termed ``derivatives of regular expressions'' \citep{Brzozowski64} \note{additional references}. 356 | Much more recently \citet{Might2010YaccID} extended the technique from regular to \emph{context-free} languages as a transformation on context-free grammars. 357 | 358 | %format deriv (c) = "\derivOp_{"c"}" 359 | %format derivs (s) = "\derivsOp_{"s"}" 360 | 361 | %% %format deriv (c) = "\deriv{"c"}" 362 | %% %format derivs (s) = "\derivs{"s"}" 363 | 364 | \begin{definition} \deflabel{derivs} 365 | The \emph{derivative} $\derivs u p$ of a language $p$ with respect to a string $u$ is the set of $u$-suffixes of strings in $p$, i.e., 366 | $$ \derivs u p = \set{ v \mid u \mappend v \in p } $$ 367 | \end{definition} 368 | \begin{lemma}\lemlabel{derivs-member} 369 | For a string $s$ and language $p$, 370 | $$ s \in p \iff \mempty \in \derivs s p .$$ 371 | Proof: immediate from \defref{derivs}. 372 | \end{lemma} 373 | The practical value of \lemref{derivs-member} is that |derivs s p| and |mempty|-containment can be computed easily and efficiently, thanks to \lemrefs{derivs-monoid}{hasEps} below. 374 | \begin{lemma}[\provedIn{lemma:derivs-monoid}]\lemlabel{derivs-monoid} 375 | |derivs| satisfies the following properties: 376 | \begin{align*} 377 | \derivs\mempty p &= p \\ 378 | \derivs{u \mappend v} p &= \derivs v (\derivs u p) 379 | \end{align*} 380 | Equivalently, 381 | \begin{align*} 382 | \derivs\mempty &= \id \\ 383 | \derivs{u \mappend v} &= \derivs v \circ \derivs u 384 | \end{align*} 385 | where $\id$ is the identity function.\footnote{In other words, |derivs| is a contravariant monoid homomorphism (targeting the monoid of endo-functions).} 386 | \end{lemma} 387 | 388 | %% %format hasEps = "\hasEps" 389 | %format hasEps = "\Varid{has_{\mempty}}" 390 | 391 | \begin{definition} 392 | The derivative $\deriv c p$ of a language $p$ with respect to a single value (``symbol'') $c$ is the derivative of $p$ with respect to the singleton sequence $[c]$, i.e. $$\deriv c p = \derivs{[c]} p.$$ 393 | Equivalently, $\deriv c p = \set{v \mid c:v \in p}$, where ``$c:v$'' is the result of prepending $c$ to the sequence $v$ (so that $c:v = [c] \mappend v$). 394 | \end{definition} 395 | \begin{lemma}[\citet{Brzozowski64}, Theorem 3.1]\lemlabel{deriv} 396 | The $\derivOp$ operation has the following properties:\footnote{The fourth property can be written more directly as follows: 397 | $$\deriv c (p \conv q) = (\ite{\mempty \in p}{\deriv c q}0) + \deriv c p \conv q $$ 398 | or even 399 | $$\deriv c (p \conv q) = \iteB{\mempty \in p}{\deriv c q + \deriv c p \conv q}{\deriv c p \conv q}. $$} 400 | \begin{align*} 401 | \deriv c \zero &= \zero \\ 402 | \deriv c \one &= \zero \\ 403 | \deriv c (p + q) &= \deriv c p + \deriv c q \\ 404 | \deriv c (p \conv q) &= \delta\, p \conv \deriv c q + \deriv c p \conv q \\ 405 | \deriv c (\closure p) &= \deriv c (p \conv \closure p) \\ 406 | \end{align*} 407 | where $\delta\,p$ is the set containing just the empty string $\mempty$ if $\mempty \in p$ and otherwise the empty set itself:\notefoot{Consider eliminating |delta| in favor of just using |hasEps|.} 408 | $$ \delta\,p = \iteB{\mempty \in p}{\one}{\zero} . $$ 409 | \end{lemma} 410 | All that remains now is to see how to test whether $\mempty \in p$ for a language $p$. 411 | \begin{lemma}[\provedIn{lemma:hasEps}]\lemlabel{hasEps} 412 | The following properties hold:\notefoot{Move this definition to after \defref{derivs} and \lemref{derivs-member}, which motivate |hasEps|.} 413 | $$ \mempty \not\in \zero $$ 414 | $$ \mempty \in \one $$ 415 | $$ \mempty \in p + q \iff \mempty \in p \lor \mempty \in q $$ 416 | $$ \mempty \in p \conv q \iff \mempty \in p \land \mempty \in q $$ 417 | $$ \mempty \in \closure p $$ 418 | Recalling the nature of the closed-semiring of booleans from \figref{classes}, and defining $\hasEps p = \mempty \in p$, these properties amount to saying that $\hasEps$ is a closed-semiring homomorphism, i.e., 419 | \begin{align*} 420 | \hasEps \zero &= \zero \\ 421 | \hasEps \one &= \one \\ 422 | \hasEps (p + q) &= \hasEps p + \hasEps q \\ 423 | \hasEps (p \conv q) &= \hasEps p \conv \hasEps q \\ 424 | \hasEps (\closure p) &= \closure{(\hasEps p)}\\ 425 | \end{align*} 426 | \end{lemma} 427 | 428 | %% \noindent 429 | %% With this new vocabulary, \lemrefthree{derivs-member}{derivs-monoid}{deriv} can be interpreted much more broadly than languages as sets of sequences. 430 | 431 | Let's now package up these new operations as another abstract interface for language representations to implement. \lemrefs{derivs-member}{hasEps} can then be interpreted much more broadly than languages as sets of sequences. 432 | \begin{code} 433 | class HasDecomp a c | a -> c where 434 | hasEps :: a -> Bool 435 | deriv :: c -> a -> a 436 | 437 | instance Eq a => HasDecomp (Set a) a where 438 | hasEps p = [] `elem` p 439 | deriv c p = set (cs | c : cs `elem` p) 440 | 441 | derivs :: HasDecomp a c => [c] -> a -> a 442 | derivs s p = foldl (flip deriv) p s 443 | \end{code} 444 | As with |Semiring|, |ClosedSemiring|, and |HasSingle|, we can calculate instances of |HasDecomp|, as shown in \figref{HasDecomp}. 445 | \begin{theorem}[\provedIn{theorem:HasDecomp}]\thmlabel{HasDecomp} 446 | Given the definitions in \figrefdef{HasDecomp}{Decomposition of language representations (specified by homomorphicity)}{ 447 | \begin{code} 448 | instance HasDecomp (Pred [c]) c where 449 | hasEps (Pred f) = f [] 450 | deriv c (Pred f) = Pred (f . (c :)) 451 | 452 | instance HasDecomp (Resid s) s where 453 | hasEps (Resid f) = any null (f []) 454 | deriv c (Resid f) = Resid (f . (c :)) 455 | \end{code} 456 | \begin{code} 457 | instance Eq c => HasDecomp (RegExp c) c where 458 | hasEps (Char _) = zero 459 | hasEps Zero = zero 460 | hasEps One = one 461 | hasEps (p :<+> q) = hasEps p <+> hasEps q 462 | hasEps (p :<.> q) = hasEps p <.> hasEps q 463 | hasEps (Closure p) = closure (hasEps p) 464 | 465 | deriv c (Char c') = if c == c' then one else zero 466 | deriv _ Zero = zero 467 | deriv _ One = zero 468 | deriv c (p :<+> q) = deriv c p <+> deriv c q 469 | deriv c (p :<.> q) = delta p <.> deriv c q <+> deriv c p <.> q 470 | deriv c (Closure p) = deriv c (p <.> Closure p) 471 | \end{code} 472 | %% deriv c (Char c') = if c == c' then one else zero 473 | %% deriv c (Char c') | c == c' = one 474 | %% | otherwise = zero 475 | \vspace{-4ex} 476 | }, |predSet|, |residPred|, and |regexp| are |HasDecomp| homomorphisms. 477 | \end{theorem} 478 | 479 | Taken together, \lemrefs{derivs-member}{hasEps} give us an effective test for ``language'' membership, assuming that the language is expressed via |Semiring|, |ClosedSemiring|, and |HasSingle| and assuming that the language representation supports |HasDecomp|: 480 | \begin{code} 481 | accept :: HasDecomp a c => a -> [c] -> Bool 482 | accept p s = hasEps (derivs s p) 483 | \end{code} 484 | \note{Show some examples.} 485 | 486 | \sectionl{Tries} 487 | 488 | The definition of |accept| works for every language representation that implements the |HasDecomp| methods. 489 | A natural alternative representation is thus an implementation of those two methods, as shown in \figref{Decomp}. 490 | \begin{theorem}[\provedIn{theorem:Decomp}]\thmlabel{Decomp} 491 | Given the definitions in \figrefdef{Decomp}{Language representation as |Decomp| methods}{ 492 | %format :<: = "\mathrel{\Varid{:\!\blacktriangleleft}}" 493 | %format LazyPat = "\mathit{\sim}\!\!" 494 | \begin{code} 495 | data Decomp c = Bool :<: (c -> Decomp c) 496 | 497 | inDecomp :: Decomp c -> [c] -> Bool 498 | inDecomp (e :<: _ ) [] = e 499 | inDecomp (_ :<: ds ) (c:cs) = inDecomp (ds c) cs 500 | 501 | decompPred :: Decomp c -> Pred [c] 502 | decompPred = Pred . inDecomp 503 | 504 | instance Semiring (Decomp c) where 505 | zero = False :<: const zero 506 | one = True :<: const zero 507 | (a :<: ps') <+> (b :<: qs') = (a || b) :<: liftA2 (<+>) ps' qs' 508 | (a :<: ps') <.> (b :<: qs') = (a && b) :<: liftA2 h ps' qs' 509 | where 510 | h p' q' = (if a then b :<: qs' else zero) <+> p' <.> q 511 | 512 | instance ClosedSemiring (Decomp c) 513 | 514 | instance Eq c => HasSingle (Decomp c) [c] where 515 | single s = product (map symbol s) 516 | where symbol c = False :<: (\ c' -> if c'==c then one else zero) 517 | 518 | instance HasDecomp (Decomp c) c where 519 | hasEps (a :<: _ ) = a 520 | deriv c (_ :<: ds ) = ds c 521 | \end{code} 522 | \vspace{-4ex} 523 | }, |decompPred| is a homomorphism with respect to each instantiated class. 524 | \end{theorem} 525 | 526 | %format :| = "\mathrel{\Varid{:\!\!\triangleleft}}" 527 | %format `mat` = ! 528 | %format mat = (!) 529 | 530 | Although the |Decomp| representation caches |hasEps|, |deriv c| will be recomputed due to the use of a function in the |Decomp| representation. 531 | To further improve performance, we can \emph{memoize} these functions, e.g., with a generalized trie \needcite{} or a finite map. 532 | Given the sparseness of typical languages, the latter choice seems preferable as a naturally sparse representation, interpreting missing entries as $\zero$ (the empty language). 533 | The resulting representation is exactly a trie \needcite{}, and |accept| for |Trie| is the usual membership test for tries. 534 | Another route to ``derivative''-based language recognition was hiding in the standard notion of tries all along! 535 | \begin{theorem}[\provedIn{theorem:Trie}]\thmlabel{Trie} 536 | Given the definitions in \figrefdef{Trie}{Tries as language representation}{ 537 | \begin{code} 538 | data Trie c = Bool :| Map c (Trie c) 539 | 540 | inTrie :: Ord c => Trie c -> [c] -> Bool 541 | inTrie (e :| _ ) [] = e 542 | inTrie (_ :| ds ) (c:cs) = inTrie (ds `mat` c) cs 543 | 544 | mat :: (Ord c, Semiring a) => Map c a -> c -> a 545 | m `mat` c = findWithDefault zero c m 546 | 547 | triePred :: Ord c => Trie c -> Pred [c] 548 | triePred = Pred . inTrie 549 | 550 | instance Ord c => Semiring (Trie c) where 551 | zero = False :| empty 552 | one = True :| empty 553 | (a :| ps') <+> (b :| qs') = (a || b) :| unionWith (<+>) ps' qs' 554 | (a :| ps') <.> (b :| qs') = (a && b) :| unionWith (<+>) us vs 555 | where 556 | us = fmap (<.> NOP (b :| qs')) ps' 557 | vs = if a then qs' else empty 558 | 559 | instance Ord c => ClosedSemiring (Trie c) where 560 | closure (_ :| ds) = q where q = True :| fmap (<.> NOP q) ds 561 | 562 | instance Ord c => HasSingle (Trie c) [c] where 563 | single s = product (map symbol s) 564 | where symbol c = False :| singleton c one 565 | 566 | instance Ord c => HasDecomp (Trie c) c where 567 | hasEps (a :| _) = a 568 | deriv c (_ :| ds) = ds `mat` c 569 | \end{code} 570 | \vspace{-4ex} 571 | }, |triePred| is a homomorphism with respect to each instantiated class.% 572 | \notefoot{Briefly describe the operations used from |Data.Map|: |empty|, |unionWith|, |singleton|, and |findWithDefault|.} 573 | \end{theorem} 574 | 575 | \note{Examples, and maybe timing comparisons. Motivate the lazy pattern. Mention sharing work by memoizing the functions of characters.} 576 | 577 | \sectionl{Beyond Booleans} 578 | 579 | We began in with the question of language specification (\secref{Languages}) and recognition/matching (\secref{Matching}) and 580 | then moved on to regular expressions and language ``derivatives'' (\secreftwo{Regular Expressions}{Derivatives of Languages}). 581 | These derivatives turn out to arise in the classic \emph{trie} construction, yielding a simple and efficient means of language recognition (\secref{Tries}). 582 | Let's now \emph{generalize} the notion of languages along with the constructions introduced above for efficient recognition. 583 | 584 | %format `suchThat` = "\mid" 585 | As pointed out in \secref{Matching}, sets and predicates are isomorphic notions. 586 | For any set |s|, we can construct the membership predicate, |\ x -> x `elem` s|. 587 | Conversely, for any predicate |p|, we can construct a corresponding set of values, |set (x || p x)|. 588 | Moreover, these two conversions are inverses. 589 | 590 | A predicate over some type |a| is just a function from |a| to |Bool|, so the predicate perspective naturally suggests generalizing the result type beyond booleans. 591 | Examining the operations defined in \figref{Pred}, we can see that the needed operations on |Bool| are |False|, |True|, |(||||)|, and |(&&)|. 592 | As shown in \figref{classes}, those operations correspond to |zero|, |one|, |(+)|, and |(*)| operations for |Bool|. 593 | It therefore seems likely that we can generalize from |Bool| to \emph{any} semiring, and indeed we can do just that. 594 | We'll need to generalize |hasEps| as well to return a semiring value rather than just |Bool|: 595 | %format atEps = "\Varid{at_{\mempty}}" 596 | \begin{code} 597 | class HasDecomp a c s | a -> c s where 598 | atEps :: a -> s 599 | deriv :: c -> a -> a 600 | 601 | derivs :: HasDecomp a c s => [c] -> a -> a 602 | derivs s p = foldl (flip deriv) p s 603 | 604 | accept :: HasDecomp a c s => a -> [c] -> s 605 | accept p s = atEps (derivs s p) 606 | \end{code} 607 | \figrefdef{FunTo}{Semiring-generalized predicates}{ 608 | \begin{code} 609 | newtype FunTo s a = FunTo (a -> s) 610 | 611 | instance Semiring s => Semiring (FunTo s [c]) where 612 | zero = FunTo (const zero) 613 | one = FunTo (boolVal . null) 614 | FunTo f <+> FunTo g = FunTo (\ w -> f w <+> g w) 615 | FunTo f <.> FunTo g = FunTo (\ w -> sum [ f u <.> g v | (u,v) <- splits w ] ) 616 | 617 | boolVal :: Semiring s => Bool -> s 618 | boolVal False = zero 619 | boolVal True = one 620 | 621 | instance Semiring s => ClosedSemiring (FunTo s [c]) 622 | 623 | instance (Semiring s, Eq b) => HasSingle (FunTo s b) b where 624 | single x = FunTo (boolVal . (== x)) 625 | 626 | instance HasDecomp (FunTo s [c]) c s where 627 | atEps (FunTo f) = f [] 628 | deriv c (FunTo f) = FunTo (f . (c :)) 629 | \end{code} 630 | \vspace{-4ex} 631 | } shows the generalization of |Pred|, resulting from replacing boolean operations by their semiring generalizations in \figref{Pred}. 632 | In particular, omitting |FunTo| wrappers, the definition of |(*)| is equivalent to the following: 633 | \begin{align}\label{def:convolution} 634 | (f * g)\,w = \hspace{-1.5ex}\sum_{\substack{u,v \\ u \mappend v = w}}\hspace{-1ex} f\,u * g\,v 635 | \end{align} 636 | 637 | The other language representations also generalize easily as well. 638 | \figrefdef{RegExpFun}{Semiring-generalized regular expressions}{ 639 | \begin{code} 640 | infixl 6 :<+> 641 | infixl 7 :<.> 642 | 643 | data RegExp c s = Char c 644 | | Value s 645 | | RegExp c s :<+> RegExp c s 646 | | RegExp c s :<.> RegExp c s 647 | | Closure (RegExp c s) 648 | 649 | instance Semiring s => Semiring (RegExp c s) where 650 | zero = Value zero 651 | one = Value one 652 | (<+>) = (:<+>) 653 | (<.>) = (:<.>) 654 | 655 | instance Semiring s => ClosedSemiring (RegExp c s) where 656 | closure = Closure 657 | 658 | instance (Functor f, Foldable f, Semiring s) => HasSingle (RegExp c s) (f c) where 659 | single w = product (fmap Char w) 660 | 661 | instance (Eq c, ClosedSemiring s) => HasDecomp (RegExp c s) c s where 662 | atEps (Char _) = zero 663 | atEps (Value s) = s 664 | atEps (p :<+> q) = atEps p <+> atEps q 665 | atEps (p :<.> q) = atEps p <.> atEps q 666 | atEps (Closure p) = closure (atEps p) 667 | 668 | deriv c (Char c') = boolVal (c == c') 669 | deriv _ (Value _) = zero 670 | deriv c (p :<+> q) = deriv c p <+> deriv c q 671 | deriv c (p :<.> q) = Value (atEps p) <.> deriv c q <+> deriv c p <.> q 672 | deriv c (Closure p) = deriv c (p <.> Closure p) 673 | 674 | -- Interpret a regular expression 675 | regexp :: (ClosedSemiring s, HasSingle s [c]) => RegExp c s -> s 676 | regexp (Char c) = single [c] 677 | regexp (Value s) = s 678 | regexp (u :<+> v) = regexp u <+> regexp v 679 | regexp (u :<.> v) = regexp u <.> regexp v 680 | regexp (Closure u) = closure (regexp u) 681 | \end{code} 682 | \vspace{-4ex} 683 | } generalizes regular expressions from \figref{RegExp}. 684 | The only representation change is to replace the |Zero| and |One| constructors by a single general |Value| constructor taking a semiring value. 685 | Note for |deriv c (p :<.> q)| that |Value (atEps p)| replaces |delta p| and denotes (via |accept|) the function that maps |mempty| to |atEps p| and all other sequences to |zero|. 686 | \begin{theorem}\thmlabel{RegExpFun} 687 | Given the definitions in \figref{RegExpFun}, |regexp| is a homomorphism with respect to each instantiated class. 688 | \end{theorem} 689 | \noindent 690 | The proof is a straightforward adaptation of \proofRef{theorem:RegExp}. 691 | 692 | %format scaleT = scale"\!_"T 693 | The |Decomp| and |Trie| types from \figreftwo{Decomp}{Trie} also generalize without difficulty, with the latter generalization shown in \figrefdef{TrieFun}{Function-generalized list tries}{ 694 | \begin{code} 695 | infix 1 :| 696 | data Trie c s = s :| Map c (Trie c s) deriving Show 697 | 698 | scaleT :: (Ord c, DetectableZero s) => s -> Trie c s -> Trie c s 699 | scaleT s _ | isZero s = zero 700 | scaleT s (e :| ts) = (s <.> e) :| fmap (scaleT s) ts 701 | 702 | inTrie :: (Ord c, DetectableZero s) => Trie c s -> [c] -> s 703 | inTrie (e :| _ ) [] = e 704 | inTrie (_ :| ds) (c:cs) = inTrie (ds `mat` c) cs 705 | 706 | mat :: (Ord c, Semiring a) => Map c a -> c -> a 707 | m `mat` c = findWithDefault zero c m 708 | 709 | trieFunTo :: (Ord c, DetectableZero s) => Trie c s -> FunTo s [c] 710 | trieFunTo = FunTo . inTrie 711 | 712 | instance (Ord c, DetectableZero s) => Semiring (Trie c s) where 713 | zero = zero :| empty 714 | one = one :| empty 715 | (a :| ps') <+> (b :| qs') = (a <+> b) :| unionWith (<+>) ps' qs' 716 | (a :| ps') <.> q@(b :| qs') = 717 | (a <.> b) :| unionWith (<+>) (fmap (<.> q) ps') (fmap (scaleT a) qs') 718 | 719 | instance (Ord c, DetectableZero s) => ClosedSemiring (Trie c s) where 720 | closure (_ :| ds) = q where q = one :| fmap (<.> q) ds 721 | 722 | instance (Ord c, DetectableZero s) => HasSingle (Trie c s) [c] where 723 | single w = product (map symbol w) 724 | where 725 | symbol c = zero :| singleton c one 726 | 727 | instance (Ord c, DetectableZero s) => HasDecomp (Trie c s) c s where 728 | atEps (a :| _) = a 729 | deriv c (_ :| ds) = ds `mat` c 730 | \end{code} 731 | \vspace{-4ex} 732 | }, where |scaleT s t| multiplies all of the semiring values in the trie |t| by the value |s|, with |scaleT zero t == zero|. 733 | \citet{Hinze2000GGT} generalize tries from denoting sets to functions in this same way.\notefoot{To do: explore generalizing to tries over other key types.} 734 | \begin{theorem}\thmlabel{TrieFun} 735 | Given the definitions in \figref{TrieFun}, |trieFunTo| is a homomorphism with respect to each instantiated class. 736 | \end{theorem} 737 | \noindent 738 | The proof is a straightforward adaptation of \proofRef{theorem:Trie}.\notefoot{On second thought, work out the proofs for this generalized versions, and then specialize for \thmref{Trie}.} 739 | 740 | \workingHere 741 | 742 | \sectionl{Convolution} 743 | 744 | \note{Show that |(*)| corresponds to generalized convolution.} 745 | 746 | \sectionl{Beyond Convolution} 747 | 748 | \note{The free semimodule monad.} 749 | 750 | \sectionl{More Variations} 751 | 752 | \note{Variations: counting, probability distributions, temporal/spatial convolution.} 753 | 754 | \sectionl{What else?} 755 | 756 | \begin{itemize} 757 | \item Other applications: 758 | \begin{itemize} 759 | \item Univariate and multivariate polynomials. 760 | \item Convolution: discrete and continuous, one- and multi-dimensional, dense and sparse. 761 | \item 2D parsing? 762 | \end{itemize} 763 | \end{itemize} 764 | 765 | 766 | % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % 767 | 768 | \appendix 769 | 770 | \sectionl{Proofs} 771 | 772 | \subsection{\thmref{Pred}}\proofLabel{theorem:Pred} 773 | 774 | \subsection{\thmref{Resid}}\proofLabel{theorem:Resid} 775 | 776 | \subsection{\thmref{RegExp}}\proofLabel{theorem:RegExp} 777 | 778 | \subsection{\lemref{derivs-monoid}}\proofLabel{lemma:derivs-monoid} 779 | 780 | \begin{code} 781 | derivs mempty p 782 | == set ( w | mempty <> w `elem` p ) -- definition of |derivs mempty| 783 | == set ( w | w `elem` p ) -- monoid law 784 | == p -- set notation 785 | \end{code} 786 | 787 | \begin{code} 788 | derivs (u <> v) p 789 | == set ( w | (u <> v) <> w `elem` p ) -- definition of |derivs (u<>v)| 790 | == set ( w | u <> (v <> w) `elem` p ) -- monoid law 791 | == set ( w | v <> w `elem` derivs u p ) -- definition of |derivs u| 792 | == set ( w | w `elem` derivs v (derivs u p) ) -- definition of |derivs v| 793 | == derivs v (derivs u p) 794 | \end{code} 795 | 796 | \subsection{\lemref{hasEps}}\proofLabel{lemma:hasEps} 797 | 798 | \subsection{\thmref{HasDecomp}}\proofLabel{theorem:HasDecomp} 799 | 800 | \subsection{\thmref{Decomp}}\proofLabel{theorem:Decomp} 801 | 802 | \subsection{\thmref{Trie}}\proofLabel{theorem:Trie} 803 | 804 | \bibliography{bib} 805 | 806 | \end{document} 807 | 808 | --------------------------------------------------------------------------------