├── .gitignore
├── AddLinks.lhs
├── AnIssueWithTokenParsers.lhs
├── ApplicativeStyle.lhs
├── CombinatorReview.lhs
├── ErrorMessages.lhs
├── FromClause.lhs
├── FunctionsAndTypesForParsing.lhs
├── GettingStarted.lhs
├── IntroductionToCombinators.lhs
├── LICENSE
├── ParseFile.lhs
├── ParseSQLQueries.lhs
├── ParseString.lhs
├── ParsingTpch.lhs
├── PrettyPrinting0.lhs
├── QueryExpressions.lhs
├── README.asciidoc
├── Setup.hs
├── SimpleSQLQueryParser0.lhs
├── Text
└── Parsec
│ └── String
│ ├── Char.hs
│ ├── Combinator.hs
│ ├── Expr.hs
│ └── Parsec.hs
├── TextParsecExpr.lhs
├── TextParsecPerm.lhs
├── TextParsecToken.lhs
├── ValueExpressions.lhs
├── VerySimpleExpressions.lhs
├── main.css
├── make_website.sh
├── render
├── FixReadme.lhs
└── Render.lhs
└── source_text
/.gitignore:
--------------------------------------------------------------------------------
1 | /build/
2 | /nohup.out
3 | /cabal.sandbox.config
4 | /.cabal-sandbox
5 | /.ghc.environment.x86_64-linux-8.6.5
6 | /dist-newstyle/
7 | /dist/
--------------------------------------------------------------------------------
/AddLinks.lhs:
--------------------------------------------------------------------------------
1 |
2 | Little hack to add links to the navigation bars when generating the
3 | html website.
4 |
5 | > main :: IO ()
6 | > main = interact addLinks
7 |
8 |
9 | > addLinks :: String -> String
10 | > addLinks [] = error "not found"
11 | > addLinks ('<':'/':'u':'l':'>':'\n':'<':'/':'d':'i':'v':'>':xs) =
12 | > "" ++ linkSection ++ "\n" ++ xs
13 | > addLinks (x:xs) = x : addLinks xs
14 |
15 | > linkSection :: String
16 | > linkSection =
17 | > "
\n\
18 | > \\n"
25 |
--------------------------------------------------------------------------------
/AnIssueWithTokenParsers.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[an-issue-with-token-parsers]]
3 | = An issue with token parsers
4 |
5 | This is a tutorial about an issue with the token parsing we have so
6 | far.
7 |
8 | > import Text.Parsec.String (Parser)
9 | > import Text.Parsec.String.Parsec (try)
10 | > import Text.Parsec.String.Combinator (many1, notFollowedBy)
11 | > import Text.Parsec.String.Char (digit, string, oneOf, satisfy, char, letter)
12 | >
13 | > import Control.Applicative ((<$>), (<*>), (<*), many, (<$), (<|>))
14 | > import Control.Monad (void, guard)
15 | >
16 | > import qualified Text.Parsec.String.Expr as E
17 | > import FunctionsAndTypesForParsing
18 |
19 | Here is a simplified expression type and parser:
20 |
21 | > data SimpleExpr = Num Integer
22 | > | BinaryOp SimpleExpr String SimpleExpr
23 | > deriving (Eq,Show)
24 | >
25 | > simpleExpr :: Parser SimpleExpr
26 | > simpleExpr = E.buildExpressionParser table num
27 | >
28 | > table :: [[E.Operator SimpleExpr]]
29 | > table = [[binary "<=" E.AssocNone
30 | > ,binary ">=" E.AssocNone]
31 | > ,[binary "<" E.AssocNone
32 | > ,binary ">" E.AssocNone]
33 | > ]
34 | > where
35 | > binary name assoc =
36 | > E.Infix (mkBinOp name <$ symbol name) assoc
37 | > mkBinOp nm a b = BinaryOp a nm b
38 | >
39 | > num :: Parser SimpleExpr
40 | > num = Num <$> integer
41 | >
42 | > whitespace :: Parser ()
43 | > whitespace = void $ many $ oneOf " \n\t"
44 | >
45 | > lexeme :: Parser a -> Parser a
46 | > lexeme p = p <* whitespace
47 | >
48 | > integer :: Parser Integer
49 | > integer = read <$> lexeme (many1 digit)
50 | >
51 | > symbol :: String -> Parser String
52 | > symbol s = lexeme $ string s
53 |
54 | Let's try it out:
55 |
56 | ```
57 | *Main> regularParse simpleExpr "1=2"
58 | Right (Num 1)
59 |
60 | *Main> regularParse simpleExpr "1>=2"
61 | Right (BinaryOp (Num 1) ">=" (Num 2))
62 |
63 | *Main> regularParse simpleExpr "1>2"
64 | Left (line 1, column 2):
65 | unexpected "2"
66 | expecting ">="
67 | ```
68 |
69 | What happened? The parser tried to parse > as >=, failed, and since
70 | the failure consumed some input (the first >), it failed completely.
71 |
72 | We are going to change the symbol parser to fix this. Here is a
73 | parameterized version of the simpleExpr parser so we can try a few
74 | variations out.
75 |
76 | > simpleExprP :: (String -> Parser String) -> Parser SimpleExpr
77 | > simpleExprP sym = E.buildExpressionParser (tableP sym) num
78 | >
79 | > tableP :: (String -> Parser String) -> [[E.Operator SimpleExpr]]
80 | > tableP sym = [[binary "<=" E.AssocNone
81 | > ,binary ">=" E.AssocNone]
82 | > ,[binary "<" E.AssocNone
83 | > ,binary ">" E.AssocNone]]
84 | > where
85 | > binary name assoc =
86 | > E.Infix (mkBinOp name <$ sym name) assoc
87 | > mkBinOp nm a b = BinaryOp a nm b
88 |
89 | Let's reproduce the failure:
90 |
91 | ```
92 | *Main> regularParse (simpleExprP symbol) "1>=2"
93 | Right (BinaryOp (Num 1) ">=" (Num 2))
94 |
95 | *Main> regularParse (simpleExprP symbol) "1>2"
96 | Left (line 1, column 2):
97 | unexpected "2"
98 | expecting ">="
99 | ```
100 |
101 | We are going to look at two possible solutions.
102 |
103 | 1. Let's use `try`:
104 |
105 | ```
106 | *Main> regularParse (simpleExprP (try . symbol)) "1>=2"
107 | Right (BinaryOp (Num 1) ">=" (Num 2))
108 |
109 | *Main> regularParse (simpleExprP (try . symbol)) "1>2"
110 | Right (BinaryOp (Num 1) ">" (Num 2))
111 | ```
112 |
113 | This seems to have done the job. There is still a problem
114 | though. Consider a case when the precedence is the other way round -
115 | the `<` and `>` are higher precedence than `<=` and `>=`,
116 |
117 | > simpleExprP1 :: (String -> Parser String) -> Parser SimpleExpr
118 | > simpleExprP1 sym = E.buildExpressionParser (tableP1 sym) num
119 | >
120 | > tableP1 :: (String -> Parser String) -> [[E.Operator SimpleExpr]]
121 | > tableP1 sym = [[binary "<" E.AssocNone
122 | > ,binary ">" E.AssocNone]
123 | > ,[binary "<=" E.AssocNone
124 | > ,binary ">=" E.AssocNone]]
125 | > where
126 | > binary name assoc =
127 | > E.Infix (mkBinOp name <$ sym name) assoc
128 | > mkBinOp nm a b = BinaryOp a nm b
129 |
130 | ```
131 | *Main> regularParse (simpleExprP1 (try . symbol)) "1>2"
132 | Right (BinaryOp (Num 1) ">" (Num 2))
133 |
134 | *Main> regularParse (simpleExprP1 (try . symbol)) "1>=2"
135 | Left (line 1, column 3):
136 | unexpected "="
137 | expecting digit
138 | ```
139 |
140 | Although the precendence order is a little contrived in this case,
141 | this issue could easily crop up for real when we start adding more
142 | operators. Let's fix it now.
143 |
144 | This could be solved by adding a `try` at a earlier place in the
145 | parsing. Because of how the `buildExpressionParser` function works,
146 | it's not obvious where the `try` could go.
147 |
148 | Let's try tackling the problem in a different way. One way of looking
149 | at this is to consider that the symbol parser stops parsing too soon:
150 |
151 | ```
152 | *Main> parseWithLeftOver (symbol ">") ">="
153 | Right (">","=")
154 | ```
155 |
156 | What it should do is keep parsing symbol characters until it gets a
157 | result string which can't be a symbol, and stop one character before
158 | this..
159 |
160 | Here is a slightly naive way of doing it, which will be good enough
161 | for quite a while:
162 |
163 | > symbol1 :: String -> Parser String
164 | > symbol1 s = try $ lexeme $ do
165 | > u <- many1 (oneOf "<>=+-^%/*")
166 | > guard (s == u)
167 | > return s
168 |
169 | Here is a similar alternative:
170 |
171 | > symbol2 :: String -> Parser String
172 | > symbol2 s = try $ lexeme $ do
173 | > void $ string s
174 | > notFollowedBy (oneOf "<>=+-^%/*")
175 | > return s
176 |
177 | Let's try them out:
178 |
179 | ```
180 | *Main> parseWithLeftOver (symbol1 ">") ">="
181 | Left (line 1, column 3):
182 | unexpected end of input
183 |
184 | *Main> parseWithLeftOver (symbol1 ">") ">"
185 | Right (">","")
186 |
187 | *Main> parseWithLeftOver (symbol1 ">") ">= 3"
188 | Left (line 1, column 3):
189 | unexpected " "
190 |
191 | *Main> parseWithLeftOver (symbol1 ">=") ">= 3"
192 | Right (">="," 3")
193 |
194 |
195 | ```
196 | The error messages don't seem very good, but it parses and fails to
197 | parse correctly.
198 |
199 | ```
200 | *Main> parseWithLeftOver (symbol2 ">") ">="
201 | Left (line 1, column 3):
202 | unexpected '='
203 |
204 | *Main> parseWithLeftOver (symbol2 ">") ">"
205 | Right (">","")
206 |
207 | *Main> parseWithLeftOver (symbol2 ">") ">= 3"
208 | Left (line 1, column 3):
209 | unexpected '='
210 |
211 | *Main> parseWithLeftOver (symbol2 ">=") ">= 3"
212 | Right (">="," 3")
213 | ```
214 |
215 | This one appears to give better error messages in this limited
216 | scenario, apart from that they both work the same.
217 |
218 | Let's try them out in the full expression parser:
219 |
220 | ```
221 | *Main> regularParse (simpleExprP symbol1) "1>=2"
222 | Right (BinaryOp (Num 1) ">=" (Num 2))
223 |
224 | *Main> regularParse (simpleExprP symbol1) "1>2"
225 | Right (BinaryOp (Num 1) ">" (Num 2))
226 |
227 | *Main> regularParse (simpleExprP symbol2) "1>=2"
228 | Right (BinaryOp (Num 1) ">=" (Num 2))
229 |
230 | *Main> regularParse (simpleExprP symbol2) "1>2"
231 | Right (BinaryOp (Num 1) ">" (Num 2))
232 | ```
233 |
234 | They both work fine here. Let's see some error messages in this
235 | context.
236 |
237 | ```
238 | *Main> parseWithEof (simpleExprP symbol1) "1>*2"
239 | Left (line 1, column 4):
240 | unexpected "2"
241 | expecting operator
242 |
243 | *Main> parseWithEof (simpleExprP symbol2) "1>*2"
244 | Left (line 1, column 4):
245 | unexpected '*'
246 | expecting operator
247 | ```
248 |
249 | Both error messages are a bit crap. So much for the second variation
250 | producing better error messages.
251 |
252 | Let's look at the equivalent issue with respect to keyword parsing. We
253 | can get a similar problem here.
254 |
255 | > keyword :: String -> Parser String
256 | > keyword s = try $ string s
257 |
258 | ```
259 | *Main> parseWithEof (keyword "not") "not"
260 | Right "not"
261 |
262 | *Main> parseWithEof (keyword "not") "nothing"
263 | Left (line 1, column 4):
264 | unexpected 'h'
265 | expecting end of input
266 |
267 | *Main> parseWithEof (keyword "not" <|> keyword "nothing") "nothing"
268 | Left (line 1, column 4):
269 | unexpected 'h'
270 | expecting end of input
271 |
272 | *Main> parseWithEof (keyword "nothing" <|> keyword "not") "nothing"
273 | Right "nothing"
274 | ```
275 |
276 | We can fix this overlapping prefix issue by reordering the
277 | choices. But let's fix the `keyword` parser in a similar way to the
278 | symbol parser.
279 |
280 | TODO: I don't know if symbol is the right name, I don't think Parsec
281 | usually uses symbol in this way. Maybe it should be called operator.
282 |
283 | > identifier :: Parser String
284 | > identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
285 | > where
286 | > firstChar = letter <|> char '_'
287 | > nonFirstChar = digit <|> firstChar
288 |
289 | > keyword1 :: String -> Parser String
290 | > keyword1 k = try $ do
291 | > i <- identifier
292 | > guard (i == k)
293 | > return k
294 |
295 | TODO: later note in error messages about choosing identifier here
296 | instead of e.g. many1 letter.
297 |
298 | ```
299 | *Main> parseWithEof (keyword1 "not") "not"
300 | Right "not"
301 |
302 | *Main> parseWithEof (keyword1 "not") "nothing"
303 | Left (line 1, column 8):
304 | unexpected end of input
305 | expecting digit, letter or "_"
306 |
307 | *Main> parseWithEof (keyword1 "not" <|> keyword1 "nothing") "nothing"
308 | Right "nothing"
309 |
310 | *Main> parseWithEof (keyword1 "nothing" <|> keyword1 "not") "nothing"
311 | Right "nothing"
312 |
313 | *Main> parseWithEof (keyword1 "not" <|> keyword1 "nothing") "not"
314 | Right "not"
315 |
316 | *Main> parseWithEof (keyword1 "nothing" <|> keyword1 "not") "not"
317 | Right "not"
318 | ```
319 |
320 | Try implementing the `keyword2` parser which uses `notFollowedBy`
321 | instead of `guard`, using something analogous to the change from
322 | `symbol1` to `symbol2` above.
323 |
324 | After this, you can try reimplementing the expression parser from the
325 | Text.Parsec.Expr tutorial using the new symbol and keyword parsers.
326 |
--------------------------------------------------------------------------------
/ApplicativeStyle.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[applicative-style-parsing-code]]
3 | = Applicative style parsing code
4 |
5 | Now we can go back over the expression parsing code written in the
6 | last tutorial, and make it much more concise, and also make it more
7 | readable. We are going to do this mainly by using functions from the
8 | typeclass Applicative.
9 |
10 | Remember you can (and should) use the functions `regularParse` and its
11 | variations (TODO list them here) to try out the all these parsers in
12 | ghci, and you can write your own variations to experiment with if you
13 | are unsure about anything.
14 |
15 | > import Text.Parsec.String (Parser)
16 | > import Text.Parsec.String.Char (oneOf, char, digit, letter, satisfy)
17 | > import Text.Parsec.String.Combinator (many1, chainl1)
18 | > import Control.Applicative ((<$>), (<*>), (<*), (*>), (<|>), many, (<$))
19 | > import Control.Monad (void, ap)
20 | > import Data.Char (isLetter, isDigit)
21 | > import FunctionsAndTypesForParsing
22 |
23 | Here is the SimpleExpr type again:
24 |
25 | > data SimpleExpr = Num Integer
26 | > | Var String
27 | > | Add SimpleExpr SimpleExpr
28 | > | Parens SimpleExpr
29 | > deriving (Eq,Show)
30 |
31 | Here is the basic pattern behind most of the rewrites we are going to
32 | cover. Here is a function which takes a constructor and two parsers
33 | for the two arguments for the constructor. It parses the two
34 | arguments, then applies the constructor to the results:
35 |
36 | > myParser1 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
37 | > myParser1 ctor pa pb = do
38 | > a <- pa
39 | > b <- pb
40 | > return $ ctor a b
41 |
42 | TODO: concrete example, plus examples at the bottom of this section
43 | (for ctor <$> a, ctor <$> a <*> b, ctor <$> a <*> b <*> c).
44 |
45 | This can be rewritten without the do syntactic sugar like this:
46 |
47 | > myParser2 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
48 | > myParser2 ctor pa pb =
49 | > pa >>= \a -> pb >>= \b -> return $ ctor a b
50 |
51 | And can also be rewritten like this:
52 |
53 | > myParser3 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
54 | > myParser3 ctor pa pb = ctor `fmap` pa `ap` pb
55 |
56 | (This uses functions from Applicative instead of Monad.) We replace
57 | the use of `>>=` with `fmap` and `ap`. This isn't always possible, but
58 | it often is.
59 |
60 | Here is the version using the operators for `fmap` and `ap` (`fmap`
61 | changed to `<$>`, and `ap` changed to `<*>`).
62 |
63 | > myParser4 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
64 | > myParser4 ctor pa pb = ctor <$> pa <*> pb
65 |
66 | This style takes less typing, and is often much simpler to write and
67 | read.
68 |
69 | This pattern 'scales', you can use:
70 |
71 | ```
72 | Ctor <$> pa
73 | ```
74 |
75 | for a single argument constructor. This might also be familiar to you
76 | as
77 |
78 | ```
79 | fmap Ctor pa
80 | ```
81 |
82 | or
83 |
84 | ```
85 | Ctor `fmap` pa
86 | ```
87 |
88 | All of which mean the same thing, just slightly different spellings.
89 |
90 | This can also be written using Monad operators:
91 |
92 | ```
93 | pa >>= liftM ctor
94 | ```
95 |
96 | or
97 |
98 | ```
99 | liftM ctor =<< pa
100 | ```
101 |
102 | (`liftM` is in the module `Control.Monad`)
103 |
104 | These `liftM` versions effectively mean the same thing as the previous
105 | versions with `fmap` and `<$>`.
106 |
107 | You can use
108 |
109 | ```
110 | Ctor <$> pa <*> pb <*> pc
111 | ```
112 |
113 | for three args, and so on. So you use `<$>` between the pure
114 | constructor and the first arg, then `<*>` between each subsequent arg.
115 |
116 | Let's go over the simple expression parsers and try to rewrite them
117 | using this style. We will see a few other new functions. I will break
118 | things down into a lot of steps.
119 |
120 | == lexeme
121 |
122 | Here is the old lexeme parser, 'D' suffix for 'do notation'.
123 |
124 | > lexemeD :: Parser a -> Parser a
125 | > lexemeD p = do
126 | > x <- p
127 | > whitespace
128 | > return x
129 |
130 | > whitespace :: Parser ()
131 | > whitespace = void $ many $ oneOf " \n\t"
132 |
133 | First, we can move the `whitespace` from its own separate line.
134 |
135 | > lexemeA0 :: Parser a -> Parser a
136 | > lexemeA0 p = do
137 | > x <- p <* whitespace
138 | > return x
139 |
140 | The expression `pa <* pb` means run `pa`, then run `pb`, and return
141 | the result of `pa`. It is sort of equivalent to this code:
142 |
143 | ```haskell
144 | (<*) :: Parser a -> Parser b -> Parser a
145 | (<*) pa pb = do
146 | a <- pa
147 | void pb
148 | return a
149 | ```
150 |
151 | (It isn't implemented this way, since `(<*)` only needs Applicative
152 | and not Monad.)
153 |
154 | Now we can use the usual monad syntax rewrites, first eliminate the
155 | name `x`.
156 |
157 | > lexemeA1 :: Parser a -> Parser a
158 | > lexemeA1 p = do
159 | > p <* whitespace
160 |
161 | Now remove the redundant do:
162 |
163 | > lexemeA :: Parser a -> Parser a
164 | > lexemeA p = p <* whitespace
165 |
166 | == num
167 |
168 | Now let's tackle the `num` parser.
169 |
170 | > numD :: Parser SimpleExpr
171 | > numD = do
172 | > n <- lexemeD $ many1 digit
173 | > return $ Num $ read n
174 |
175 | Let's move 'read' to the first line.
176 |
177 | > numA0 :: Parser SimpleExpr
178 | > numA0 = do
179 | > n <- read <$> lexemeA (many1 digit)
180 | > return $ Num n
181 |
182 | This uses `(<$>)` which we saw above. You may have done code rewrites
183 | like this using `fmap` with IO in other Haskell code.
184 |
185 | Now let's move the `Num` ctor as well:
186 |
187 | > numA1 :: Parser SimpleExpr
188 | > numA1 = do
189 | > n <- (Num . read) <$> lexemeA (many1 digit)
190 | > return n
191 |
192 | You can also write it in this way:
193 |
194 | > numA2 :: Parser SimpleExpr
195 | > numA2 = do
196 | > n <- Num <$> read <$> lexemeA (many1 digit)
197 | > return n
198 |
199 | Why does this work? It it equivalent to the previous version partly
200 | because of the applicative laws.
201 |
202 | Let's break it down:
203 |
204 | > numA2'' :: Parser SimpleExpr
205 | > numA2'' = do
206 | > n <- numb
207 | > return n
208 | > where
209 | > numb :: Parser SimpleExpr
210 | > numb = Num <$> int
211 | > int :: Parser Integer
212 | > int = read <$> lexemeA (many1 digit)
213 |
214 | In terms of style, which do you think looks better:
215 | `(a . b) <$> p` or `a <$> b <$> p`.
216 |
217 | The next step for num, we can eliminate the temporary name `n` and the
218 | `do`:
219 |
220 | > numA3 :: Parser SimpleExpr
221 | > numA3 = (Num . read) <$> lexemeA (many1 digit)
222 |
223 | In more 'industrial' parser code, I would usually write some
224 | tokenization parsers separately like this:
225 |
226 | > integerA4 :: Parser Integer
227 | > integerA4 = read <$> lexemeA (many1 digit)
228 |
229 | Then the num expression parser looks like this:
230 |
231 | > numA4 :: Parser SimpleExpr
232 | > numA4 = Num <$> integerA4
233 |
234 | and we also get a integer parser which we can reuse if we need to
235 | parse an integer in another context.
236 |
237 | == var
238 |
239 | Here is the previous var parser:
240 |
241 | > varD :: Parser SimpleExpr
242 | > varD = lexemeA $ do
243 | > fc <- firstChar
244 | > rest <- many nonFirstChar
245 | > return $ Var (fc:rest)
246 | > where
247 | > firstChar = satisfy (\a -> isLetter a || a == '_')
248 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
249 |
250 | The first thing we can do is to make the `firstChar` and
251 | `nonFirstChar` a little easier to read, using `(<|>)`, `char`,
252 | `letter` and `digit`:
253 |
254 | > varA0 :: Parser SimpleExpr
255 | > varA0 = lexemeA $ do
256 | > fl <- firstChar
257 | > rest <- many nonFirstChar
258 | > return $ Var (fl:rest)
259 | > where
260 | > firstChar = letter <|> char '_'
261 | > nonFirstChar = digit <|> firstChar
262 |
263 | Here is another way of making the function a little better:
264 |
265 | > varA0' :: Parser SimpleExpr
266 | > varA0' = lexemeA $ do
267 | > fl <- satisfy validFirstChar
268 | > rest <- many (satisfy validNonFirstChar)
269 | > return $ Var (fl:rest)
270 | > where
271 | > validFirstChar a = isLetter a || a == '_'
272 | > validNonFirstChar a = validFirstChar a || isDigit a
273 |
274 | We can lift the `(:)` using the Applicative operators.
275 |
276 | > varA1 :: Parser SimpleExpr
277 | > varA1 = do
278 | > i <- iden
279 | > return $ Var i
280 | > where
281 | > iden = lexemeA ((:) <$> firstChar <*> many nonFirstChar)
282 | > firstChar = letter <|> char '_'
283 | > nonFirstChar = digit <|> firstChar
284 |
285 | We used the prefix version of `(:)` to use it with `(<$>)` and
286 | `(<*>)`. The lexemeA call was moved to the `iden` helper function.
287 |
288 | Now tidy it up using `(<$>)` with the `Var` constructor:
289 |
290 | > varA2 :: Parser SimpleExpr
291 | > varA2 = Var <$> iden
292 | > where
293 | > iden = lexemeA ((:) <$> firstChar <*> many nonFirstChar)
294 | > firstChar = letter <|> char '_'
295 | > nonFirstChar = digit <|> firstChar
296 |
297 | We could also split the `iden` into a separate top level function,
298 | with the same idea as with splitting the `integer` parser.
299 |
300 | == parens
301 |
302 | Here is the starting point:
303 |
304 | > parensD :: Parser SimpleExpr
305 | > parensD = do
306 | > void $ lexemeA $ char '('
307 | > e <- simpleExprD
308 | > void $ lexemeA $ char ')'
309 | > return $ Parens e
310 |
311 | Here is the rewrite in one step:
312 |
313 | > parensA0 :: Parser SimpleExpr
314 | > parensA0 =
315 | > Parens <$> (lexemeA (char '(')
316 | > *> simpleExprD
317 | > <* lexemeA (char ')'))
318 |
319 | // f'in asciidoc, the first operator following is (*>)
320 |
321 | Here you can see that there is a `(\*>)` which works in the opposite
322 | direction to `(<*)`. The precendence of these operators means that we
323 | have to use some extra parentheses (!) here.
324 |
325 | TODO: lost the chained <*. Put something below about this so there is
326 | a concrete example.
327 |
328 | == simple expr
329 |
330 | Here is the old version:
331 |
332 | > termD :: Parser SimpleExpr
333 | > termD = numD <|> varD <|> parensD
334 |
335 | > simpleExprD :: Parser SimpleExpr
336 | > simpleExprD = chainl1 termD op
337 | > where
338 | > op = do
339 | > void $ lexemeA $ char '+'
340 | > return Add
341 |
342 | We can simplify the `op` function using the techniques we've already
343 | seen:
344 |
345 | > simpleExprA0 :: Parser SimpleExpr
346 | > simpleExprA0 = chainl1 termD op
347 | > where op = lexemeA (char '+') *> return Add
348 |
349 | The pattern `p *> return f` can use a different operator like this:
350 | `f <$ p`. Here it is in the expression parser:
351 |
352 | > simpleExprA1 :: Parser SimpleExpr
353 | > simpleExprA1 = chainl1 termD op
354 | > where op = Add <$ lexemeA (char '+')
355 |
356 | You could also write the `op` parser inline:
357 |
358 | > simpleExprA2 :: Parser SimpleExpr
359 | > simpleExprA2 = chainl1 termD (Add <$ lexemeA (char '+'))
360 |
361 | Maybe this last step makes it less readable?
362 |
363 | == summary
364 |
365 | Here is the finished job for all the simple expression code without
366 | separate token parsers:
367 |
368 | > num' :: Parser SimpleExpr
369 | > num' = (Num . read) <$> lexemeA (many1 digit)
370 |
371 | > var' :: Parser SimpleExpr
372 | > var' = Var <$> iden
373 | > where
374 | > iden = lexemeA ((:) <$> firstChar <*> many nonFirstChar)
375 | > firstChar = letter <|> char '_'
376 | > nonFirstChar = digit <|> firstChar
377 |
378 | > parens' :: Parser SimpleExpr
379 | > parens' =
380 | > Parens <$> (lexemeA (char '(')
381 | > *> simpleExpr'
382 | > <* lexemeA (char ')'))
383 |
384 | > term' :: Parser SimpleExpr
385 | > term' = num' <|> var' <|> parens'
386 |
387 | > simpleExpr' :: Parser SimpleExpr
388 | > simpleExpr' = chainl1 term' op
389 | > where op = Add <$ lexemeA (char '+')
390 |
391 | Here they are with separate token parsers and a helper function:
392 |
393 | > lexeme :: Parser a -> Parser a
394 | > lexeme p = p <* whitespace
395 |
396 | > identifier :: Parser String
397 | > identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
398 | > where
399 | > firstChar = letter <|> char '_'
400 | > nonFirstChar = digit <|> firstChar
401 |
402 | > integer :: Parser Integer
403 | > integer = read <$> lexeme (many1 digit)
404 |
405 | Here is a lexeme wrapper for parsing single character symbols.
406 |
407 | > symbol :: Char -> Parser ()
408 | > symbol c = void $ lexeme $ char c
409 |
410 | Here is another little helper function. It barely pays its way in this
411 | short example, but even though it is only used once, I think it is
412 | worth it to make the code clearer.
413 |
414 | > betweenParens :: Parser a -> Parser a
415 | > betweenParens p = symbol '(' *> p <* symbol ')'
416 |
417 | Now the expression parsers:
418 |
419 | > num :: Parser SimpleExpr
420 | > num = Num <$> integer
421 |
422 | > var :: Parser SimpleExpr
423 | > var = Var <$> identifier
424 |
425 | > parens :: Parser SimpleExpr
426 | > parens = Parens <$> betweenParens simpleExpr
427 |
428 | > term :: Parser SimpleExpr
429 | > term = num <|> var <|> parens
430 |
431 | > simpleExpr :: Parser SimpleExpr
432 | > simpleExpr = chainl1 term' op
433 | > where op = Add <$ lexemeA (char '+')
434 |
435 | Splitting the lexer parser layer out means that we have one place
436 | where we have to remember to add `lexeme` wrappers, and also I think
437 | makes the code easier to follow.
438 |
--------------------------------------------------------------------------------
/CombinatorReview.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[combinator-review]]
3 | = Combinator review
4 |
5 | In this tutorial we will go through all the functions in
6 | Text.Parsec.Combinator, and some useful ones in Control.Applicative
7 | and Control.Monad as well.
8 |
9 | > import Text.Parsec (ParseError)
10 | > import Text.Parsec.String (Parser)
11 | > import Text.Parsec.String.Parsec (try)
12 | > import Text.Parsec.String.Char (oneOf, char, digit
13 | > ,string, letter, satisfy)
14 | > import Text.Parsec.String.Combinator (many1, choice, chainl1, between
15 | > ,count, option, optionMaybe, optional)
16 | > import Control.Applicative ((<$>), (<*>), (<$), (<*), (*>), (<|>), many)
17 | > import Control.Monad (void, ap, mzero)
18 | > import Data.Char (isLetter, isDigit)
19 | > import FunctionsAndTypesForParsing
20 |
21 | == Text.Parsec.Combinator
22 |
23 | You should look at the source for these functions and try to
24 | understand how they are implemented.
25 |
26 |
27 |
28 | The style of the source code in the Parsec library sources is a little
29 | different to what we used at the end of the last tutorial. You can try
30 | reimplementing each of the Text.Parsec.Combinator module functions
31 | using the Applicative style. See if you can find a way to reassure
32 | yourself that the rewritten versions you make are correct, perhaps via
33 | writing automated tests, or perhaps some other method.
34 |
35 | You should be able to easily understand the implementation of all the
36 | functions in Text.Parsec.Combinator except possibly `anyToken` and
37 | `notFollowedBy`.
38 |
39 | === choice
40 |
41 | ```haskell
42 | choice :: [Parser a] -> Parser a
43 | ```
44 |
45 | `choice ps` tries to apply the parsers in the list `ps` in order,
46 | until one of them succeeds. It returns the value of the succeeding
47 | parser.
48 |
49 | > a :: Parser Char
50 | > a = char 'a'
51 | >
52 | > b :: Parser Char
53 | > b = char 'b'
54 | >
55 | > aOrB :: Parser Char
56 | > aOrB = choice [a,b]
57 |
58 | ```
59 | *Main> regularParse aOrB "a"
60 | Right 'a'
61 |
62 | *Main> regularParse aOrB "b"
63 | Right 'b'
64 |
65 | *Main> regularParse aOrB "c"
66 | Left (line 1, column 1):
67 | unexpected "c"
68 | expecting "a" or "b"
69 | ```
70 |
71 | ==== using with try
72 |
73 | If a parser fails with `(<|>)` or `choice`, then it will only try the
74 | next parser if the last parser consumed no input.
75 |
76 | TODO: make the parsers return the keyword and update the examples
77 |
78 | > byKeyword :: Parser ()
79 | > byKeyword = void $ string "by"
80 | >
81 | > betweenKeyword :: Parser ()
82 | > betweenKeyword = void $ string "between"
83 |
84 | Since both of these have the same prefix - b - if we combine them
85 | using `choice` then it doesn't work correctly:
86 |
87 | ```
88 | *Main> regularParse byKeyword "by"
89 | Right ()
90 |
91 | *Main> regularParse byKeyword "between"
92 | Left (line 1, column 1):
93 | unexpected "e"
94 | expecting "by"
95 |
96 | *Main> regularParse betweenKeyword "between"
97 | Right ()
98 |
99 | *Main> regularParse betweenKeyword "by"
100 | Left (line 1, column 1):
101 | unexpected "y"
102 | expecting "between"
103 |
104 | *Main> regularParse (choice [betweenKeyword,byKeyword]) "between"
105 | Right ()
106 |
107 | *Main> regularParse (choice [betweenKeyword,byKeyword]) "by"
108 | Left (line 1, column 1):
109 | unexpected "y"
110 | expecting "between"
111 |
112 | *Main> regularParse (choice [byKeyword,betweenKeyword]) "between"
113 | Left (line 1, column 1):
114 | unexpected "e"
115 | expecting "by"
116 |
117 | *Main> regularParse (choice [byKeyword,betweenKeyword]) "by"
118 | Right ()
119 | ```
120 |
121 | If we use `try` on the first option, then it all works fine.
122 |
123 | ```
124 | *Main> regularParse (choice [try byKeyword,betweenKeyword]) "by"
125 | Right ()
126 |
127 | *Main> regularParse (choice [try byKeyword,betweenKeyword]) "between"
128 | Right ()
129 | ```
130 |
131 | === count
132 |
133 | ```haskell
134 | count :: Int -> Parser a -> Parser [a]
135 | ```
136 |
137 | `count n p` parses `n` occurrences of `p`. If `n` is smaller or equal
138 | to zero, the parser is equivalent to `return []`. It returns a list of
139 | the n values returned by `p`.
140 |
141 | ```
142 | *Main> regularParse (count 5 a) "aaaaa"
143 | Right "aaaaa"
144 |
145 | *Main> regularParse (count 5 a) "aaaa"
146 | Left (line 1, column 5):
147 | unexpected end of input
148 | expecting "a"
149 |
150 | *Main> regularParse (count 5 a) "aaaab"
151 | Left (line 1, column 5):
152 | unexpected "b"
153 | expecting "a"
154 |
155 | *Main> regularParse (count 5 aOrB) "aabaa"
156 | Right "aabaa"
157 | ```
158 |
159 | === between
160 |
161 | ```haskell
162 | between :: Parser open -> Parser close -> Parser a -> Parser a
163 | ```
164 |
165 | `between open close p` parses `open`, followed by `p` and
166 | `close`. It returns the value returned by `p`.
167 |
168 | We can replace the betweenParens from the previous tutorial using
169 | this:
170 |
171 | > betweenParens :: Parser a -> Parser a
172 | > betweenParens p = between (symbol '(') (symbol ')') p
173 |
174 | It hardly seems worth it to make this change, but it might be slightly
175 | quicker to read and understand if you aren't already familiar with
176 | some code or haven't viewed it for a while. This is good for 'code
177 | maintenance', where we need to fix bugs or add new features quickly to
178 | code we haven't looked at for two years or something.
179 |
180 | Here are the support functions for this parser.
181 |
182 | > symbol :: Char -> Parser Char
183 | > symbol c = lexeme $ char c
184 |
185 | > lexeme :: Parser a -> Parser a
186 | > lexeme p = p <* whitespace
187 |
188 | > whitespace :: Parser ()
189 | > whitespace = void $ oneOf " \n\t"
190 |
191 | === option
192 |
193 | ```haskell
194 | option :: a -> Parser a -> Parser a
195 | ```
196 |
197 | `option x p` tries to apply parser `p`. If `p` fails without
198 | consuming input, it returns the value `x`, otherwise the value returned
199 | by `p`.
200 |
201 | ```
202 | *Main> regularParse (option "" (count 5 aOrB)) "aaaaa"
203 | Right "aaaaa"
204 |
205 | *Main> regularParse (option "" (count 5 aOrB)) "caaaa"
206 | Right ""
207 |
208 | *Main> regularParse (option "" (count 5 aOrB)) "aaaa"
209 | Left (line 1, column 5):
210 | unexpected end of input
211 | expecting "a" or "b"
212 |
213 | *Main> regularParse (option "" (count 5 aOrB)) "aaaac"
214 | Left (line 1, column 5):
215 | unexpected "c"
216 | expecting "a" or "b"
217 |
218 | *Main> regularParse (option "" (try (count 5 aOrB))) "aaaa"
219 | Right ""
220 | ```
221 |
222 | === optionMaybe
223 |
224 | ```haskell
225 | optionMaybe :: Parser a -> Parser (Maybe a)
226 | ```
227 |
228 | `optionMaybe p` tries to apply parser `p`. If `p` fails without consuming
229 | input, it returns `Nothing`, otherwise it returns `Just` the value returned
230 | by `p`.
231 |
232 | ```
233 | *Main> regularParse (optionMaybe (count 5 aOrB)) "aaaaa"
234 | Right (Just "aaaaa")
235 |
236 | *Main> regularParse (optionMaybe (count 5 aOrB)) "caaaa"
237 | Right Nothing
238 |
239 | *Main> regularParse (optionMaybe (count 5 aOrB)) "caaa"
240 | Right Nothing
241 |
242 | *Main> regularParse (optionMaybe (count 5 aOrB)) "aaaa"
243 | Left (line 1, column 5):
244 | unexpected end of input
245 | expecting "a" or "b"
246 |
247 | *Main> regularParse (optionMaybe (count 5 aOrB)) "aaaac"
248 | Left (line 1, column 5):
249 | unexpected "c"
250 | expecting "a" or "b"
251 |
252 | *Main> regularParse (optionMaybe (try $ count 5 aOrB)) "aaaac"
253 | Right Nothing
254 | ```
255 |
256 | === optional
257 |
258 | ```haskell
259 | optional :: Parser a -> Parser ()
260 | ```
261 |
262 | `optional p` tries to apply parser `p`. It will parse `p` or
263 | nothing. It only fails if `p` fails after consuming input. It discards
264 | the result of `p`.
265 |
266 | ```
267 | *Main> parseWithLeftOver (optional (count 5 aOrB)) "aaaaa"
268 | Right ((),"")
269 |
270 | *Main> parseWithLeftOver (optional (count 5 aOrB)) "caaaa"
271 | Right ((),"caaaa")
272 |
273 | *Main> parseWithLeftOver (optional (count 5 aOrB)) "caaa"
274 | Right ((),"caaa")
275 |
276 | *Main> parseWithLeftOver (optional (count 5 aOrB)) "aaaa"
277 | Left (line 1, column 5):
278 | unexpected end of input
279 | expecting "a" or "b"
280 |
281 | *Main> parseWithLeftOver (optional (count 5 aOrB)) "aaaac"
282 | Left (line 1, column 5):
283 | unexpected "c"
284 | expecting "a" or "b"
285 |
286 | *Main> parseWithLeftOver (optional (try $ count 5 aOrB)) "aaaac"
287 | Right ((),"aaaac")
288 | ```
289 |
290 | === skipMany1
291 |
292 | ```haskell
293 | skipMany1 :: Parser a -> Parser ()
294 | ```
295 |
296 | `skipMany1 p` applies the parser `p` one or more times, skipping its result.
297 |
298 | === many1
299 |
300 | ```haskell
301 | many1 :: Parser a -> Parser [a]
302 | ```
303 |
304 | many1 p applies the parser p one or more times. Returns a list of the
305 | returned values of p.
306 |
307 | ```haskell
308 | word = many1 letter
309 | ```
310 |
311 | === sepBy
312 |
313 | ```haskell
314 | sepBy :: Parser a -> Parser sep -> Parser [a]
315 | ```
316 |
317 | sepBy p sep parses zero or more occurrences of p, separated by
318 | sep. Returns a list of values returned by p.
319 |
320 | ```haskell
321 | commaSep p = p `sepBy` (symbol ",")
322 | ```
323 |
324 | === sepBy1
325 |
326 | ```haskell
327 | sepBy1 :: Parser a -> Parser sep -> Parser [a]
328 | ```
329 |
330 | sepBy1 p sep parses one or more occurrences of p, separated by
331 | sep. Returns a list of values returned by p.
332 |
333 | === endBy
334 |
335 | ```haskell
336 | endBy :: Parser a -> Parser sep -> Parser [a]
337 | ```
338 |
339 | endBy p sep parses zero or more occurrences of p, seperated and ended
340 | by sep. Returns a list of values returned by p.
341 |
342 | ```haskell
343 | cStatements = cStatement `endBy` semi
344 | ```
345 |
346 | === endBy1
347 |
348 | ```haskell
349 | endBy1 :: Parser a -> Parser sep -> Parser [a]
350 | ```
351 |
352 | endBy1 p sep parses one or more occurrences of p, seperated and ended
353 | by sep. Returns a list of values returned by p.
354 |
355 | === sepEndBy
356 |
357 | ```haskell
358 | sepEndBy :: Parser a -> Parser sep -> Parser [a]
359 | ```
360 |
361 | sepEndBy p sep parses zero or more occurrences of p, separated and
362 | optionally ended by sep, ie. haskell style statements. Returns a list
363 | of values returned by p.
364 |
365 | ```haskell
366 | haskellStatements = haskellStatement `sepEndBy` semi
367 | ```
368 |
369 | === sepEndBy1
370 |
371 | ```haskell
372 | sepEndBy1 :: Parser a -> Parser sep -> Parser [a]
373 | ```
374 |
375 | sepEndBy1 p sep parses one or more occurrences of p, separated and
376 | optionally ended by sep. Returns a list of values returned by p.
377 |
378 | === chainl
379 |
380 | ```haskell
381 | chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
382 | ```
383 |
384 | chainl p op x parser zero or more occurrences of p, separated by
385 | op. Returns a value obtained by a left associative application of all
386 | functions returned by op to the values returned by p. If there are
387 | zero occurrences of p, the value x is returned.
388 |
389 | === chainl1
390 |
391 | ```haskell
392 | chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
393 | ```
394 |
395 | chainl1 p op x parser one or more occurrences of p, separated by op
396 | Returns a value obtained by a left associative application of all
397 | functions returned by op to the values returned by p. . This parser
398 | can for example be used to eliminate left recursion which typically
399 | occurs in expression grammars.
400 |
401 | === chainr
402 |
403 | ```haskell
404 | chainr :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
405 | ```
406 |
407 | chainr p op x parser zero or more occurrences of p, separated by op
408 | Returns a value obtained by a right associative application of all
409 | functions returned by op to the values returned by p. If there are no
410 | occurrences of p, the value x is returned.
411 |
412 | === chainr1
413 |
414 | ```haskell
415 | chainr1 :: Parser a -> Parser (a -> a -> a) -> Parser a
416 | ```
417 |
418 | chainr1 p op x parser one or more occurrences of `p`, separated by op
419 | Returns a value obtained by a right associative application of all
420 | functions returned by op to the values returned by p.
421 |
422 | === eof
423 |
424 | ```haskell
425 | eof :: Parser ()
426 | ```
427 |
428 | This parser only succeeds at the end of the input. This is not a
429 | primitive parser but it is defined using notFollowedBy.
430 |
431 | ```haskell
432 | eof = notFollowedBy anyToken > "end of input"
433 | ```
434 |
435 | The (>) operator is used for error messages. We will come back to
436 | error messages after writing the basic SQL parser.
437 |
438 | === notFollowedBy
439 |
440 | ```haskell
441 | notFollowedBy :: Show a => Parser a -> Parser ()
442 | ```
443 |
444 | notFollowedBy p only succeeds when parser p fails. This parser does
445 | not consume any input. This parser can be used to implement the
446 | 'longest match' rule. For example, when recognizing keywords (for
447 | example let), we want to make sure that a keyword is not followed by a
448 | legal identifier character, in which case the keyword is actually an
449 | identifier (for example lets). We can program this behaviour as
450 | follows:
451 |
452 | ```haskell
453 | keywordLet = try (do{ string "let"
454 | ; notFollowedBy alphaNum
455 | })
456 | ```
457 |
458 | === manyTill
459 |
460 | ```haskell
461 | manyTill :: Parser a -> Parser end -> Parser [a]
462 | ```
463 |
464 | manyTill p end applies parser p zero or more times until parser end
465 | succeeds. Returns the list of values returned by p. This parser can be
466 | used to scan comments:
467 |
468 | ```haskell
469 | simpleComment = do{ string ""))
471 | }
472 | ```
473 | // asciidoc: the string should be "-->"
474 |
475 | Note the overlapping parsers anyChar and string "-\->", and therefore
476 | the use of the try combinator.
477 |
478 | === lookAhead
479 |
480 | ```haskell
481 | lookAhead :: Parser a -> Parser a
482 | ```
483 |
484 | lookAhead p parses p without consuming any input.
485 |
486 | If p fails and consumes some input, so does lookAhead. Combine with
487 | try if this is undesirable.
488 |
489 | === anyToken
490 |
491 | ```haskell
492 | anyToken :: Parser Char
493 | ```
494 |
495 | The parser anyToken accepts any kind of token. It is for example used
496 | to implement eof. Returns the accepted token.
497 |
498 | == Control.Applicative
499 |
500 | Here are the functions from Applicative that are used:
501 |
502 | `(<$>)`, `(<*>)`, `(<$)`, `(<*)`, `(*>)`, `(<|>)`, `many`
503 |
504 | TODO: examples for all of these
505 |
506 | We've already seen all of these, except `(<$)`. This is often used to
507 | parse a keyword and return a no argument constructor:
508 |
509 | > data Something = Type1 | Type2 | Type3
510 |
511 | > something :: Parser Something
512 | > something = choice [Type1 <$ string "type1"
513 | > ,Type2 <$ string "type2"
514 | > ,Type3 <$ string "type3"]
515 |
516 | // the first symbol following should be (<**>)
517 | // the backslashes are for asciidoc
518 | // todo: do this in the render.lhs?
519 | // I wish I understood why you need backslashes in some places
520 | // and not others
521 |
522 | There is also `(<\**>)` which is `(<*>)` with the arguments flipped.
523 |
524 | TODO: double check using these from Parsec instead of
525 | Control.Applicative: possible performance implictions?
526 |
527 | == Control.Monad
528 |
529 | === return
530 |
531 | One use of return is to always succeed, and return a value:
532 |
533 | > alwaysX :: Parser Char
534 | > alwaysX = return 'x'
535 |
536 | ```
537 | *Main> parseWithLeftOver (a <|> alwaysX) "a"
538 | Right ('a',"")
539 |
540 | *Main> parseWithLeftOver (a <|> alwaysX) "b"
541 | Right ('x',"b")
542 | ```
543 |
544 | === mzero
545 |
546 | This function is used in the implementation of `choice`:
547 |
548 | > choice' :: [Parser a] -> Parser a
549 | > choice' ps = foldr (<|>) mzero ps
550 |
551 |
552 |
553 | TODO: go through a bunch of functions + do notation examples
554 |
555 | >>=
556 | =<<
557 | >>
558 | void
559 | mapM, mapM_
560 | sequence,sequence_
561 | guard
562 | return
563 | mzero
564 | mplus
565 | when, unless
566 | liftMN
567 | ap
568 | quick note about fail, will return to this in the error messages stage
569 |
570 | TODO: using trace
571 |
--------------------------------------------------------------------------------
/ErrorMessages.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[error-messages]]
3 | = Error messages
4 |
5 | In this tutorial, we will start looking at the error messages
6 | generated by Parsec and how we can influence to improve them.
7 |
8 | TODO:
9 |
10 | review the different methods of generating failure in parsec and what
11 | they look like, including choice.
12 |
13 | examine how rearranging parsers can change the error messages,
14 | committing to a parsing branch to improve the messages
15 |
16 | also possibly look at adding additional parsing to detect common
17 | errors to provide a nicer error messages
18 |
19 | maybe talk about the idea of being permissive in the parsing proper
20 | stage, then doing additional syntax-type checks on the ast after
21 |
22 |
23 | == ways of generating errors in parsec
24 |
25 | == how parsec combines errors or drops them into the void
26 |
27 |
28 | == Parsing tpch
29 |
30 | Let's try the parser out on the TPC-H queries.
31 |
32 | ```
33 | ```
34 |
35 | Summary of errors so far:
36 | q1: typed literal: type_name 'literal value'
37 | q2: scalar subquery
38 | q3: typed literal
39 | q4: typed literal
40 | q5: typed literal
41 | q6: typed literal
42 | q7: ??
43 | q8: extract??
44 | q9: extract ??
45 | q10: typed literal
46 | q11: scalar sub query
47 | q12: ??
48 | q13: not like??
49 | q14: decimal literal
50 | q15: cte
51 | q16: count distinct
52 | q17: decimal literal
53 | q18: in subquery
54 | q19: in literal list
55 | q20: in subquery
56 | q21: exists subquery
57 | q22: substring
58 |
59 | TODO: try to figure out each issue. Not very easy: the error messages
60 | are not very good.
61 |
62 | Do some ad hoc stuff to try to improve each error message? Keep
63 | simplifying expressions then try to understand specific principles.
64 | Then make some generalizations that can be made across the parser
65 |
66 |
67 | == value expressions in isolation
68 |
69 | We can start with the real error code now by considering the
70 | valueexpression parser in isolation.
71 |
72 | ideas:
73 | error inside function parameter list
74 | bad identifier
75 | unrecognised operator
76 | unrecognised chars
77 | unmatching parens
78 | keyword errors in case
79 | unterminated string lit
80 | 3 dot components
81 | binary op with missing second arg
82 | associativity errors with binary ops?
83 | blacklist errors
84 |
85 | == query expressions
86 |
87 | mispelling keywords at this level
88 | clauses in wrong order
89 | adding extra tokens or removing them at clause/value expression
90 | boundaries
91 |
92 | == summarizing?
93 |
94 | == updated parsing code with improved error message behaviour
95 |
96 |
--------------------------------------------------------------------------------
/FromClause.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[from-clause]]
3 | = From clause
4 |
5 | In this tutorial, we extend the from clause support to the following:
6 | we will support implicit and explicit joins, including keywords
7 | natural, inner, outer, left, right, full, cross, on and using, plus
8 | parens and simple aliases (e.g. select a from t u, but not select a
9 | from t(a,b)). We don't support oracle outer join syntax (+) or the
10 | other 'pre-ANSI' variations on this theme. No lateral keyword or apply
11 | or pivot.
12 |
13 | > {-# LANGUAGE TupleSections #-}
14 | >
15 | > --import Text.Groom (groom)
16 | > --import qualified Text.Parsec as P
17 | > import Text.Parsec.String (Parser)
18 | > import Text.Parsec.String.Parsec (try)
19 | > import Text.Parsec.String.Combinator
20 | > import Control.Applicative ((<$>),(<*), (*>),(<*>), (<$), (<|>))
21 | > import Control.Monad
22 | > import Data.Maybe ()
23 | > import qualified Test.HUnit as H
24 | > import FunctionsAndTypesForParsing
25 | > import Debug.Trace
26 |
27 | TODO: should explicitly import from these two modules (and same in
28 | QueryExpressions.lhs)
29 |
30 | > import ValueExpressions (ValueExpr(..), valueExpr, identifier, makeTest, parens)
31 | > import QueryExpressions (selectList,whereClause,groupByClause,having,orderBy
32 | > ,commaSep1, keyword_,blackListIdentifier)
33 |
34 | == Abstract syntax
35 |
36 | Here are is the updated `QueryExpr` and the new `TableRef` abstract
37 | syntax types.
38 |
39 | > data QueryExpr
40 | > = Select
41 | > {qeSelectList :: [(ValueExpr,Maybe String)]
42 | > ,qeFrom :: [TableRef]
43 | > ,qeWhere :: Maybe ValueExpr
44 | > ,qeGroupBy :: [ValueExpr]
45 | > ,qeHaving :: Maybe ValueExpr
46 | > ,qeOrderBy :: [ValueExpr]
47 | > } deriving (Eq,Show)
48 | >
49 | > makeSelect :: QueryExpr
50 | > makeSelect = Select {qeSelectList = []
51 | > ,qeFrom = []
52 | > ,qeWhere = Nothing
53 | > ,qeGroupBy = []
54 | > ,qeHaving = Nothing
55 | > ,qeOrderBy = []}
56 | >
57 | > data TableRef = TRSimple String
58 | > | TRJoin TableRef JoinType TableRef (Maybe JoinCondition)
59 | > | TRParens TableRef
60 | > | TRAlias TableRef String
61 | > | TRQueryExpr QueryExpr
62 | > deriving (Eq,Show)
63 |
64 | This syntax for table references can represent invalid syntax, for
65 | instance two nested aliases. The justification for this is that
66 | sometimes trying to accurately represent only exactly what is valid
67 | creates something much more complex. Maybe this is a good tradeoff in
68 | this situation, and maybe not.
69 |
70 | > data JoinType = JoinInner | JoinLeft | JoinRight | JoinFull | JoinCross
71 | > deriving (Eq,Show)
72 | >
73 | > data JoinCondition = JoinOn ValueExpr
74 | > | JoinUsing [String]
75 | > | JoinNatural
76 | > deriving (Eq,Show)
77 |
78 | With the join condition, we've done the opposite to TableRef - we've
79 | combined `natural` and `on`/`using`, since only one of these can be
80 | present, even though this departs a little from the concrete
81 | syntax.
82 |
83 | First we will develop the standalone from clause parser, then we will
84 | update the query expression syntax and parsing to incorporate our new
85 | from clause parser.
86 |
87 | == simple table name
88 |
89 | Let's start with something simple: a from clause can be multiple comma
90 | separated tablerefs, aka an implicit join.
91 |
92 | > multipleTRSimpleTests :: [(String, [TableRef])]
93 | > multipleTRSimpleTests = [("from a,b", [TRSimple "a", TRSimple "b"])]
94 | >
95 | > from0 :: Parser [TableRef]
96 | > from0 = keyword_ "from" *> commaSep1 (TRSimple <$> identifier)
97 |
98 | ```
99 | *Main> H.runTestTT $ H.TestList $ map (makeTest from0) multipleTRSimpleTests
100 | Cases: 1 Tried: 1 Errors: 0 Failures: 0
101 | Counts {cases = 1, tried = 1, errors = 0, failures = 0}
102 | ```
103 |
104 | Let's do the query expression, parens and alias first, before tackling
105 | joins.
106 |
107 | == subquery
108 |
109 | Here is the example:
110 |
111 | > trQueryExprTests :: [(String, [TableRef])]
112 | > trQueryExprTests =
113 | > [("from (select a from t)"
114 | > ,[TRQueryExpr $ makeSelect {qeSelectList = [(Iden "a", Nothing)]
115 | > ,qeFrom = [TRSimple "t"]}])]
116 |
117 | Here is the query expression parser we can use:
118 |
119 | > queryExpr1 :: Parser [TableRef] -> Parser QueryExpr
120 | > queryExpr1 from' = Select
121 | > <$> selectList
122 | > <*> option [] from'
123 | > <*> optionMaybe whereClause
124 | > <*> option [] groupByClause
125 | > <*> optionMaybe having
126 | > <*> option [] orderBy
127 | >
128 | > from1 :: Parser [TableRef]
129 | > from1 =
130 | > keyword_ "from" *> commaSep1 trefTerm
131 | > where
132 | > trefTerm = choice [TRSimple <$> identifier
133 | > ,TRQueryExpr <$> parens (queryExpr1 from1)]
134 |
135 | == parens
136 |
137 | We can't do a sensible example for these right now - we need explicit
138 | joins and then the parens can be used to override the associativity of
139 | a three way join, or to specify over what part of the expression to
140 | apply an alias.
141 |
142 | > trParensTests :: [(String, [TableRef])]
143 | > trParensTests = [("from (a)", [TRParens $ TRSimple "a"])]
144 |
145 | We can write some more tests for parens after we've done the explicit
146 | joins.
147 |
148 | > from2 :: Parser [TableRef]
149 | > from2 =
150 | > keyword_ "from" *> commaSep1 trefTerm
151 | > where
152 | > trefTerm = choice [TRSimple <$> identifier
153 | > ,try (TRQueryExpr <$> parens (queryExpr1 from2))
154 | > ,TRParens <$> parens trefTerm]
155 |
156 | == alias
157 |
158 | > trAliasTests :: [(String,[TableRef])]
159 | > trAliasTests = [("from a as b", [TRAlias (TRSimple "a") "b"])
160 | > ,("from a b", [TRAlias (TRSimple "a") "b"])]
161 |
162 | The alias can be treated like a postfix operator.
163 |
164 | > suffixWrapper :: (a -> Parser a) -> a -> Parser a
165 | > suffixWrapper p a = p a <|> return a
166 |
167 | TODO: ?? not sure about this
168 |
169 | > from3 :: Parser [TableRef]
170 | > from3 =
171 | > keyword_ "from" *> commaSep1 trefTerm
172 | > where
173 | > trefTerm = choice [TRSimple <$> identifier
174 | > ,try (TRQueryExpr <$> parens (queryExpr1 from3))
175 | > ,TRParens <$> parens trefTerm]
176 | > >>= suffixWrapper alias
177 | > alias tr = TRAlias tr <$> (optional (keyword_ "as") *> identifier)
178 |
179 | How to make it keep nesting?
180 |
181 | == joins
182 |
183 | Here is a casual sketch of the target grammar:
184 |
185 | ```
186 | tref
187 | (cross | [natural]
188 | ([inner]
189 | | left [outer]
190 | | right [outer]
191 | | full [outer]
192 | )
193 | join tref
194 | [on expr | using (...)]
195 | ```
196 |
197 | Let's start with parsers for the 'join operator' in the middle and for
198 | the join condition:
199 |
200 | === join type
201 |
202 | > joinType :: Parser JoinType
203 | > joinType = choice
204 | > [JoinCross <$ keyword_ "cross" <* keyword_ "join"
205 | > ,JoinInner <$ keyword_ "inner" <* keyword_ "join"
206 | > ,JoinLeft <$ keyword_ "left"
207 | > <* optional (keyword_ "outer")
208 | > <* keyword_ "join"
209 | > ,JoinRight <$ keyword_ "right"
210 | > <* optional (keyword_ "outer")
211 | > <* keyword_ "join"
212 | > ,JoinFull <$ keyword_ "full"
213 | > <* optional (keyword_ "outer")
214 | > <* keyword_ "join"
215 | > ,JoinInner <$ keyword_ "join"]
216 |
217 | ```
218 | *Main> parseWithEof joinType "cross join"
219 | Right JoinCross
220 |
221 | *Main> parseWithEof joinType "inner join"
222 | Right JoinInner
223 |
224 | *Main> parseWithEof joinType "left outer join"
225 | Right JoinLeft
226 |
227 | *Main> parseWithEof joinType "left join"
228 | Right JoinLeft
229 |
230 | *Main> parseWithEof joinType "right outer join"
231 | Right JoinRight
232 |
233 | *Main> parseWithEof joinType "right join"
234 | Right JoinRight
235 |
236 | *Main> parseWithEof joinType "full outer join"
237 | Right JFull
238 |
239 | *Main> parseWithEof joinType "full join"
240 | Right JoinFull
241 |
242 | *Main> parseWithEof joinType "join"
243 | Right JoinInner
244 | ```
245 |
246 | I thought about factoring out the common bits with the joinType parser:
247 |
248 | > joinType0 :: Parser JoinType
249 | > joinType0 = choice
250 | > [choice
251 | > [JoinCross <$ try (keyword_ "cross")
252 | > ,JoinInner <$ try (keyword_ "inner")
253 | > ,choice [JoinLeft <$ try (keyword_ "left")
254 | > ,JoinRight <$ try (keyword_ "right")
255 | > ,JoinFull <$ try (keyword_ "full")]
256 | > <* optional (try $ keyword_ "outer")]
257 | > <* keyword_ "join"
258 | > ,JoinInner <$ keyword_ "join"]
259 |
260 | But I think the longer version is much easier to follow, even if it is
261 | a little more boring.
262 |
263 | === join condition
264 |
265 | The idea with the join condition is that we pass a bool to say whether
266 | we've already seen the 'natural' keyword. If so, then we don't try to
267 | parse 'on' or 'using'.
268 |
269 | > joinCondition :: Bool -> Parser JoinCondition
270 | > joinCondition nat =
271 | > choice [guard nat >> return JoinNatural
272 | > ,keyword_ "on" >> JoinOn <$> valueExpr []
273 | > ,keyword_ "using" >> JoinUsing <$> parens (commaSep1 identifier)
274 | > ]
275 |
276 | ```
277 | *Main> parseWithEof (joinCondition False) "on a"
278 | Right (JoinOn (Iden "a"))
279 |
280 | *Main> parseWithEof (joinCondition False) "on a + b"
281 | Right (JoinOn (BinOp (Iden "a") "+" (Iden "b")))
282 |
283 | *Main> parseWithEof (joinCondition False) "using (a,b)"
284 | Right (JoinUsing ["a","b"])
285 |
286 | *Main> parseWithEof (joinCondition True) "using (a,b)"
287 | Left (line 1, column 1):
288 | unexpected 'u'
289 | expecting end of input
290 |
291 | *Main> parseWithEof (joinCondition True) ""
292 | Right JoinNatural
293 | ```
294 |
295 | === simple binary join
296 |
297 | Let's try some simple binary joins:
298 |
299 | > simpleBinaryJoinTests :: [(String,[TableRef])]
300 | > simpleBinaryJoinTests =
301 | > [("from a join b"
302 | > ,[TRJoin (TRSimple "a") JoinInner (TRSimple "b") Nothing])
303 | >
304 | > ,("from a natural join b"
305 | > ,[TRJoin (TRSimple "a") JoinInner (TRSimple "b") (Just JoinNatural)])
306 | >
307 | > ,("from a join b on a.x = b.y"
308 | > ,[TRJoin (TRSimple "a") JoinInner (TRSimple "b")
309 | > (Just $ JoinOn $ BinOp (DIden "a" "x") "="(DIden "b" "y"))])
310 | >
311 | > ,("from a join b using(x,y)"
312 | > ,[TRJoin (TRSimple "a") JoinInner (TRSimple "b")
313 | > (Just $ JoinUsing ["x","y"])])
314 | >
315 | > ,("from a cross join b"
316 | > ,[TRJoin (TRSimple "a") JoinCross (TRSimple "b") Nothing])
317 | >
318 | > ]
319 |
320 | We want to parse the first table, then optionally parse the 'natural'
321 | keyword, then the join type, then the second table, then optionally
322 | parse the join condition.
323 |
324 | > from4 :: Parser [TableRef]
325 | > from4 = keyword_ "from" >> (:[]) <$> do
326 | > t0 <- simpleTref
327 | > nat <- option False (True <$ keyword_ "natural")
328 | > jt <- joinType
329 | > t1 <- simpleTref
330 | > jc <- optionMaybe (joinCondition nat)
331 | > return $ TRJoin t0 jt t1 jc
332 | > where
333 | > simpleTref = TRSimple <$> identifier
334 |
335 | Let's start extending this into the full target parser. In this next
336 | version, I've tried to combine all the versions we've seen so far.
337 |
338 | > from5 :: Parser [TableRef]
339 | > from5 = keyword_ "from" >> commaSep1 tref
340 | > where
341 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
342 | > joinTrefSuffix t0 = do
343 | > nat <- option False (True <$ keyword_ "natural")
344 | > jt <- joinType
345 | > t1 <- nonJoinTref
346 | > jc <- optionMaybe (joinCondition nat)
347 | > return $ TRJoin t0 jt t1 jc
348 | > nonJoinTref = choice [TRSimple <$> identifier
349 | > ,try (TRQueryExpr <$> parens (queryExpr1 from5))
350 | > ,TRParens <$> parens tref]
351 | > >>= suffixWrapper alias
352 | > alias tr = try (TRAlias tr <$> (optional (keyword_ "as") *> identifier))
353 |
354 | ```
355 | *Main> H.runTestTT $ H.TestList $ map (makeTest from5) (multipleTRSimpleTests ++ trQueryExprTests ++ trParensTests ++ trAliasTests ++ simpleBinaryJoinTests)
356 | ### Failure in: 5:from a join b
357 | (line 1, column 14):
358 | unexpected end of input
359 | expecting digit, letter, "_", "--" or "/*"
360 | ### Failure in: 6:from a natural join b
361 | from a natural join b
362 | expected: [TRJoin (TRSimple "a") JoinInner (TRSimple "b") (Just JoinNatural)]
363 | but got: [TRJoin (TRAlias (TRSimple "a") "natural") JoinInner (TRSimple "b") Nothing]
364 | ### Failure in: 7:from a join b on a.x = b.y
365 | (line 1, column 15):
366 | unexpected "o"
367 | expecting "--" or "/*"
368 | ### Failure in: 8:from a join b using(x,y)
369 | (line 1, column 15):
370 | unexpected "u"
371 | expecting "--" or "/*"
372 | ### Failure in: 9:from a cross join b
373 | from a cross join b
374 | expected: [TRJoin (TRSimple "a") JoinCross (TRSimple "b") Nothing]
375 | but got: [TRJoin (TRAlias (TRSimple "a") "cross") JoinInner (TRSimple "b") Nothing]
376 | Cases: 10 Tried: 10 Errors: 0 Failures: 5
377 | Counts {cases = 10, tried = 10, errors = 0, failures = 5}
378 | ```
379 |
380 | What's going wrong? If you look at some of the issues, it looks like
381 | we are getting keywords parsed as aliases. Let's fix that first:
382 |
383 | > from6 :: Parser [TableRef]
384 | > from6 = keyword_ "from" >> commaSep1 tref
385 | > where
386 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
387 | > joinTrefSuffix t0 = do
388 | > nat <- option False (True <$ keyword_ "natural")
389 | > jt <- joinType
390 | > t1 <- nonJoinTref
391 | > jc <- optionMaybe (joinCondition nat)
392 | > return $ TRJoin t0 jt t1 jc
393 | > nonJoinTref = choice [TRSimple <$> identifier
394 | > ,try (TRQueryExpr <$> parens (queryExpr1 from6))
395 | > ,TRParens <$> parens tref]
396 | > >>= suffixWrapper alias
397 | > alias tr = TRAlias tr <$> (optional (keyword_ "as") *> aliasIdentifier)
398 | > aliasIdentifier = blackListIdentifier
399 | > ["natural"
400 | > ,"inner"
401 | > ,"outer"
402 | > ,"cross"
403 | > ,"left"
404 | > ,"right"
405 | > ,"full"
406 | > ,"join"
407 | > ,"on"
408 | > ,"using"]
409 |
410 | That didn't solve the problem. I think we also have a problem since
411 | the `alias` can now fail after consuming input, we need to use `try`.
412 |
413 | > from7 :: Parser [TableRef]
414 | > from7 = keyword_ "from" >> commaSep1 tref
415 | > where
416 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
417 | > joinTrefSuffix t0 = do
418 | > nat <- option False (True <$ keyword_ "natural")
419 | > jt <- joinType
420 | > t1 <- nonJoinTref
421 | > jc <- optionMaybe (joinCondition nat)
422 | > return $ TRJoin t0 jt t1 jc
423 | > nonJoinTref = choice [TRSimple <$> identifier
424 | > ,try (TRQueryExpr <$> parens (queryExpr1 from7))
425 | > ,TRParens <$> parens tref]
426 | > >>= suffixWrapper (try . alias)
427 | > alias tr = try (TRAlias tr <$> (optional (keyword_ "as") *> aliasIdentifier))
428 | > aliasIdentifier = blackListIdentifier
429 | > ["natural"
430 | > ,"inner"
431 | > ,"outer"
432 | > ,"cross"
433 | > ,"left"
434 | > ,"right"
435 | > ,"full"
436 | > ,"join"
437 | > ,"on"
438 | > ,"using"]
439 |
440 | The final step is to make it parse n-way explicit joins.
441 |
442 | > threeWayJoinTests :: [(String,[TableRef])]
443 | > threeWayJoinTests =
444 | > [("from a join b join c"
445 | > ,[TRJoin
446 | > (TRJoin (TRSimple "a") JoinInner (TRSimple "b") Nothing)
447 | > JoinInner (TRSimple "c") Nothing])]
448 |
449 |
450 | > from8 :: Parser [TableRef]
451 | > from8 = keyword_ "from" >> commaSep1 tref
452 | > where
453 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
454 | > joinTrefSuffix t0 = (do
455 | > nat <- option False (True <$ keyword_ "natural")
456 | > TRJoin t0 <$> joinType
457 | > <*> nonJoinTref
458 | > <*> optionMaybe (joinCondition nat))
459 | > >>= suffixWrapper joinTrefSuffix
460 | > nonJoinTref = choice [TRSimple <$> identifier
461 | > ,try (TRQueryExpr <$> parens (queryExpr1 from8))
462 | > ,TRParens <$> parens tref]
463 | > >>= suffixWrapper (try . alias)
464 | > alias tr = try (TRAlias tr <$> (optional (keyword_ "as") *> aliasIdentifier))
465 | > aliasIdentifier = blackListIdentifier
466 | > ["natural"
467 | > ,"inner"
468 | > ,"outer"
469 | > ,"cross"
470 | > ,"left"
471 | > ,"right"
472 | > ,"full"
473 | > ,"join"
474 | > ,"on"
475 | > ,"using"]
476 |
477 | We get left associative with this code. I don't know if this is correct.
478 |
479 | We should do some more testing to make sure this code is good. TODO
480 |
481 | == query expressions
482 |
483 | Let's create the full query expression parser now:
484 |
485 | > queryExprJoinTests :: [(String,QueryExpr)]
486 | > queryExprJoinTests =
487 | > [("select a from t"
488 | > ,ms [TRSimple "t"])
489 | >
490 | > ,("select a from t,u"
491 | > ,ms [TRSimple "t", TRSimple "u"])
492 | >
493 | > ,("select a from t inner join u on expr"
494 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
495 | > (Just $ JoinOn $ Iden "expr")])
496 | >
497 | > ,("select a from t left join u on expr"
498 | > ,ms [TRJoin (TRSimple "t") JoinLeft (TRSimple "u")
499 | > (Just $ JoinOn $ Iden "expr")])
500 | >
501 | > ,("select a from t right join u on expr"
502 | > ,ms [TRJoin (TRSimple "t") JoinRight (TRSimple "u")
503 | > (Just $ JoinOn $ Iden "expr")])
504 | >
505 | > ,("select a from t full join u on expr"
506 | > ,ms [TRJoin (TRSimple "t") JoinFull (TRSimple "u")
507 | > (Just $ JoinOn $ Iden "expr")])
508 | >
509 | > ,("select a from t cross join u"
510 | > ,ms [TRJoin (TRSimple "t") JoinCross (TRSimple "u") Nothing])
511 | >
512 | > ,("select a from t natural inner join u"
513 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
514 | > (Just JoinNatural)])
515 | >
516 | > ,("select a from t inner join u using(a,b)"
517 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
518 | > (Just $ JoinUsing ["a", "b"])])
519 | >
520 | > ,("select a from (select a from t)"
521 | > ,ms [TRQueryExpr $ ms [TRSimple "t"]])
522 | >
523 | > ,("select a from t as u"
524 | > ,ms [TRAlias (TRSimple "t") "u"])
525 | >
526 | > ,("select a from t u"
527 | > ,ms [TRAlias (TRSimple "t") "u"])
528 | >
529 | > ,("select a from (t cross join u) as u"
530 | > ,ms [TRAlias (TRParens $ TRJoin (TRSimple "t") JoinCross
531 | > (TRSimple "u") Nothing) "u"])
532 | > ]
533 | > where
534 | > ms f = makeSelect {qeSelectList = [(Iden "a",Nothing)]
535 | > ,qeFrom = f}
536 |
537 | Here are all the other query expression tests updated with the new
538 | QueryExpr type.
539 |
540 | > singleSelectItemTests :: [(String,QueryExpr)]
541 | > singleSelectItemTests =
542 | > [("select 1", makeSelect {qeSelectList = [(NumLit 1,Nothing)]})]
543 |
544 | > multipleSelectItemsTests :: [(String,QueryExpr)]
545 | > multipleSelectItemsTests =
546 | > [("select a"
547 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]})
548 | > ,("select a,b"
549 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
550 | > ,(Iden "b",Nothing)]})
551 | > ,("select 1+2,3+4"
552 | > ,makeSelect {qeSelectList =
553 | > [(BinOp (NumLit 1) "+" (NumLit 2),Nothing)
554 | > ,(BinOp (NumLit 3) "+" (NumLit 4),Nothing)]})
555 | > ]
556 |
557 | > selectListTests :: [(String,QueryExpr)]
558 | > selectListTests =
559 | > [("select a as a, b as b"
560 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a")
561 | > ,(Iden "b", Just "b")]})
562 | >
563 | > ,("select a a, b b"
564 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a")
565 | > ,(Iden "b", Just "b")]})
566 | > ] ++ multipleSelectItemsTests
567 | > ++ singleSelectItemTests
568 |
569 | > fromTests :: [(String,QueryExpr)]
570 | > fromTests =
571 | > [("select a from t"
572 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
573 | > ,qeFrom = [TRSimple "t"]})]
574 |
575 | > whereTests :: [(String,QueryExpr)]
576 | > whereTests =
577 | > [("select a from t where a = 5"
578 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
579 | > ,qeFrom = [TRSimple "t"]
580 | > ,qeWhere = Just $ BinOp (Iden "a") "=" (NumLit 5)})
581 | > ]
582 |
583 | > groupByTests :: [(String,QueryExpr)]
584 | > groupByTests =
585 | > [("select a,sum(b) from t group by a"
586 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
587 | > ,(App "sum" [Iden "b"],Nothing)]
588 | > ,qeFrom = [TRSimple "t"]
589 | > ,qeGroupBy = [Iden "a"]
590 | > })
591 | >
592 | > ,("select a,b,sum(c) from t group by a,b"
593 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
594 | > ,(Iden "b",Nothing)
595 | > ,(App "sum" [Iden "c"],Nothing)]
596 | > ,qeFrom = [TRSimple "t"]
597 | > ,qeGroupBy = [Iden "a",Iden "b"]
598 | > })
599 | > ]
600 |
601 | > havingTests :: [(String,QueryExpr)]
602 | > havingTests =
603 | > [("select a,sum(b) from t group by a having sum(b) > 5"
604 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
605 | > ,(App "sum" [Iden "b"],Nothing)]
606 | > ,qeFrom = [TRSimple "t"]
607 | > ,qeGroupBy = [Iden "a"]
608 | > ,qeHaving = Just $ BinOp (App "sum" [Iden "b"]) ">" (NumLit 5)
609 | > })
610 | > ]
611 |
612 | > orderByTests :: [(String,QueryExpr)]
613 | > orderByTests =
614 | > [("select a from t order by a"
615 | > ,ms [Iden "a"])
616 | >
617 | > ,("select a from t order by a, b"
618 | > ,ms [Iden "a", Iden "b"])
619 | > ]
620 | > where
621 | > ms o = makeSelect {qeSelectList = [(Iden "a",Nothing)]
622 | > ,qeFrom = [TRSimple "t"]
623 | > ,qeOrderBy = o}
624 |
625 | ```
626 | *Main> H.runTestTT $ H.TestList $ map (makeTest (queryExpr1 from8)) (selectListTests ++ fromTests ++ whereTests ++ groupByTests ++ havingTests ++ orderByTests ++ queryExprJoinTests)
627 | ### Failure in: 7:select a from t where a = 5
628 | (line 1, column 25):
629 | unexpected "="
630 | expecting "--" or "/*"
631 | ### Failure in: 8:select a,sum(b) from t group by a
632 | (line 1, column 33):
633 | unexpected "a"
634 | expecting "--" or "/*"
635 | ### Failure in: 9:select a,b,sum(c) from t group by a,b
636 | (line 1, column 35):
637 | unexpected "a"
638 | expecting "--" or "/*"
639 | ### Failure in: 10:select a,sum(b) from t group by a having sum(b) > 5
640 | (line 1, column 33):
641 | unexpected "a"
642 | expecting "--" or "/*"
643 | ### Failure in: 11:select a from t order by a
644 | (line 1, column 26):
645 | unexpected "a"
646 | expecting "--" or "/*"
647 | ### Failure in: 12:select a from t order by a, b
648 | (line 1, column 26):
649 | unexpected "a"
650 | expecting "--" or "/*"
651 | Cases: 26 Tried: 26 Errors: 0 Failures: 6
652 | Counts {cases = 26, tried = 26, errors = 0, failures = 6}
653 | ```
654 |
655 | The problem is the table alias parser is trying to parse keywords
656 | again. Here is the `from` parser with the alias name blacklist
657 | expanded.
658 |
659 | > from :: Parser [TableRef]
660 | > from = keyword_ "from" >> commaSep1 tref
661 | > where
662 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
663 | > joinTrefSuffix t0 = (do
664 | > nat <- option False (True <$ keyword_ "natural")
665 | > TRJoin t0 <$> joinType
666 | > <*> nonJoinTref
667 | > <*> optionMaybe (joinCondition nat))
668 | > >>= suffixWrapper joinTrefSuffix
669 | > nonJoinTref = choice [TRSimple <$> identifier
670 | > ,try (TRQueryExpr <$> parens queryExpr)
671 | > ,TRParens <$> parens tref]
672 | > >>= suffixWrapper (try . alias)
673 | > alias tr = try (TRAlias tr <$> (optional (keyword_ "as") *> aliasIdentifier))
674 | > aliasIdentifier = blackListIdentifier
675 | > [-- join keywords
676 | > "natural"
677 | > ,"inner"
678 | > ,"outer"
679 | > ,"cross"
680 | > ,"left"
681 | > ,"right"
682 | > ,"full"
683 | > ,"join"
684 | > ,"on"
685 | > ,"using"
686 | > -- subsequent clause keywords
687 | > ,"where"
688 | > ,"group"
689 | > ,"having"
690 | > ,"order"
691 | > ]
692 |
693 | Here is the final query expression parser:
694 |
695 | > queryExpr :: Parser QueryExpr
696 | > queryExpr = Select
697 | > <$> selectList
698 | > <*> option [] from
699 | > <*> optionMaybe whereClause
700 | > <*> option [] groupByClause
701 | > <*> optionMaybe having
702 | > <*> option [] orderBy
703 |
--------------------------------------------------------------------------------
/FunctionsAndTypesForParsing.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[functions-and-types-for-parsing]]
3 | = Functions and types for parsing
4 |
5 | In this file is the source and explanation for the parsing functions
6 | which we've been using, and some limited notes about the wrappers and
7 | full types in Parsec.
8 |
9 | > module FunctionsAndTypesForParsing where
10 | >
11 | > import Text.Parsec (ParseError)
12 | > import Text.Parsec.String (Parser)
13 | > import Text.Parsec.String.Parsec (parse)
14 | > import Text.Parsec.String.Char (oneOf)
15 | > import Text.Parsec.String.Combinator (eof,manyTill,anyToken)
16 | > import Control.Applicative ((<$>), (<*>), (<*), (*>), many)
17 | > import Control.Monad (void)
18 |
19 |
20 | == Functions for parsing
21 |
22 | Here are the testing functions which were used earlier:
23 |
24 | The basic parse function: this is a pretty simple wrapper. The parse
25 | function from parsec just adds a filename to use in parse errors,
26 | which is set as the empty string here.
27 |
28 | > regularParse :: Parser a -> String -> Either ParseError a
29 | > regularParse p = parse p ""
30 |
31 | 'parse' is a basic function in the family of functions for running
32 | parsers in Parsec. You can compose the parser functions in the Parser
33 | monad, then run the top level function using 'parse' and get back an
34 | 'Either ParserError a' as the result. There are a few alternatives to
35 | 'parse' in Parsec, mostly when you are using a more general parser
36 | type instead of 'Parser a' (which is an alias for 'ParsecT String ()
37 | Identity a'). Have a look in the Text.Parsec.Prim module for these
38 | .
39 |
40 | This function will run the parser, but additionally fail if it doesn't
41 | consume all the input.
42 |
43 | > parseWithEof :: Parser a -> String -> Either ParseError a
44 | > parseWithEof p = parse (p <* eof) ""
45 |
46 | This function will apply the parser, then also return any left over
47 | input which wasn't parsed.
48 |
49 | > parseWithLeftOver :: Parser a -> String -> Either ParseError (a,String)
50 | > parseWithLeftOver p = parse ((,) <$> p <*> leftOver) ""
51 | > where leftOver = manyTill anyToken eof
52 |
53 | TODO: what happens when you use 'many anyToken <* eof' variations
54 | instead? Maybe should talk about greediness? Or talk about it in a
55 | better place in the tutorial.
56 |
57 | > parseWithWSEof :: Parser a -> String -> Either ParseError a
58 | > parseWithWSEof p = parseWithEof (whiteSpace *> p)
59 | > where whiteSpace = void $ many $ oneOf " \n\t"
60 |
61 | You should have a look at the two helper executables, and see if you
62 | can understand the code now. You can see them online here:
63 |
64 |
65 |
66 |
67 |
68 | == type signatures revisited
69 |
70 | todo: update this to refer to real parsec instead of the string
71 | wrappers here.
72 |
73 | I think you should always use type signatures with Parsec. Because the
74 | Parsec code is really generalized, without the type GHC will refuse to
75 | compile this code. Try commenting out the type signature above and
76 | loading into ghci to see the error message.
77 |
78 | There is an alternative: you can get this code to compile without a
79 | type signature by using the NoMonomorphismRestriction language
80 | pragma. You can also see the type signature that GHC will choose for
81 | this function by commenting the type signature and using -Wall and
82 | -XNoMonomorphismRestriction together. Using NoMonomorphismRestriction
83 | is a popular solution to these sorts of problems in haskell.
84 |
85 | It's up to you whether you prefer to always write type signatures when
86 | you are developing parsing code, or use the NoMonomorphismRestriction
87 | pragma. Even if you can use NoMonomorphismRestriction, when using
88 | explicit type signatures you usually get much simpler compiler error
89 | messages.
90 |
91 |
92 | == Parser
93 |
94 | The definition of Parser and a partial explanation of the full type
95 | signature.
96 |
97 | ```
98 | type Parser = Parsec String ()
99 | ```
100 |
101 | This means that a function returning Parser a parses from a String
102 | with () as the initial state.
103 |
104 | The Parsec type is defined like this:
105 |
106 | ```
107 | type Parsec s u = ParsecT s u Identity
108 | ```
109 |
110 | ParsecT is a monad transformer, I think it is the primitive one in the
111 | Parsec library, and the 'Parsec' type is a type alias which sets the
112 | base monad to be Identity.
113 |
114 | Here is the haddock for the ParsecT type:
115 |
116 | `ParsecT s u m a` is a parser with stream type `s`, user state type `u`,
117 | underlying monad `m` and return type `a`.
118 |
119 | The full types that you see like this:
120 |
121 | ```
122 | satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char
123 | ```
124 |
125 | refer to the same things (stream type s, user state type u, underlying
126 | monad m).
127 |
128 | We are using String as the stream type (i.e. the input type), () as
129 | the user state type (this effectively means no user state, since ()
130 | only has one value), and the underlying monad is Identity: we are
131 | using no other underlying monad, so `Parser a` expands to `ParsecT
132 | String () Identity a`.
133 |
134 | I.e. the source is String, the user state is (), and the underlying monad
135 | is Identity.
136 |
137 | == Other information
138 |
139 | TODO: Here is some other information on Parsec and Haskell:
140 | links, tutorials on fp, section in rwh, lyah?, old parsec docs,
141 | parsec docs on hackage, other parser combinator libs (uu, polyparse,
142 | trifecta?)
143 |
--------------------------------------------------------------------------------
/GettingStarted.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[getting-started]]
3 | = Getting started
4 |
5 | This is an introduction to parsing with Haskell and Parsec.
6 |
7 | Prerequisites: you should know some basic Haskell and have GHC and
8 | cabal-install installed (installing the Haskell Platform will give you
9 | this).
10 |
11 | This tutorial was originally written using GHC 7.6.3 and Parsec 3.1.3,
12 | which are the versions which come with the Haskell Platform
13 | 2013.2.0.0. It should also work fine with GHC 7.8.4 and GHC 7.10.2 and
14 | through to at least the latest release of Parsec 3.1.x.
15 |
16 | This tutorial was written using Literate Haskell files available here:
17 | link:https://github.com/JakeWheat/intro_to_parsing[].
18 |
19 | I recommend you download them all, and follow along in your favourite
20 | editor, and use GHCi to experiment. To download the intro_to_parsing
21 | files, use:
22 |
23 | ```
24 | git clone https://github.com/JakeWheat/intro_to_parsing.git
25 | ```
26 |
27 | Here are the imports.
28 |
29 | > import Text.Parsec.String (Parser)
30 | > import Text.Parsec.String.Char (anyChar)
31 | > import Text.Parsec.String.Char
32 | > import FunctionsAndTypesForParsing (regularParse, parseWithEof, parseWithLeftOver)
33 | > import Data.Char
34 | > import Text.Parsec.String.Combinator (many1)
35 |
36 | == First parser
37 |
38 | The first parser:
39 |
40 | ```
41 | anyChar :: Parser Char
42 | ```
43 |
44 | This parser is in the module `Text.Parsec.Char`. There is a wrapper in
45 | this tutorial's project, `Text.Parsec.String.Char`, which gives this
46 | function a simplified type.
47 |
48 | Whenever we write a parser which parses to a value of type `a`, we give
49 | it the return type of `Parser a`. In this case, we parse a character
50 | so the return type is `Parser Char`. The `Parser` type itself is in
51 | the module `Text.Parsec.String`. We will cover this in more detail
52 | later.
53 |
54 | Let's use this parser. I will assume you have GHC and cabal-install
55 | installed (which provides the 'cabal' executable) and both are in your
56 | PATH. The Haskell Platform is one way that provides this.
57 |
58 | Change to the directory where you downloaded the intro_to_parsing
59 | source files (which will contain the GettingStarted.lhs file). Then
60 | you can set up a cabal sandbox and be ready to work with the code by
61 | running the following commands in that directory:
62 |
63 | ```
64 | cabal v1-update
65 | cabal v1-sandbox init
66 | cabal v1-install parsec HUnit
67 | cabal v1-repl
68 | ```
69 |
70 | Now you will get the ghci prompt. Type in ':l GettingStarted.lhs'. You
71 | can run the parser using a wrapper, enter the following at the ghci
72 | prompt: `regularParse anyChar "a"`.
73 |
74 | Here is a transcript of running ghci via 'cabal repl':
75 |
76 | ```
77 | $ cabal repl
78 | Warning: The repl command is a part of the legacy v1 style of cabal usage.
79 |
80 | Please switch to using either the new project style and the new-repl command
81 | or the legacy v1-repl alias as new-style projects will become the default in
82 | the next version of cabal-install. Please file a bug if you cannot replicate a
83 | working v1- use case with the new-style commands.
84 |
85 | For more information, see: https://wiki.haskell.org/Cabal/NewBuild
86 |
87 | GHCi, version 8.6.5: http://www.haskell.org/ghc/ :? for help
88 | Prelude> :l GettingStarted.lhs
89 | [1 of 5] Compiling Text.Parsec.String.Char ( Text/Parsec/String/Char.hs, interpreted )
90 | [2 of 5] Compiling Text.Parsec.String.Combinator ( Text/Parsec/String/Combinator.hs, interpreted )
91 | [3 of 5] Compiling Text.Parsec.String.Parsec ( Text/Parsec/String/Parsec.hs, interpreted )
92 | [4 of 5] Compiling FunctionsAndTypesForParsing ( FunctionsAndTypesForParsing.lhs, interpreted )
93 | [5 of 5] Compiling Main ( GettingStarted.lhs, interpreted )
94 | Ok, five modules loaded.
95 | *Main> regularParse anyChar "a"
96 | Right 'a'
97 | *Main>
98 | ```
99 |
100 | You can exit ghci by entering ':quit' or using Ctrl-d. From now on, to
101 | start ghci again, you can just change to the directory with
102 | GettingStarted.lhs and run 'cabal repl'. ghci should have readline
103 | support so you can browse through your command history using up and
104 | down arrow, etc.
105 |
106 | This is the type of `regularParse`. It is wrapper which takes a parser
107 | function such as anyChar, and wraps it so you can parse a string to
108 | either a parse error, or the return value from your parser function:
109 |
110 | ```
111 | regularParse :: Parser a -> String -> Either ParseError a
112 | ```
113 |
114 |
115 | Here are some examples of running this parser on various input:
116 |
117 | ```
118 | *Main> regularParse anyChar "a"
119 | Right 'a'
120 |
121 | *Main> regularParse anyChar "b"
122 | Right 'b'
123 |
124 | *Main> regularParse anyChar "0"
125 | Right '0'
126 |
127 | *Main> regularParse anyChar " "
128 | Right ' '
129 |
130 | *Main> regularParse anyChar "\n"
131 | Right '\n'
132 |
133 | *Main> regularParse anyChar "aa"
134 | Right 'a'
135 |
136 | *Main> regularParse anyChar ""
137 | Left (line 1, column 1):
138 | unexpected end of input
139 |
140 | *Main> regularParse anyChar " a"
141 | Right ' '
142 | ```
143 |
144 |
145 | You can see that if there are no characters, we get an error.
146 | Otherwise, it takes the first character and returns it, and throws
147 | away any trailing characters. The details of the helper function
148 | `regularParse` will come later.
149 |
150 | Here are two alternatives to `regularParse` you can also use for
151 | experimenting for the time being:
152 |
153 | ```
154 | parseWithEof :: Parser a -> String -> Either ParseError a
155 | ```
156 |
157 | ```
158 | parseWithLeftOver :: Parser a -> String -> Either ParseError (a,String)
159 | ```
160 |
161 | These can be useful when you are not sure if your parser is consuming
162 | all your input string or not. The eof parser will error if you haven't
163 | consumed all the input, and the leftover parser can instead tell you
164 | what was not consumed from the input.
165 |
166 | ```
167 | *Main> regularParse anyChar "a"
168 | Right 'a'
169 |
170 | *Main> parseWithEof anyChar "a"
171 | Right 'a'
172 |
173 | *Main> parseWithLeftOver anyChar "a"
174 | Right ('a',"")
175 |
176 | *Main> *Main> regularParse anyChar ""
177 | Left (line 1, column 1):
178 | unexpected end of input
179 |
180 | *Main> parseWithEof anyChar ""
181 | Left (line 1, column 1):
182 | unexpected end of input
183 |
184 | *Main> parseWithLeftOver anyChar ""
185 | Left (line 1, column 1):
186 | unexpected end of input
187 |
188 | *Main> regularParse anyChar "aa"
189 | Right 'a'
190 |
191 | *Main> parseWithEof anyChar "aa"
192 | Left (line 1, column 2):
193 | unexpected 'a'
194 | expecting end of input
195 |
196 | *Main> parseWithLeftOver anyChar "aa"
197 | Right ('a',"a")
198 |
199 | *Main> parseWithLeftOver anyChar "abc"
200 | Right ('a',"bc")
201 |
202 | ```
203 |
204 | You can use these functions and ghci to experiment. Try running all
205 | the parsers in ghci on various input strings as you work through the
206 | document to get a good feel for all the different features. Tip: you
207 | can also write the parsers inline in the function call, for example:
208 |
209 | ```
210 | *Main> regularParse (many1 digit) "1"
211 | Right "1"
212 |
213 | *Main> regularParse (many1 digit) "122"
214 | Right "122"
215 | ```
216 |
217 | This can be used to quickly try out new ad hoc parsing code.
218 |
219 | == Type signatures
220 |
221 | The real Parsec functions have quite complex type signatures. This
222 | makes a lot of things very tricky before you understand them, and can
223 | make some of the error messages you'll see really difficult to
224 | understand. I've created some wrapper modules, which set the types of
225 | all the functions from Parsec we use to be much more restricted. This
226 | will make the types easy to understand, and reduce the amount of
227 | tricky to understand compiler errors you get. You can use this
228 | approach when writing your own parser code with Parsec. These wrapper
229 | modules are created with the following name pattern:
230 | `Text.Parsec.Char` -> `Text.Parsec.String.Char`.
231 |
232 | Later on, we will look at the general types in more detail.
233 |
234 | == Text.Parsec.Char
235 |
236 | Let's go through some of the functions in `Text.Parsec.Char` module from
237 | the Parsec package. The haddock is available here:
238 | .
239 |
240 | Here is the `satisfy` function, with its full type signature.
241 |
242 | ```
243 | satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char
244 | ```
245 |
246 | This is one of the main primitive functions in Parsec. This looks at
247 | the next character from the current input, and if the function (`Char
248 | -> Bool`) returns true for this character, it 'pops' it from the input
249 | and returns it. In this way, the current position in the input string
250 | is tracked behind the scenes.
251 |
252 | In the simplified type wrappers, the `satisfy` function's type is this:
253 |
254 | ```
255 | satisfy :: (Char -> Bool) -> Parser Char
256 | ```
257 |
258 | This makes it a bit clearer what it is doing. All the functions in
259 | `Text.Parsec.Char` are reproduced in the local `Text.Parsec.String.Char`
260 | module with simplified types
261 | ().
262 |
263 | Here are some examples of satisfy in action.
264 |
265 | ```
266 | *Main> parseWithEof (satisfy (=='a')) "a"
267 | Right 'a'
268 |
269 | *Main> parseWithEof (satisfy (=='b')) "a"
270 | Left (line 1, column 1):
271 | unexpected "a"
272 |
273 | *Main> parseWithEof (satisfy (`elem` "abc")) "a"
274 | Right 'a'
275 |
276 | *Main> parseWithEof (satisfy (`elem` "abc")) "d"
277 | Left (line 1, column 1):
278 | unexpected "d"
279 |
280 | *Main> parseWithEof (satisfy isDigit) "d"
281 | Left (line 1, column 1):
282 | unexpected "d"
283 |
284 | *Main> parseWithEof (satisfy isDigit) "1"
285 | Right '1'
286 | ```
287 |
288 | You can see that it is easy to use `==`, or `elem` or one of the
289 | functions from the Data.Char module.
290 |
291 | If you look at the docs on hackage
292 | ,
293 | you can view the source. The implementations of most of the functions
294 | in `Text.Parsec.Char` are straightforward. I recommend you look at the
295 | source for all of these functions.
296 |
297 | You can see in the source that the `satisfy` function is a little more
298 | primitive than the other functions.
299 |
300 | Here is the parser we used above in the `anyChar` parser:
301 |
302 | ```
303 | anyChar :: Parser Char
304 | ```
305 |
306 | If you look at the source via the haddock link above, you can see it
307 | uses `satisfy`.
308 |
309 | Here are some other simple wrappers of `satisfy` from
310 | `Text.Parsec.Char` which use different validation functions.
311 |
312 | The `char` parser parses a specific character which you supply:
313 |
314 | ```
315 | char :: Char -> Parser Char
316 | ```
317 |
318 | ```
319 | *Main> regularParse (char 'a') "a"
320 | Right 'a'
321 |
322 | *Main> regularParse (char 'a') "b"
323 | Left (line 1, column 1):
324 | unexpected "b"
325 | expecting "a"
326 | ```
327 |
328 | These parsers all parse single hardcoded characters
329 |
330 | ```
331 | space :: Parser Char
332 | newline :: Parser Char
333 | tab :: Parser Char
334 | ```
335 |
336 | They all return a `Char`. You might be able to guess what `Char` each
337 | of them returns, you can double check your intuition using ghci.
338 |
339 | These parser all parse one character from a hardcoded set of
340 | characters:
341 |
342 | ```
343 | upper :: Parser Char
344 | lower :: Parser Char
345 | alphaNum :: Parser Char
346 | letter :: Parser Char
347 | digit :: Parser Char
348 | hexDigit :: Parser Char
349 | octDigit :: Parser Char
350 | ```
351 |
352 | In these cases, the return value is less redundant.
353 |
354 | `oneOf` and `noneOf` parse any of the characters in the given list
355 |
356 | ```
357 | oneOf :: [Char] -> Parser Char
358 | noneOf :: [Char] -> Parser Char
359 | ```
360 |
361 | These are just simple wrappers of satisfy using `elem`.
362 |
363 |
364 | You should try all these parsers out in ghci, e.g.:
365 |
366 | ```
367 |
368 | regularParse space " "
369 |
370 | regularParse upper "A"
371 |
372 | regularParse (char 'b') "B"
373 |
374 | regularParse (oneOf "abc") "c"
375 |
376 | ```
377 |
378 | Here are the final functions in `Text.Parsec.Char`:
379 |
380 | `string` matches a complete string, one character at a time. I think
381 | the implementation of this function is like it is for efficiency when
382 | parsing from, e.g., `Data.Text.Text`, instead of `String`, but I'm not
383 | sure. We will skip the detailed explanation of the implementation for
384 | now.
385 |
386 | ```
387 | string :: String -> Parser String
388 | ```
389 |
390 | ```
391 | *Main> regularParse (string "one") "one"
392 | Right "one"
393 |
394 | *Main> regularParse (string "one") "two"
395 | Left (line 1, column 1):
396 | unexpected "t"
397 | expecting "one"
398 | ```
399 |
400 | Here is the `spaces` parser, which, if you look at the source, you can
401 | see uses a combinator (`skipMany`). We will cover this combinator
402 | shortly.
403 |
404 | ```
405 | spaces :: Parser ()
406 | ```
407 |
408 | ```
409 | *Main> regularParse spaces ""
410 | Right ()
411 |
412 | *Main> regularParse spaces " "
413 | Right ()
414 |
415 | *Main> regularParse spaces " "
416 | Right ()
417 |
418 | *Main> regularParse spaces " a "
419 | Right ()
420 |
421 | *Main> regularParse spaces "a a "
422 | Right ()
423 | ```
424 |
425 | It always succeeds.
426 |
427 | == A couple of helper executables
428 |
429 | Here are two exes which you can use to parse either a string or a file
430 | to help you experiment. This will save you having to figure out how to
431 | write this boilerplate until later.
432 |
433 |
434 |
435 |
436 |
437 | Now you can easily experiment using ghci, or with a string on the
438 | command line, or by putting the text to parse into a file and parsing
439 | that.
440 |
--------------------------------------------------------------------------------
/IntroductionToCombinators.lhs:
--------------------------------------------------------------------------------
1 |
2 | = Introduction to combinators
3 |
4 | In this tutorial we will develop a parser for a very simple expression
5 | language, and start learning about the set of combinators which comes
6 | with Parsec.
7 |
8 | > import Text.Parsec (ParseError)
9 | > import Text.Parsec.String (Parser)
10 | > import Text.Parsec.String.Parsec (try)
11 | > import Text.Parsec.String.Char (oneOf, char, digit
12 | > ,string, letter, satisfy)
13 | > import Text.Parsec.String.Combinator (many1, choice, chainl1, between)
14 | > import Control.Applicative ((<$>), (<*>), (<$), (<*), (*>), (<|>), many)
15 | > import Control.Monad (void, ap)
16 | > import Data.Char (isLetter, isDigit)
17 | > import FunctionsAndTypesForParsing
18 |
19 | Let's create a very simple expression language:
20 |
21 | > data SimpleExpr = Num Integer
22 | > | Var String
23 | > | Add SimpleExpr SimpleExpr
24 | > | Parens SimpleExpr
25 | > deriving (Eq,Show)
26 |
27 | It's a bit simple and almost useless at the moment, but we will expand
28 | on this a lot in later tutorials.
29 |
30 | Here are some examples:
31 |
32 | > simpleExprExamples :: [(String,SimpleExpr)]
33 | > simpleExprExamples =
34 | > [("a", Var "a")
35 | > ,("1", Num 1)
36 | > ,("2 + 3", Add (Num 2) (Num 3))
37 | > ,("(42)", Parens (Num 42))]
38 |
39 | TODO: some more complex examples
40 |
41 | Let's write a simple parser for these, and introduce a few things
42 | along the way.
43 |
44 | We will write a parser for each constructor separately, then look at
45 | how we can write a parser for all of them together.
46 |
47 | ==== Num
48 |
49 | To parse a number, we need to parse one or more digits, and then read
50 | the resulting string. We can use the combinator 'many1' to help with
51 | this. We will also use do notation.
52 |
53 | > num :: Parser SimpleExpr
54 | > num = do
55 | > n <- many1 digit
56 | > return (Num (read n))
57 |
58 | Let's try it out. TODO: examples
59 |
60 | How does it work? First, we parse one or more (many1) digits (digit),
61 | and give the result the name 'n'. Then we convert the string to an
62 | integer using read, and wrap it in a Num constructor.
63 |
64 | The many1 function's type looks like this:
65 |
66 | ```
67 | many1 :: Parser a -> Parser [a]
68 | ```
69 |
70 | It applies the parser given one or more times, returning the result.
71 |
72 | TODO: example show what happens when you use 'many' instead of 'many1'
73 |
74 | ==== Var
75 |
76 | For var, we have to decide on a syntax for the identifiers. Let's go
77 | for a common choice: identifiers must start with a letter or
78 | underscore, and then they can be followed by zero or more letters,
79 | underscores or digits in any combination.
80 |
81 | > var :: Parser SimpleExpr
82 | > var = do
83 | > fc <- firstChar
84 | > rest <- many nonFirstChar
85 | > return (Var (fc:rest))
86 | > where
87 | > firstChar = satisfy (\a -> isLetter a || a == '_')
88 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
89 |
90 | This time, we create two helper parsers: firstChar - which parses a
91 | letter or underscore, and nonFirstChar = which parses a digit, letter
92 | or underscore. This time, we use the 'many' function instead of
93 | 'many1': TODO - demonstrate why using examples only.
94 |
95 | > add :: Parser SimpleExpr
96 | > add = do
97 | > e0 <- num
98 | > void $ char '+'
99 | > e1 <- num
100 | > return (Add e0 e1)
101 |
102 | There are two simplifications here. We don't have a general simple
103 | expression parser yet, so it only supports parsing adding two number
104 | literals, and we don't deal with whitespace yet, so you must write
105 | "1+2" and not "1 + 2". We will deal with both of these issues below.
106 |
107 | We used void to ignore the return of "char '+'". This is not required,
108 | but supresses a warning which you should get (since you are using
109 | -Wall, right?) and I think it is also good style to explicitly say
110 | that the result is being ignored.
111 |
112 | To use -Wall in ghci, enter the following at the prompt:
113 |
114 | ```
115 | *Main> :set -Wall
116 | ```
117 |
118 | Try it out, then replace the line
119 |
120 | ```
121 | void $ char '+'
122 | ```
123 |
124 | with
125 |
126 | ```
127 | char '+'
128 | ```
129 |
130 | And check you see the warning. Another way of avoiding the warning is
131 | to write this:
132 |
133 | ```
134 | _ <- char '+'
135 | ```
136 |
137 | ==== parens
138 |
139 | > parens :: Parser SimpleExpr
140 | > parens = do
141 | > void $ char '('
142 | > e <- num
143 | > void $ char ')'
144 | > return (Parens e)
145 |
146 | The same two issues from the 'add' parser apply here: whitespace and
147 | lack of a general simple expression parser, so it just uses the num
148 | parser again instead. Now we will tackle the whitespace issue.
149 |
150 | === whitespace and lexeme parsing
151 |
152 | Here is a parser which will skip zero or more whitespace characters.
153 |
154 | > whiteSpace :: Parser ()
155 | > whiteSpace = void $ many $ oneOf " \n\t"
156 |
157 | In the original parsec documentation, one of the concepts mentioned is
158 | the idea of 'lexeme' parsing. This is a style in which every token
159 | parser should also consume and ignore any trailing whitespace. This is
160 | a simple convention which with a bit of care allows skipping
161 | whitespace exactly once wherever it needs to be skipped. To complete
162 | the lexeme style, we should also always skip leading whitespace at the
163 | top level only.
164 |
165 | > parseWithWhitespace :: Parser a -> String -> Either ParseError a
166 | > parseWithWhitespace p = parseWithEof wrapper
167 | > where
168 | > wrapper = do
169 | > whiteSpace
170 | > p
171 |
172 | the wrapper function can also use (>>) to make it a bit shorter:
173 |
174 | ```
175 | wrapper = whiteSpace >> p
176 | ```
177 |
178 | Here is a shorter version of this function using (>>):
179 |
180 | > parseWithWhitespace' :: Parser a -> String -> Either ParseError a
181 | > parseWithWhitespace' p = parseWithEof (whiteSpace >> p)
182 |
183 | Here is the num parser rewritten in the lexeme style:
184 |
185 | > lexeme :: Parser a -> Parser a
186 | > lexeme p = do
187 | > x <- p
188 | > whiteSpace
189 | > return x
190 |
191 | TODO: review the placement of the function 'lexeme' in all the code
192 | below. Maybe something could be said about all the places that it can
193 | be put.
194 |
195 | > num' :: Parser SimpleExpr
196 | > num' = lexeme $ do
197 | > n <- many1 digit
198 | > return (Num (read n))
199 |
200 | Here it is in action:
201 |
202 | ```
203 | *Main Data.List> parseWithEof num "1"
204 | Right (Num 1)
205 |
206 | *Main Data.List> parseWithEof num " 1"
207 | Left (line 1, column 1):
208 | unexpected " "
209 | expecting digit
210 |
211 | *Main Data.List> parseWithEof num "1 "
212 | Left (line 1, column 2):
213 | unexpected ' '
214 | expecting digit or end of input
215 |
216 | *Main Data.List> parseWithEof num " 1 "
217 | Left (line 1, column 1):
218 | unexpected " "
219 | expecting digit
220 |
221 | *Main Data.List> parseWithWhitespace num' "1"
222 | Right (Num 1)
223 |
224 | *Main Data.List> parseWithWhitespace num' " 1"
225 | Right (Num 1)
226 |
227 | *Main Data.List> parseWithWhitespace num' "1 "
228 | Right (Num 1)
229 |
230 | *Main Data.Lst> parseWithWhitespace num' " 1 "
231 | Right (Num 1)
232 | ```
233 |
234 | Here are the other functions in lexeme style.
235 |
236 | > var' :: Parser SimpleExpr
237 | > var' = lexeme $ do
238 | > fl <- firstChar
239 | > rest <- many nonFirstChar
240 | > return (Var (fl:rest))
241 | > where
242 | > firstChar = satisfy (\a -> isLetter a || a == '_')
243 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
244 |
245 | > add' :: Parser SimpleExpr
246 | > add' = do
247 | > e0 <- num'
248 | > void $ lexeme $ char '+'
249 | > e1 <- num'
250 | > return (Add e0 e1)
251 |
252 | > parens' :: Parser SimpleExpr
253 | > parens' = do
254 | > void $ lexeme $ char '('
255 | > e <- num'
256 | > void $ lexeme $ char ')'
257 | > return (Parens e)
258 |
259 | In this style, you have to be slightly careful to make sure you call
260 | whitespace at the right points.
261 |
262 | Let's try and implement the simpleExpr parser. We can use a function
263 | called 'choice':
264 |
265 | > numOrVar :: Parser SimpleExpr
266 | > numOrVar = choice [num', var']
267 |
268 | It tries each parser one at a time, finishing with the first one that
269 | succeeds. There are some more details about this later on.
270 |
271 | Here is another way to write the numOrVar parser:
272 |
273 | > numOrVar' :: Parser SimpleExpr
274 | > numOrVar' = num' <|> var'
275 |
276 | In general, you can write 'choice [p0, p1, p2, ...]' as 'p0 <|> p1 <|>
277 | p2 <|> ...'.
278 |
279 | TODO: a bunch of examples
280 |
281 | Here is the first version of the simpleExpr parser:
282 |
283 | > simpleExpr :: Parser SimpleExpr
284 | > simpleExpr = choice [num', var', add', parens']
285 |
286 | TODO: a bunch of examples
287 |
288 | It works well for some of the parsers, but fails with add': the num'
289 | always partially succeeds first, then fails, so the add' is never
290 | tried. We can rearrange this parser like this:
291 |
292 | > simpleExpr' :: Parser SimpleExpr
293 | > simpleExpr' = choice [add', num', var', parens']
294 |
295 | TODO: examples again
296 |
297 | We have another problem now: when we start parsing the add', see a
298 | num, then fail, it gives up completely. This is because the 'choice'
299 | function (and <|>) will only try the next parser if the parser that
300 | failed consumed no input before failing.
301 |
302 | TODO: show examples with satify and choice on 1 char match/no match,
303 | and two char match/no match.
304 |
305 | Here is one way to fix it:
306 |
307 | > simpleExpr'' :: Parser SimpleExpr
308 | > simpleExpr'' = choice [try add', num', var', parens']
309 |
310 | The try function implements backtracking. When this is used in a
311 | choice like this, it means that if the add' parser fails, it will undo
312 | the consumed input and carry on with the next option, instead of
313 | failing completely. If there is another place using try higher up in
314 | the call stack, then we will continue there, otherwise the whole parse
315 | will fail immediately.
316 |
317 | The same happens with <|>, we can implement the simpleExpr parser like
318 | this also:
319 |
320 | > simpleExpr''' :: Parser SimpleExpr
321 | > simpleExpr''' = try add' <|> num' <|> var' <|> parens'
322 |
323 | TODO: show the examples all working
324 |
325 | Now we can make 'parens' and 'add' use a general simple expression
326 | parser. Parens is simple:
327 |
328 | > parens'' :: Parser SimpleExpr
329 | > parens'' = do
330 | > void $ lexeme $ char '('
331 | > e <- simpleExpr'''
332 | > void $ lexeme $ char ')'
333 | > return (Parens e)
334 |
335 | There is a problem implementing 'add' in the same way:
336 |
337 | > add'' :: Parser SimpleExpr
338 | > add'' = do
339 | > e0 <- simpleExpr'''
340 | > void $ lexeme $ char '+'
341 | > e1 <- simpleExpr'''
342 | > return (Add e0 e1)
343 |
344 | It will never return since it calls simpleExpr''' which calls add''
345 | again.
346 |
347 | Let's look at another problem:
348 |
349 | ```
350 | *Main> parseWithWhitespace simpleExpr''' " 1 + 1 + 1"
351 | Left (line 1, column 8):
352 | unexpected '+'
353 | expecting end of input
354 | ```
355 |
356 | Our parser will only parse one operator, and not a chain of them.
357 |
358 | Here is one way to solve it:
359 |
360 | > simpleExpr4 :: Parser SimpleExpr
361 | > simpleExpr4 = do
362 | > e <- term
363 | > maybeAddSuffix e
364 | > where
365 | > maybeAddSuffix e = addSuffix e <|> return e
366 | > addSuffix e0 = do
367 | > void $ lexeme $ char '+'
368 | > e1 <- term
369 | > maybeAddSuffix (Add e0 e1)
370 | > term = num' <|> var' <|> parens'
371 |
372 | TODO: explain how this works in much more detail
373 |
374 | This forces our Add operator to be left associative. This also solves
375 | the previous problem with add calling simpleExpr recursively. There is
376 | lots of discussion about these issues in Parsing theory documents you
377 | can find online, etc..
378 |
379 | == general simple expression parser
380 |
381 | Here is the all the parser code written out again for clarity.
382 |
383 | > numD :: Parser SimpleExpr
384 | > numD = lexeme $ do
385 | > n <- many1 digit
386 | > return $ Num $ read n
387 |
388 | > varD :: Parser SimpleExpr
389 | > varD = lexeme $ do
390 | > fl <- firstChar
391 | > rest <- many nonFirstChar
392 | > return $ Var (fl:rest)
393 | > where
394 | > firstChar = satisfy (\a -> isLetter a || a == '_')
395 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
396 |
397 | > parensD :: Parser SimpleExpr
398 | > parensD = do
399 | > void $ lexeme $ char '('
400 | > e <- simpleExprD
401 | > void $ lexeme $ char ')'
402 | > return $ Parens e
403 |
404 | > simpleExprD :: Parser SimpleExpr
405 | > simpleExprD = do
406 | > e <- term
407 | > maybeAddSuffix e
408 | > where
409 | > maybeAddSuffix e =
410 | > choice [addSuffix e
411 | > ,return e]
412 | > addSuffix e0 = do
413 | > void $ lexeme $ char '+'
414 | > e1 <- term
415 | > maybeAddSuffix (Add e0 e1)
416 | > term = numD <|> varD <|> parensD
417 |
418 |
419 | === Testing with the examples
420 |
421 | TODO: write a little manual tester that accepts a parser and a list of
422 | examples, and checks they all parse correctly.
423 |
424 | === Testing with quickcheck
425 |
426 | Let's see if we can check with quickcheck. It's a bit tricky testing
427 | parsers in this way, but one way to do something useful is to generate
428 | random asts, convert them to concrete syntax, parse them, and check
429 | the result. We can write a simple 'pretty printer' to convert an ast
430 | to concrete syntax.
431 |
432 | ==== a pretty printer
433 |
434 | TODO: a really simple pretty printer just pasting strings together, no
435 | layout.
436 |
437 | ==== the quick check code
438 |
439 | TODO: write a quickcheck property and arbitary instance and show
440 | running it at the ghci prompt
441 |
442 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013-2019, Jake Wheat
2 |
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of Jake Wheat nor the names of other
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/ParseFile.lhs:
--------------------------------------------------------------------------------
1 |
2 | = Parsing a file executable
3 |
4 | This is the example exe file which will parse a string from the file
5 | given on the command line. You can substitute your own parser for the
6 | parser given below.
7 |
8 |
9 | > import System.Environment
10 | > import Text.Parsec
11 | > import Text.Parsec.String
12 | > import Control.Monad
13 |
14 | > main :: IO ()
15 | > main = do
16 | > a <- getArgs
17 | > case a of
18 | > [str] -> parseFromFile myParser str >>= either print print
19 | > _ -> error "please pass one argument with the file containing the text to parse"
20 |
21 | This is the parser which you can replace with your own code:
22 |
23 | > myParser :: Parser ()
24 | > myParser = void $ string "correct"
25 |
26 | Here is an example of running this program:
27 |
28 | $ echo x > source_text && runhaskell ParseFile.lhs source_text
29 | "source_text" (line 1, column 1):
30 | unexpected "x"
31 | expecting "correct"
32 | $ echo correct > source_text && runhaskell ParseFile.lhs source_text
33 | ()
34 |
--------------------------------------------------------------------------------
/ParseSQLQueries.lhs:
--------------------------------------------------------------------------------
1 |
2 | = Parsing a file with SQL queries
3 |
4 | This is an example exe which parses a list of SQL queries in a file
5 | separated by semi colons.
6 |
7 | > import System.Environment
8 | > import Text.Parsec (ParseError, errorPos, sourceName, sourceLine, sourceColumn)
9 | > import Text.Parsec.String
10 | > import Text.Parsec.String.Char
11 | > import Text.Parsec.String.Combinator
12 | > import Control.Applicative ((<$>),(<*), (*>),(<*>), (<$), (<|>), many)
13 | > import Control.Monad
14 | > import SimpleSQLQueryParser0
15 | > import Data.List
16 | > import qualified PrettyPrinting0 as P
17 |
18 | > main :: IO ()
19 | > main = do
20 | > a <- getArgs
21 | > case a of
22 | > [str] -> parseFromFile myParser str
23 | > >>= either (putStrLn . showError)
24 | > (putStrLn . intercalate "\n" . map P.prettyQueryExpr)
25 | > _ -> error "please pass one argument with the file containing the queries to parse"
26 |
27 | > myParser :: Parser [QueryExpr]
28 | > myParser = whitespace
29 | > *> sepBy1 queryExpr semi
30 | > <* optional semi
31 | > <* eof
32 | > where semi = void $ lexeme $ char ';'
33 |
34 | > showError :: ParseError -> String
35 | > showError e =
36 | > let p = errorPos e
37 | > in sourceName p ++ ":" ++ show (sourceLine p) ++ ":"
38 | > ++ show (sourceColumn p) ++ ":\n"
39 | > ++ show e
40 |
--------------------------------------------------------------------------------
/ParseString.lhs:
--------------------------------------------------------------------------------
1 |
2 | = Parsing a string executable
3 |
4 | This is the example exe file which will parse a string given on the
5 | command line. You can substitute your own parser for the parser given
6 | below.
7 |
8 |
9 | > import System.Environment
10 | > import Text.Parsec
11 | > import Text.Parsec.String
12 | > import Control.Monad
13 |
14 | > main :: IO ()
15 | > main = do
16 | > a <- getArgs
17 | > case a of
18 | > [str] -> either print print $ parse myParser "" str
19 | > _ -> error "please pass one argument with the string to parse"
20 |
21 | This is the parser which you can replace with your own code:
22 |
23 | > myParser :: Parser ()
24 | > myParser = void $ string "correct"
25 |
26 | Here is an example of running this program:
27 |
28 | ```
29 | $ runhaskell ParseString.lhs x
30 | (line 1, column 1):
31 | unexpected "x"
32 | expecting "correct"
33 |
34 | $ runhaskell ParseString.lhs correct
35 | ()
36 | ```
37 |
--------------------------------------------------------------------------------
/ParsingTpch.lhs:
--------------------------------------------------------------------------------
1 |
2 | = Parsing TPC-H queries
3 |
4 | In this file we are going to get the TPC-H queries parsing. This can
5 | be an example of taking a parser, identifying some missing features,
6 | and adding them. The TPC-H queries are slightly more complex than
7 | trivial, and will also serve to exercise the human readable layout
8 | feature of the pretty printer.
9 |
10 | Let's try the parser out on the TPC-H queries.
11 |
12 | ```
13 | ```
14 |
15 | Summary of errors so far:
16 | q1: typed literal: type_name 'literal value'
17 | q2: scalar subquery
18 | q3: typed literal
19 | q4: typed literal
20 | q5: typed literal
21 | q6: typed literal
22 | q7: ??
23 | q8: extract??
24 | q9: extract ??
25 | q10: typed literal
26 | q11: scalar sub query
27 | q12: ??
28 | q13: not like??
29 | q14: decimal literal
30 | q15: cte
31 | q16: count distinct
32 | q17: decimal literal
33 | q18: in subquery
34 | q19: in literal list
35 | q20: in subquery
36 | q21: exists subquery
37 | q22: substring
38 |
--------------------------------------------------------------------------------
/PrettyPrinting0.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[pretty-printing]]
3 | = Pretty printing
4 |
5 | Here is a pretty printer for the parser version 0.
6 |
7 | It uses Text.PrettyPrint module. I think this module is a bit out of
8 | date these days, it would be nice to update this code to a more
9 | fashionable pretty printing library. I haven't put much
10 | comments/explanation here.
11 |
12 | > module PrettyPrinting0 where
13 |
14 | > import Prelude hiding ((<>))
15 | > import SimpleSQLQueryParser0 (ValueExpr(..), QueryExpr(..), TableRef(..)
16 | > ,JoinType(..), JoinCondition(..))
17 | > import qualified SimpleSQLQueryParser0 as S
18 | > import Text.PrettyPrint (render, vcat, text, (<>), (<+>), empty, parens,
19 | > nest, Doc, punctuate, comma, sep, quotes,
20 | > doubleQuotes, hsep)
21 | > import Data.Maybe (maybeToList, catMaybes)
22 | > import FunctionsAndTypesForParsing
23 | > --import Text.Parsec.String.Combinator (eof)
24 | > import qualified Test.HUnit as H
25 | > import Text.Parsec (parse, ParseError)
26 | > import Control.Applicative ((<$>),(<*), (*>),(<*>), (<$), (<|>), many)
27 | > --import Text.Parsec.String (Parser)
28 |
29 | The basic concept in this pretty printer is we convert the ast into
30 | `Doc` values, then convert these into a string. We have a bunch of
31 | functions to convert `String` to `Doc`s, and combine these docs with
32 | different layouts.
33 |
34 | By using a pretty printer library, we can get human readable source
35 | very easily compared with trying to convert directly to strings
36 | ourselves.
37 |
38 | == api
39 |
40 | > prettyQueryExpr :: QueryExpr -> String
41 | > prettyQueryExpr = render . queryExpr
42 |
43 | == value expressions
44 |
45 | > valueExpr :: ValueExpr -> Doc
46 | > valueExpr (StringLit s) = quotes $ text s
47 | > valueExpr (NumLit i) = text $ show i
48 | > valueExpr (Iden s) = text s
49 | > valueExpr (DIden q i) = text q <> text "." <> text i
50 | > valueExpr Star = text "*"
51 | > valueExpr (DStar q) = text q <> text "." <> text "*"
52 | > valueExpr (App f es) = text f <> parens (commaSep $ map valueExpr es)
53 | > valueExpr (PrefOp op e) = sep[text op, valueExpr e]
54 | > valueExpr (BinOp e0 op e1) = sep [valueExpr e0, text op, valueExpr e1]
55 | > valueExpr (Case test whens els) =
56 | > sep [text "case" <+> maybe empty valueExpr test
57 | > ,nest 5 $ sep (map wh whens
58 | > ++ [maybe empty
59 | > (\e -> text "else" <+> valueExpr e)
60 | > els])
61 | > ,text "end"]
62 | > where wh (w,t) = sep [text "when" <+> valueExpr w
63 | > ,text "then" <+> valueExpr t]
64 | > valueExpr (Parens e) = parens $ valueExpr e
65 |
66 | == query expressions
67 |
68 | > queryExpr :: QueryExpr -> Doc
69 | > queryExpr (Select sl fr wh gb hv ob) = sep
70 | > [text "select" <+> nest 7 (commaSep $ map selectItem sl)
71 | > -- from
72 | > ,ml fr $ \f -> text "from" <+> nest 7 (commaSep $ map tref f)
73 | > -- where
74 | > ,me wh $ \w -> text "where" <+> nest 6 (valueExpr w)
75 | > -- group by
76 | > ,ml gb $ \g -> text "group by" <+> nest 9 (commaSep $ map valueExpr g)
77 | > -- having
78 | > ,me hv $ \h -> text "having" <+> nest 6 (valueExpr h)
79 | > -- order by
80 | > ,ml ob $ \o -> text "order by" <+> nest 9 (commaSep $ map valueExpr o)
81 | > ]
82 | > where
83 | > selectItem (e,a) = valueExpr e <+> me a (\a' -> text "as" <+> text a')
84 | >
85 | > tref (TRSimple t) = text t
86 | > tref (TRParens t) = parens $ tref t
87 | > tref (TRAlias t a) = tref t <+> text "as" <+> text a
88 | > tref (TRQueryExpr q) = parens $ queryExpr q
89 | > tref (TRJoin t0 jt t1 jc) = sep
90 | > [tref t0
91 | > ,joinName jt jc <+> tref t1
92 | > ,case jc of
93 | > Just (JoinOn e) -> text "on" <+> valueExpr e
94 | > Just (JoinUsing is) -> text "using" <+> parens (commaSep $ map text is)
95 | > Just JoinNatural -> empty
96 | > Nothing -> empty]
97 | > joinName jt jc =
98 | > hsep [case jc of
99 | > Just JoinNatural -> text "natural"
100 | > _ -> empty
101 | > ,case jt of
102 | > JoinInner -> text "inner join"
103 | > JoinCross -> text "cross join"
104 | > JoinLeft -> text "left join"
105 | > JoinRight -> text "right join"
106 | > JoinFull -> text "full join"]
107 | > me e r = maybe empty r e
108 | > ml [] _ = empty
109 | > ml l r = r l
110 |
111 | == helpers
112 |
113 | > commaSep :: [Doc] -> Doc
114 | > commaSep = sep . punctuate comma
115 |
116 | Have a look at the haddock for this module and see if you can work out
117 | how the code above works.
118 |
119 | ```
120 | *PrettyPrinting0> either (error . show) valueExpr (parseWithEof (S.valueExpr []) "a and b")
121 | a and b
122 |
123 | *PrettyPrinting0> either (error . show) queryExpr (parseWithEof S.queryExpr "select a from t inner join u using(a,b)")
124 | select a from t inner join u using (a, b)
125 | ```
126 |
127 | == tests
128 |
129 | Now we can do some tests: we take the previous test data, and for each
130 | test add an additional test which pretty prints then parses the
131 | results to see that it is unchanged.
132 |
133 | > makeTest :: (Eq a, Show a) =>
134 | > (String -> Either ParseError a)
135 | > -> (a -> String)
136 | > -> (String,a)
137 | > -> H.Test
138 | > makeTest parser pretty (src,expected) = H.TestLabel src $ H.TestCase $ do
139 | > let gote = parser src
140 | > case gote of
141 | > Left e -> H.assertFailure $ show e
142 | > Right got -> do
143 | > H.assertEqual src expected got
144 | > let prsql = pretty got
145 | > gotpretty = parser prsql
146 | > case gotpretty of
147 | > Left e -> H.assertFailure $ "pretty: " ++ prsql ++ "\n" ++ show e
148 | > Right gotp -> H.assertEqual ("pretty: " ++ prsql) expected gotp
149 |
150 | TODO: fix parsing issue
151 |
152 | ```
153 | *PrettyPrinting0> H.runTestTT $ H.TestList $ map (makeTest S.parseQueryExpr (render . queryExpr)) S.allQueryExprTests
154 | Cases: 26 Tried: 26 Errors: 0 Failures: 0
155 | Counts {cases = 26, tried = 26, errors = 0, failures = 0}
156 | *PrettyPrinting0> H.runTestTT $ H.TestList $ map (makeTest S.parseValueExpr (render . valueExpr)) S.allValueExprTests
157 | ### Failure in: 34:case when a=1 then 2 when a=3 then 4 else 5 end
158 | (line 1, column 11):
159 | unexpected "a"
160 | expecting "--" or "/*"
161 | Cases: 35 Tried: 35 Errors: 0 Failures: 1
162 | Counts {cases = 35, tried = 35, errors = 0, failures = 1}
163 | ```
164 |
--------------------------------------------------------------------------------
/QueryExpressions.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[query-expressions]]
3 | = Query expressions
4 |
5 | We can now start on the 'select' parser. In the SQL standard, it
6 | refers to these things as 'query expressions' to distinguish then from
7 | 'value expressions', so we will reuse this language here.
8 |
9 | The subset of SQL we will support is this:
10 |
11 | TODO: write lots of examples here
12 |
13 | select queries only, no union, intersect or except. No common table
14 | expressions.
15 |
16 | we will support all the value expressions that the value expression
17 | parser above supports
18 |
19 | we will support select lists with optional aliases, with the 'as'
20 | optional in the alias
21 |
22 | we will support the * as in 'select * from t', and the variation
23 | 'select t.* from t', but not the alias version select * as (a,b,c)
24 | from t.
25 |
26 | we support two part dotted identifiers in value expressions, but no
27 | other sort (such as 3-part dotted value expression identifiers, or
28 | schema qualified table names or function names).
29 |
30 | for the from clause, we will only support optional 'from table_name'.
31 |
32 | supports where
33 |
34 | we will support regular group by lists, but not the new group by
35 | options in SQL2003 (group by (), grouping sets, cube, rollup).
36 |
37 | we will support having
38 |
39 | we support order by, with multiple columns, but not explicit asc or
40 | desc , and no 'nulls first' or 'nulls last' syntax.
41 |
42 |
43 | No support for offset and fetch first, or variations.
44 |
45 | > {-# LANGUAGE TupleSections #-}
46 | > module QueryExpressions where
47 | >
48 | > --import Text.Groom (groom)
49 | > --import qualified Text.Parsec as P
50 | > import Text.Parsec.String (Parser)
51 | > import Text.Parsec (try,optionMaybe, optional, sepBy1,option)
52 | > import Control.Applicative ((<$>),(*>),(<*>))
53 | > import Control.Monad (void,guard)
54 | > --import Debug.Trace
55 | > --import Data.List (intercalate)
56 | > import Data.Maybe ()
57 | > import qualified Test.HUnit as H
58 | > import FunctionsAndTypesForParsing
59 |
60 | > import ValueExpressions (ValueExpr(..), valueExpr, identifier, symbol, keyword, comma)
61 |
62 | Here is the datatype for query expressions to get started with. In
63 | this tutorial, we will only support an optional single table in the
64 | from clause, and this will be expanded in the next tutorial.
65 |
66 | TODO: rearrange the from/where/groupby/having/orderby to a separate
67 | datatype which is optional in a select, and the from part is mandatory
68 | in this new type. This follows the standard and is more accurate
69 | syntax.
70 |
71 | > data QueryExpr
72 | > = Select
73 | > {qeSelectList :: [(ValueExpr,Maybe String)]
74 | > ,qeFrom :: Maybe String
75 | > ,qeWhere :: Maybe ValueExpr
76 | > ,qeGroupBy :: [ValueExpr]
77 | > ,qeHaving :: Maybe ValueExpr
78 | > ,qeOrderBy :: [ValueExpr]
79 | > } deriving (Eq,Show)
80 |
81 | Here is a default value which can be used to easily construct query
82 | expression values.
83 |
84 | > makeSelect :: QueryExpr
85 | > makeSelect = Select {qeSelectList = []
86 | > ,qeFrom = Nothing
87 | > ,qeWhere = Nothing
88 | > ,qeGroupBy = []
89 | > ,qeHaving = Nothing
90 | > ,qeOrderBy = []}
91 |
92 | == select lists
93 |
94 | Let's start with something simple:
95 |
96 | ```
97 | select [value expr]
98 | ```
99 |
100 | TODO: shorten the names of these examples
101 |
102 | > singleSelectItemTests :: [(String,QueryExpr)]
103 | > singleSelectItemTests =
104 | > [("select 1", makeSelect {qeSelectList = [(NumLit 1,Nothing)]})]
105 |
106 | Here are a couple of wrappers for `symbol` and `keyword` which wrap
107 | them with void.
108 |
109 | > keyword_ :: String -> Parser ()
110 | > keyword_ = void . keyword
111 |
112 | > symbol_ :: String -> Parser ()
113 | > symbol_ = void . symbol
114 |
115 | > singleSelectItem :: Parser QueryExpr
116 | > singleSelectItem = do
117 | > keyword_ "select"
118 | > e <- valueExpr []
119 | > return $ makeSelect {qeSelectList = [(e,Nothing)]}
120 |
121 | You can use the old test runner to check these:
122 |
123 | ```
124 | *QueryExpressions> H.runTestTT $ H.TestList $ map (makeTest singleSelectItem) parseSingleSelectItemTestData
125 | Cases: 1 Tried: 1 Errors: 0 Failures: 0
126 | Counts {cases = 1, tried = 1, errors = 0, failures = 0}
127 | ```
128 |
129 | Let's rewrite it in the Applicative and mostly point free style.
130 |
131 | > singleSelectItemApplicative :: Parser QueryExpr
132 | > singleSelectItemApplicative =
133 | > (\sl -> makeSelect {qeSelectList = sl})
134 | > <$> (keyword_ "select" *> (((:[]) . (,Nothing)) <$> valueExpr []))
135 |
136 | That didn't go so well. Using a wrapper:
137 |
138 | > singleSelectItemApplicative' :: Parser QueryExpr
139 | > singleSelectItemApplicative' =
140 | > ms <$> (keyword_ "select" *> valueExpr [])
141 | > where
142 | > ms e = makeSelect {qeSelectList = [(e,Nothing)]}
143 |
144 | Now let's write something that supports multiple value expressions,
145 | e.g.
146 |
147 | ```
148 | select 1+2, 3+4;
149 | ```
150 |
151 | > multipleSelectItemsTests :: [(String,QueryExpr)]
152 | > multipleSelectItemsTests =
153 | > [("select a"
154 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]})
155 | > ,("select a,b"
156 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
157 | > ,(Iden "b",Nothing)]})
158 | > ,("select 1+2,3+4"
159 | > ,makeSelect {qeSelectList =
160 | > [(BinOp (NumLit 1) "+" (NumLit 2),Nothing)
161 | > ,(BinOp (NumLit 3) "+" (NumLit 4),Nothing)]})
162 | > ]
163 |
164 | > selectMultipleItems :: Parser QueryExpr
165 | > selectMultipleItems = do
166 | > keyword_ "select"
167 | > es <- commaSep1 (valueExpr [])
168 | > return $ makeSelect {qeSelectList = map (,Nothing) es}
169 |
170 | > commaSep1 :: Parser a -> Parser [a]
171 | > commaSep1 = (`sepBy1` comma)
172 |
173 | === aliases
174 |
175 | We can write names for the columns produced from a select list using
176 | the keyword `as`, and we can miss out the `as`:
177 |
178 | ```sql
179 | select a as a1, b as b1, f(c) as c1;
180 |
181 | -- no as
182 | select a a1, b b1;
183 | ```
184 |
185 | > selectListTests :: [(String,QueryExpr)]
186 | > selectListTests =
187 | > [("select a as a1, b as b1"
188 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a1")
189 | > ,(Iden "b", Just "b1")]})
190 | > ,("select a a1, b b1"
191 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a1")
192 | > ,(Iden "b", Just "b1")]})
193 | > ] ++ multipleSelectItemsTests
194 | > ++ singleSelectItemTests
195 |
196 | Finally, here is the select list parser and the helper for select
197 | items:
198 |
199 | > selectItem0 :: Parser (ValueExpr, Maybe String)
200 | > selectItem0 = (,) <$> valueExpr [] <*> optionMaybe (try alias)
201 | > where alias = optional (keyword_ "as") *> identifier
202 |
203 | > selectList0 :: Parser [(ValueExpr, Maybe String)]
204 | > selectList0 = keyword_ "select" *> commaSep1 selectItem0
205 |
206 | > queryExpr0 :: Parser QueryExpr
207 | > queryExpr0 = mkSelect <$> selectList0
208 | > where mkSelect sl = makeSelect {qeSelectList = sl}
209 |
210 | == from clause
211 |
212 | > from :: Parser String
213 | > from = keyword_ "from" *> identifier
214 |
215 | > fromTests :: [(String,QueryExpr)]
216 | > fromTests =
217 | > [("select a from t"
218 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
219 | > ,qeFrom = (Just "t")})]
220 |
221 | > queryExpr1 :: Parser QueryExpr
222 | > queryExpr1 = mkSelect
223 | > <$> selectList0
224 | > <*> optionMaybe from
225 | > where mkSelect sl fr = makeSelect {qeSelectList = sl
226 | > ,qeFrom = fr}
227 |
228 | ```
229 | *QueryExpressions> H.runTestTT $ H.TestList $ map (makeTest queryExpr1) (selectListTests ++ fromTests)
230 | ### Failure in: 6:select a from t
231 | (line 1, column 16):
232 | unexpected end of input
233 | expecting digit, letter, "_", "--" or "/*"
234 | Cases: 7 Tried: 7 Errors: 0 Failures: 1
235 | Counts {cases = 7, tried = 7, errors = 0, failures = 1}
236 | ```
237 |
238 | This is a keyword issue again. We are parsing the `from` as if it was
239 | a column alias and then getting stuck.
240 |
241 | > blackListIdentifier :: [String] -> Parser String
242 | > blackListIdentifier bl = do
243 | > i <- identifier
244 | > guard (i `notElem` bl)
245 | > return i
246 |
247 | > selectItem :: Parser (ValueExpr, Maybe String)
248 | > selectItem = (,) <$> valueExpr [] <*> optionMaybe (try alias)
249 | > where alias = optional (keyword_ "as") *> blackListIdentifier ["from"]
250 |
251 | > selectList :: Parser [(ValueExpr, Maybe String)]
252 | > selectList = keyword_ "select" *> commaSep1 selectItem
253 |
254 | > queryExpr2 :: Parser QueryExpr
255 | > queryExpr2 = mkSelect
256 | > <$> selectList
257 | > <*> optionMaybe from
258 | > where mkSelect sl fr = makeSelect {qeSelectList = sl
259 | > ,qeFrom = fr}
260 |
261 | That did the job for now.
262 |
263 | == where
264 |
265 | The where, group by, having, and order by parsers are simple.
266 |
267 | > whereTests :: [(String,QueryExpr)]
268 | > whereTests =
269 | > [("select a from t where a = 5"
270 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
271 | > ,qeFrom = Just "t"
272 | > ,qeWhere = Just $ BinOp (Iden "a") "=" (NumLit 5)})
273 | > ]
274 |
275 | > whereClause :: Parser ValueExpr
276 | > whereClause = keyword_ "where" *> valueExpr []
277 |
278 | > queryExpr3 :: Parser QueryExpr
279 | > queryExpr3 = mkSelect
280 | > <$> selectList
281 | > <*> optionMaybe from
282 | > <*> optionMaybe whereClause
283 | > where mkSelect sl fr wh =
284 | > makeSelect {qeSelectList = sl
285 | > ,qeFrom = fr
286 | > ,qeWhere = wh}
287 |
288 | == group by
289 |
290 | > groupByTests :: [(String,QueryExpr)]
291 | > groupByTests =
292 | > [("select a,sum(b) from t group by a"
293 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
294 | > ,(App "sum" [Iden "b"],Nothing)]
295 | > ,qeFrom = Just "t"
296 | > ,qeGroupBy = [Iden "a"]
297 | > })
298 | > ,("select a,b,sum(c) from t group by a,b"
299 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
300 | > ,(Iden "b",Nothing)
301 | > ,(App "sum" [Iden "c"],Nothing)]
302 | > ,qeFrom = Just "t"
303 | > ,qeGroupBy = [Iden "a",Iden "b"]
304 | > })
305 | > ]
306 |
307 | > groupByClause :: Parser [ValueExpr]
308 | > groupByClause = keyword_ "group" *> keyword_ "by"
309 | > *> commaSep1 (valueExpr [])
310 |
311 | > queryExpr4 :: Parser QueryExpr
312 | > queryExpr4 = mkSelect
313 | > <$> selectList
314 | > <*> optionMaybe from
315 | > <*> optionMaybe whereClause
316 | > <*> option [] groupByClause
317 | > where mkSelect sl fr wh gr =
318 | > makeSelect {qeSelectList = sl
319 | > ,qeFrom = fr
320 | > ,qeWhere = wh
321 | > ,qeGroupBy = gr}
322 |
323 | == having
324 |
325 | > havingTests :: [(String,QueryExpr)]
326 | > havingTests =
327 | > [("select a,sum(b) from t group by a having sum(b) > 5"
328 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
329 | > ,(App "sum" [Iden "b"],Nothing)]
330 | > ,qeFrom = (Just "t")
331 | > ,qeGroupBy = [Iden "a"]
332 | > ,qeHaving = Just $ BinOp (App "sum" [Iden "b"]) ">" (NumLit 5)
333 | > })
334 | > ]
335 |
336 | > having :: Parser ValueExpr
337 | > having = keyword_ "having" *> (valueExpr [])
338 |
339 | > queryExpr5 :: Parser QueryExpr
340 | > queryExpr5 = mkSelect
341 | > <$> selectList
342 | > <*> optionMaybe from
343 | > <*> optionMaybe whereClause
344 | > <*> option [] groupByClause
345 | > <*> optionMaybe having
346 | > where mkSelect sl fr wh gr hv =
347 | > makeSelect {qeSelectList = sl
348 | > ,qeFrom = fr
349 | > ,qeWhere = wh
350 | > ,qeGroupBy = gr
351 | > ,qeHaving = hv}
352 |
353 | Looking nice so far. Did you run the tests for each stage?
354 |
355 |
356 | == order by
357 |
358 | > orderByTests :: [(String,QueryExpr)]
359 | > orderByTests =
360 | > [("select a from t order by a"
361 | > ,ms [Iden "a"])
362 | > ,("select a from t order by a, b"
363 | > ,ms [Iden "a", Iden "b"])
364 | > ]
365 | > where
366 | > ms o = makeSelect {qeSelectList = [(Iden "a",Nothing)]
367 | > ,qeFrom = (Just "t")
368 | > ,qeOrderBy = o}
369 |
370 | > orderBy :: Parser [ValueExpr]
371 | > orderBy = keyword_ "order" *> keyword_ "by"
372 | > *> commaSep1 (valueExpr [])
373 |
374 | > queryExpr6 :: Parser QueryExpr
375 | > queryExpr6 = Select
376 | > <$> selectList
377 | > <*> optionMaybe from
378 | > <*> optionMaybe whereClause
379 | > <*> option [] groupByClause
380 | > <*> optionMaybe having
381 | > <*> option [] orderBy
382 |
383 | ```
384 | *QueryExpressions> H.runTestTT $ H.TestList $ map (makeTest queryExpr6) (selectListTests ++ fromTests ++ whereTests ++ groupByTests ++ havingTests ++ orderByTests)
385 | Cases: 13 Tried: 13 Errors: 0 Failures: 0
386 | Counts {cases = 13, tried = 13, errors = 0, failures = 0}
387 | ```
388 |
389 | TODO: talk about putting the maybes/default values inside from, where, etc??
390 |
--------------------------------------------------------------------------------
/README.asciidoc:
--------------------------------------------------------------------------------
1 | = Intro to Parsing with Parsec in Haskell
2 |
3 | == Overview
4 |
5 | WIP, a tutorial which demonstrates the basics of Parsec and goes on to
6 | build a SQL query parser.
7 |
8 | You can view this tutorial as HTML online here:
9 |
10 | http://jakewheat.github.io/intro_to_parsing/
11 |
12 | and you can view the files directly in the github repository here:
13 |
14 | https://github.com/JakeWheat/intro_to_parsing
15 |
16 | === Summary of sections
17 |
18 | // the first link for each section will work in the readme on github,
19 | // the second link is for the rendered html and doesn't work here
20 |
21 | link:GettingStarted.lhs[]
22 |
23 | // <>
24 |
25 | Introduction to parsing with Parsec, including a review of
26 | Text.Parsec.Char functions.
27 |
28 | link:VerySimpleExpressions.lhs[]
29 |
30 | // <>
31 |
32 | Creating a very simple expression language parser, and introducing
33 | some functions from Text.Parsec.Combinator.
34 |
35 | link:ApplicativeStyle.lhs[]
36 |
37 | // <>
38 |
39 | Rewriting the simple expression parser code in a more succinct style.
40 |
41 | link:CombinatorReview.lhs[]
42 |
43 | // <>
44 |
45 | Review and examples of all functions from Text.Parsec.Combinator, and
46 | some from Control.Applicative and Control.Monad.
47 |
48 | link:FunctionsAndTypesForParsing.lhs[]
49 |
50 | // <>
51 |
52 | The utility functions used in the previous tutorials, plus some notes
53 | on types in Parsec.
54 |
55 | link:TextParsecExpr.lhs[]
56 |
57 | // <>
58 |
59 | This covers using the Text.Parsec.Expr for expression parsing with
60 | prefix, postfix and infix operators with fixity.
61 |
62 | link:AnIssueWithTokenParsers.lhs[]
63 |
64 | // <>
65 |
66 | Looks at an issue we have with the way the symbol parser in the
67 | Text.Parsec.Expr tutorial was used, and some possible fixes.
68 |
69 | link:TextParsecPerm.lhs[]
70 |
71 | // <>
72 |
73 | This covers the Text.Parsec.Perm module which is used for parsing
74 | different things in flexible order.
75 |
76 | link:TextParsecToken.lhs[]
77 |
78 | // <>
79 |
80 | This covers Text.Parsec.Token which can be used to create token
81 | parsers easily.
82 |
83 | link:ValueExpressions.lhs[]
84 |
85 | // <>
86 |
87 | This covers building a parser a subset of value expressions from SQL,
88 | which are an extension of the simple expression types and parsers
89 | covered in previous tutorials.
90 |
91 | link:QueryExpressions.lhs[]
92 |
93 | // <>
94 |
95 | This covers building a parser to parse query expressions with select
96 | lists, simple from, where, group by, having and order by.
97 |
98 | link:FromClause.lhs[]
99 |
100 | // <>
101 |
102 | This extend the parser for query expressions to support a from clause
103 | with much more features including joins.
104 |
105 | link:SimpleSQLQueryParser0.lhs[]
106 |
107 | // <>
108 |
109 | Here is the code from ValueExpressions, QueryExpressions and
110 | FromClause plus tests put together and rearranged as a coherent
111 | standalone module.
112 |
113 | link:PrettyPrinting0.lhs[]
114 |
115 | // <>
116 |
117 | This quick module covers a simple pretty printer for our SQL ast.
118 |
119 | link:ErrorMessages.lhs[]
120 |
121 | // <>
122 |
123 | In this document, we will explore error messages with parsec and how
124 | restructuring parser code can lead to better or worse error messages.
125 |
126 | === Going further
127 |
128 | If you are interested in SQL parsing, check out the project to build
129 | a complete SQL parser here:
130 | http://jakewheat.github.io/simple-sql-parser/latest. The parsing code in the
131 | simple-sql-parser project is based on this tutorial code.
132 |
133 | ////
134 |
135 | Later documents
136 |
137 | Additional provisional documents not yet started:
138 |
139 | Parsing TPC-H queries
140 |
141 | We will use the tpch queries as examples to help improve the pretty
142 | printer. First there are a few extra bits of syntax to be able to
143 | parse these queries
144 |
145 | Pretty printing part 2
146 |
147 | some tweaks to the pretty printer to improve the layout for the tpch
148 | queries
149 |
150 | Writing tests
151 |
152 | Here we will take the ad hoc tests and build an organised test suite
153 | with a wrapper for hunit, wrapper for test.framework wrapper and maybe
154 | tasty
155 |
156 | Refactored project + cabal package
157 |
158 | In this tutorial, we will take the sql parser, pretty printer and
159 | tests, and create a complete cabal package.
160 |
161 | TODO: talk about robustness and the casual way the parser has been put
162 | together and the casual way issues have been tackled.
163 |
164 | Writing a command line sql interface
165 |
166 | quick experiment to try to implement the front end for a multiline sql
167 | command line using fake incremental parsing which parsec doesn't
168 | support directly.
169 |
170 | Position annotation
171 |
172 | In this tutorial, we will add position annotation to the parsing, so
173 | that a later stage could, e.g., provide type error messages with the
174 | correct line and column numbers.
175 |
176 | Dialects
177 |
178 | In this tutorial, we will discuss how we can support other SQL dialects
179 |
180 | Separate lexer
181 |
182 | In this tutorial, we will look at creating a proper separate lexer to
183 | see how it is done, and remark on what the tradeoffs seem to be.
184 |
185 | Fixing fixity
186 |
187 | Parsing full SQL expressions is a mess, and trying to do the fixity at
188 | parse time has many downsides. Here is another approach, to ignore
189 | fixity at parse time and fix it in a pass on the ast after parsing.
190 |
191 | Quasiquotes
192 |
193 | In this tutorial, we will create quasiquoters for sql query
194 | expressions and value expressions, and see how powerful this can be
195 |
196 |
197 | Something about syntax highlighting, generating documentation + links?
198 |
199 | ////
200 |
201 | === Extras
202 |
203 | * link:ParseString.lhs[]
204 |
205 | an executable which contains the boilerplate to run a parsec parser on
206 | a string passed as an argument
207 |
208 | * link:ParseFile.lhs[]
209 |
210 | an executable which contains the boilerplate to run a parsec parser on
211 | a file passed as an argument
212 |
213 | Contact: +++jakewheatmail@gmail.com+++
214 |
215 | License: BSD3
216 |
--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 |
--------------------------------------------------------------------------------
/SimpleSQLQueryParser0.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[simple-sql-query-parser]]
3 | = Simple SQL query parser
4 |
5 | Here is the complete syntax, parser and tests for the value and query
6 | expressions so far as a self contained module
7 |
8 | > module SimpleSQLQueryParser0 where
9 | >
10 | > import Text.Parsec.String (Parser)
11 | > import Text.Parsec.String.Parsec (try)
12 | > import Text.Parsec.String.Char
13 | > import Text.Parsec.String.Combinator
14 | > import Text.Parsec (parse,ParseError)
15 | > import Control.Applicative ((<$>),(<*), (*>),(<*>), (<$), (<|>), many)
16 | > import qualified Text.Parsec.String.Expr as E
17 | > import Control.Monad
18 | > --import Data.List (intercalate)
19 | > import Data.Maybe ()
20 | > import qualified Test.HUnit as H
21 | > import FunctionsAndTypesForParsing
22 | > import Debug.Trace
23 |
24 | == Supported SQL
25 |
26 | === comments
27 |
28 | ```sql
29 | -- single line comment
30 | /*
31 | multiline
32 | comment
33 | */
34 | ```
35 |
36 | The `/* */` comments do not nest.
37 |
38 | === value expressions
39 |
40 | ==== literals
41 |
42 | postive integral literals and string literals with single quote,
43 | without escaping of single quote within string (so there is no way to
44 | create a string literal with a single quote in it).
45 |
46 | ```sql
47 | 1
48 | 500
49 | 'string literal'
50 | ```
51 | ==== identifiers
52 |
53 | Unquoted identifiers only, an identifier may start with a letter or
54 | underscore, and contain letters, underscores and digits.
55 |
56 | ```sql
57 | a
58 | something
59 | _test_
60 | a123
61 | ```
62 |
63 | ==== dotted identifiers
64 |
65 | Supports two part dotted identifiers only. Both parts must parse
66 | according to the rules for regular identifiers.
67 |
68 | ```sql
69 | t.a
70 | something.something_else
71 | ```
72 |
73 | ==== star
74 |
75 | Star, plus dotted star using the identifier rules for the first part.
76 |
77 | ```sql
78 | *
79 | t.*
80 | ```
81 |
82 | ==== function application
83 |
84 | The function name must parse as a valid identifier.
85 |
86 | ```sql
87 | f()
88 | g(1)
89 | h(2,'something')
90 | ```
91 |
92 | ==== operators
93 |
94 | Here is the range of operators supported.
95 |
96 | ```sql
97 | a = b
98 | a > b
99 | a < b
100 | a >= b
101 | a <= b
102 | a != b
103 | a <> b
104 | a and b
105 | a or b
106 | 1 + 2
107 | 1 - 2
108 | 1 * 2
109 | 1 / 2
110 | 'some' || 'thing'
111 | a like b
112 | not a
113 | ```
114 |
115 | ==== case expression
116 |
117 | ```sql
118 | case a
119 | when 3 then 'got three'
120 | when 5 then 'got five'
121 | else 'neither'
122 | end
123 | ```
124 |
125 | ```sql
126 | case
127 | when a = 3 then 'a is three'
128 | when b = 4 then 'b is four'
129 | else 'neither'
130 | end
131 | ```
132 |
133 | The else branch is optional in both cases.
134 |
135 | ==== parentheses
136 |
137 | ```sql
138 | (1 + 2) * 3
139 | ```
140 |
141 | Parentheses are explicit in the abstract syntax.
142 |
143 | === query expressions
144 |
145 | TODO: examples
146 |
147 | select queries only, no union, intersect or except. No common table
148 | expressions.
149 |
150 | select list aliases, with the 'as' optional in the alias
151 |
152 | 'select * from t'
153 | 'select t.* from t'
154 | but not the alias version 'select * as (a,b,c) from t'.
155 |
156 | from clause
157 |
158 | implicit and explicit joins, including keywords
159 | natural, inner, outer, left, right, full, cross, on and using, plus
160 | parens and simple aliases (e.g. select a from t u, but not select a
161 | from t(a,b)).
162 |
163 | where
164 |
165 | group by lists
166 | but not the new group by options in SQL2003 (group by (), grouping
167 | sets, cube, rollup).
168 |
169 | having
170 |
171 | order by, with multiple columns, but not explicit asc or
172 | desc , and no 'nulls first' or 'nulls last' syntax.
173 |
174 | No support for offset and fetch first, or variations.
175 |
176 |
177 | == Abstract syntax
178 |
179 | > data ValueExpr = StringLit String
180 | > | NumLit Integer
181 | > | Iden String
182 | > | DIden String String -- a.b
183 | > | Star
184 | > | DStar String -- t.*
185 | > | App String [ValueExpr]
186 | > | PrefOp String ValueExpr
187 | > | BinOp ValueExpr String ValueExpr
188 | > | Case (Maybe ValueExpr) -- test value
189 | > [(ValueExpr,ValueExpr)] -- when branches
190 | > (Maybe ValueExpr) -- else value
191 | > | Parens ValueExpr
192 | > deriving (Eq,Show)
193 |
194 | > data QueryExpr
195 | > = Select
196 | > {qeSelectList :: [(ValueExpr,Maybe String)]
197 | > ,qeFrom :: [TableRef]
198 | > ,qeWhere :: Maybe ValueExpr
199 | > ,qeGroupBy :: [ValueExpr]
200 | > ,qeHaving :: Maybe ValueExpr
201 | > ,qeOrderBy :: [ValueExpr]
202 | > } deriving (Eq,Show)
203 |
204 | > makeSelect :: QueryExpr
205 | > makeSelect = Select {qeSelectList = []
206 | > ,qeFrom = []
207 | > ,qeWhere = Nothing
208 | > ,qeGroupBy = []
209 | > ,qeHaving = Nothing
210 | > ,qeOrderBy = []}
211 |
212 | > data TableRef = TRSimple String
213 | > | TRJoin TableRef JoinType TableRef (Maybe JoinCondition)
214 | > | TRParens TableRef
215 | > | TRAlias TableRef String
216 | > | TRQueryExpr QueryExpr
217 | > deriving (Eq,Show)
218 |
219 | > data JoinType = JoinInner | JoinLeft | JoinRight | JoinFull | JoinCross
220 | > deriving (Eq,Show)
221 |
222 | > data JoinCondition = JoinOn ValueExpr
223 | > | JoinUsing [String]
224 | > | JoinNatural
225 | > deriving (Eq,Show)
226 |
227 | == Value expression parsing
228 |
229 | === term components
230 |
231 | > num :: Parser ValueExpr
232 | > num = NumLit <$> integer
233 |
234 | > iden :: [String] -> Parser ValueExpr
235 | > iden blacklist = Iden <$> identifierBlacklist blacklist
236 |
237 | > parensValue :: Parser ValueExpr
238 | > parensValue = Parens <$> parens (valueExpr [])
239 |
240 | > stringLit :: Parser ValueExpr
241 | > stringLit = StringLit <$> stringToken
242 |
243 | > dIden :: Parser ValueExpr
244 | > dIden = DIden <$> identifier <*> (dot *> identifier)
245 |
246 | > star :: Parser ValueExpr
247 | > star = Star <$ symbol "*"
248 |
249 | > dstar :: Parser ValueExpr
250 | > dstar = DStar <$> (identifier <* dot <* symbol "*")
251 |
252 | > app :: Parser ValueExpr
253 | > app = App <$> identifier <*> parens (commaSep $ valueExpr [])
254 |
255 | === case
256 |
257 | > caseValue :: Parser ValueExpr
258 | > caseValue =
259 | > Case
260 | > <$> (keyword "case" *> optionMaybe caseVal)
261 | > <*> many1 whenClause
262 | > <*> optionMaybe elseClause
263 | > <* keyword "end"
264 | > where
265 | > whenClause = (,) <$> (keyword "when" *> caseVal)
266 | > <*> (keyword "then" *> caseVal)
267 | > elseClause = keyword "else" *> caseVal
268 | > caseVal = valueExpr blackList
269 | > blackList = ["case", "when", "then", "else", "end"]
270 |
271 | === term
272 |
273 | > term :: [String] -> Parser ValueExpr
274 | > term blackList = choice [caseValue
275 | > ,try app
276 | > ,try dstar
277 | > ,try dIden
278 | > ,iden blackList
279 | > ,num
280 | > ,parensValue
281 | > ,stringLit
282 | > ,star]
283 |
284 | === operators
285 |
286 | > table :: [[E.Operator ValueExpr]]
287 | > table = [[prefix "-", prefix "+"]
288 | > ,[binary "^" E.AssocLeft]
289 | > ,[binary "*" E.AssocLeft
290 | > ,binary "/" E.AssocLeft
291 | > ,binary "%" E.AssocLeft]
292 | > ,[binary "+" E.AssocLeft
293 | > ,binary "-" E.AssocLeft]
294 | > ,[binary "<=" E.AssocRight
295 | > ,binary ">=" E.AssocRight
296 | > ,binaryK "like" E.AssocNone
297 | > ,binary "!=" E.AssocRight
298 | > ,binary "<>" E.AssocRight
299 | > ,binary "||" E.AssocRight]
300 | > ,[binary "<" E.AssocNone
301 | > ,binary ">" E.AssocNone]
302 | > ,[binary "=" E.AssocRight]
303 | > ,[prefixK "not"]
304 | > ,[binaryK "and" E.AssocLeft]
305 | > ,[binaryK "or" E.AssocLeft]]
306 | > where
307 | > binary name assoc =
308 | > E.Infix (mkBinOp name <$ symbol name) assoc
309 | > mkBinOp nm a b = BinOp a nm b
310 | > prefix name = E.Prefix (PrefOp name <$ symbol name)
311 | > binaryK name assoc =
312 | > E.Infix (mkBinOp name <$ keyword name) assoc
313 | > prefixK name = E.Prefix (PrefOp name <$ keyword name)
314 |
315 | === valueExpr
316 |
317 | > valueExpr :: [String] -> Parser ValueExpr
318 | > valueExpr blackList = E.buildExpressionParser table (term blackList)
319 |
320 | == Query expression parsing
321 |
322 | > selectList :: Parser [(ValueExpr, Maybe String)]
323 | > selectList = keyword_ "select" *> commaSep1 selectItem
324 |
325 | > selectItem :: Parser (ValueExpr, Maybe String)
326 | > selectItem = (,) <$> valueExpr [] <*> optionMaybe (try alias)
327 | > where alias = optional (keyword_ "as") *> identifierBlacklist ["from"]
328 |
329 | > whereClause :: Parser ValueExpr
330 | > whereClause = keyword_ "where" *> valueExpr []
331 |
332 | > groupByClause :: Parser [ValueExpr]
333 | > groupByClause = keyword_ "group" *> keyword_ "by"
334 | > *> commaSep1 (valueExpr [])
335 |
336 | > having :: Parser ValueExpr
337 | > having = keyword_ "having" *> (valueExpr [])
338 |
339 | > orderBy :: Parser [ValueExpr]
340 | > orderBy = keyword_ "order" *> keyword_ "by"
341 | > *> commaSep1 (valueExpr [])
342 |
343 | === from clause
344 |
345 | > from :: Parser [TableRef]
346 | > from = keyword_ "from" >> commaSep1 tref
347 | > where
348 | > tref = nonJoinTref >>= suffixWrapper joinTrefSuffix
349 | > joinTrefSuffix t0 = (do
350 | > nat <- option False (True <$ keyword_ "natural")
351 | > TRJoin t0 <$> joinType
352 | > <*> nonJoinTref
353 | > <*> optionMaybe (joinCondition nat))
354 | > >>= suffixWrapper joinTrefSuffix
355 | > nonJoinTref = choice [TRSimple <$> identifier
356 | > ,try (TRQueryExpr <$> parens queryExpr)
357 | > ,TRParens <$> parens tref]
358 | > >>= suffixWrapper (try . alias)
359 | > alias tr = try (TRAlias tr <$> (optional (keyword_ "as") *> aliasIdentifier))
360 | > aliasIdentifier = identifierBlacklist
361 | > [-- join keywords
362 | > "natural"
363 | > ,"inner"
364 | > ,"outer"
365 | > ,"cross"
366 | > ,"left"
367 | > ,"right"
368 | > ,"full"
369 | > ,"join"
370 | > ,"on"
371 | > ,"using"
372 | > -- subsequent clause keywords
373 | > ,"where"
374 | > ,"group"
375 | > ,"having"
376 | > ,"order"
377 | > ]
378 |
379 | > joinType :: Parser JoinType
380 | > joinType = choice
381 | > [JoinCross <$ keyword_ "cross" <* keyword_ "join"
382 | > ,JoinInner <$ keyword_ "inner" <* keyword_ "join"
383 | > ,JoinLeft <$ keyword_ "left"
384 | > <* optional (keyword_ "outer")
385 | > <* keyword_ "join"
386 | > ,JoinRight <$ keyword_ "right"
387 | > <* optional (keyword_ "outer")
388 | > <* keyword_ "join"
389 | > ,JoinFull <$ keyword_ "full"
390 | > <* optional (keyword_ "outer")
391 | > <* keyword_ "join"
392 | > ,JoinInner <$ keyword_ "join"]
393 |
394 | > joinCondition :: Bool -> Parser JoinCondition
395 | > joinCondition nat =
396 | > choice [guard nat >> return JoinNatural
397 | > ,keyword_ "on" >> JoinOn <$> valueExpr []
398 | > ,keyword_ "using" >> JoinUsing <$> parens (commaSep1 identifier)
399 | > ]
400 |
401 | === queryExpr
402 |
403 | > queryExpr :: Parser QueryExpr
404 | > queryExpr = Select
405 | > <$> selectList
406 | > <*> option [] from
407 | > <*> optionMaybe whereClause
408 | > <*> option [] groupByClause
409 | > <*> optionMaybe having
410 | > <*> option [] orderBy
411 |
412 | == tokens
413 |
414 | > whitespace :: Parser ()
415 | > whitespace =
416 | > choice [simpleWhitespace *> whitespace
417 | > ,lineComment *> whitespace
418 | > ,blockComment *> whitespace
419 | > ,return ()]
420 | > where
421 | > lineComment = try (string "--")
422 | > *> manyTill anyChar (void (char '\n') <|> eof)
423 | > blockComment = try (string "/*")
424 | > *> manyTill anyChar (try $ string "*/")
425 | > simpleWhitespace = void $ many1 (oneOf " \t\n")
426 |
427 | > lexeme :: Parser a -> Parser a
428 | > lexeme p = p <* whitespace
429 |
430 | > integer :: Parser Integer
431 | > integer = read <$> lexeme (many1 digit)
432 |
433 | > identifier :: Parser String
434 | > identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
435 | > where
436 | > firstChar = letter <|> char '_'
437 | > nonFirstChar = digit <|> firstChar
438 |
439 | > symbol :: String -> Parser String
440 | > symbol s = try $ lexeme $ do
441 | > u <- many1 (oneOf "<>=+-^%/*!|")
442 | > guard (s == u)
443 | > return s
444 |
445 | > openParen :: Parser Char
446 | > openParen = lexeme $ char '('
447 |
448 | > closeParen :: Parser Char
449 | > closeParen = lexeme $ char ')'
450 |
451 | > stringToken :: Parser String
452 | > stringToken = lexeme (char '\'' *> manyTill anyChar (char '\''))
453 |
454 | > dot :: Parser Char
455 | > dot = lexeme $ char '.'
456 |
457 | > comma :: Parser Char
458 | > comma = lexeme $ char ','
459 |
460 | == helper functions
461 |
462 | > keyword :: String -> Parser String
463 | > keyword k = try $ do
464 | > i <- identifier
465 | > guard (i == k)
466 | > return k
467 |
468 | > parens :: Parser a -> Parser a
469 | > parens = between openParen closeParen
470 |
471 | > commaSep :: Parser a -> Parser [a]
472 | > commaSep = (`sepBy` comma)
473 |
474 | > keyword_ :: String -> Parser ()
475 | > keyword_ = void . keyword
476 |
477 | > symbol_ :: String -> Parser ()
478 | > symbol_ = void . symbol
479 |
480 | > commaSep1 :: Parser a -> Parser [a]
481 | > commaSep1 = (`sepBy1` comma)
482 |
483 | > identifierBlacklist :: [String] -> Parser String
484 | > identifierBlacklist bl = do
485 | > i <- identifier
486 | > guard (i `notElem` bl)
487 | > return i
488 |
489 | > suffixWrapper :: (a -> Parser a) -> a -> Parser a
490 | > suffixWrapper p a = p a <|> return a
491 |
492 | == the parser api
493 |
494 | > parseQueryExpr :: String -> Either ParseError QueryExpr
495 | > parseQueryExpr = parse (whitespace *> queryExpr <* eof) ""
496 |
497 | > parseValueExpr :: String -> Either ParseError ValueExpr
498 | > parseValueExpr = parse (whitespace *> valueExpr [] <* eof) ""
499 |
500 |
501 | == tests
502 |
503 | > data TestItem = Group String [TestItem]
504 | > | ValueExpressionTest String ValueExpr
505 | > | QueryExpressionTest String QueryExpr
506 |
507 | > numLitTests :: [(String,ValueExpr)]
508 | > numLitTests =
509 | > [("1", NumLit 1)
510 | > ,("54321", NumLit 54321)]
511 | >
512 | > idenTests :: [(String,ValueExpr)]
513 | > idenTests =
514 | > [("test", Iden "test")
515 | > ,("_something3", Iden "_something3")]
516 | >
517 | > operatorTests :: [(String,ValueExpr)]
518 | > operatorTests =
519 | > map (\o -> (o ++ " a", PrefOp o (Iden "a"))) ["not", "+", "-"]
520 | > ++ map (\o -> ("a " ++ o ++ " b", BinOp (Iden "a") o (Iden "b")))
521 | > ["=",">","<", ">=", "<=", "!=", "<>"
522 | > ,"and", "or", "+", "-", "*", "/", "||", "like"]
523 | >
524 | > parensTests :: [(String,ValueExpr)]
525 | > parensTests = [("(1)", Parens (NumLit 1))]
526 |
527 | > basicTests :: [(String,ValueExpr)]
528 | > basicTests = numLitTests ++ idenTests ++ operatorTests ++ parensTests
529 |
530 | > stringLiteralTests :: [(String,ValueExpr)]
531 | > stringLiteralTests =
532 | > [("''", StringLit "")
533 | > ,("'test'", StringLit "test")]
534 |
535 | > dIdenTests :: [(String,ValueExpr)]
536 | > dIdenTests =
537 | > [("t.a", DIden "t" "a")]
538 |
539 | > starTests :: [(String,ValueExpr)]
540 | > starTests = [("*", Star)]
541 |
542 |
543 | > dStarTests :: [(String,ValueExpr)]
544 | > dStarTests = [("t.*", DStar "t")]
545 |
546 | > appTests :: [(String,ValueExpr)]
547 | > appTests = [("f()", App "f" [])
548 | > ,("f(1)", App "f" [NumLit 1])
549 | > ,("f(1,a)", App "f" [NumLit 1, Iden "a"])]
550 |
551 | > caseTests :: [(String,ValueExpr)]
552 | > caseTests =
553 | > [("case a when 1 then 2 end"
554 | > ,Case (Just $ Iden "a") [(NumLit 1,NumLit 2)] Nothing)
555 | >
556 | > ,("case a when 1 then 2 when 3 then 4 end"
557 | > ,Case (Just $ Iden "a")
558 | > [(NumLit 1, NumLit 2)
559 | > ,(NumLit 3, NumLit 4)]
560 | > Nothing)
561 | >
562 | > ,("case a when 1 then 2 when 3 then 4 else 5 end"
563 | > ,Case (Just $ Iden "a")
564 | > [(NumLit 1, NumLit 2)
565 | > ,(NumLit 3, NumLit 4)]
566 | > (Just $ NumLit 5))
567 | >
568 | > ,("case when a=1 then 2 when a=3 then 4 else 5 end"
569 | > ,Case Nothing
570 | > [(BinOp (Iden "a") "=" (NumLit 1), NumLit 2)
571 | > ,(BinOp (Iden "a") "=" (NumLit 3), NumLit 4)]
572 | > (Just $ NumLit 5))
573 | > ]
574 |
575 | > allValueExprTests :: [(String,ValueExpr)]
576 | > allValueExprTests = concat [basicTests
577 | > ,stringLiteralTests
578 | > ,dIdenTests
579 | > ,starTests
580 | > ,dStarTests
581 | > ,appTests
582 | > ,caseTests]
583 |
584 | > singleSelectItemTests :: [(String,QueryExpr)]
585 | > singleSelectItemTests =
586 | > [("select 1", makeSelect {qeSelectList = [(NumLit 1,Nothing)]})]
587 |
588 | > multipleSelectItemsTests :: [(String,QueryExpr)]
589 | > multipleSelectItemsTests =
590 | > [("select a"
591 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]})
592 | > ,("select a,b"
593 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
594 | > ,(Iden "b",Nothing)]})
595 | > ,("select 1+2,3+4"
596 | > ,makeSelect {qeSelectList =
597 | > [(BinOp (NumLit 1) "+" (NumLit 2),Nothing)
598 | > ,(BinOp (NumLit 3) "+" (NumLit 4),Nothing)]})
599 | > ]
600 |
601 | > selectListTests :: [(String,QueryExpr)]
602 | > selectListTests =
603 | > [("select a as a, b as b"
604 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a")
605 | > ,(Iden "b", Just "b")]})
606 | > ,("select a a, b b"
607 | > ,makeSelect {qeSelectList = [(Iden "a", Just "a")
608 | > ,(Iden "b", Just "b")]})
609 | > ] ++ multipleSelectItemsTests
610 | > ++ singleSelectItemTests
611 |
612 | > fromTests :: [(String,QueryExpr)]
613 | > fromTests =
614 | > [("select a from t"
615 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
616 | > ,qeFrom = [TRSimple "t"]})]
617 |
618 | > whereTests :: [(String,QueryExpr)]
619 | > whereTests =
620 | > [("select a from t where a = 5"
621 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)]
622 | > ,qeFrom = [TRSimple "t"]
623 | > ,qeWhere = Just $ BinOp (Iden "a") "=" (NumLit 5)})
624 | > ]
625 |
626 | > groupByTests :: [(String,QueryExpr)]
627 | > groupByTests =
628 | > [("select a,sum(b) from t group by a"
629 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
630 | > ,(App "sum" [Iden "b"],Nothing)]
631 | > ,qeFrom = [TRSimple "t"]
632 | > ,qeGroupBy = [Iden "a"]
633 | > })
634 | > ,("select a,b,sum(c) from t group by a,b"
635 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
636 | > ,(Iden "b",Nothing)
637 | > ,(App "sum" [Iden "c"],Nothing)]
638 | > ,qeFrom = [TRSimple "t"]
639 | > ,qeGroupBy = [Iden "a",Iden "b"]
640 | > })
641 | > ]
642 |
643 | > havingTests :: [(String,QueryExpr)]
644 | > havingTests =
645 | > [("select a,sum(b) from t group by a having sum(b) > 5"
646 | > ,makeSelect {qeSelectList = [(Iden "a",Nothing)
647 | > ,(App "sum" [Iden "b"],Nothing)]
648 | > ,qeFrom = [TRSimple "t"]
649 | > ,qeGroupBy = [Iden "a"]
650 | > ,qeHaving = Just $ BinOp (App "sum" [Iden "b"]) ">" (NumLit 5)
651 | > })
652 | > ]
653 |
654 | > orderByTests :: [(String,QueryExpr)]
655 | > orderByTests =
656 | > [("select a from t order by a"
657 | > ,ms [Iden "a"])
658 | > ,("select a from t order by a, b"
659 | > ,ms [Iden "a", Iden "b"])
660 | > ]
661 | > where
662 | > ms o = makeSelect {qeSelectList = [(Iden "a",Nothing)]
663 | > ,qeFrom = [TRSimple "t"]
664 | > ,qeOrderBy = o}
665 |
666 | > queryExprJoinTests :: [(String,QueryExpr)]
667 | > queryExprJoinTests =
668 | > [("select a from t"
669 | > ,ms [TRSimple "t"])
670 | > ,("select a from t,u"
671 | > ,ms [TRSimple "t", TRSimple "u"])
672 | >
673 | > ,("select a from t inner join u on expr"
674 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
675 | > (Just $ JoinOn $ Iden "expr")])
676 | >
677 | > ,("select a from t left join u on expr"
678 | > ,ms [TRJoin (TRSimple "t") JoinLeft (TRSimple "u")
679 | > (Just $ JoinOn $ Iden "expr")])
680 | >
681 | > ,("select a from t right join u on expr"
682 | > ,ms [TRJoin (TRSimple "t") JoinRight (TRSimple "u")
683 | > (Just $ JoinOn $ Iden "expr")])
684 | >
685 | > ,("select a from t full join u on expr"
686 | > ,ms [TRJoin (TRSimple "t") JoinFull (TRSimple "u")
687 | > (Just $ JoinOn $ Iden "expr")])
688 | >
689 | > ,("select a from t cross join u"
690 | > ,ms [TRJoin (TRSimple "t") JoinCross (TRSimple "u") Nothing])
691 | >
692 | > ,("select a from t natural inner join u"
693 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
694 | > (Just JoinNatural)])
695 | >
696 | > ,("select a from t inner join u using(a,b)"
697 | > ,ms [TRJoin (TRSimple "t") JoinInner (TRSimple "u")
698 | > (Just $ JoinUsing ["a", "b"])])
699 | >
700 | > ,("select a from (select a from t)"
701 | > ,ms [TRQueryExpr $ ms [TRSimple "t"]])
702 | >
703 | > ,("select a from t as u"
704 | > ,ms [TRAlias (TRSimple "t") "u"])
705 | >
706 | > ,("select a from t u"
707 | > ,ms [TRAlias (TRSimple "t") "u"])
708 | >
709 | > ,("select a from (t cross join u) as u"
710 | > ,ms [TRAlias (TRParens $ TRJoin (TRSimple "t") JoinCross
711 | > (TRSimple "u") Nothing) "u"])
712 | > ]
713 | > where
714 | > ms f = makeSelect {qeSelectList = [(Iden "a",Nothing)]
715 | > ,qeFrom = f}
716 |
717 | > allQueryExprTests :: [(String,QueryExpr)]
718 | > allQueryExprTests = concat [selectListTests ++ fromTests ++ whereTests ++ groupByTests ++ havingTests ++ orderByTests ++ queryExprJoinTests]
719 |
720 | todo: use external api parsing code
721 |
722 | > makeTest :: (Eq a, Show a) => Parser a -> (String,a) -> H.Test
723 | > makeTest parser (src,expected) = H.TestLabel src $ H.TestCase $ do
724 | > let gote = parse (whitespace *> parser <* eof) "" src
725 | > case gote of
726 | > Left e -> H.assertFailure $ show e
727 | > Right got -> H.assertEqual src expected got
728 |
--------------------------------------------------------------------------------
/Text/Parsec/String/Char.hs:
--------------------------------------------------------------------------------
1 | module Text.Parsec.String.Char where
2 |
3 | {-
4 |
5 | Wrappers for the Text.Parsec.Char module with the types fixed to
6 | 'Text.Parsec.String.Parser a', i.e. the stream is String, no user
7 | state, Identity monad.
8 |
9 | -}
10 |
11 | import qualified Text.Parsec.Char as C
12 | import Text.Parsec.String (Parser)
13 |
14 | spaces :: Parser ()
15 | spaces = C.spaces
16 |
17 | space :: Parser Char
18 | space = C.space
19 |
20 | newline :: Parser Char
21 | newline = C.newline
22 |
23 | tab :: Parser Char
24 | tab = C.tab
25 |
26 | upper :: Parser Char
27 | upper = C.upper
28 |
29 | lower :: Parser Char
30 | lower = C.lower
31 |
32 | alphaNum :: Parser Char
33 | alphaNum = C.alphaNum
34 |
35 | letter :: Parser Char
36 | letter = C.letter
37 |
38 | digit :: Parser Char
39 | digit = C.digit
40 |
41 | hexDigit :: Parser Char
42 | hexDigit = C.hexDigit
43 |
44 | octDigit :: Parser Char
45 | octDigit = C.octDigit
46 |
47 | char :: Char -> Parser Char
48 | char = C.char
49 |
50 | string :: String -> Parser String
51 | string = C.string
52 |
53 | anyChar :: Parser Char
54 | anyChar = C.anyChar
55 |
56 | oneOf :: [Char] -> Parser Char
57 | oneOf = C.oneOf
58 |
59 | noneOf :: [Char] -> Parser Char
60 | noneOf = C.noneOf
61 |
62 | satisfy :: (Char -> Bool) -> Parser Char
63 | satisfy = C.satisfy
64 |
--------------------------------------------------------------------------------
/Text/Parsec/String/Combinator.hs:
--------------------------------------------------------------------------------
1 |
2 | module Text.Parsec.String.Combinator where
3 |
4 | {-
5 |
6 | Wrappers for the Text.Parsec.Combinator module with the types fixed to
7 | 'Text.Parsec.String.Parser a', i.e. the stream is String, no user
8 | state, Identity monad.
9 |
10 | -}
11 |
12 | import qualified Text.Parsec.Combinator as C
13 | import Text.Parsec.String (Parser)
14 |
15 | choice :: [Parser a] -> Parser a
16 | choice = C.choice
17 |
18 |
19 | count :: Int -> Parser a -> Parser [a]
20 | count = C.count
21 |
22 | between :: Parser open -> Parser close -> Parser a -> Parser a
23 | between = C.between
24 |
25 |
26 | option :: a -> Parser a -> Parser a
27 | option = C.option
28 |
29 | optionMaybe :: Parser a -> Parser (Maybe a)
30 | optionMaybe = C.optionMaybe
31 |
32 | optional :: Parser a -> Parser ()
33 | optional = C.optional
34 |
35 | skipMany1 :: Parser a -> Parser ()
36 | skipMany1 = C.skipMany1
37 |
38 | many1 :: Parser a -> Parser [a]
39 | many1 = C.many1
40 |
41 | sepBy :: Parser a -> Parser sep -> Parser [a]
42 | sepBy = C.sepBy
43 |
44 | sepBy1 :: Parser a -> Parser sep -> Parser [a]
45 | sepBy1 = C.sepBy1
46 |
47 | endBy :: Parser a -> Parser sep -> Parser [a]
48 | endBy = C.endBy
49 |
50 | endBy1 :: Parser a -> Parser sep -> Parser [a]
51 | endBy1 = C.endBy1
52 |
53 | sepEndBy :: Parser a -> Parser sep -> Parser [a]
54 | sepEndBy = C.sepEndBy
55 |
56 | sepEndBy1 :: Parser a -> Parser sep -> Parser [a]
57 | sepEndBy1 = C.sepEndBy1
58 |
59 | chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
60 | chainl = C.chainl
61 |
62 | chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
63 | chainl1 = C.chainl1
64 |
65 | chainr :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
66 | chainr = C.chainr
67 |
68 | chainr1 :: Parser a -> Parser (a -> a -> a) -> Parser a
69 | chainr1 = C.chainr1
70 |
71 | eof :: Parser ()
72 | eof = C.eof
73 |
74 | notFollowedBy :: Show a => Parser a -> Parser ()
75 | notFollowedBy = C.notFollowedBy
76 |
77 | manyTill :: Parser a -> Parser end -> Parser [a]
78 | manyTill = C.manyTill
79 |
80 | lookAhead :: Parser a -> Parser a
81 | lookAhead = C.lookAhead
82 |
83 | anyToken :: Parser Char
84 | anyToken = C.anyToken
85 |
--------------------------------------------------------------------------------
/Text/Parsec/String/Expr.hs:
--------------------------------------------------------------------------------
1 |
2 | module Text.Parsec.String.Expr (buildExpressionParser
3 | ,Operator(..)
4 | ,OperatorTable
5 | ,E.Assoc(..)
6 | )where
7 |
8 | {-
9 |
10 | Wrappers for the Text.Parsec.Expr module with simplified types.
11 |
12 | -}
13 |
14 | import Text.Parsec.String (Parser)
15 | import qualified Text.Parsec.Expr as E
16 |
17 | -- not sure if this is neccessary, or a type alias would be good
18 | -- enough
19 | data Operator a = Infix (Parser (a -> a -> a)) E.Assoc
20 | | Prefix (Parser (a -> a))
21 | | Postfix (Parser (a -> a))
22 |
23 | type OperatorTable a = [[Operator a]]
24 |
25 | buildExpressionParser :: OperatorTable a -> Parser a -> Parser a
26 | buildExpressionParser t = E.buildExpressionParser (map (map f) t)
27 | where
28 | f (Infix p a) = E.Infix p a
29 | f (Prefix p) = E.Prefix p
30 | f (Postfix p) = E.Postfix p
31 |
--------------------------------------------------------------------------------
/Text/Parsec/String/Parsec.hs:
--------------------------------------------------------------------------------
1 |
2 | module Text.Parsec.String.Parsec where
3 |
4 | {-
5 |
6 | wrapper for some Text.Parsec functions which use a simplified type
7 |
8 | -}
9 |
10 | import Text.Parsec.String (Parser)
11 | import qualified Text.Parsec as P
12 |
13 |
14 | try :: Parser a -> Parser a
15 | try = P.try
16 |
17 | parse :: Parser a -> P.SourceName -> String -> Either P.ParseError a
18 | parse = P.parse
19 |
--------------------------------------------------------------------------------
/TextParsecExpr.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[parsing-expressions-with-fixity]]
3 | = Parsing expressions with fixity
4 |
5 | Text.Parsec.Expr allows building expression parsers with a range of
6 | operators with different precedences and associativities
7 | easily. Fixity is the (not completely standard) term for precendence
8 | and associativity together.
9 |
10 | `Text.Parsec.Expr` can be great to quickly get a parser for simple
11 | expressions or a simple programming language with simple expressions
12 | up and running.
13 |
14 | > import Text.Parsec.String (Parser)
15 | > import Text.Parsec.String.Combinator (many1, between)
16 | > import Text.Parsec.String.Char (letter, char, digit, string, oneOf)
17 | >
18 | > import Control.Applicative ((<$>), (<*>), (<*), (<|>), many, (<$))
19 | > import Control.Monad (void)
20 | >
21 | > import qualified Text.Parsec.String.Expr as E
22 | > import FunctionsAndTypesForParsing
23 |
24 | Let's extend the SimpleExpression type and parsers to cover a range of
25 | operators with different precedences and associativity.
26 |
27 | == expressions with plus and times
28 |
29 | Let's start with a simple case: + and * with the usual fixity. Here is
30 | the abstract syntax:
31 |
32 | > data PlusTimesExpr = PteVar String
33 | > | PteNum Integer
34 | > | PteParens PlusTimesExpr
35 | > | Plus PlusTimesExpr PlusTimesExpr
36 | > | Times PlusTimesExpr PlusTimesExpr
37 | > deriving (Eq,Show)
38 |
39 | > plusTimesExpr :: Parser PlusTimesExpr
40 | > plusTimesExpr = E.buildExpressionParser pteTable pteTerm
41 |
42 | > pteTable :: [[E.Operator PlusTimesExpr]]
43 | > pteTable = [[E.Infix (Times <$ symbol "*") E.AssocLeft]
44 | > ,[E.Infix (Plus <$ symbol "+") E.AssocLeft]
45 | > ]
46 |
47 | Here you can see the operator parsers are the same as the previous
48 | `SimpleExpr` parser which used `chainl1`: `Times <$ symbol "*"` and
49 | `Plus <$ symbol "+"`. We just wrapped these up in the `E.Infix`
50 | constructor with the associativity, and put them in a list of lists
51 | which represents the precendence classes.
52 |
53 | Here is the term parser and components. All this is just the same as
54 | the `SimpleExpr` parser a previous tutorial.
55 |
56 | > pteTerm :: Parser PlusTimesExpr
57 | > pteTerm = pteVar <|> pteNum <|> pteParens
58 |
59 | > pteNum :: Parser PlusTimesExpr
60 | > pteNum = PteNum <$> integer
61 |
62 | > pteVar :: Parser PlusTimesExpr
63 | > pteVar = PteVar <$> identifier
64 |
65 | > pteParens :: Parser PlusTimesExpr
66 | > pteParens = PteParens <$> between (symbol "(") (symbol ")") plusTimesExpr
67 |
68 | support functions:
69 |
70 | > whitespace :: Parser ()
71 | > whitespace = void $ many $ oneOf " \n\t"
72 |
73 | > lexeme :: Parser a -> Parser a
74 | > lexeme p = p <* whitespace
75 |
76 | > integer :: Parser Integer
77 | > integer = read <$> lexeme (many1 digit)
78 |
79 | > identifier :: Parser String
80 | > identifier = lexeme ((:) <$> firstChar <*> many nonFirstChar)
81 | > where
82 | > firstChar = letter <|> char '_'
83 | > nonFirstChar = digit <|> firstChar
84 |
85 | > symbol :: String -> Parser String
86 | > symbol s = lexeme $ string s
87 |
88 | Here you can see the precendence in action:
89 |
90 | ```
91 | *Main> regularParse plusTimesExpr "a + b * c"
92 | Right (Plus (PteVar "a") (Times (PteVar "b") (PteVar "c")))
93 |
94 | *Main> regularParse plusTimesExpr "a * b + c"
95 | Right (Plus (Times (PteVar "a") (PteVar "b")) (PteVar "c"))
96 | ```
97 |
98 | == a full featured expression type
99 |
100 | Now let's try a much bigger example with lots more operators. Now we
101 | are thinking ahead to the first version of the SQL query parser, and
102 | preparing for this.
103 |
104 | Here are our new operators in precedence order:
105 |
106 | === unary + -
107 |
108 | ```
109 | +a
110 | -3
111 | ```
112 |
113 | === exponentiation
114 |
115 | ```
116 | a ^ 3
117 | ```
118 |
119 | associativity: left
120 |
121 | === multiplication, division, modulo
122 |
123 | ```
124 | a * 3
125 | 3 / b
126 | a % 5
127 | ```
128 |
129 | associativity: left
130 |
131 | === addition, subtraction
132 |
133 | ```
134 | a + b
135 | a - b
136 | ```
137 |
138 | associativity: left
139 |
140 | === less than, greater than
141 |
142 | ```
143 | a < b
144 | a > b
145 | ```
146 |
147 | associativity: none
148 |
149 | === equals
150 |
151 | ```
152 | a = 3
153 | ```
154 |
155 | associativity: right
156 |
157 | === not
158 |
159 | ```
160 | not a
161 | ```
162 |
163 | === and
164 |
165 | ```
166 | a and b
167 | ```
168 |
169 | associativity: left
170 |
171 | === or
172 |
173 | ```
174 | a or b
175 | ```
176 |
177 | associativity: left
178 |
179 | Here is the abstract syntax type:
180 |
181 | > data SimpleExpr = Num Integer
182 | > | Var String
183 | > | Parens SimpleExpr
184 | > | PrefixOp String SimpleExpr
185 | > | BinaryOp SimpleExpr String SimpleExpr
186 | > deriving (Eq,Show)
187 |
188 | Here is the new expression parser:
189 |
190 | > simpleExpr :: Parser SimpleExpr
191 | > simpleExpr = E.buildExpressionParser table term
192 |
193 | > table :: [[E.Operator SimpleExpr]]
194 | > table = [[prefix "-", prefix "+"]
195 | > ,[binary "^" E.AssocLeft]
196 | > ,[binary "*" E.AssocLeft
197 | > ,binary "/" E.AssocLeft
198 | > ,binary "%" E.AssocLeft]
199 | > ,[binary "+" E.AssocLeft
200 | > ,binary "-" E.AssocLeft]
201 | > ,[binary "<" E.AssocNone
202 | > ,binary ">" E.AssocNone]
203 | > ,[binary "=" E.AssocRight]
204 | > ,[prefix "not"]
205 | > ,[binary "and" E.AssocLeft]
206 | > ,[binary "or" E.AssocLeft]
207 | > ]
208 | > where
209 | > binary name assoc =
210 | > E.Infix (mkBinOp name <$ symbol name) assoc
211 | > mkBinOp nm a b = BinaryOp a nm b
212 | > prefix name = E.Prefix (PrefixOp name <$ symbol name)
213 |
214 | TODO: expand and explain the bits.
215 |
216 | Here is the term parser.
217 |
218 | > term :: Parser SimpleExpr
219 | > term = var <|> num <|> parens
220 |
221 | > num :: Parser SimpleExpr
222 | > num = Num <$> integer
223 |
224 | > var :: Parser SimpleExpr
225 | > var = Var <$> identifier
226 |
227 | > parens :: Parser SimpleExpr
228 | > parens = between (symbol "(") (symbol ")") simpleExpr
229 |
230 | TODO: write lots of parsing examples, including parse failures with
231 | ambiguity.
232 |
233 | issue: double prefix op, link to bug on parsec bug tracker.
234 |
235 | The source in Text.Parsec.Expr is not too big. You can have a look and
236 | try to understand it. There are several standard approaches in parsing
237 | theory to parse expressions with data driven precendences and
238 | associativity. I don't know which one Text.Parsec.Expr uses, but if
239 | you find these and read about them, then the source of
240 | Text.Parsec.Expr might be a bit more understandable.
241 |
--------------------------------------------------------------------------------
/TextParsecPerm.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[permutation-parsing]]
3 | = Permutation parsing
4 |
5 | This can parse a bunch of different things in any order. TODO:
6 | examples.
7 |
--------------------------------------------------------------------------------
/TextParsecToken.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[token-parsing]]
3 | = Token parsing
4 |
5 | This can be used quickly create a set of token parsers handling lots
6 | of little issues which you otherwise have to deal with manually.
7 |
8 |
--------------------------------------------------------------------------------
/VerySimpleExpressions.lhs:
--------------------------------------------------------------------------------
1 |
2 | [[very-simple-expression-parsing]]
3 | = Very simple expression parsing
4 |
5 | In this tutorial we will develop a parser for a very simple expression
6 | language, and start learning about the set of combinators which comes
7 | with Parsec.
8 |
9 | > import Text.Parsec (ParseError)
10 | > import Text.Parsec.String (Parser)
11 | > import Text.Parsec.String.Parsec (try)
12 | > import Text.Parsec.String.Char (oneOf, char, digit, satisfy)
13 | > import Text.Parsec.String.Combinator (many1, choice, chainl1)
14 | > import Control.Applicative ((<|>), many)
15 | > import Control.Monad (void)
16 | > import Data.Char (isLetter, isDigit)
17 | > import FunctionsAndTypesForParsing
18 |
19 | == num
20 |
21 | The first element we will have in this expression language is positive
22 | integral numbers:
23 |
24 | > numberExamples :: [(String,Integer)]
25 | > numberExamples = [("1", 1)
26 | > ,("23", 23)]
27 |
28 | TODO: make examples with parsing failures for all of the example
29 | scripts below?
30 |
31 | To parse a number, we need to parse one or more digits, and then read
32 | the resulting string. We can use the combinator `many1` to help with
33 | this. We will also use do notation.
34 |
35 | > num :: Parser Integer
36 | > num = do
37 | > n <- many1 digit
38 | > return (read n)
39 |
40 | Let's try it out.
41 |
42 | ```
43 | *Main> regularParse num "1"
44 | Right 1
45 |
46 | *Main> regularParse num "123456"
47 | Right 123456
48 |
49 | *Main> regularParse num "aa"
50 | Left (line 1, column 1):
51 | unexpected "a"
52 | expecting digit
53 |
54 | ```
55 |
56 | How does it work? First, we parse one or more (`many1`) digits (`digit`),
57 | and give the result the name 'n'. Then we convert the string to an
58 | integer using `read`.
59 |
60 | The `many1` function's type looks like this:
61 |
62 | ```
63 | many1 :: Parser a -> Parser [a]
64 | ```
65 |
66 | It applies the parser given one or more times, returning the result.
67 |
68 | Let's see what happens when we use the `many` combinator which parses
69 | zero or more items instead of one or more.
70 |
71 | > num1 :: Parser Integer
72 | > num1 = do
73 | > n <- many digit
74 | > return (read n)
75 |
76 | ```
77 | *Main> regularParse num1 "1"
78 | Right 1
79 |
80 | *Main> regularParse num1 "123456"
81 | Right 123456
82 |
83 | *Main> regularParse num1 "aa"
84 | Right *** Exception: Prelude.read: no parse
85 | ```
86 |
87 | == var
88 |
89 | For var, we have to decide on a syntax for the identifiers. Let's go
90 | for a common choice: identifiers must start with a letter or
91 | underscore, and then they can be followed by zero or more letters,
92 | underscores or digits in any combination.
93 |
94 | > varExamples :: [(String,String)]
95 | > varExamples = [("test", "test")
96 | > ,("_stuff", "_stuff")
97 | > ,("_1234", "_1234")]
98 |
99 | > var :: Parser String
100 | > var = do
101 | > fc <- firstChar
102 | > rest <- many nonFirstChar
103 | > return (fc:rest)
104 | > where
105 | > firstChar = satisfy (\a -> isLetter a || a == '_')
106 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
107 |
108 | This time, we create two helper parsers: `firstChar`, which parses a
109 | letter or underscore, and `nonFirstChar` which parses a digit, letter
110 | or underscore. This time, we use the `many` function instead of
111 | `many1`.
112 |
113 | Try it out in ghci. I like to try things which you expect to work, and
114 | also to try things which you expect to not work and make sure you get
115 | an error.
116 |
117 | == parens
118 |
119 | The parens parser will eventually parse any expression inside
120 | parentheses. First it will just parse integers inside parentheses.
121 |
122 | > data Parentheses = Parentheses Integer
123 | > deriving (Eq,Show)
124 | >
125 | > parensExamples :: [(String, Parentheses)]
126 | > parensExamples = [("(1)", Parentheses 1)
127 | > ,("(17)", Parentheses 17)]
128 |
129 | > parens :: Parser Parentheses
130 | > parens = do
131 | > void $ char '('
132 | > e <- many1 digit
133 | > void $ char ')'
134 | > return (Parentheses (read e))
135 |
136 | There is a new function: `void`. This might be familiar to you
137 | already. This is used to ignore the result of the `char` parser, since
138 | we are not interested in this value. You can also write the function
139 | without `void`, but ghc will give you a warning if you have warnings
140 | turned on.
141 |
142 | One way of turning warnings on in ghci is to enter `:set -Wall` at the
143 | ghci prompt.
144 |
145 | > parens' :: Parser Parentheses
146 | > parens' = do
147 | > char '('
148 | > e <- many1 digit
149 | > char ')'
150 | > return (Parentheses (read e))
151 |
152 | ```
153 | *Main> :set -Wall
154 | *Main> :l "VerySimpleExpressions.lhs"
155 |
156 | ...
157 |
158 | FirstRealParsing.lhs:140:7: Warning:
159 | A do-notation statement discarded a result of type Char.
160 | Suppress this warning by saying "_ <- char '('",
161 | or by using the flag -fno-warn-unused-do-bind
162 |
163 | FirstRealParsing.lhs:142:7: Warning:
164 | A do-notation statement discarded a result of type Char.
165 | Suppress this warning by saying "_ <- char ')'",
166 | or by using the flag -fno-warn-unused-do-bind
167 |
168 | ...
169 |
170 | ```
171 |
172 | As you can see, another way to suppress the warning is to use
173 | `_ <- char '('`.
174 |
175 | One issue with this parser is that it doesn't handle whitespace:
176 |
177 | ```
178 | *Main> regularParse parens "(1)"
179 | Right (Parentheses 1)
180 |
181 | *Main> regularParse parens "( 1)"
182 | Left (line 1, column 2):
183 | unexpected " "
184 | expecting digit
185 |
186 | *Main> regularParse parens "(1 )"
187 | Left (line 1, column 3):
188 | unexpected " "
189 | expecting digit or ")"
190 | ```
191 |
192 | We will look at this issue below.
193 |
194 | == add
195 |
196 | Now we will write a little parser to parse strings like 'a+b' where a
197 | and b are numbers.
198 |
199 | > data SingleAdd = SingleAdd Integer Integer
200 | > deriving (Eq,Show)
201 | >
202 | > singleAddExamples :: [(String, SingleAdd)]
203 | > singleAddExamples = [("1+2", SingleAdd 1 2)
204 | > ,("101+202", SingleAdd 101 202)]
205 |
206 | > add :: Parser SingleAdd
207 | > add = do
208 | > e0 <- many1 digit
209 | > void $ char '+'
210 | > e1 <- many1 digit
211 | > return (SingleAdd (read e0) (read e1))
212 |
213 | It has the same whitespace issues as the parens parser.
214 |
215 | ```
216 | *Main> regularParse add "1+2"
217 | Right (SingleAdd 1 2)
218 |
219 | *Main> regularParse add "1 +2"
220 | Left (line 1, column 2):
221 | unexpected " "
222 | expecting digit or "+"
223 |
224 | ```
225 |
226 | == whitespace
227 |
228 | Here is a parser which will skip zero or more whitespace characters.
229 |
230 | > whitespace :: Parser ()
231 | > whitespace = void $ many $ oneOf " \n\t"
232 |
233 | We can use this to make our parsers handle whitespace better.
234 |
235 | ```
236 | *Main> regularParse whitespace " "
237 | Right ()
238 | *Main> regularParse whitespace " "
239 | Right ()
240 | *Main> regularParse whitespace "\t"
241 | Right ()
242 | *Main> regularParse whitespace " \n "
243 | Right ()
244 | *Main> regularParse whitespace ""
245 | Right ()
246 | ```
247 |
248 | Notice that it always succeeds.
249 |
250 | Here is the parens parser rewritten with a common approach to
251 | whitespace handling:
252 |
253 | > parensW :: Parser Parentheses
254 | > parensW = do
255 | > whitespace
256 | > void $ char '('
257 | > whitespace
258 | > e <- many1 digit
259 | > whitespace
260 | > void $ char ')'
261 | > whitespace
262 | > return (Parentheses (read e))
263 |
264 | ```
265 | *Main> regularParse parensW "(1)"
266 | Right (Parentheses 1)
267 |
268 | *Main> regularParse parensW " (1)"
269 | Right (Parentheses 1)
270 |
271 | *Main> regularParse parensW " (1 )"
272 | Right (Parentheses 1)
273 |
274 | *Main> regularParse parensW " ( 1 ) "
275 | Right (Parentheses 1)
276 | ```
277 |
278 | Looks good.
279 |
280 | In the original parsec documentation, one of the concepts mentioned is
281 | the idea of 'lexeme' parsing. This is a style in which every token
282 | parser should also consume and ignore any trailing whitespace.
283 |
284 | This is a simple convention which with a bit of care allows skipping
285 | whitespace exactly once wherever it needs to be skipped. To complete
286 | the lexeme style, we should also always skip leading whitespace at the
287 | top level only. This feels more elegant than spamming all the parsing
288 | code with many calls to `whitespace`.
289 |
290 | > lexeme :: Parser a -> Parser a
291 | > lexeme p = do
292 | > x <- p
293 | > whitespace
294 | > return x
295 |
296 | > parseWithWhitespace :: Parser a -> String -> Either ParseError a
297 | > parseWithWhitespace p = parseWithEof wrapper
298 | > where
299 | > wrapper = do
300 | > whitespace
301 | > p
302 |
303 |
304 | Here is the parens parser rewritten to use lexeme:
305 |
306 | > parensL :: Parser Parentheses
307 | > parensL = do
308 | > void $ lexeme $ char '('
309 | > e <- lexeme $ many1 digit
310 | > void $ lexeme $ char ')'
311 | > return (Parentheses (read e))
312 |
313 | ```
314 | *Main> parseWithWhitespace parensL "(1)"
315 | Right (Parentheses 1)
316 |
317 | *Main> parseWithWhitespace parensL " (1)"
318 | Right (Parentheses 1)
319 |
320 | *Main> parseWithWhitespace parensL " ( 1)"
321 | Right (Parentheses 1)
322 |
323 | *Main> parseWithWhitespace parensL " ( 1 ) "
324 | Right (Parentheses 1)
325 | ```
326 |
327 | The `parseWithWhitespace` function can also use `(>>)` to make it a
328 | bit shorter, `wrapper = whiteSpace >> p`.
329 |
330 | Here is the shorter version of this function using `(>>)`:
331 |
332 | > parseWithWhitespace' :: Parser a -> String -> Either ParseError a
333 | > parseWithWhitespace' p = parseWithEof (whitespace >> p)
334 |
335 | Try rewriting the SingleAdd parser to use `lexeme`, and test it out to
336 | convince yourself that it skips whitespace correctly.
337 |
338 | == simple expr
339 |
340 | Now we are ready to write a parser which parses simple expressions
341 | made from these components. Here is the data type for these
342 | expressions.
343 |
344 | > data SimpleExpr = Num Integer
345 | > | Var String
346 | > | Add SimpleExpr SimpleExpr
347 | > | Parens SimpleExpr
348 | > deriving (Eq,Show)
349 |
350 | It's so simple that it is almost useless at the moment.
351 |
352 | > simpleExprExamples :: [(String,SimpleExpr)]
353 | > simpleExprExamples =
354 | > [("a", Var "a")
355 | > ,("1", Num 1)
356 | > ,("2 + 3", Add (Num 2) (Num 3))
357 | > ,("(42)", Parens (Num 42))]
358 |
359 | TODO: some more complex examples
360 |
361 | Here are all our component parsers with `lexeme`, and with the
362 | `SimpleExpr` constructors:
363 |
364 | > numE :: Parser SimpleExpr
365 | > numE = do
366 | > n <- lexeme $ many1 digit
367 | > return $ Num $ read n
368 |
369 | There doesn't seem to be a unique obviously correct place to put the
370 | lexeme call in the var parser:
371 |
372 | > varE :: Parser SimpleExpr
373 | > varE = lexeme $ do
374 | > fc <- firstChar
375 | > rest <- many nonFirstChar
376 | > return $ Var (fc:rest)
377 | > where
378 | > firstChar = satisfy (\a -> isLetter a || a == '_')
379 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
380 |
381 | Here is an alternative, with the call to lexeme in a different place,
382 | but gives effectively the same function.
383 |
384 | > varE' :: Parser SimpleExpr
385 | > varE' = do
386 | > fc <- firstChar
387 | > rest <- lexeme $ many nonFirstChar
388 | > return $ Var (fc:rest)
389 | > where
390 | > firstChar = satisfy (\a -> isLetter a || a == '_')
391 | > nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
392 |
393 | > parensE :: Parser SimpleExpr
394 | > parensE = do
395 | > void $ lexeme $ char '('
396 | > e <- lexeme $ many1 digit
397 | > void $ lexeme $ char ')'
398 | > return $ Parens $ Num $ read e
399 |
400 | In the parens parser, we can reuse the `numE` parser like this:
401 |
402 | > parensE' :: Parser SimpleExpr
403 | > parensE' = do
404 | > void $ lexeme $ char '('
405 | > e <- numE
406 | > void $ lexeme $ char ')'
407 | > return $ Parens e
408 |
409 | Here is the add parser using `numE` also.
410 |
411 | > addE :: Parser SimpleExpr
412 | > addE = do
413 | > e0 <- numE
414 | > void $ lexeme $ char '+'
415 | > e1 <- numE
416 | > return $ Add e0 e1
417 |
418 | === choice
419 |
420 | To combine these, we can use an operator called `(<|>)`:
421 |
422 | > numOrVar :: Parser SimpleExpr
423 | > numOrVar = numE <|> varE
424 |
425 | It tries the first parser, and it if fails (without consuming any
426 | input), it tries the second parser. More about the 'consuming input'
427 | concept later.
428 |
429 | Here is another way to write the numOrVar parser:
430 |
431 | > numOrVar' :: Parser SimpleExpr
432 | > numOrVar' = choice [numE,varE]
433 |
434 | `choice` is just wrapper around `(<|>)`. You can choose which one to
435 | use based on which is more readable in each particular case.
436 |
437 | ```
438 | *Main> parseWithWhitespace numOrVar "a"
439 | Right (Var "a")
440 |
441 | *Main> parseWithWhitespace numOrVar "1"
442 | Right (Num 1)
443 |
444 | *Main> parseWithWhitespace numOrVar "!"
445 | Left (line 1, column 1):
446 | unexpected "!"
447 | expecting digit
448 | ```
449 |
450 | Here is the first version of the simpleExpr parser:
451 |
452 | > simpleExpr :: Parser SimpleExpr
453 | > simpleExpr = numE <|> varE <|> addE <|> parensE
454 |
455 | ```
456 | *Main> parseWithWhitespace simpleExpr "12"
457 | Right (Num 12)
458 |
459 | *Main> parseWithWhitespace simpleExpr "aa"
460 | Right (Var "aa")
461 |
462 | *Main> parseWithWhitespace simpleExpr "1+2"
463 | Left (line 1, column 2):
464 | unexpected '+'
465 | expecting digit or end of input
466 |
467 | *Main> parseWithWhitespace simpleExpr "(1)"
468 | Right (Parens (Num 1))
469 |
470 | *Main> parseWithWhitespace simpleExpr "(aa)"
471 | Left (line 1, column 2):
472 | unexpected "a"
473 | expecting digit
474 |
475 | ```
476 |
477 | It works well for some of the parsers. One problem is that the `addE`
478 | and `parensE` parsers don't parse general expressions as the
479 | components, but just `numE`. Another problem is that the `addE`
480 | doesn't work at all: the `numE` parser parses the first number, and
481 | the `addE` parser is never tried. This is an example of `(<|>)` not
482 | trying the second parser if the first parser succeeds, even if a later
483 | alternative would consume more input or successfully parse the whole
484 | input.
485 |
486 | Let's try and rearrange the order:
487 |
488 | > simpleExpr1 :: Parser SimpleExpr
489 | > simpleExpr1 = addE <|> numE <|> varE <|> parensE
490 |
491 | ```
492 | *Main> parseWithWhitespace simpleExpr1 "12"
493 | Left (line 1, column 3):
494 | unexpected end of input
495 | expecting digit or "+"
496 |
497 | *Main> parseWithWhitespace simpleExpr1 "aa"
498 | Right (Var "aa")
499 |
500 | *Main> parseWithWhitespace simpleExpr1 "1+2"
501 | Right (Add (Num 1) (Num 2))
502 |
503 | *Main> parseWithWhitespace simpleExpr1 "(1)"
504 | Right (Parens (Num 1))
505 |
506 | ```
507 |
508 | We swapped one problem for another. Let's fix this using the `try`
509 | function.
510 |
511 |
512 | > simpleExpr2 :: Parser SimpleExpr
513 | > simpleExpr2 = try addE <|> numE <|> varE <|> parensE
514 |
515 | ```
516 | *Main> parseWithWhitespace simpleExpr2 "12"
517 | Right (Num 12)
518 |
519 | *Main> parseWithWhitespace simpleExpr2 "aa"
520 | Right (Var "aa")
521 |
522 | *Main> parseWithWhitespace simpleExpr2 "1+2"
523 | Right (Add (Num 1) (Num 2))
524 |
525 | *Main> parseWithWhitespace simpleExpr2 "(1)"
526 | Right (Parens (Num 1))
527 | ```
528 |
529 | Now everything seems to work fine. The `try` function is very powerful
530 | and easy to use, and can be used where in a more traditional parsing
531 | approach you would have to use left factoring or something else.
532 |
533 | The `try` function implements backtracking. When this is used with
534 | `(<|>)`, it means that if the first parser fails, it will undo the
535 | consumed input and carry on with the next option, instead of failing
536 | completely. This works even if the `try` is nested deeply within the
537 | first parser given to `(<|>)`.
538 |
539 | `try` has its downsides (some of which we will see later), and I
540 | usually try to minimise its use or eliminate it completely. I found I
541 | often got into a complete mess when I used `try` too much when writing
542 | parsers for something a little tricky like SQL, and that although
543 | doing some left-factoring appeared at first to be tedious and appeared
544 | to make the code less readable, I eventually decided that for me it
545 | made the code more readable since what was happening was more
546 | transparent.
547 |
548 | Now we are going to fix this parser to parse arbitrarily nested
549 | expressions. In a way, the method used will roughly mean we are left
550 | factoring the `numE` and `addE` common prefix.
551 |
552 | Here is the naive implementation:
553 |
554 | > parensE3 :: Parser SimpleExpr
555 | > parensE3 = do
556 | > void $ lexeme $ char '('
557 | > e <- simpleExpr3
558 | > void $ lexeme $ char ')'
559 | > return $ Parens e
560 |
561 |
562 | > addE3 :: Parser SimpleExpr
563 | > addE3 = do
564 | > e0 <- simpleExpr3
565 | > void $ lexeme $ char '+'
566 | > e1 <- simpleExpr3
567 | > return $ Add e0 e1
568 |
569 |
570 | > simpleExpr3 :: Parser SimpleExpr
571 | > simpleExpr3 = try addE3 <|> numE <|> varE <|> parensE3
572 |
573 | If you run this parser, it will enter an infinite loop, since
574 | `simpleExpr3` and `addE3` will keep calling each other recursively
575 | without making any progress.
576 |
577 | ```
578 | *Main> parseWithWhitespace simpleExpr3 "a+b"
579 | C-c Interrupted.
580 | ```
581 | Let's try without `add`.
582 |
583 | > parensE4 :: Parser SimpleExpr
584 | > parensE4 = do
585 | > void $ lexeme $ char '('
586 | > e <- simpleExpr4
587 | > void $ lexeme $ char ')'
588 | > return $ Parens e
589 |
590 | > simpleExpr4 :: Parser SimpleExpr
591 | > simpleExpr4 = numE <|> varE <|> parensE4
592 |
593 | ```
594 | *Main> parseWithWhitespace simpleExpr4 "a"
595 | Right (Var "a")
596 |
597 | *Main> parseWithWhitespace simpleExpr4 "1"
598 | Right (Num 1)
599 |
600 | *Main> parseWithWhitespace simpleExpr4 "(1)"
601 | Right (Parens (Num 1))
602 |
603 | *Main> parseWithWhitespace simpleExpr4 "((a))"
604 | Right (Parens (Parens (Var "a")))
605 |
606 | ```
607 |
608 | At least this part seems to work OK.
609 |
610 | Let's try to stop the add parser from calling itself indirectly:
611 |
612 | Here is a parameterized parens parser where we supply the nested
613 | expression parser as an argument. This is used here to try to make the
614 | code easier to follow and avoid rewriting this parser out again and
615 | again in full.
616 |
617 | > parensEN :: Parser SimpleExpr -> Parser SimpleExpr
618 | > parensEN simpleExprImpl = do
619 | > void $ lexeme $ char '('
620 | > e <- simpleExprImpl
621 | > void $ lexeme $ char ')'
622 | > return $ Parens e
623 |
624 | Here is a new parser, which parses expressions except add.
625 |
626 | > term :: Parser SimpleExpr -> Parser SimpleExpr
627 | > term simpleExprImpl = numE <|> varE <|> parensEN simpleExprImpl
628 |
629 | > term5 :: Parser SimpleExpr
630 | > term5 = term simpleExpr5
631 |
632 | > addE5 :: Parser SimpleExpr
633 | > addE5 = do
634 | > e0 <- term5
635 | > void $ lexeme $ char '+'
636 | > e1 <- term5
637 | > return $ Add e0 e1
638 |
639 | > simpleExpr5 :: Parser SimpleExpr
640 | > simpleExpr5 = try addE5 <|> term5
641 |
642 | ```
643 | *Main> parseWithWhitespace simpleExpr5 "1"
644 | Right (Num 1)
645 |
646 | *Main> parseWithWhitespace simpleExpr5 "a"
647 | Right (Var "a")
648 |
649 | *Main> parseWithWhitespace simpleExpr5 "(a)"
650 | Right (Parens (Var "a"))
651 |
652 | *Main> parseWithWhitespace simpleExpr5 "1+2"
653 | Right (Add (Num 1) (Num 2))
654 |
655 | *Main> parseWithWhitespace simpleExpr5 "1+a"
656 | Right (Add (Num 1) (Var "a"))
657 |
658 | *Main> parseWithWhitespace simpleExpr5 "(1+a)"
659 | Right (Parens (Add (Num 1) (Var "a")))
660 |
661 | *Main> parseWithWhitespace simpleExpr5 "1+a+b"
662 | Left (line 1, column 4):
663 | unexpected '+'
664 | expecting end of input
665 | ```
666 |
667 | Almost. Let's see what happens when the second `term` in `add` is
668 | changed to the general expression parser.
669 |
670 | > term6 :: Parser SimpleExpr
671 | > term6 = term simpleExpr6
672 |
673 | > addE6 :: Parser SimpleExpr
674 | > addE6 = do
675 | > e0 <- term6
676 | > void $ lexeme $ char '+'
677 | > e1 <- simpleExpr6
678 | > return $ Add e0 e1
679 |
680 | > simpleExpr6 :: Parser SimpleExpr
681 | > simpleExpr6 = try addE6 <|> term6
682 |
683 | ```
684 | *Main> parseWithWhitespace simpleExpr6 "a + b + c"
685 | Right (Add (Var "a") (Add (Var "b") (Var "c")))
686 | ```
687 |
688 | Maybe it looks like we've made it. But there is a problem. We've
689 | parsed the + operator as if it has right associativity:
690 |
691 | ```
692 | a + b + c -> a + (b + c)
693 | ```
694 |
695 | But it should be left associative:
696 |
697 | ```
698 | a + b + c -> (a + b) + c
699 | ```
700 |
701 | Let's left factor the parsing and fix this:
702 |
703 | > term7 :: Parser SimpleExpr
704 | > term7 = term simpleExpr7
705 |
706 | > simpleExpr7 :: Parser SimpleExpr
707 | > simpleExpr7 = do
708 | > -- first parse a term
709 | > e <- term7
710 | > -- then see if it is followed by an '+ expr' suffix
711 | > maybeAddSuffix e
712 | > where
713 | > -- this function takes an expression, and parses a
714 | > -- '+ expr' suffix, returning an Add expression
715 | > -- it recursively calls itself via the maybeAddSuffix function
716 | > addSuffix e0 = do
717 | > void $ lexeme $ char '+'
718 | > e1 <- term7
719 | > maybeAddSuffix (Add e0 e1)
720 | > -- this is the wrapper for addSuffix, which adapts it so that if
721 | > -- addSuffix fails, it returns just the original expression
722 | > maybeAddSuffix e = addSuffix e <|> return e
723 |
724 | ```
725 | *Main> parseWithWhitespace simpleExpr7 "a + b + c"
726 | Right (Add (Add (Var "a") (Var "b")) (Var "c"))
727 | ```
728 |
729 | Now the parser seems to work for everything it should.
730 |
731 | There is a combinator function in Parsec we can use which abstracts
732 | this sort of pattern, `chainl1`.
733 |
734 | > simpleExpr8 :: Parser SimpleExpr
735 | > simpleExpr8 = chainl1 term8 op
736 | > where
737 | > op = do
738 | > void $ lexeme $ char '+'
739 | > return Add
740 | > term8 = term simpleExpr8
741 |
742 | How does this work? Here is the type of `chainl1`:
743 |
744 | ```haskell
745 | chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
746 | chainl1 term op = ...
747 | ```
748 |
749 | The type of the Add constructor in pseudocode is:
750 |
751 | ```haskell
752 | Add :: SimpleExpr -> SimpleExpr -> SimpleExpr
753 | ```
754 |
755 | The `op` parser here now just parses the operator itself, i.e. '+'
756 | (and not the second expression like our simpleExpr7 parser). The
757 | return from the `op` function is a function which accepts two elements
758 | and combines them using the appropriate operator representation. In
759 | this case, the represenation is the `Add` constructor.
760 |
761 | You can look at the source
762 |
763 | and see if you can understand how it works. If you can't work it out,
764 | you could come back to it later when you have more experience writing
765 | parsing code.
766 |
767 | == Testing with the examples
768 |
769 | TODO: write a little manual tester that accepts a parser and a list of
770 | examples, and checks they all parse correctly.
771 |
772 | == Testing with quickcheck
773 |
774 | Let's see if we can check with quickcheck. It's a bit tricky testing
775 | parsers in this way, but one way to do something useful is to generate
776 | random asts, convert them to concrete syntax, parse them, and check
777 | the result. We can write a simple 'pretty printer' to convert an ast
778 | to concrete syntax.
779 |
780 | === a pretty printer
781 |
782 | TODO: a really simple pretty printer just pasting strings together, no
783 | layout.
784 |
785 | === the quick check code
786 |
787 | TODO: write a quickcheck property and arbitary instance and show
788 | running it at the ghci prompt
789 |
790 |
--------------------------------------------------------------------------------
/main.css:
--------------------------------------------------------------------------------
1 | h1, h2 {
2 | display:block;
3 | background-color: #f0f0f0;
4 | border-top: thin #c0c0c0 solid;
5 | /*position:relative;*/
6 | padding-left:1ex;
7 | /*z-index: -10;*/
8 | }
9 | h1 {
10 | font-size: x-large;
11 | /*left: -3ex;*/
12 | margin-top: 3ex;
13 | /*width: 100%;*/
14 | }
15 | h2 {
16 | font-size: large;
17 | /*left: -1.5ex;*/
18 | margin-top: 1.5ex;
19 | /*width: 100%;*/
20 | }
21 | body {
22 | margin-left: 5em;
23 | margin-right: 5em;
24 | margin-bottom: 5em;
25 | margin-top: 2em;
26 | }
27 | #TOC {
28 | float:right;
29 | z-index:10;
30 | background-color: #f0f0f0;
31 | border: thin #c0c0c0 solid;
32 | padding:2ex;
33 | }
34 | .header {
35 | /*position:relative;
36 | left: -4ex;*/
37 | border-top: thin #c0c0c0 solid;
38 | border-bottom: thin #c0c0c0 solid;
39 | display:inline;
40 | padding: 1ex;
41 | background-color: #f0f0f0;
42 | font-weight: bold;
43 | }
44 | .footer {
45 | text-align: center;
46 | font-size: small;
47 | }
48 | pre {
49 | padding: 0.5ex;
50 | }
51 |
52 |
53 | pre {
54 | background-color: #f0f0f0;
55 | }
56 |
57 | /*
58 | .SqlPostgresql pre.sourceCode {
59 | padding: 0.5em;
60 | background-color: #f0f6f6;
61 | }
62 | .sql pre.sourceCode {
63 | padding: 0.5em;
64 | background-color: #f0f6f6;
65 | }
66 | .GeneratedSql .SqlPostgresql pre.sourceCode, .SqlPostgresql .GeneratedSql pre.sourceCode {
67 | padding: 0.5em;
68 | background-color: #f0f6e0;
69 | }
70 | .UnusedSql .SqlPostgresql pre.sourceCode, .SqlPostgresql .UnusedSql pre.sourceCode {
71 | padding: 0.5em;
72 | background-color: #e9e9e9;
73 | }
74 | .haskell,.Haskell pre.sourceCode {
75 | background-color: #f5f5d9;
76 | }
77 |
78 | .sh pre.sourceCode {
79 | padding: 0.5em;
80 | background-color: #f0f0f0;
81 | }
82 | */
83 | table, tr, td {
84 | border-collapse:collapse;
85 | cell-padding:2px;
86 | cell-spacing:2px;
87 | /* padding:2px
88 | spacing:2px
89 | margin:2px*/
90 | vertical-align:top;
91 | }
92 | td pre {
93 | width: 98%;
94 | height: 98%;
95 | vertical-align:top;
96 | }
97 | table {
98 | width:100%;
99 | table-layout:fixed;
100 | }
101 | td {
102 | width: 50%;
103 | vertical-align:top;
104 | overflow:auto;
105 | }
106 | hr {
107 | border: 0;
108 | color: black;
109 | background-color: black;
110 | height: 1px;
111 | width: 75%;
112 | }
113 |
114 | .tablediv {
115 | width:100%;
116 | }
117 |
118 | /* higlighting kate */
119 |
120 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode, table.sourceCode pre
121 | { /*margin: 2; padding: 2; border: 2; */ vertical-align: baseline; border: none; }
122 | td.lineNumbers { border-right: 1px solid #AAAAAA; text-align: right; color: #AAAAAA; padding-right: 5px; padding-left: 5px; }
123 | td.sourceCode { padding-left: 5px; }
124 | pre.sourceCode { }
125 | pre.sourceCode span.Normal { }
126 | pre.sourceCode span.Keyword { color: #007020; font-weight: bold; }
127 | pre.sourceCode span.DataType { color: #902000; }
128 | pre.sourceCode span.DecVal { color: #40a070; }
129 | pre.sourceCode span.BaseN { color: #40a070; }
130 | pre.sourceCode span.Float { color: #40a070; }
131 | pre.sourceCode span.Char { color: #4070a0; }
132 | pre.sourceCode span.String { color: #4070a0; }
133 | pre.sourceCode span.Comment { color: #60a0b0; font-style: italic; }
134 | pre.sourceCode span.Others { color: #007020; }
135 | pre.sourceCode span.Alert { color: red; font-weight: bold; }
136 | pre.sourceCode span.Function { color: #06287e; }
137 | pre.sourceCode span.RegionMarker { }
138 | pre.sourceCode span.Error { color: red; font-weight: bold; }
139 |
--------------------------------------------------------------------------------
/make_website.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # this file is used to render the html via asciidoc from the source
6 | # files and the README.asciidoc
7 |
8 | mkdir -p build
9 |
10 | set +e
11 | rm build/IntroToParsing.lhs
12 | set -e
13 |
14 | cat GettingStarted.lhs VerySimpleExpressions.lhs ApplicativeStyle.lhs CombinatorReview.lhs FunctionsAndTypesForParsing.lhs TextParsecExpr.lhs AnIssueWithTokenParsers.lhs TextParsecPerm.lhs TextParsecToken.lhs ValueExpressions.lhs QueryExpressions.lhs FromClause.lhs SimpleSQLQueryParser0.lhs PrettyPrinting0.lhs ErrorMessages.lhs > build/IntroToParsing.lhs
15 |
16 | echo > build/intro_to_parsing.asciidoc
17 | echo :toc: right >> build/intro_to_parsing.asciidoc
18 | echo :sectnums: >> build/intro_to_parsing.asciidoc
19 | echo :toclevels: 10 >> build/intro_to_parsing.asciidoc
20 | echo :source-highlighter: pygments >> build/intro_to_parsing.asciidoc
21 | echo >> build/intro_to_parsing.asciidoc
22 |
23 | cat README.asciidoc | runhaskell render/FixReadme.lhs >> build/intro_to_parsing.asciidoc
24 | cat build/IntroToParsing.lhs | runhaskell render/Render.lhs >> build/intro_to_parsing.asciidoc
25 |
26 | cat build/intro_to_parsing.asciidoc | asciidoctor -d book - | runhaskell AddLinks.lhs > build/index.html
27 |
--------------------------------------------------------------------------------
/render/FixReadme.lhs:
--------------------------------------------------------------------------------
1 |
2 | Fix the README for the html genereation
3 |
4 | > import Data.List
5 |
6 | > main :: IO ()
7 | > main = do
8 | > ls <- getContents
9 | > putStrLn $ unlines $ map fixLinks $ lines ls
10 |
11 | > fixLinks x | "link:" `isPrefixOf` x = []
12 | > fixLinks x | "// <<" `isPrefixOf` x = drop 3 x
13 | > fixLinks x = x
14 |
--------------------------------------------------------------------------------
/render/Render.lhs:
--------------------------------------------------------------------------------
1 |
2 | This code is to take the .lhs and add some formatting so it renders
3 | nicely with asciidoctor.
4 |
5 | TODO
6 |
7 | concat docs then render as multiple pages
8 | get the inter/intra doc links working
9 |
10 | find a better fix for the * issue
11 |
12 | > import System.Environment
13 | > import Data.Char
14 | > import System.IO
15 | > import System.FilePath
16 | > import Data.List
17 |
18 | > main :: IO ()
19 | > main = do
20 | > --[fn] <- getArgs
21 | > src <- getContents -- readFile fn
22 | > --let title = makeTitle $ takeBaseName fn
23 | > --putStrLn $ ":toc: right\n:toclevels: 8\n:sectnums:\n"
24 | > --putStrLn "= Introduction to parsing in Haskell with Parsec\n"
25 | > putStrLn . unlines . map fixHeadings
26 | > . addBlocks False . lines $ src
27 | > where
28 | > makeTitle (c:c1:cs) | isUpper c && isUpper c1 =
29 | > let (rest,cs') = span isUpper cs
30 | > in case (rest,cs') of
31 | > ([],_) -> ' ' : c : ' ' : c1 : makeTitle cs'
32 | > (_, d:_) | isDigit d -> ' ' : c : c1 :
33 | > (rest ++ " " ++ makeTitle cs')
34 | > _ -> ' ' : c : c1 : (init rest ++ " " ++ [last rest]
35 | > ++ makeTitle cs')
36 | > makeTitle (c:cs) | isUpper c = ' ' : toUpper c : makeTitle cs
37 | > makeTitle (c:cs) = c : makeTitle cs
38 | > makeTitle [] = []
39 | > fixHeadings ('=':xs) = '=':'=':xs
40 | > fixHeadings xs = xs
41 | > addBlocks False (l:ls)
42 | > | isLiterate l =
43 | > "[source,haskell]\n----" : stripLiterate l
44 | > : addBlocks True ls
45 | > | otherwise = l : addBlocks False ls
46 |
47 | > addBlocks True (l:ls)
48 | > | isLiterate l =
49 | > stripLiterate l : addBlocks True ls
50 | > | otherwise = "----" : l : addBlocks False ls
51 | > addBlocks _ [] = []
52 | > stripLiterate ('>':' ':l) = l
53 | > stripLiterate (">") = ""
54 | > stripLiterate l = l
55 | > isLiterate ('>':_) = True
56 | > isLiterate _ = False
57 |
58 | escape asterisk: rules worked out by experiment. If there is an
59 | asterisk on a non code block line, and there are more asterisks on the
60 | line, then put a backslash before the first asterisk only to stop it
61 | from being interpreted as bold formatting. Not sure why only the first
62 | one should have a backslash, or why if you put a backslash in front of
63 | subsequence asterisks, then the backslash is rendered ...
64 |
65 | Doesn't work, need to parse paragraphs not lines, todo, and need to
66 | make sure it doesn't replace inside blocks
67 |
68 | > {-escapeAsterisk l
69 | > | length (elemIndices '*' l) > 1 =
70 | > let (a,b) = break (=='*') l
71 | > in a ++ ('\\':b)
72 | > | otherwise = l-}
73 |
--------------------------------------------------------------------------------
/source_text:
--------------------------------------------------------------------------------
1 | correct
2 |
--------------------------------------------------------------------------------