├── .gitignore
├── LICENSE
├── NOTES.txt
├── examples
    ├── adder.src
    ├── fizzbuzz.src
    ├── fizzbuzz2.src
    └── fizzbuzz3.src
├── shell.nix
├── simplecompiler.cabal
├── src
    ├── AST.hs
    ├── IR.hs
    ├── LLVM.hs
    ├── Main.hs
    ├── MyPrelude.hs
    ├── Name.hs
    ├── Pretty.hs
    ├── Token.hs
    └── Type.hs
└── stack.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | dist-*
 3 | cabal-dev
 4 | *.o
 5 | *.hi
 6 | *.chi
 7 | *.chs.h
 8 | *.dyn_o
 9 | *.dyn_hi
10 | .hpc
11 | .hsenv
12 | .cabal-sandbox/
13 | cabal.sandbox.config
14 | *.prof
15 | *.aux
16 | *.hp
17 | *.eventlog
18 | .stack-work/
19 | cabal.project.local
20 | .HTF/
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Gábor Lehel
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NOTES.txt:
--------------------------------------------------------------------------------
  1 | TITLE: Compiling first-class functions as programs at runtime
  2 | 
  3 | ABSTRACT:
  4 | A difficult problem when writing a program with the goal of high performance is if some crucial input or parameter only becomes known at runtime, but if the program uses that input in its computations as a completely unknown dynamic value, it is undesirably inefficient. An example for illustration is calculations such as matrix multiplication which can implemented and compiled more efficiently when the size of the matrix is known at compile time, but where, unfortunately, the matrix and its size only become known at runtime.
  5 | 
  6 | The usual solution to this problem at present is to use techniques such as staged compilation and program generation (forms of metaprogramming), where the program constructs a second program at runtime, building the parameters and input it has received into it, and then compiles and executes the result. The drawback of this approach is that it requires the first program to construct the second one at a syntactic level: instead of writing the code for an algorithm directly, one must write code to construct the syntactic representation of a program which will perform the algorithm, which is unwieldy.
  7 | 
  8 | We implement a compiler for a small language with first-class functions, in which the compiler itself is available as a runtime function `compile: (a -> b) -> (a -> b)`, which interprets a function value together with its closed-over environment as a program, where the values in the environment are now "known at compile time", and recompiles it into a form where those values have been directly inlined and specialized into the resulting function's code. This `compile` function is semantically the identity function, but the returned function will execute faster than the one that was passed in. With this available, it is sufficient to write the code for an algorithm directly and wrap it up in a function value partially applying it to the input, and pass that to `compile`, instead of having to deal with syntactic representations. We will evaluate the effectiveness of this approach by benchmarking small programs written in our language, comparing cases where a given input is handled as a runtime value "normally", where the same value is inlined into the source code of the program by hand, and where the input is handled as a runtime value but optimized using `compile`.
  9 | 
 10 | 
 11 | Problem: specializing program on values you receive at runtime
 12 | Current solutions: metaprogramming, staging, program generation
 13 | Problem with those: Syntactic
 14 | Our solution: compile :: (a -> b) -> (a -> b)
 15 | Investigate with benchmarks etc.
 16 | 
 17 | Re-opening Closures
 18 | Andrew W. Appel
 19 | https://www.cs.princeton.edu/~appel/papers/reo.pdf
 20 | 
 21 | CONSTRAINTS
 22 |   Compiler has to work as both
 23 |     Command-line application
 24 |     Library loaded into the compiled application (hopefully statically?)
 25 |   Compiler has to be able to work with input of both
 26 |     Text files containing source code
 27 |     Closures/values in memory with the ABI of the compiled program
 28 |   Compiler has to able to produce output of both
 29 |     Binary executable programs (and/or libraries)
 30 |     In-memory functions with the ABI of the compiled program
 31 |       (this, in turn, must include its "source", just all other compiled functions!)
 32 | 
 33 | Is this related to NBE by any chance?
 34 |   Literals are related to normal forms...
 35 |   We have
 36 |     (compilerBackend :: Literal -> Value)
 37 |     quote :: Value -> Literal
 38 |     runTheProgram :: Expr ->  Value ??
 39 | 
 40 | PLAN:
 41 |   Make a "vertical" prototype first, then expand "horizontally"
 42 |   I.e. first make the smallest possible thing working "all the way through", then add more features and capabilities
 43 |     This extends to the whole thesis
 44 |     Get to a "minimum viable thesis" first, then extend/improve it as time allows
 45 |   Dynamically typed, or a simple static type system (e.g. no polymorphism)?
 46 |     The type of `compile` itself would have be polymorphic(?), but we could just handle it as a primitive
 47 |   What types do we want? Functions... ints, structs, arrays, bool, enums? Depends on the use cases!
 48 |     At first just: int and function(int) -> int? or do we need function(a) -> a?
 49 |   At first just stdin, stdout, stderr, error code as interface to external world?
 50 |   -----
 51 |   Compiler in Haskell with llvm-hs
 52 |   Separate frontend and backend
 53 |   Backend linked into compiled application
 54 |     foreign exports a `compile` function taking UnserIR (in what format?) as input
 55 |       (not a ptr to the closure itself! conversion to UnserIR happens on the generated-code side of things, at least according to the current plan)
 56 |       returns a C pointer to the compiled function
 57 |       the compiled program will contain a shim function around `compile()` to do the UnserIR conversions and stuff
 58 |   Stages: Source --Lex--> Syntax --Parse--> AST --Check--> UnserIR --Trans--> LLVM IR --LLVM--> Machine code
 59 |                  ^^^^^^^^^^^^^^^^^FRONTEND^^^^^^^^^^^^^^^^         ^^^^^^^^^^^BACKEND^^^^^^^^^^^
 60 |   At a negative offset from the machine code of generated functions, we store a pointer to a description table
 61 |     Contains two things:
 62 |       * UnserIR of the function (in some format.. can we avoid having to serialize/deserialize?)
 63 |         * Can we use compact regions for this?
 64 |           * Seems like yes, but it's only in GHC 8.2
 65 |             * But it'll be released in June?
 66 |               * But it seems to only work for FilePaths not Ptrs/ByteStrings...
 67 |               * And it requires "info table" pointers to be in the exact same place, not sure this is satisfied btwn run of compiler vs. program
 68 |         * If we want to be hardcore we could probably define some binary encoding and machinery around it on the Haskell side that works on it directly,
 69 |           w/o an explicit serialize/deserialize step, maybe making it nicer with view patterns + pattern synonyms
 70 |         * But let's just do some dumb & easy serialization at first
 71 |       * For each of its parameters, a pointer to a function that translates that type to a literal in UnserIR
 72 |         * QUESTION: How does this work for user-defined and generic types?
 73 |           Where do these functions "live"? When/how are they generated??
 74 |             Presumably we would generate AST or UnserIR for them (kinda like type class instances) and codegen it normally
 75 |         * For closures this will be precisely steps 1-3 below!
 76 |           * For functions embedded in structs, the function code itself will still need to be emitted globally and we just store the ptr!
 77 |             * Or is this only a concern at LLVM IR level not UnserIR?
 78 |           * Note: Unlike other types, closures on the heap may have cycles, need to handle it!
 79 |             * This will be codegenned as mutual recursion.
 80 |             * (Or heck, what about single recursion - is that any easier?)
 81 |             * Is it possible for a variable in a closure's environment to contain, somewhere within it, (a ptr/ref to) the closure itself?
 82 |           * Heck: broader problem: sharing!!
 83 |             * Three basic strategies:
 84 |               A. No sharing. Each literal is duplicated in the UnserIR as many times as it is referred to.
 85 |               B. Preserve sharing. Shared objects become `let`s, and objects occur the UnserIR exactly as many times as in the runtime object graph.
 86 |               C. Maximize sharing. All objects are hashed and refer to each other by hash. Each unique term occurs in the UnserIR only once.
 87 |                 * (Subquestion: what does "equivalent" mean. Syntactic equality? Alpha-equality?)
 88 |             * I am predisposed towards B. How do we do it?
 89 |               * How far can we leverage reference counting?
 90 |               * At a minimum we can do:
 91 |                 * If RC == 1, the term is translated in-place (as if A.)
 92 |                 * We keep a Set<Pointer> of already-translated objects
 93 |                 * If RC > 1, we look the pointer up in the set
 94 |                   * If it exists, nothing more to do
 95 |                   * If it doesn't, translate the `let`, and add the pointer to the set
 96 |                     * How do we ensure `let`s are scoped correctly? Just make everything a global?
 97 |                   * The name of the let becomes the numeric value of the pointer, which uniquely and globally identifies it
 98 |               * Is there any way we can avoid having to keep even the auxiliary Set structure around?
 99 |             * We might also just implement all three and make it a parameter. For testing/benchmarking if nothing else.
100 |           * What about "not actually closures" but pointers to static fns with an empty env?
101 |             * Do we also recompile their IR? Or just leave them as a plain fn call?
102 |             * What about fn calls to static fns as part of the code of recompiled fns?
103 |             * Probably these should be exposed as options
104 |               * Hmm... is there any way to give LLVM some extra IR with instructions to "inline this maybe, but don't actually compile it"?
105 |         * Mutable references: we need to put it into the generated code by-pointer, not its contents!
106 |           * So the we probably just turn it into a pointer literal.
107 |           * Hmmm... and we need to bump the reference count!!
108 |             * By how much?? Can we know / control it?
109 |             * Or maybe we just need to represent it as a function which bumps the reference count and returns the pointer.
110 |               * Do we need to do this for other things besides references? Theoretically no, because other types are by-value...
111 |    Algorithm of `compile()` function:
112 |      1. Get description table of fn ptr
113 |      2. For each argument available in the env, call the corresponding fn ptr to translate it to UnserIR
114 |         2a. Given the environment may contain closures, this process is recursive!
115 |      3. Transform the IR of the fn to take N fewer arguments, and replace them with global constants containing results from step 2
116 |      4. Invoke the compiler backend on the resulting IR in JIT mode
117 |        4a. Needs to contain the description table and ptr to it as well!
118 |      5. Return the resulting fn ptr
119 |    Hmm... maybe we shouldn't actually restrict it to type `(a -> b) -> (a -> b)`! Like augustss said, `a -> a` instead.
120 |      Like what if you want to partially apply / optimize multiple functions at the same time, e.g. a whole (e.g. linear algebra) API?
121 |      Put them in a record and pass that in.
122 |      So it'll compile any functions anywhere inside the structure that was passed to it.
123 |        On the output end, we should re-construct any function-less "outer structure" directly in the program instead of recompiling it into a static literal (would be kind of pointless)?
124 |          Though might be interesting to see if that works at all.
125 |          And if there's no functions in it anywhere I guess we should skip that step entirely?
126 |          We should also try to preserve sharing with the original structure where possible.
127 |        If we use the "full sharing" option this could be used to optimize memory usage even if no functions are involved.
128 |    QUESTION: when do we init/deinit the GHC runtime? Does a static constructor handle this for us?
129 |    How do we persuade LLVM to try to inline everything into the entry point fn (and not vice versa)?
130 |    Memory management:
131 |      Is there any way to determine whether a pointer points into static memory or the heap?
132 |        Maybe: https://stackoverflow.com/a/35206941
133 |      If yes we can use this to call the JIT's deallocation fn on the code ptr when deallocing closures iff it points to heap.
134 |        Although... we'd need to keep a reference count too? :\
135 |        Does the reference count always match that of the env?
136 |          Actually there isn't an env. So we can repurpose the env ptr as a ptr to a refcount?
137 |          Unless we can get LLVM to put a reference count next to the machine code for us.
138 |      Potential problem: if the JITed memory contains static/global variables, and other variables referring to them outlive the JITted closure itself
139 |        Closure dies -> whole JITted memory is deallocated -> including the global/static variables in it -> use after free
140 |        Solution 1: the reference count of the static variables redirects to the reference count for the whole JITted memory itself
141 |          This can lead to "memory leaks" in the cases where use-after-free would otherwise have resulted
142 |        Solution 2: instead of global variables, just make them let-bound at the beginning of the function, or inline at each use site
143 |          Can we tell LLVM not to turn things into globals even as an optimization? (Would it want to do that otherwise?)
144 |        Solution 3: instead of storing a static T, store a static Rc<T>, that is the JITted memory just has one reference to the global variable instead of owning it
145 |          Can we attach a 'static destructor' to the JITted memory to make it decrement the reference counts of the statics when it is deallocated??
146 |          If not, is there any reasonable way accomplish it manually?
147 |          This seems like a nice solution provided that we actually want globals.
148 |          The question from S.2. still applies here!
149 |    > Whoa, how does the JIT know about sin and cos? The answer is surprisingly simple: in this example, the JIT started execution of a function and got to a function call. It realized that the function was not yet JIT compiled and invoked the standard set of routines to resolve the function. In this case, there is no body defined for the function, so the JIT ended up calling dlsym("sin") on the Kaleidoscope process itself. Since "sin" is defined within the JIT's address space, it simply patches up calls in the module to call the libm version of sin directly.
150 |    > The LLVM JIT provides a number of interfaces for controlling how unknown functions get resolved. It allows us to establish explicit mappings between IR objects and addresses (useful for LLVM global variables that we want to map to static tables, for example), allows us to dynamically decide on the fly based on the function name, and even allows us JIT compile functions lazily the first time they're called.
151 | 
152 | BLAH
153 |   Can also take full advantage of all CPU-specific instructions!!!
154 |     This is the case even for functions that have already "been compiled at compile time" (it's worthwhile to recompile them)
155 |     http://www.agner.org/optimize/blog/read.php?i=167
156 |     Might this be relevant? https://ispc.github.io/
157 |       ispc compiles a C-based SPMD programming language to run on the SIMD units of CPUs and the Intel Xeon Phi™ architecture; it frequently provides a 3x or more speedup on CPUs with 4-wide vector SSE units and 5x-6x on CPUs with 8-wide AVX vector units, without any of the difficulty of writing intrinsics code. Parallelization across multiple cores is also supported by ispc, making it possible to write programs that achieve performance improvement that scales by both number of cores and vector unit size.
158 | 
159 | CLI: optparse-applicative? there seem to be many options here!
160 |   https://www.stackage.org/haddock/lts-9.0/optparse-generic-1.2.2/Options-Generic.html
161 |   http://hackage.haskell.org/package/cli-0.1.2/docs/Console-Options.html
162 |   http://hackage.haskell.org/package/optparse-applicative-simple
163 |   http://hackage.haskell.org/package/optparse-text
164 | 
165 | LEXING: inchworm? (scanner, lexer-applicative?)
166 | 
167 | PARSING: Earley, grammatical-parsers, trifecta?
168 |   Earley maybe...?
169 |   Attribute Grammars?? https://www.reddit.com/r/haskell/comments/6j6dtd/uu_attribute_grammar_manual/
170 |   http://teh.id.au/posts/2017/06/07/round-trip-property/index.html
171 |     round trip hedgehog/quickcheck properties for testing parsing <-> pp
172 | 
173 | PRETTY PRINTING
174 |   https://hackage.haskell.org/package/prettyprinter
175 | 
176 | PRETTY PRINTING of Show instances: groom, pretty-show, show-prettyprint, pretty-simple
177 |   http://hackage.haskell.org/package/reprinter modify AST and re-print it preserving original layout, generically
178 | 
179 | NAME MANAGEMENT: either zabt or bound
180 | 
181 | SOURCE LOCATIONS: `loc` seems the nicest!
182 |   also: srcloc, located
183 | 
184 | AST manipulation: lens?
185 | 
186 | UNIFICATION: monad-unify, unification-fd, cmu? other? handroll?
187 |   we won't need this for a while(?)
188 | 
189 | IR REPRESENTATION: CPS, SSA, ANF, Join Points, Sea-of-Nodes, Thorin, ...?
190 |   ANF with join points sounds good! (~SSA with basic block arguments)
191 |   mid-level semantic operations like clone (refcount+-), borrow, move should be explicitly represented here
192 |   nominal types -> structural types (struct, enum, function, generic, abstract, recursive, array, ref, other primitives..)
193 |   do we also want to represent indirections explicitly?
194 |   presumably not enum discriminants or refcount fields and things like that?
195 |   how do we deal with structs (unordered fields) vs tuples (ordered fields) if we want to do automatic reordering for the former?
196 |     do we want to do the reordering beforehand, so IR only has a single kind of struct?
197 |     what about the case of structural sums, where we'd want/need a discriminant of fixed size, unlike named enums?
198 |   if the IR has a closed universe of types, can we actually put (part/most of) the `quote` operation in the compiler...?
199 |   https://wingolog.org/archives/2011/07/12/static-single-assignment-for-functional-programmers
200 |   for possible future reference:
201 |     Simple and Efficient Construction of Static Single Assignment Form
202 |     http://pp.ipd.kit.edu/uploads/publikationen/braun13cc.pdf
203 | 
204 | OPTIMIZATION: Hoopl?
205 |   Does it work with ANFj?
206 |   Are there any competitors?
207 |   Can we use this for CFG analysis and checking as well? instead of just optimization?
208 |     liveness analysis, which it can do, seems like exactly what we need to implement "eager drops"
209 |   http://blog.ezyang.com/2011/02/picturing-hoopl-transferrewrite-functions/
210 |   http://blog.ezyang.com/2011/04/hoopl-guided-tour-base-system/
211 |   http://blog.ezyang.com/2011/04/hoopl-dataflow-lattices/
212 |   http://blog.ezyang.com/2011/04/hoopl-dataflow-analysis/
213 | 
214 | SERIALIZATION: store, flat, cereal? (packman?)
215 | 
216 | TESTING:
217 |   https://github.com/unisonweb/unison/tree/master/yaks/easytest
218 |   hedgehog
219 |   there was one where it automatically generated a test based on the current output?
220 | 
221 | OTHER
222 |   generic-lens
223 |   reflection, dependent-map, prim-uniq, tie-knot
224 |   hashtables{,-plus}, keys, unordered-containers, hamtmap, disjoint-sets-st
225 |   unordered-graphs, algebraic-graphs
226 |   static-hash. quickset, perfecthash/PerfectHash
227 |   intern
228 |   (transformations, TTTAS, syntactic, ho-rewriting)
229 |   (hindley-milner, boomerang, cassette)
230 |   https://github.com/ermine-language/ermine/blob/master/ermine.cabal
231 |   https://twanvl.nl/blog/haskell/traversing-syntax-trees
232 |   http://hackage.haskell.org/package/brick CLI "GUI" library
233 | 
234 | MEMORY MANAGEMENT
235 |   Reference counting? Just leak?
236 |   Just leak at first, if necessary or time allows do something more
237 | 
238 | USE CASES
239 |   Linear algebra (vector/matrix stuff)
240 |   Parsers
241 |   Interpreters?
242 |   GUI loaded from XML?
243 |   "Ágens alapú szimuláció"? Van Ndb agent amik csinálnak dolgokat, őket szimuláljuk, a számuk-viselkedésük futásidőben jön
244 |   "Agent-based simulation"? Simulating N agents which do stuff, their number and behavior is determined at runtime
245 |   Network protocol written in XML, evolving(?)
246 |   A router, the packet switching rules change dynamically
247 |   Unreal Editor
248 |   Software raytracing + effects (Pali, Open Shading Language)
249 |   Neural networks
250 |   Specializing parallel code dynamically for the available hardware/resources (splitting-into-blocks, number of threds, cache sizes etc.)
251 |   Equation solver, expression evaluator
252 |   Data processing, e.g. grep
253 |     We can first semantically "compile" the source expression into an `a -> b` function that's "algorithmically optimal"(?), and then call `compile`/`optimize` on that!
254 |       i.o.w. the kinds of optimizations we'd otherwise perform syntactically, we can perform internally w/ plain function-based transformations?
255 |     The crucial difference is this:
256 |       Suppose we have `grep :: String -> ([String] -> [String])`
257 |       `grep "foo"` can be either (a) a "no-op" partial application, OR (b) it can do actual work and return a `[String] -> [String]` that's optimal for grepping "foo"!
258 |         In the latter case, `optimize (grep "foo")` has an easier job and just needs to turn virtual calls into static ones and inline stuff, not actually partially evaluate anything
259 |     Suppose a `compile`d closure also makes static function calls. Do we also "ASTify" and re-compile *those*? That would potentially be the whole program! Is it possible & sufficient to use the same inlining heuristics as at "normal compile time" and only reconsider the source code of the statically called functions in that case?
260 |     Hmm the runtime-compiled code will need to be memory managed somehow too
261 |   database queries
262 |   FRP
263 |   constructive solid geometry
264 |   Unification/substituation in DT typechecker?
265 | 
266 | BENCHMARK:
267 |   * Program written to take runtime input
268 |   * Program with the "runtime input" hardwired into `main()`
269 |   * Program with the "runtime input" hand-specialized into the relevant functions
270 |   * Program written to take runtime input, but partially applying and `compile()`ing it at runtime
271 |     * When the `compile`d function is just a top-level function partially applied to an argument
272 |     * When the `compile`d function is a pre-generated in-memory structure of nested closures etc.
273 |   * Program written in different language (C, C++, Rust?)
274 | https://reviews.llvm.org/rL303769
275 | 
276 | (BENEFIT: Turn runtime values into static ones and dynamic calls into static calls, inlining, specialization(?), etc)
277 | 
278 | RELATED WORK
279 |   http://compilers.cs.uni-saarland.de/papers/gpce15.pdf How related is this?? (Impala)
280 | 
281 | SIMILAR TECH
282 |   Template metaprogramming
283 |     Eigen
284 |   Stream fusion?
285 |     Not really the same thing...?
286 |     Rewrite rules are more of a "has synergy" thing?
287 |       Rewrite rules only work with statically-visible things, here we make more things "statically-visible"
288 |   Macros, Template Haskell, staging?
289 |   Terra? Julia? LMS?
290 | 
291 | Optimization in the frontend?
292 |   Inlining
293 |   Specialization-on-values
294 |   Rewrite rules?
295 |   (Reference counting elision?)
296 | 
297 | CRAZY IDEAS (future directions):
298 |  * *Cross-compiling*?? (to GPU)
299 |  * Render function as source code program?
300 |    * for debugging, print to file, serialization of functions, sending them over network, ...
301 |      (but normally the `compile()` function would be the compiler's backend, not its frontend...)
302 |      (hmm, that's OK for serialization, but not debugging)
303 |  * Futamura projections
304 |  * Hmm... if this runtime recompiles/optimizes code, might there be anything analogous for data/types?
305 |    * Relatedly or not??, how might this interact with the "intensional type analysis" representation of polymorphism?
306 |      * Well, at a minimum, if a polymorphic function's been applied to a type, that is, if a function's been partially applied to
307 |        its size and offset parameters, then those'll be optimized away. Yay.
308 |      * I wonder if there's more to it...
309 | 
310 | Being able to generate function pointers at runtime could also help with C FFI?
311 | (What about C++? Probably not: think we'd need to link in clang to really be able to deal with templates...:\)
312 | 
313 | coding conventions used:
314 |  - avoid nonstandard binary operators
315 |  - use $ only if necessary to prevent parentheses spanning multiple lines
316 |  - do notation is Good
317 |  - eschew function defns by multiple bindings: use \case instead (possible exception: needing to simultaneously match on multiple args)
318 |  - only one unqualified-mass-import per module is allowed
319 |  - avoid abbreviating things where reasonably possible
320 |  - try to have consistent 4-space indents, where reasonable
321 |  - avoid repeatedly importing the same "standard" things in multiple modules
322 |  - (likewise, avoid repeatedly specifying the same language extensions)
323 |  - avoid heavy machinery unless absolutely necessary
324 |  - don't abstract without reason
325 |  - maybe I should re-allow name shadowing?
326 |  - avoid unnecessary hierarchy for modules
327 |  - use names short enough that qualifying with them doesn't hurt
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | # OUTDATED NOTES ABOUT IR STRUCTURE
339 | 
340 | 
341 | -- blocks and variable decls should follow the lexically scoped structure of the source program
342 | -- jumps ("join points") should be distinguished from calls
343 | -- do we want "basic blocks"?
344 | --   yes, if we distinguish jumps from calls, and jumps are always in tail position, that means we have BBs
345 | --   wait... does that mean we have BBs or EBBs??
346 | -- what about "basic block arguments"??
347 | --   IINM these are equivalent to PHIs, and they're needed when mutating locals?? are they used for anything else?
348 | --   we don't want it for mutation I don't think
349 | --   what about for expressions like `bar(if foo { a } else { b })`?
350 | --     seems natural to represent that as a block which takes 1 argument and calls `bar()` with it?
351 | --     IINM this means blocks would always have just 1 argument, not more...? an expr can't evaluate to more than 1 thing?
352 | --       what about `baz(if foo { a } else { b }, if bar { c } else { d })`?
353 | --       if nothing else match arms would expect multiple incoming values! so we should just go with multiple regardless
354 | --         would this let us handle enum destructuring in a type-safe way?
355 | --           is a "primitive switch" construct sufficient? what about e.g. `if` guards?
356 | --         (based on join points paper: yes, this seems correct)
357 | --         in fact, if we ever have existential types we'd need polymorphic join points to destructure them!
358 | --       (the question of how to translate nested control flow is still interesting though)
359 | --          maybe: first, extract into temporaries:
360 | --            let tmp1 = if foo { a } else { b }; let tmp2 = if bar { c } else { d }; baz(tmp1, tmp2)
361 | --          then introduce join points for each with `tmp1` and `tmp2` becoming parameters?
362 | --            the second one would be within the scope of the first, with `tmp1` in scope implicitly?
363 | 
364 | -- once we add struct fields, will field access count as a Value? kinda seems like it should...
365 | --   this means the Value vs Expression distinction isn't quite the same as introduction vs. elimination forms ("literals vs figuratives")
366 | --     (should we still have that distinction at a previous stage, e.g. for typechecking?)
367 | -- what about things that require a load (not just a GEP)?
368 | --   how would recursive structs be represented?
369 | --   for `ref`s`, it seems pretty clear that load-and-store should be explicit
370 | 
371 | -- if/when we add intensional polymorphism, maybe we'd want a dependently typed IR with Types as explicit values?
372 |    -- if/when we add HKT too, it'll be `Type -> Type` values and such...
373 |    -- or maybe this should just be a succeeding pass, I'm not sure if there's anything in particular we want to do with Types at this level?
374 | -- RankNTypes: We can just restrict `compile` to monomorphic functions to start with if we don't want to figure it out yet?
375 |    -- How even would you specify "a function that can be arbitrarily polymorphic" in a signature??
376 |      -- Ah - I think with ImpredicativeTypes: as `T`.
377 |      -- Impredicativity ofc means you can instantiate type variables with polymorphic types if you want...
378 |          -- Whereas RankNTypes means only functions can be polymorphic, iirc?
379 |          -- Can impredicativity work with intensional polymorphism at all???
380 |          -- Are the questions of having polymorphic non-function-types and having impredicativity connected or separate?
381 |          -- What about the question of instantiating type variables "inside of" a type?
382 |            -- One thing we DEFINITELY can't do is instantiate `List<foreach<T> Foo<T>>` to a specific `List<Foo<UInt8>>` e.g. without having a uniform representation
383 |              -- `foreach<T> Foo<T>` would be represented as a function, while `Foo<UInt8>` would be unboxed!
384 |              -- We could somehow restrict it to types which inherently have uniform representation (e.g. refs), but it's unlikely we'd want this complexity
385 |            -- Apparent options:
386 |              -- (1) Forbid instantiating type variables inside types. Is this reasonable?
387 |                -- This is "one step down"; instantiating type variables with polytypes is a "kinding question", this is a "typing question"?
388 |              -- (2) Only functions can be polymorphic. Would a generic fn and its instantiation have compatible reprs in this case??
389 |                 -- The generic fn would be taking additional arguments so it's not clear they would be...
390 |                 -- Maybe every fn could have its first argument be to its type-args, null for monomorphic ones?
391 |                   -- (What's the perf impact?)
392 |                     -- I guess you could have this as a wrapper over the version without the arg, so it'd only hit in higher-order cases?
393 |                   -- The other impact of course is that the type-args would have to be accessed through an additional indirection
394 |                 -- Could this be folded together with the env-arg for closures or would they have to be separate?
395 |                   -- I think they'd have to be separate -- the env is determined when creating the closure, the type-args when calling it?
396 |                 -- WAIT I don't think this works at all
397 |                   -- To instantiate a generic function a type, it'd have to be partially applied to some type arguments
398 |                     -- Quite physically, with intensional polymorphism
399 |                   -- Which requires mapping over the List just the same
400 |                   -- So I guess restriction (1) is the only game in town? Does it have a name?
401 | 
402 | {-
403 | 
404 | the source program:
405 | var n = 0
406 | forever {
407 |     say("Running total:")
408 |     write(n)
409 |     let m = ask("Number to add:")
410 |     if m == 0 {
411 |         return
412 |     }
413 |     n = n + m
414 | }
415 | 
416 | should be translated to:
417 | 
418 | let n = 0
419 | block forever0() {
420 |     say("Running total:")
421 |     write(n)
422 |     let m = ask("Number to add:")
423 |     let tmp1 = (m == 0)
424 | 
425 |     if tmp1 {
426 |         jump if0true()
427 |     } else {
428 |         jump if0join()
429 |     }
430 | 
431 |     block if0true() {
432 |         return
433 |     }
434 | 
435 |     block if0join() {
436 |         let tmp2 = n + m
437 |         n = tmp2
438 |         jump forever0()
439 |     }
440 | }
441 | 
442 | seems we either have to abandon decl-before-use, or we have to invert the ordering of control flow a bit
443 | not sure which is better
444 | otoh what's the point of translating the source program to globally unique names if we go back to scoping again here???
445 |     maybe this is where it makes sense to start using a name management lib like (un)bound?
446 |     I think what requiring well-scoped names gets us is the guaranteed absence of using-unitialized-memory?
447 |     together with basic block arguments it seems ideal for also having explicit moves?
448 |       though it'd require substructural typing too - to avoid using a variable that's been moved out of...
449 |         (unless every time a move happens, it transfers control to a different block where the old variable's literally not in scope any more...)
450 |           dunno how feasible/practical that is?
451 | 
452 | 
453 | -}
454 | 
455 | TODO
456 |   clean up "P.note (P.Identifier (P.IdentInfo ..." and similar ugliness?
457 |   IR: fix syntax highlighting of `return` as a keyword
458 | 
459 | TODO
460 |   make IDs follow each other in program order
461 |   implement astname-preservation for logical ops
462 |   keep let/var distinction in IR?
463 |   make `load`s from `var`s explicit? (simplifies translation to LLVM?)
464 |   do SSA conversion on the IR?
465 |   only ASTNames should become allocas!! temporaries can just become plain operands!
466 | 
467 | Babby's First Compiler
468 |   why haskell and not rust
469 |   why first post - impostor?
470 |   Earley (how to pronounce?) + tokenizing = :(
471 |   Fortuity: parameterize over name -> class ResolveNamesIn -> Functor, Foldable, Traversable
472 |   always start trying to write as a function w/ recursion etc -- too painful, make a monad!
473 |   design of IR - ANF vs CPS, join points, scoping, SSA
474 |   Tardis monad debugging https://www.reddit.com/r/reflexfrp/comments/71q6lk/how_do_you_guys_find_accidental_infinite_loops_in/dnd4udc/
475 |   emitBlock :: m Transfer -> ...
476 |     vs. fall off into continuation (+ question about that), but doesn't work if there are arguments
477 |   thisBlock vs nextBlock question ("braiding")
478 |   tfw your IR has better highlighting than your editor :(
479 |     work on pretty printing just so I can tell whether the IR is correct
480 |   Fortuity: frontend in terms of abstract monadic interface -> can have tardis-based vs. two-pass backends
481 |   LLVM pass puzzlement: where should I use IR types, where LLVM ones?
482 |     e.g. in LLVM monad: all LLVM types
483 |   huge string literal code
484 |   Managed
485 |   what is the right name management library? what is the right command line args library?
486 |     who knows?
487 |     DON'T SOLVE A PAIN POINT BEFORE YOU HAVE ONE
488 | 
489 | 
490 | TODO
491 |   nicer fix for innermostBlock assert
492 |   make a lens alias for innermostBlock . blah?
493 |   prettyprinting lost the block descriptions, current fix is ugly, make it nicer (how did it work earlier?)


--------------------------------------------------------------------------------
/examples/adder.src:
--------------------------------------------------------------------------------
 1 | function main()
 2 | {
 3 |     var n = 0;
 4 |     forever {
 5 |         say("Running total:");
 6 |         write(n);
 7 |         let m = ask("Number to add:");
 8 |         if m == 0 {
 9 |             return;
10 |         }
11 |         n = n + m;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/fizzbuzz.src:
--------------------------------------------------------------------------------
 1 | function main()
 2 | {
 3 |     var number = 1;
 4 |     while number <= 100 {
 5 |         let fizz = "Fizz";
 6 |         let buzz = "Buzz";
 7 |         let fizzBuzz = "FizzBuzz";
 8 |         let isFizz = number % 3 == 0;
 9 |         let isBuzz = number % 5 == 0;
10 |         if isFizz && isBuzz {
11 |             say(fizzBuzz);
12 |         } else {
13 |             if isFizz {
14 |                 say(fizz);
15 |             } else {
16 |                 if isBuzz {
17 |                     say(buzz);
18 |                 } else {
19 |                     write(number);
20 |                 }
21 |             }
22 |         }
23 |         number = number + 1;
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/fizzbuzz2.src:
--------------------------------------------------------------------------------
 1 | function main()
 2 | {
 3 |     var number = 1;
 4 |     while number <= 100 {
 5 |         if number % 3 == 0 && number % 5 == 0 {
 6 |             say("FizzBuzz");
 7 |         } else {
 8 |             if number % 3 == 0 {
 9 |                 say("Fizz");
10 |             } else {
11 |                 if number % 5 == 0 {
12 |                     say("Buzz");
13 |                 } else {
14 |                     write(number);
15 |                 }
16 |             }
17 |         }
18 |         number = number + 1;
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/fizzbuzz3.src:
--------------------------------------------------------------------------------
 1 | function isFizz(number: Int) returns Bool
 2 | {
 3 |     return (number % 3 == 0);
 4 | }
 5 | 
 6 | function isBuzz(number: Int) returns Bool
 7 | {
 8 |     return (number % 5 == 0);
 9 | }
10 | 
11 | function main()
12 | {
13 |     var number = 1;
14 |     while number <= 100 {
15 |         let fizz = "Fizz";
16 |         let buzz = "Buzz";
17 |         let fizzBuzz = "FizzBuzz";
18 |         if isFizz(number) && isBuzz(number) {
19 |             say(fizzBuzz);
20 |         } else {
21 |             if isFizz(number) {
22 |                 say(fizz);
23 |             } else {
24 |                 if isBuzz(number) {
25 |                     say(buzz);
26 |                 } else {
27 |                     write(number);
28 |                 }
29 |             }
30 |         }
31 |         number = number + 1;
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
1 | with (import <unstable> {});
2 | 
3 | haskell.lib.buildStackProject {
4 |     name              = "simplecompiler";
5 |     nativeBuildInputs = [ haskell.packages.ghc862.ghc ];
6 |     buildInputs       = [ llvm_7 ];
7 | }
8 | 


--------------------------------------------------------------------------------
/simplecompiler.cabal:
--------------------------------------------------------------------------------
  1 | name:                simplecompiler
  2 | version:             0.1.0.0
  3 | -- synopsis:
  4 | -- description:
  5 | homepage:            https://github.com/glaebhoerl/simplecompiler
  6 | license:             MIT
  7 | license-file:        LICENSE
  8 | author:              Gábor Lehel
  9 | maintainer:          glaebhoerl@gmail.com
 10 | copyright:           2017 Gábor Lehel
 11 | category:            Compilers
 12 | build-type:          Simple
 13 | cabal-version:       >=1.10
 14 | -- extra-source-files:  README.md
 15 | 
 16 | executable simplecompiler
 17 |   hs-source-dirs:      src
 18 |   main-is:             Main.hs
 19 |   other-modules:       MyPrelude, Pretty, Token, AST, Name, Type, IR, LLVM
 20 | 
 21 | -- Here we define the language dialect we will be using:
 22 |   default-language:    Haskell2010
 23 |   default-extensions:  AllowAmbiguousTypes,
 24 | --                     ^^^^^^^^^^^^^^^^^^^ sounds scary, but just means you can define functions which you need `TypeApplications` to use
 25 |                        ApplicativeDo,
 26 |                        AutoDeriveTypeable,
 27 |                        BangPatterns,
 28 |                        BinaryLiterals,
 29 |                        BlockArguments,
 30 | --                     ^^^^^^^^^^^^^^ <3
 31 |                        ConstrainedClassMethods,
 32 |                        ConstraintKinds,
 33 |                        DataKinds,
 34 |                        DefaultSignatures,
 35 |                        DeriveFoldable,
 36 |                        DeriveFunctor,
 37 |                        DeriveGeneric,
 38 |                        DeriveTraversable,
 39 |                        DerivingVia,
 40 |                        EmptyCase,
 41 |                        ExistentialQuantification,
 42 |                        FlexibleContexts,
 43 |                        FlexibleInstances,
 44 |                        GADTs,
 45 |                        GeneralizedNewtypeDeriving,
 46 |                        InstanceSigs,
 47 |                        KindSignatures,
 48 |                        LambdaCase,
 49 | --                     ^^^^^^^^^^ <3
 50 |                        MultiParamTypeClasses,
 51 |                        NamedFieldPuns,
 52 |                        NegativeLiterals,
 53 |                        NoImplicitPrelude,
 54 | --                     ^^^^^^^^^^^^^^^^^ `import MyPrelude` instead!
 55 |                        NumericUnderscores,
 56 |                        OverloadedStrings,
 57 |                        PartialTypeSignatures,
 58 |                        PatternSynonyms,
 59 |                        PolyKinds,
 60 |                        QuantifiedConstraints,
 61 |                        RankNTypes,
 62 |                        ScopedTypeVariables,
 63 |                        StandaloneDeriving,
 64 |                        StrictData,
 65 | --                     ^^^^^^^^^^ NOTE!!
 66 |                        TupleSections,
 67 |                        TypeApplications,
 68 |                        TypeFamilies,
 69 |                        TypeOperators,
 70 |                        TypeSynonymInstances,
 71 |                        ViewPatterns
 72 | 
 73 | -- NOTABLY MISSING:
 74 | --     CPP, TemplateHaskell:
 75 | --         Would like to avoid these if at all possible. Use a local `LANGUAGE` pragma if/where we can't.
 76 | --     DeriveAnyClass:
 77 | --         Would be nice, but in GHC 8.0 it's badly implemented and conflicts with GeneralizedNewtypeDeriving.
 78 | --     FunctionalDependencies:
 79 | --         Use TypeFamilies instead.
 80 | --     OverloadedLists:
 81 | --         I got a type ambiguity error that I think was caused by this, and otherwise haven't needed it.
 82 | --     RecordWildCards:
 83 | --         Tryna keep it obvious where names are coming from.
 84 | --     RoleAnnotations:
 85 | --         Seems niche and it's quite possible we won't ever need it.
 86 | --     TypeInType:
 87 | --         Very new and fancy and powerful. Give it some time.
 88 | --     UndecidableInstances, UndecidableSuperClasses:
 89 | --         Not evil, but use a local `LANGUAGE` pragma to call it out if it's necessary.
 90 | --     RecursiveDo:
 91 | --         Every time I have tried to use this, I eventually got undebuggable <<loop>>s.
 92 | 
 93 |   build-depends:       base >= 4.7 && < 5,
 94 |                        bytestring,
 95 |                        containers,
 96 |                        directory,
 97 |                        Earley,
 98 |                        generic-lens,
 99 |                        lexer-applicative,
100 |                        llvm-hs,
101 |                        llvm-hs-pure,
102 |                        managed,
103 |                        mtl,
104 |                        pretty-simple,
105 |                        prettyprinter,
106 |                        prettyprinter-ansi-terminal,
107 |                        process,
108 |                        profunctors,
109 |                        regex-applicative,
110 |                        srcloc,
111 |                        text
112 |   ghc-options:         -O
113 |                        -W -Wall -Wcompat
114 |                        -Wincomplete-uni-patterns -Wincomplete-record-updates -Wredundant-constraints
115 |                        -Wno-name-shadowing
116 | 
117 | source-repository head
118 |   type:     git
119 |   location: https://github.com/glaebhoerl/simplecompiler
120 | 


--------------------------------------------------------------------------------
/src/AST.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE RecursiveDo #-} -- needed for Earley
  2 | 
  3 | module AST (Type (..), Expression (..), BindingType (..), Statement (..), Block (..), Argument (..), Function (..), AST, Error (..), parse, RenderName (..)) where
  4 | 
  5 | import MyPrelude
  6 | 
  7 | import qualified Text.Earley as E
  8 | 
  9 | import qualified Pretty as P
 10 | import qualified Token  as T
 11 | 
 12 | import Pretty (Render, render)
 13 | 
 14 | 
 15 | ----------------------------------------------------------------------------- types
 16 | 
 17 | data Type metadata name
 18 |     = NamedType
 19 |         name
 20 |     | FunctionType
 21 |         [NodeWith Type metadata name]
 22 |         (NodeWith Type metadata name)
 23 |     deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 24 | 
 25 | data Expression metadata name
 26 |     = Named
 27 |         name
 28 |     | Call
 29 |         (NodeWith Expression metadata name)
 30 |         [NodeWith Expression metadata name]
 31 |     | NumberLiteral
 32 |         Integer
 33 |     | TextLiteral
 34 |         Text
 35 |     | UnaryOperator
 36 |         UnaryOperator
 37 |         (NodeWith Expression metadata name)
 38 |     | BinaryOperator
 39 |         (NodeWith Expression metadata name)
 40 |         BinaryOperator
 41 |         (NodeWith Expression metadata name)
 42 |     deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 43 | 
 44 | data BindingType
 45 |     = Let
 46 |     | Var
 47 |     deriving (Generic, Eq, Show)
 48 | 
 49 | data Statement metadata name
 50 |     = Expression
 51 |         (NodeWith Expression metadata name)
 52 |     | Binding
 53 |         BindingType
 54 |         name
 55 |         (NodeWith Expression metadata name)
 56 |     | Assign
 57 |         name
 58 |         (NodeWith Expression metadata name)
 59 |     | IfThen
 60 |         (NodeWith Expression metadata name)
 61 |         (NodeWith Block      metadata name)
 62 |     | IfThenElse
 63 |         (NodeWith Expression metadata name)
 64 |         (NodeWith Block      metadata name)
 65 |         (NodeWith Block      metadata name)
 66 |     | Forever
 67 |         (NodeWith Block      metadata name)
 68 |     | While
 69 |         (NodeWith Expression metadata name)
 70 |         (NodeWith Block      metadata name)
 71 |     | Return
 72 |         name -- return and break refer to the `exitTarget` in `Block`; these are "phantom names", not present in the source code
 73 |         (Maybe (NodeWith Expression metadata name))
 74 |     | Break
 75 |         name -- see above
 76 |     deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 77 | 
 78 | data Block metadata name = Block {
 79 |     exitTarget :: Maybe name, -- "phantom", see above
 80 |     statements :: [NodeWith Statement metadata name]
 81 | } deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 82 | 
 83 | data Argument metadata name = Argument {
 84 |     argumentName :: name,
 85 |     argumentType :: NodeWith Type metadata name
 86 | } deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 87 | 
 88 | data Function metadata name = Function {
 89 |     functionName :: name,
 90 |     arguments    :: [NodeWith Argument metadata name],
 91 |     returns      :: Maybe (NodeWith Type metadata name),
 92 |     body         :: NodeWith Block metadata name
 93 | } deriving (Generic, Eq, Show, Functor, Foldable, Traversable)
 94 | 
 95 | 
 96 | 
 97 | ----------------------------------------------------------------------------- parsing
 98 | 
 99 | type Expected         = Text
100 | type Prod      r      = Compose (E.Prod r Expected (With Loc T.Token)) (With Loc)
101 | type Grammar   r node = E.Grammar r (Prod r (node Loc Text))
102 | 
103 | token :: T.Token -> Prod r ()
104 | token = unused . Compose . E.token . pure
105 | 
106 | keyword :: T.Keyword -> Prod r ()
107 | keyword = token . T.Keyword
108 | 
109 | terminal :: (T.Token -> Maybe a) -> Prod r a
110 | terminal f = Compose (E.terminal (\(With loc a) -> fmap (With loc) (f a)))
111 | 
112 | tokenConstructor :: forall name inner r. AsConstructor' name T.Token inner => Prod r inner
113 | tokenConstructor = terminal (match @name)
114 | 
115 | bracketed :: T.BracketKind -> Prod r output -> Prod r output
116 | bracketed kind inner = do
117 |     token (T.Bracket' (T.Bracket kind T.Open))
118 |     output <- inner
119 |     token (T.Bracket' (T.Bracket kind T.Close))
120 |     return output
121 | 
122 | separatedBy :: T.Token -> Prod r output -> Prod r [output]
123 | separatedBy t element = oneOf [pure [], liftA2 prepend element (zeroOrMore (token t `followedBy` element))]
124 | 
125 | followedBy :: Prod r a -> Prod r b -> Prod r b
126 | followedBy = (*>)
127 | 
128 | -- This may seem somewhat surprising -- why do we need to /duplicate/ the location info? Doesn't the Applicative instance handle this for us?
129 | -- The explanation is that Applicative only handles combining the sublocations into the location of the final result --
130 | --   but we don't just want the location of the whole tree, we also want the locations of all the sub-nodes!
131 | -- So this takes a snapshot of the location for the subnode, and also lets `Applicative` go on combining it into the location of the parent node.
132 | located :: Prod r (node Loc Text) -> Prod r (NodeWith node Loc Text)
133 | located = Compose . fmap dupLocated . getCompose where
134 |     dupLocated node = With (getMetadata node) (NodeWith node)
135 | 
136 | nodeRule :: Prod r (node Loc Text) -> Grammar r node
137 | nodeRule = fmap Compose . E.rule . getCompose
138 | 
139 | locatedNode :: Prod r (node Loc Text) -> Grammar r (NodeWith node)
140 | locatedNode = nodeRule . located
141 | 
142 | -- from tightest to loosest; operators within a group have equal precedence
143 | precedenceGroups :: [[BinaryOperator]]
144 | precedenceGroups = assert (justIf isWellFormed listOfGroups) where
145 |     isWellFormed = all exactly1 (enumerate :: [BinaryOperator]) && not (any null listOfGroups)
146 |     exactly1 op  = length (filter (== op) (concat listOfGroups)) == 1
147 |     listOfGroups =
148 |         [map ArithmeticOperator [Mul, Div, Mod],
149 |          map ArithmeticOperator [Add, Sub],
150 |          map ComparisonOperator [Less, LessEqual, Greater, GreaterEqual],
151 |          map ComparisonOperator [Equal, NotEqual],
152 |          map LogicalOperator    [And],
153 |          map LogicalOperator    [Or]]
154 | 
155 | data BinaryOperationList metadata name
156 |     = SingleExpression (NodeWith Expression metadata name)
157 |     | BinaryOperation  (NodeWith Expression metadata name) BinaryOperator (BinaryOperationList metadata name)
158 |     deriving Show
159 | 
160 | resolvePrecedences :: BinaryOperationList Loc Text -> NodeWith Expression Loc Text
161 | resolvePrecedences binaryOperationList = finalResult where
162 |     finalResult = case allPrecedencesResolved of
163 |         SingleExpression expr -> expr
164 |         list                  -> bug ("Unresolved binary operator precedence: " ++ prettyShow list)
165 |     allPrecedencesResolved = foldl' resolveOnePrecedenceLevel binaryOperationList precedenceGroups
166 |     resolveOnePrecedenceLevel binOpList precedenceGroup = case binOpList of
167 |         BinaryOperation expr1 op1 (BinaryOperation expr2 op2 rest)
168 |             | elem op1 precedenceGroup -> resolveOnePrecedenceLevel (BinaryOperation (locatedBinaryOperator expr1 op1 expr2) op2 rest) precedenceGroup
169 |             | otherwise                -> BinaryOperation expr1 op1 (resolveOnePrecedenceLevel (BinaryOperation expr2 op2 rest) precedenceGroup)
170 |         BinaryOperation expr1 op (SingleExpression expr2)
171 |             | elem op  precedenceGroup -> SingleExpression (locatedBinaryOperator expr1 op expr2)
172 |         other -> other
173 |     locatedBinaryOperator expr1 op expr2 = NodeWith (With combinedLoc (BinaryOperator expr1 op expr2)) where
174 |         combinedLoc = mconcat (map nodeMetadata [expr1, expr2])
175 | 
176 | expressionGrammar :: Grammar r (NodeWith Expression)
177 | expressionGrammar = mdo
178 |     atom <- (locatedNode . oneOf)
179 |         [
180 |             liftA1 Named         (tokenConstructor @"Name"),
181 |             liftA1 NumberLiteral (tokenConstructor @"Number"),
182 |             liftA1 TextLiteral   (tokenConstructor @"Text"),
183 |             liftA1 nodeWithout   (bracketed T.Round expression)
184 |         ]
185 |     call <- (locatedNode . oneOf)
186 |         [
187 |             liftA2 Call        call (bracketed T.Round (separatedBy T.Comma expression)),
188 |             liftA1 nodeWithout atom
189 |         ]
190 |     unary <- (locatedNode . oneOf)
191 |         [
192 |             liftA2 UnaryOperator (tokenConstructor @"UnaryOperator") unary,
193 |             liftA1 nodeWithout call
194 |         ]
195 |     binaries <- (nodeRule . oneOf)
196 |         [
197 |             liftA3 BinaryOperation  unary (tokenConstructor @"BinaryOperator") binaries,
198 |             liftA1 SingleExpression unary
199 |         ]
200 |     let expression = liftA1 resolvePrecedences binaries
201 |     return expression
202 | 
203 | blockGrammar :: Grammar r (NodeWith Block)
204 | blockGrammar = mdo
205 |     expression <- expressionGrammar
206 |     -----------------------------------------------------------
207 |     binding <- locatedNode do
208 |         letvar <- terminal (\case T.Keyword T.K_let -> Just Let; T.Keyword T.K_var -> Just Var; _ -> Nothing) -- TODO prism?
209 |         name   <- tokenConstructor @"Name"
210 |         token T.EqualsSign
211 |         rhs    <- expression
212 |         token T.Semicolon
213 |         return (Binding letvar name rhs)
214 |     assign <- locatedNode do
215 |         lhs <- tokenConstructor @"Name"
216 |         token T.EqualsSign
217 |         rhs <- expression
218 |         token T.Semicolon
219 |         return (Assign lhs rhs)
220 |     ifthen <- locatedNode do
221 |         keyword T.K_if
222 |         cond <- expression
223 |         body <- block
224 |         return (IfThen cond body)
225 |     ifthenelse <- locatedNode do
226 |         keyword T.K_if
227 |         cond  <- expression
228 |         body1 <- block
229 |         keyword T.K_else
230 |         body2 <- block
231 |         return (IfThenElse cond body1 body2)
232 |     forever <- locatedNode do
233 |         keyword T.K_forever
234 |         body <- block
235 |         return (Forever (mapNode (set (field @"exitTarget") (Just "break")) body))
236 |     while <- locatedNode do
237 |         keyword T.K_while
238 |         cond <- expression
239 |         body <- block
240 |         return (While cond (mapNode (set (field @"exitTarget") (Just "break")) body))
241 |     ret <- locatedNode do
242 |         keyword T.K_return
243 |         arg <- liftA1 head (zeroOrOne expression)
244 |         token T.Semicolon
245 |         return (Return "return" arg)
246 |     break <- locatedNode do
247 |         keyword T.K_break
248 |         token T.Semicolon
249 |         return (Break "break")
250 |     exprStatement <- locatedNode do
251 |         expr <- expression
252 |         token T.Semicolon
253 |         return (Expression expr)
254 |     -----------------------------------------------------
255 |     statement <- nodeRule (oneOf [binding, assign, ifthen, ifthenelse, forever, while, ret, break, exprStatement])
256 |     block <- locatedNode do
257 |         statements <- bracketed T.Curly (oneOrMore statement)
258 |         return Block { exitTarget = Nothing, statements }
259 |     return block
260 | 
261 | typeGrammar :: Grammar r (NodeWith Type)
262 | typeGrammar = mdo
263 |     functionType <- nodeRule do
264 |         keyword T.K_function
265 |         parameters <- bracketed T.Round (separatedBy T.Comma type')
266 |         keyword T.K_returns
267 |         returns <- type'
268 |         return (FunctionType parameters returns)
269 |     type' <- locatedNode (oneOf [liftA1 NamedType (tokenConstructor @"Name"), functionType])
270 |     return type'
271 | 
272 | functionGrammar :: Grammar r (NodeWith Function)
273 | functionGrammar = do
274 |     block <- blockGrammar
275 |     type' <- typeGrammar
276 |     argument <- locatedNode do
277 |         argumentName <- tokenConstructor @"Name"
278 |         token T.Colon
279 |         argumentType <- type'
280 |         return Argument { argumentName, argumentType }
281 |     locatedNode do
282 |         keyword T.K_function
283 |         functionName <- tokenConstructor @"Name"
284 |         arguments    <- bracketed T.Round (separatedBy T.Comma argument)
285 |         returns      <- liftA1 head (zeroOrOne (keyword T.K_returns `followedBy` type'))
286 |         body         <- block
287 |         return Function { functionName, arguments, returns, body = mapNode (set (field @"exitTarget") (Just "return")) body }
288 | 
289 | type AST metadata name = [NodeWith Function metadata name]
290 | 
291 | data Error
292 |     = Invalid Int [Expected] [With Loc T.Token]
293 |     | Ambiguous [AST Loc Text]
294 |     deriving (Generic, Show)
295 | 
296 | parse :: [With Loc T.Token] -> Either Error (AST Loc Text)
297 | parse = checkResult . E.fullParses parser where
298 |     parser = E.parser (liftM oneOrMore (fmap (fmap unWith . getCompose) functionGrammar))
299 |     checkResult = \case
300 |         ([], E.Report a b c) ->
301 |             Left (Invalid a b c)
302 |         ([one], _) ->
303 |             Right one
304 |         (more,  _) ->
305 |             Left (Ambiguous more)
306 | 
307 | 
308 | 
309 | 
310 | 
311 | ----------------------------------------------------------------------------- pretty-printing
312 | 
313 | renderBlock :: RenderName name => NodeWith Block metadata name -> P.Document
314 | renderBlock block =
315 |     P.braces (P.nest 4 (P.hardline ++ render block) ++ P.hardline)
316 | 
317 | class RenderName name where
318 |     renderName :: P.DefinitionOrUse -> name -> P.Document
319 | 
320 | instance RenderName Text where
321 |     renderName defOrUse name =
322 |         P.note (P.Identifier (P.IdentInfo name defOrUse P.Unknown False)) (P.pretty name)
323 | 
324 | instance RenderName name => Render (Type metadata name) where
325 |     listSeparator = ", "
326 |     render = \case
327 |         NamedType name ->
328 |             renderName P.Use name
329 |         FunctionType parameters returns ->
330 |             P.keyword "function" ++ P.parens (render parameters) ++ " " ++ P.keyword "returns" ++ " " ++ render returns
331 | 
332 | instance RenderName name => Render (Expression metadata name) where
333 |     listSeparator = ", "
334 |     render = \case
335 |         Named name ->
336 |             renderName P.Use name
337 |         Call fn args ->
338 |             render fn ++ P.parens (render args)
339 |         NumberLiteral number->
340 |             P.number number
341 |         TextLiteral text ->
342 |             P.string text
343 |         UnaryOperator op expr->
344 |             P.unaryOperator op ++ render expr
345 |         BinaryOperator expr1 op expr2 ->
346 |             render expr1 ++ " " ++ P.binaryOperator op ++ " " ++ render expr2
347 | 
348 | instance Render BindingType where
349 |     render = P.keyword . \case
350 |         Let -> "let"
351 |         Var -> "var"
352 | 
353 | instance RenderName name => Render (Statement metadata name) where
354 |     render = \case
355 |         Binding btype name expr ->
356 |             render btype ++ " " ++ renderName P.Definition name ++ " " ++ P.defineEquals ++ " " ++ render expr ++ P.semicolon
357 |         Assign name expr ->
358 |             renderName P.Use name ++ " " ++ P.assignEquals ++ " " ++ render expr ++ P.semicolon
359 |         IfThen expr block ->
360 |             P.keyword "if" ++ " " ++ render expr ++ " " ++ renderBlock block
361 |         IfThenElse expr block1 block2 ->
362 |             render (IfThen expr block1) ++ " " ++ P.keyword "else" ++ " " ++ renderBlock block2
363 |         Forever block ->
364 |             P.keyword "forever" ++ " " ++ renderBlock block
365 |         While expr block ->
366 |             P.keyword "while" ++ " " ++ render expr ++ " " ++ renderBlock block
367 |         Return _ maybeExpr ->
368 |             P.keyword "return" ++ (maybe "" (\expr -> " " ++ render expr) maybeExpr) ++ P.semicolon
369 |         Break _ ->
370 |             P.keyword "break" ++ P.semicolon
371 |         Expression expr ->
372 |             render expr ++ P.semicolon
373 | 
374 | instance RenderName name => Render (Block metadata name) where
375 |     render Block { statements } = render statements
376 | 
377 | instance RenderName name => Render (Argument metadata name) where
378 |     listSeparator = ", "
379 |     render Argument { argumentName, argumentType } =
380 |         renderName P.Definition argumentName ++ P.colon ++ " " ++ render argumentType
381 | 
382 | instance RenderName name => Render (Function metadata name) where
383 |     render Function { functionName, arguments, returns, body } =
384 |         renderedHead ++ renderedArguments ++ renderedReturns ++ renderedBody where
385 |             renderedHead      = P.keyword "function" ++ " " ++ renderName P.Definition functionName
386 |             renderedArguments = P.parens (render arguments)
387 |             renderedReturns   = maybe "" (\returnType -> " " ++ P.keyword "returns" ++ " " ++ render returnType) returns
388 |             renderedBody      = P.hardline ++ renderBlock body
389 | 


--------------------------------------------------------------------------------
/src/IR.hs:
--------------------------------------------------------------------------------
  1 | module IR (
  2 |     Type, BlockType (..), ID (..), NameWithType (..), Name, BlockName,
  3 |     Literal (..), Value (..), Expression (..), Statement (..), Block (..), Transfer (..), Target (..), Function (..),
  4 |     typeOf, typeOfBlock, translateFunction, validate, eliminateTrivialBlocks
  5 | ) where
  6 | 
  7 | import MyPrelude
  8 | 
  9 | import qualified Data.Map as Map
 10 | 
 11 | import qualified Pretty as P
 12 | import qualified AST    as AST
 13 | import qualified Name   as Name
 14 | import qualified Type   as Type
 15 | 
 16 | import Pretty (Render, render)
 17 | import Type   (Type)
 18 | 
 19 | 
 20 | ---------------------------------------------------------------------------------------------------- TYPE DEFINITIONS
 21 | 
 22 | newtype BlockType = BlockType {
 23 |     parameters :: [Type]
 24 | } deriving (Generic, Eq, Show)
 25 | 
 26 | data ID
 27 |     = ID      Int
 28 |     | ASTName Name.Name -- this includes all functions, as well as `return` and `break` points
 29 |     deriving (Generic, Eq, Ord, Show)
 30 | 
 31 | data NameWithType nameType = Name {
 32 |     nameID      :: ID,
 33 |     nameType    :: nameType,
 34 |     description :: Text
 35 | } deriving (Generic, Show, Functor)
 36 | 
 37 | type Name      = NameWithType Type
 38 | type BlockName = NameWithType BlockType
 39 | 
 40 | instance Eq  (NameWithType nameType) where
 41 |     (==) = (==) `on` nameID
 42 | 
 43 | instance Ord (NameWithType nameType) where
 44 |     compare = compare `on` nameID
 45 | 
 46 | data Literal
 47 |     = Int  Int64
 48 |     | Text Text
 49 |     | Unit
 50 |     deriving (Generic, Eq, Show)
 51 | 
 52 | data Value
 53 |     = Literal Literal
 54 |     | Named   Name
 55 |     deriving (Generic, Eq, Show)
 56 | 
 57 | data Expression
 58 |     = Value          Value
 59 |     | UnaryOperator  UnaryOperator Value
 60 |     | BinaryOperator Value BinaryOperator Value
 61 |     | Call           Value [Value]
 62 |     deriving (Generic, Eq, Show)
 63 | 
 64 | data Statement
 65 |     = BlockDecl BlockName Block
 66 |     | Let       Name      Expression -- also used for "expression statements" -- the name is simply ignored
 67 |     | Assign    Name      Value
 68 |     deriving (Generic, Eq, Show)
 69 | 
 70 | data Block = Block {
 71 |     arguments :: [Name],
 72 |     body      :: [Statement],
 73 |     transfer  :: Transfer
 74 | } deriving (Generic, Eq, Show)
 75 | 
 76 | data Transfer
 77 |     = Jump   Target
 78 |     | Branch Value [Target] -- targets are in "ascending order": false, then true
 79 |     deriving (Generic, Eq, Show)
 80 | 
 81 | data Target = Target {
 82 |     targetBlock :: BlockName,
 83 |     targetArgs  :: [Value]
 84 | } deriving (Generic, Eq, Show)
 85 | 
 86 | data Function = Function {
 87 |     functionID   :: ID,
 88 |     functionBody :: Block,
 89 |     returnBlock  :: BlockName
 90 | } deriving (Generic, Eq, Show)
 91 | 
 92 | functionName :: Function -> Name
 93 | functionName Function { functionID, functionBody, returnBlock } =
 94 |     Name functionID (Type.Function argumentTypes returnType) ""
 95 |     where argumentTypes = map nameType (arguments functionBody)
 96 |           returnType    = assert (head (parameters (nameType returnBlock)))
 97 | 
 98 | 
 99 | typeOf :: Expression -> Type
100 | typeOf = \case
101 |     Value (Literal literal)    -> case literal of
102 |         Int  _                 -> Type.Int
103 |         Text _                 -> Type.Text
104 |         Unit                   -> Type.Unit
105 |     Value (Named name)         -> nameType name
106 |     UnaryOperator Not      _   -> Type.Bool
107 |     UnaryOperator Negate   _   -> Type.Int
108 |     BinaryOperator _ op    _   -> case op of
109 |         ArithmeticOperator _   -> Type.Int
110 |         ComparisonOperator _   -> Type.Bool
111 |         LogicalOperator    _   -> Type.Bool
112 |     Call                   f _ -> case typeOf (Value f) of
113 |         Type.Function      _ r -> r
114 |         _                      -> bug "Call of non-function in IR"
115 | 
116 | typeOfBlock :: Block -> BlockType
117 | typeOfBlock = BlockType . map nameType . arguments
118 | 
119 | 
120 | 
121 | 
122 | ---------------------------------------------------------------------------------------------------- TRANSLATION FRONTEND
123 | 
124 | class Monad m => TranslateM m where
125 |     emitStatement      :: Statement -> m ()
126 |     emitLet            :: Maybe Type.TypedName -> Expression -> m Name
127 |     emitBlock          :: Text -> BlockType -> m Transfer -> m BlockName
128 |     withContinuation   :: Either Type.TypedName (Text, BlockType) -> (BlockName -> m Transfer) -> m ()
129 |     emitTransfer       :: Transfer -> m ()
130 |     currentBlock       :: m BlockName
131 |     currentArguments   :: m [Name]
132 | 
133 | translateTemporary :: TranslateM m => NodeWith AST.Expression metadata Type.TypedName -> m Value
134 | translateTemporary = translateExpression Nothing . nodeWithout
135 | 
136 | translateBinding :: TranslateM m => Type.TypedName -> NodeWith AST.Expression metadata Type.TypedName -> m Value
137 | translateBinding name = translateExpression (Just name) . nodeWithout
138 | 
139 | translateExpression :: TranslateM m => Maybe Type.TypedName -> AST.Expression metadata Type.TypedName -> m Value
140 | translateExpression providedName = let emitNamedLet = emitLet providedName in \case
141 |     AST.Named name -> do
142 |         return (Named (translateName name))
143 |     AST.NumberLiteral num -> do
144 |         let value = Literal (Int (fromIntegral num))
145 |         if isJust providedName
146 |             then do
147 |                 name <- emitNamedLet (Value value)
148 |                 return (Named name)
149 |             else do
150 |                 return value
151 |     AST.TextLiteral text -> do -- TODO refactor
152 |         let value = Literal (Text text)
153 |         if isJust providedName
154 |             then do
155 |                 name <- emitNamedLet (Value value)
156 |                 return (Named name)
157 |             else do
158 |                 return value
159 |     AST.UnaryOperator op expr -> do
160 |         value <- translateTemporary expr
161 |         name  <- emitNamedLet (UnaryOperator op value)
162 |         return (Named name)
163 |     -- Logical operators are short-circuiting, so we can't just emit them as simple statements, except when the RHS is already a Value.
164 |     AST.BinaryOperator expr1 (LogicalOperator op) expr2 | ((nodeWithout expr2) `isn't` constructor @"NumberLiteral" && (nodeWithout expr2) `isn't` constructor @"Named") -> do -- ugh
165 |         value1 <- translateTemporary expr1
166 |         let opName = toLower (showText op)
167 |         -- TODO use the provided name for the arg!
168 |         withContinuation (Right ("join_" ++ opName, BlockType [Type.Bool])) \joinPoint -> do
169 |             rhsBlock <- emitBlock opName (BlockType []) do
170 |                 value2 <- translateTemporary expr2
171 |                 return (Jump (Target joinPoint [value2]))
172 |             let branches = case op of
173 |                     And -> [Target joinPoint [value1], Target rhsBlock  []]
174 |                     Or  -> [Target rhsBlock  [],       Target joinPoint [value1]]
175 |             return (Branch value1 branches)
176 |         args <- currentArguments -- (TODO this still works right?)
177 |         return (Named (assert (head args)))
178 |     AST.BinaryOperator expr1 op expr2 -> do
179 |         value1 <- translateTemporary expr1
180 |         value2 <- translateTemporary expr2
181 |         name   <- emitNamedLet (BinaryOperator value1 op value2)
182 |         return (Named name)
183 |     AST.Call fn args -> do
184 |         fnValue   <- translateTemporary fn
185 |         argValues <- mapM translateTemporary args
186 |         name      <- emitNamedLet (Call fnValue argValues)
187 |         return (Named name)
188 | 
189 | translateStatement :: TranslateM m => NodeWith AST.Statement metadata Type.TypedName -> m ()
190 | translateStatement = (flip (.)) nodeWithout \case -- HACK
191 |     AST.Binding _ name expr -> do
192 |         _ <- translateBinding name expr
193 |         return ()
194 |     AST.Assign name expr -> do
195 |         value <- translateTemporary expr
196 |         emitStatement (Assign (translateName name) value)
197 |     AST.IfThen expr block -> do
198 |         value <- translateTemporary expr
199 |         withContinuation (Right ("join_if", BlockType [])) \joinPoint -> do
200 |             thenBlock <- emitBlock "if" (BlockType []) do
201 |                 translateStatements block
202 |                 return (Jump (Target joinPoint []))
203 |             return (Branch value [Target joinPoint [], Target thenBlock []])
204 |     AST.IfThenElse expr block1 block2 -> do
205 |         value <- translateTemporary expr
206 |         withContinuation (Right ("join_if_else", BlockType [])) \joinPoint -> do
207 |             thenBlock <- emitBlock "if" (BlockType []) do
208 |                 translateStatements block1
209 |                 return (Jump (Target joinPoint []))
210 |             elseBlock <- emitBlock "else" (BlockType []) do
211 |                 translateStatements block2
212 |                 return (Jump (Target joinPoint []))
213 |             return (Branch value [Target elseBlock [], Target thenBlock []])
214 |     AST.Forever blockWith -> do
215 |         let block = nodeWithout blockWith
216 |         withContinuation (Left (assert (AST.exitTarget block))) \_ -> do
217 |             foreverBlock <- emitBlock "forever" (BlockType []) do
218 |                 blockBody <- currentBlock
219 |                 translateStatements blockWith
220 |                 return (Jump (Target blockBody []))
221 |             return (Jump (Target foreverBlock []))
222 |     AST.While expr blockWith -> do
223 |         let block = nodeWithout blockWith
224 |         withContinuation (Left (assert (AST.exitTarget block))) \joinPoint -> do
225 |             whileBlock <- emitBlock "while" (BlockType []) do
226 |                 conditionTest <- currentBlock
227 |                 blockBody <- emitBlock "while_body" (BlockType []) do
228 |                     translateStatements blockWith
229 |                     return (Jump (Target conditionTest []))
230 |                 value <- translateTemporary expr
231 |                 return (Branch value [Target joinPoint [Literal Unit], Target blockBody []])
232 |             return (Jump (Target whileBlock []))
233 |     AST.Return target maybeExpr -> do
234 |         maybeValue <- mapM translateTemporary maybeExpr
235 |         emitTransfer (Jump (Target (translateBlockName target) [fromMaybe (Literal Unit) maybeValue]))
236 |     AST.Break target -> do
237 |         emitTransfer (Jump (Target (translateBlockName target) [Literal Unit]))
238 |     AST.Expression expr -> do
239 |         unused (translateTemporary expr)
240 | 
241 | translateStatements :: TranslateM m => NodeWith AST.Block metadata Type.TypedName -> m ()
242 | translateStatements = mapM_ translateStatement . AST.statements .  nodeWithout
243 | 
244 | 
245 | ---------------------------------------------------------------------------------------------------- TRANSLATION BACKEND
246 | 
247 | translateName :: Type.TypedName -> Name
248 | translateName (Name.NameWith name ty) = Name (ASTName name) (translatedType ty) (Name.unqualifiedName name) where
249 |     translatedType = \case
250 |         Type.HasType ty -> ty
251 |         Type.IsType  _  -> bug "Use of typename as local"
252 | 
253 | -- TODO it's not clear in when we should copy the `unqualifiedName` as the `description` and when not...?
254 | -- right now it's inconsistent between lets and blocks
255 | 
256 | translateBlockName :: Type.TypedName -> BlockName
257 | translateBlockName (Name.NameWith name ty) = Name (ASTName name) (translatedType ty) "" where
258 |     translatedType = \case
259 |         Type.HasType ty -> BlockType [ty]
260 |         Type.IsType  _  -> bug "Use of typename as exit target"
261 | 
262 | translateFunction :: AST.Function metadata Type.TypedName -> Function
263 | translateFunction AST.Function { AST.functionName, AST.arguments, AST.body = functionBody } = result where
264 |     result        = evalState initialState (runTranslate translateImpl)
265 |     initialState  = TranslateState  { lastID = 0, innermostBlock = BlockState (ID 0) "root" rootBlockArgs [] Nothing Nothing }
266 |     rootBlockArgs = map (translateName . AST.argumentName . nodeWithout) arguments
267 |     exitTarget    = (assert . AST.exitTarget . nodeWithout) functionBody
268 |     returnBlock   = translateBlockName exitTarget
269 |     translateImpl = do
270 |         -- this means a somewhat-redundant additional block will be emitted as the body, but, it works
271 |         bodyBlockName <- emitBlock "body" (BlockType []) do
272 |             translateStatements functionBody
273 |             return (Jump (Target returnBlock [Literal Unit])) -- will be discarded as dead code when not needed
274 |         emitTransfer (Jump (Target bodyBlockName []))
275 |         functionBody <- liftM assert getFinishedBlock
276 |         blockID <- getM (blockField @"blockID")
277 |         assertEqM blockID (ID 0)
278 |         return Function { functionID = ASTName (Name.name functionName), functionBody, returnBlock }
279 | 
280 | newtype Translate a = Translate {
281 |     runTranslate :: State TranslateState a
282 | } deriving (Functor, Applicative, Monad, MonadState TranslateState)
283 | 
284 | data TranslateState = TranslateState {
285 |     lastID         :: Int,
286 |     innermostBlock :: BlockState
287 | } deriving (Generic, Eq, Show)
288 | 
289 | data BlockState = BlockState {
290 |     blockID          :: ID,
291 |     blockDescription :: Text,
292 |     blockArguments   :: [Name],
293 |     statements       :: [Statement],
294 |     emittedTransfer  :: Maybe Transfer, -- this is `Just` if we have early-returned and are in dead code, or if we are currently processing the continuation of the block
295 |     enclosingBlock   :: Maybe BlockState
296 | } deriving (Generic, Eq, Show)
297 | 
298 | -- (wonder if there's any nicer solution?)
299 | blockField :: forall name inner. HasField' name BlockState inner => Lens TranslateState inner
300 | blockField = field @"innermostBlock" . field @name
301 | 
302 | getFinishedBlock :: Translate (Maybe Block)
303 | getFinishedBlock = do
304 |     BlockState { blockArguments, statements, emittedTransfer } <- getM (field @"innermostBlock")
305 |     return (fmap (Block blockArguments statements) emittedTransfer)
306 | 
307 | data IDSort = LetID | BlockID
308 | 
309 | newID :: IDSort -> Translate ID
310 | newID sort = do
311 |     -- NOTE incrementing first is significant, ID 0 is the root block!
312 |     modifyM (field @"lastID") \lastID ->
313 |         let isEven = case sort of LetID -> True; BlockID -> False
314 |         in lastID + (if isEven == (lastID % 2 == 0) then 2 else 1)
315 |     new <- getM (field @"lastID")
316 |     return (ID new)
317 | 
318 | newArgumentIDs :: BlockType -> Translate [Name]
319 | newArgumentIDs (BlockType argTypes) = do
320 |     forM argTypes \argType -> do
321 |         argID <- newID LetID
322 |         return (Name argID argType "")
323 | 
324 | deadCode :: Translate Bool
325 | deadCode = do
326 |     emittedTransfer <- getM (blockField @"emittedTransfer")
327 |     return (isJust emittedTransfer)
328 | 
329 | notDeadCode :: Translate Bool
330 | notDeadCode = liftM not deadCode
331 | 
332 | instance TranslateM Translate where
333 |     emitStatement :: Statement -> Translate ()
334 |     emitStatement statement = whenM notDeadCode do
335 |         modifyM (blockField @"statements") (++ [statement])
336 |         return ()
337 | 
338 |     emitLet :: Maybe Type.TypedName -> Expression -> Translate Name
339 |     emitLet providedName expr = do
340 |         ifM deadCode do
341 |             return (Name (ID -1) Type.Unit "deadcode")
342 |         `elseM` do
343 |             name <- case providedName of
344 |                 Just astName -> do
345 |                     let translatedName = translateName astName
346 |                     assertEqM (nameType translatedName) (typeOf expr)
347 |                     return translatedName
348 |                 Nothing -> do
349 |                     letID <- newID LetID
350 |                     return (Name letID (typeOf expr) "")
351 |             emitStatement (Let name expr)
352 |             return name
353 | 
354 |     emitBlock :: Text -> BlockType -> Translate Transfer -> Translate BlockName
355 |     emitBlock description argTypes translateBody = do
356 |         ifM deadCode do
357 |             return (Name (ID -1) (BlockType []) "deadcode")
358 |         `elseM` do
359 |             blockID <- newID BlockID
360 |             args    <- newArgumentIDs argTypes
361 |             modifyM (field @"innermostBlock") (\previouslyInnermost -> BlockState blockID description args [] Nothing (Just previouslyInnermost))
362 |             emittedBlockName <- currentBlock
363 |             transferAtEnd <- translateBody
364 |             emitTransfer transferAtEnd
365 |             whileM do
366 |                 finishedBlock    <- liftM assert getFinishedBlock
367 |                 currentBlockName <- currentBlock -- possibly a continuation of `emittedBlockName`
368 |                 modifyM (field @"innermostBlock") (assert . enclosingBlock)
369 |                 parentEmittedTransfer <- getM (blockField @"emittedTransfer")
370 |                 case parentEmittedTransfer of
371 |                     Just _ -> do
372 |                         (modifyM (blockField @"statements") . map) \case
373 |                             BlockDecl name _ | name == currentBlockName ->
374 |                                 BlockDecl currentBlockName finishedBlock
375 |                             otherStatement ->
376 |                                 otherStatement
377 |                         return True
378 |                     _ -> do
379 |                         emitStatement (BlockDecl currentBlockName finishedBlock)
380 |                         return False
381 |             return emittedBlockName
382 | 
383 |     withContinuation :: Either Type.TypedName (Text, BlockType) -> (BlockName -> Translate Transfer) -> Translate ()
384 |     withContinuation blockSpec inBetweenCode = whenM notDeadCode do
385 |         nextBlockName <- case blockSpec of
386 |             Left nextBlockAstName -> do
387 |                 return (translateBlockName nextBlockAstName)
388 |             Right (nextBlockDescription, nextBlockParams) -> do
389 |                 nextBlockID <- newID BlockID
390 |                 return (Name nextBlockID nextBlockParams nextBlockDescription)
391 |         nextBlockArgs <- newArgumentIDs (nameType nextBlockName)
392 |         let nextBlockStub = Block { arguments = nextBlockArgs, body = [], transfer = Jump (Target (Name (ID -1) (BlockType []) "") []) }
393 |         emitStatement (BlockDecl nextBlockName nextBlockStub)
394 |         transfer <- inBetweenCode nextBlockName -- TODO if any `emitTransfer` is done in here, it's a `bug`!
395 |         setM (blockField @"emittedTransfer") (Just transfer)
396 |         modifyM (field @"innermostBlock") (\previouslyInnermost -> BlockState (nameID nextBlockName) (description nextBlockName) nextBlockArgs [] Nothing (Just previouslyInnermost))
397 |         return ()
398 | 
399 |     -- this means early-escapes in the source
400 |     emitTransfer :: Transfer -> Translate ()
401 |     emitTransfer transfer = whenM notDeadCode do
402 |         setM (blockField @"emittedTransfer") (Just transfer)
403 | 
404 |     currentBlock :: Translate BlockName
405 |     currentBlock = do
406 |         blockID     <- getM (blockField @"blockID")
407 |         description <- getM (blockField @"blockDescription")
408 |         arguments   <- currentArguments
409 |         return (Name blockID (BlockType (map nameType arguments)) description)
410 | 
411 |     currentArguments :: Translate [Name]
412 |     currentArguments = do
413 |         getM (blockField @"blockArguments")
414 | 
415 | 
416 | {- EXAMPLE INPUT
417 | main1
418 | if (foo) {
419 |     body1
420 | }
421 | main2
422 | if (foo2) {
423 |     body2
424 | }
425 | main3
426 | -}
427 | 
428 | {- EXAMPLE OUTPUT
429 | block main() {
430 |     main1
431 |     block join1() {
432 |         main2
433 |         block join2() {
434 |             main3
435 |             return
436 |         }
437 |         block if2() {
438 |             body2
439 |             jump join2()
440 |         }
441 |         branch foo2 [if2, join2]
442 |     }
443 |     block if1() {
444 |         body1
445 |         jump join1()
446 |     }
447 |     branch foo [if1, join1]
448 | }
449 | -}
450 | 
451 | 
452 | 
453 | ---------------------------------------------------------------------------------------------------- VALIDATION
454 | 
455 | -- TODO think through what other new error possibilities there might be!
456 | data ValidationError
457 |     = NotInScope         ID
458 |     | Redefined          ID
459 |     | ExpectedValue      BlockName
460 |     | ExpectedBlock      Name
461 |     | Inconsistent       Name      Name
462 |     | BlockInconsistent  BlockName BlockName
463 |     | TypeMismatch       Type      Expression
464 |     | BlockTypeMismatch  BlockType Block
465 |     | BadTargetCount     Transfer
466 |     | BadTargetArgsCount Target
467 |     | BadCallArgsCount   Expression
468 |     | CallOfNonFunction  Expression
469 |     deriving (Generic, Show)
470 | 
471 | validate :: [Function] -> Either ValidationError ()
472 | validate = runExcept . evalStateT [Map.empty] . mapM_ checkFunction where
473 |     checkFunction function@Function { functionBody, returnBlock } = do
474 |         recordName (functionName function)
475 |         recordBlockName returnBlock
476 |         checkBlock functionBody
477 |     checkBlock block = do
478 |         modifyState (prepend Map.empty)
479 |         mapM_ recordName     (arguments block)
480 |         mapM_ checkStatement (body      block)
481 |         checkTransfer        (transfer  block)
482 |         modifyState (assert . tail)
483 |         return ()
484 |     checkStatement = \case
485 |         BlockDecl name block -> do
486 |             recordBlockName name -- block name is in scope for body
487 |             checkBlockType (nameType name) block
488 |             checkBlock block
489 |         Let name expr -> do
490 |             checkExpression (nameType name) expr
491 |             recordName name -- let name is not in scope for rhs
492 |         Assign name value -> do
493 |             checkName name
494 |             checkValue (nameType name) value
495 |     checkExpression expectedType expr = do
496 |         checkType expectedType expr
497 |         case expr of
498 |             Value value -> do
499 |                 -- we already checked the type, we just want to check if it's in scope
500 |                 checkValue (typeOf expr) value
501 |             UnaryOperator _ value -> do
502 |                 -- we abuse the fact that the unary ops have matching input and output types
503 |                 checkValue (typeOf expr) value
504 |             BinaryOperator value1 op value2 -> do
505 |                 mapM_ (checkValue opInputType) [value1, value2] where
506 |                     opInputType = case op of
507 |                         ArithmeticOperator _ -> Type.Int
508 |                         ComparisonOperator _ -> Type.Int
509 |                         LogicalOperator    _ -> Type.Bool
510 |             Call fn args -> case typeOf (Value fn) of
511 |                 Type.Function argTypes returnType -> do
512 |                     mapM_ checkName (match @"Named" fn)
513 |                     when (returnType != expectedType) do
514 |                         throwError (TypeMismatch expectedType expr)
515 |                     when (length args != length argTypes) do
516 |                         throwError (BadCallArgsCount expr)
517 |                     zipWithM_ checkValue argTypes args
518 |                 _ -> do
519 |                     throwError (CallOfNonFunction expr)
520 |     checkValue expectedType value = do
521 |         checkType expectedType (Value value)
522 |         mapM_ checkName (match @"Named" value)
523 |     checkTransfer = \case
524 |         Jump target -> do
525 |             checkTarget target
526 |         Branch value targets -> do
527 |             checkValue Type.Bool value
528 |             when (length targets != 2) do
529 |                 throwError (BadTargetCount (Branch value targets))
530 |             mapM_ checkTarget targets
531 |     checkTarget target@Target { targetBlock, targetArgs } = do
532 |         checkBlockName targetBlock
533 |         let expectedTypes = parameters (nameType targetBlock)
534 |         when (length expectedTypes != length targetArgs) do
535 |             throwError (BadTargetArgsCount target)
536 |         zipWithM_ checkValue expectedTypes targetArgs
537 |     checkType expectedType expr = do
538 |         when (typeOf expr != expectedType) do
539 |             throwError (TypeMismatch expectedType expr)
540 |     checkBlockType expectedType block = do
541 |         when (typeOfBlock block != expectedType) do
542 |             throwError (BlockTypeMismatch expectedType block)
543 |     checkName name@Name { nameID, nameType, description } = do
544 |         when (nameID `is` (constructor @"ASTName" . constructor @"BuiltinName")) do  -- FIXME HACK
545 |             recordName name
546 |         recordedType <- lookupType nameID
547 |         when (nameType != recordedType) do
548 |             throwError (Inconsistent (Name nameID nameType description) (Name nameID recordedType ""))
549 |     checkBlockName Name { nameID, nameType, description } = do -- FIXME deduplicate
550 |         recordedType <- lookupBlockType nameID
551 |         when (nameType != recordedType) do
552 |             throwError (BlockInconsistent (Name nameID nameType description) (Name nameID recordedType ""))
553 |     lookupType nameID = do
554 |         nameType <- lookupID nameID
555 |         case nameType of
556 |             Right valueType -> return valueType
557 |             Left  blockType -> throwError (ExpectedValue (Name nameID blockType ""))
558 |     lookupBlockType nameID = do
559 |         nameType <- lookupID nameID
560 |         case nameType of
561 |             Left  blockType -> return blockType
562 |             Right valueType -> throwError (ExpectedBlock (Name nameID valueType ""))
563 |     lookupID nameID = do
564 |         scopes <- getState
565 |         case Map.lookup nameID (Map.unions scopes) of
566 |             Just nameType -> return nameType
567 |             Nothing       -> throwError (NotInScope nameID)
568 |     recordName      = insertName . fmap Right
569 |     recordBlockName = insertName . fmap Left
570 |     insertName Name { nameID, nameType } = do
571 |         doModifyState \(names : parents) -> do
572 |             when (Map.member nameID names) do -- FIXME this should be a shallow check?
573 |                 throwError (Redefined nameID)
574 |             return (Map.insert nameID nameType names : parents)
575 |         return ()
576 | 
577 | 
578 | 
579 | ---------------------------------------------------------------------------------------------------- TRANSFORMS
580 | 
581 | eliminateTrivialBlocks :: Block -> Block
582 | eliminateTrivialBlocks = evalState Map.empty . visitBlock where
583 |     visitBlock Block { arguments, body, transfer } = do
584 |         newBody     <- liftM catMaybes (mapM visitStatement body)
585 |         newTransfer <- visitTransfer transfer
586 |         return (Block arguments newBody newTransfer)
587 |     visitStatement = \case
588 |         BlockDecl name (Block [] [] (Jump target)) | targetBlock target != name -> do
589 |             modifyState (Map.insert name target)
590 |             return Nothing
591 |         BlockDecl name nonTrivialBlock -> do
592 |             newBlock <- visitBlock nonTrivialBlock
593 |             return (Just (BlockDecl name newBlock))
594 |         otherStatement -> do
595 |             return (Just otherStatement)
596 |     visitTransfer = \case
597 |         Jump target -> do
598 |             newTarget <- getAdjustedTarget target
599 |             return (Jump newTarget)
600 |         Branch value targets -> do
601 |             newTargets <- mapM getAdjustedTarget targets
602 |             return (Branch value newTargets)
603 |     getAdjustedTarget oldTarget = do
604 |         maybeNewTarget <- liftM (Map.lookup (targetBlock oldTarget)) getState
605 |         case maybeNewTarget of
606 |             Nothing -> do
607 |                 return oldTarget
608 |             Just adjustedTarget -> do
609 |                 assertEqM (targetArgs oldTarget) [] -- if the block we're eliminating had arguments, it's not trivial!
610 |                 getAdjustedTarget adjustedTarget -- check if this block was _also_ trivial
611 | 
612 | 
613 | 
614 | 
615 | ---------------------------------------------------------------------------------------------------- PRETTY PRINTING
616 | 
617 | prettyType :: Type -> P.Type
618 | prettyType = \case
619 |     Type.Int          -> P.Int
620 |     Type.Bool         -> P.Bool
621 |     Type.Text         -> P.Text
622 |     Type.Unit         -> P.Unit
623 |     Type.Function _ _ -> P.Function
624 | 
625 | blockId :: P.DefinitionOrUse -> BlockName -> P.Document
626 | blockId defOrUse name = let info = P.IdentInfo (identText (nameID name) ++ (if description name == "" then "" else "_" ++ description name)) defOrUse P.Block False
627 |                         in  P.note (P.Sigil info) "%" ++ P.note (P.Identifier info) (render (nameID name) ++ P.pretty (if description name == "" then "" else "_" ++ description name))
628 | 
629 | -- TODO refactor `letID` and `blockId` maybe?
630 | letId :: P.DefinitionOrUse -> Name -> P.Document
631 | letId   defOrUse name = let info = P.IdentInfo (identText (nameID name)) defOrUse (prettyType (nameType name)) False
632 |                         in  P.note (P.Sigil info) "$" ++ P.note (P.Identifier info) (render (nameID name))
633 | 
634 | identText :: ID -> Text
635 | identText = \case
636 |     ASTName n -> Name.unqualifiedName n
637 |     ID      i -> showText i
638 | 
639 | renderBody :: [Statement] -> Transfer -> P.Document
640 | renderBody statements transfer = P.hardline ++ P.braces (P.nest 4 (P.hardline ++ render statements ++ P.hardline ++ render transfer) ++ P.hardline)
641 | 
642 | -- we could probably refactor all these further but...
643 | 
644 | instance Render ID where
645 |     listSeparator = ", "
646 |     render = P.pretty . identText
647 | 
648 | -- NOTE `instance Render Type` is provided by `module Type`
649 | 
650 | instance Render Name where
651 |     listSeparator = ", "
652 |     render name = letId P.Definition name ++ P.colon ++ " " ++ render (nameType name)
653 | 
654 | instance Render Function where
655 |     render function@Function { functionBody, returnBlock } =
656 |         P.keyword "function" ++ " " ++ letId P.Definition (functionName function) ++ P.parens (render (arguments functionBody)) ++ " " ++ P.keyword "returns" ++ " " ++ render returnType ++
657 |          renderBody (body functionBody) (transfer functionBody)
658 |         where returnType = assert (head (parameters (nameType returnBlock)))
659 | 
660 | instance Render Block where
661 |     render block = renderBody (body block) (transfer block)
662 | 
663 | instance Render Statement where
664 |     render = \case
665 |         BlockDecl name block -> P.keyword "block" ++ " " ++ blockId P.Definition name ++ P.parens (render (arguments block)) ++ renderBody (body block) (transfer block)
666 |         Let       name expr  -> P.keyword "let"   ++ " " ++ render name ++ " " ++ P.defineEquals ++ " " ++ render expr
667 |         Assign    name value -> letId P.Use name  ++ " " ++ P.assignEquals ++ " " ++ render value
668 | 
669 | instance Render Transfer where
670 |     render = \case
671 |         Jump         target  -> P.keyword "jump"   ++ " " ++ render target
672 |         Branch value targets -> P.keyword "branch" ++ " " ++ render value ++ " " ++ render targets
673 | 
674 | instance Render Target where
675 |     listSeparator = " "
676 |     render target = blockId P.Use (targetBlock target) ++ P.parens (render (targetArgs target))
677 | 
678 | instance Render Expression where
679 |     listSeparator = ", "
680 |     render = \case
681 |         Value          value            -> render value
682 |         UnaryOperator  op value         -> P.unaryOperator op ++ render value
683 |         BinaryOperator value1 op value2 -> render value1 ++ " " ++ P.binaryOperator op ++ " " ++ render value2
684 |         Call           fn args          -> render fn ++ P.parens (render args)
685 | 
686 | instance Render Value where
687 |     listSeparator = ", "
688 |     render = \case
689 |         Named   name    -> letId P.Use name
690 |         Literal literal -> render literal
691 | 
692 | instance Render Literal where
693 |     listSeparator = ", "
694 |     render = \case
695 |         Int  num  -> P.number num
696 |         Text text -> P.string text
697 |         Unit      -> P.note (P.Literal P.Unit) "Unit"
698 | 


--------------------------------------------------------------------------------
/src/LLVM.hs:
--------------------------------------------------------------------------------
  1 | -- we want to use `functionDefaults` and `globalVariableDefaults` and so on without warnings
  2 | -- (maybe it would be better if llvm-hs split these out into their own types so the record updates
  3 | -- would not be incomplete, but ¯\_(ツ)_/¯)
  4 | {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
  5 | 
  6 | module LLVM (translateFunctions, Module) where
  7 | 
  8 | import MyPrelude
  9 | 
 10 | import Data.List (transpose)
 11 | 
 12 | import qualified Data.Char
 13 | 
 14 | import qualified Data.Map.Strict as Map
 15 | 
 16 | import qualified LLVM.AST                   as L
 17 | import qualified LLVM.AST.CallingConvention as L
 18 | import qualified LLVM.AST.Constant          as LC
 19 | import qualified LLVM.AST.Global            as LG
 20 | import qualified LLVM.AST.IntegerPredicate  as L
 21 | 
 22 | import LLVM.AST          (Module, Definition, Global, Parameter (Parameter), BasicBlock (BasicBlock),
 23 |                           Instruction, Terminator, Named (Do, (:=)), Operand (LocalReference, ConstantOperand))
 24 | import LLVM.AST.Constant (Constant (GlobalReference))
 25 | import LLVM.AST.Type     (i1, i8, i32, i64, ptr, void)
 26 | 
 27 | import qualified Pretty
 28 | import qualified Name
 29 | import qualified Type
 30 | import qualified IR
 31 | 
 32 | 
 33 | 
 34 | ---------------------------------------------------------------------------------------------------- TRANSLATION FRONTEND
 35 | 
 36 | instance Pretty.Render Module where
 37 |     render = Pretty.pretty . showText
 38 |     outputWithStyle _ handle = hPutStr handle . showText
 39 | 
 40 | translateFunctions :: [IR.Function] -> Module
 41 | translateFunctions functions = result where
 42 |     result      = L.defaultModule { L.moduleDefinitions = definitions }
 43 |     definitions = map L.GlobalDefinition (fst (runTwoPass generate))
 44 |     generate :: LLVM m => m ()
 45 |     generate = do
 46 |         emitGlobal externPrintf
 47 |         emitGlobal externScanf
 48 |         emitBlock "return" do
 49 |             let returnArg = IR.Name (IR.ID 0) Type.Int ""
 50 |             translateArguments [returnArg]
 51 |             returnValue64 <- load returnArg
 52 |             let instr = L.Trunc {
 53 |                 L.operand0 = returnValue64,
 54 |                 L.type'    = i32,
 55 |                 L.metadata = []
 56 |             }
 57 |             returnName32 <- freshName
 58 |             emit (returnName32 := instr)
 59 |             let returnValue32 = LocalReference i32 returnName32
 60 |             return (L.Ret { L.returnOperand = Just returnValue32, L.metadata' = [] }, [])
 61 |         emitBlock "start" do -- NOTE the name "start" is significant (to `runTwoPass`)
 62 |             let block = todo
 63 |             translateBlock block
 64 |     externPrintf = L.functionDefaults {
 65 |         LG.name        = "printf",
 66 |         LG.parameters  = ([Parameter (ptr i8) "" []], True),
 67 |         LG.returnType  = i32
 68 |     }
 69 |     externScanf = L.functionDefaults {
 70 |         LG.name        = "scanf",
 71 |         LG.parameters  = ([Parameter (ptr i8) "" []], True),
 72 |         LG.returnType  = i32
 73 |     }
 74 | 
 75 | class Monad m => LLVM m where
 76 |     emitBlock    :: L.Name -> m (Terminator, [CallsBlockWith]) -> m ()
 77 |     getArguments :: m [CalledByBlockWith]
 78 |     emit         :: Named Instruction -> m ()
 79 |     emitAlloca   :: L.Type -> L.Name -> m ()
 80 |     emitGlobal   :: Global -> m ()
 81 |     freshName    :: m L.Name
 82 | 
 83 | data CallsBlockWith = CallsBlockWith {
 84 |     calledBlock     :: L.Name,
 85 |     argumentsPassed :: [Operand]
 86 | } deriving (Generic, Eq, Show)
 87 | 
 88 | data CalledByBlockWith = CalledByBlockWith {
 89 |     callingBlock      :: L.Name,
 90 |     argumentsReceived :: [Operand]
 91 | } deriving (Generic, Eq, Show)
 92 | 
 93 | translatedType :: IR.Type -> L.Type
 94 | translatedType = \case
 95 |     Type.Bool -> i1
 96 |     Type.Int  -> i64
 97 |     Type.Text -> ptr i8
 98 |     Type.Unit -> void
 99 |     Type.Function argTypes returnType ->
100 |         ptr (L.FunctionType (translatedType returnType) (map translatedType (filter (!= Type.Unit) argTypes)) False)
101 | 
102 | allocaForLet :: IR.Name -> Operand
103 | allocaForLet (IR.Name ident nameType _) = LocalReference (ptr (translatedType nameType)) (translatedID ident)
104 | 
105 | translatedID :: IR.ID -> L.Name
106 | translatedID = \case
107 |     IR.ID      num  -> L.mkName (show num)
108 |     IR.ASTName name -> L.mkName (textToString (Name.qualifiedName name))
109 | 
110 | alloca :: LLVM m => IR.Name -> m ()
111 | alloca (IR.Name ident nameType _) = emitAlloca (translatedType nameType) (translatedID ident)
112 | 
113 | load :: LLVM m => IR.Name -> m Operand
114 | load name = do
115 |     newName <- freshName
116 |     let instr = L.Load {
117 |         L.volatile       = False,
118 |         L.address        = allocaForLet name,
119 |         L.maybeAtomicity = Nothing,
120 |         L.alignment      = 0,
121 |         L.metadata       = []
122 |     }
123 |     emit (newName := instr)
124 |     return (LocalReference (translatedType (IR.nameType name)) newName)
125 | 
126 | store :: LLVM m => IR.Name -> Operand -> m ()
127 | store name operand = do
128 |     let instr = L.Store {
129 |         L.volatile       = False,
130 |         L.address        = allocaForLet name,
131 |         L.value          = operand,
132 |         L.maybeAtomicity = Nothing,
133 |         L.alignment      = 0,
134 |         L.metadata       = []
135 |     }
136 |     emit (Do instr)
137 | 
138 | translateValue :: LLVM m => IR.Value -> m Operand
139 | translateValue = \case
140 |     IR.Literal literal -> case literal of
141 |         IR.Int num -> do
142 |             return (number (Bits 64) (fromIntegral num))
143 |         IR.Text text -> do
144 |             translateStringLiteral text
145 |     IR.Named name -> do
146 |         load name
147 | 
148 | newtype Bits = Bits Word32
149 | 
150 | number :: Bits -> Integer -> Operand
151 | number (Bits bits) value = ConstantOperand (LC.Int { LC.integerBits = bits, LC.integerValue = value })
152 | 
153 | translateUnaryOp :: Operand -> UnaryOperator -> Instruction
154 | translateUnaryOp operand = \case
155 |     Not    -> L.Xor { L.operand0 = number (Bits 1)  1, L.operand1 = operand, L.metadata = [] }
156 |     Negate -> L.Sub { L.operand0 = number (Bits 64) 0, L.operand1 = operand, L.metadata = [], L.nsw = False, L.nuw = False }
157 | 
158 | translateBinaryOp :: Operand -> Operand -> BinaryOperator -> Instruction
159 | translateBinaryOp operand0 operand1 = \case
160 |     -- TODO: Handle the UB cases :(
161 |     ArithmeticOperator Add -> L.Add  { L.operand0, L.operand1, L.metadata = [], L.nsw   = False, L.nuw = False }
162 |     ArithmeticOperator Sub -> L.Sub  { L.operand0, L.operand1, L.metadata = [], L.nsw   = False, L.nuw = False }
163 |     ArithmeticOperator Mul -> L.Add  { L.operand0, L.operand1, L.metadata = [], L.nsw   = False, L.nuw = False }
164 |     ArithmeticOperator Div -> L.SDiv { L.operand0, L.operand1, L.metadata = [], L.exact = False }
165 |     ArithmeticOperator Mod -> L.SRem { L.operand0, L.operand1, L.metadata = [] } -- TODO: remainder vs. modulo semantics?
166 |     LogicalOperator    And -> L.And  { L.operand0, L.operand1, L.metadata = [] }
167 |     LogicalOperator    Or  -> L.Or   { L.operand0, L.operand1, L.metadata = [] }
168 |     ComparisonOperator op  -> L.ICmp { L.operand0, L.operand1, L.metadata = [], L.iPredicate = predicateFor op }
169 |     where predicateFor = \case
170 |             Less           -> L.SLT
171 |             LessEqual      -> L.SLE
172 |             Greater        -> L.SGT
173 |             GreaterEqual   -> L.SGE
174 |             Equal          -> L.EQ
175 |             NotEqual       -> L.NE
176 | 
177 | translateExpression :: LLVM m => IR.Expression -> m Operand
178 | translateExpression expr = let localRef = LocalReference (translatedType (IR.typeOf expr)) in case expr of
179 |     IR.Value value -> do
180 |         translateValue value
181 |     IR.UnaryOperator op value -> do
182 |         operand <- translateValue value
183 |         newName <- freshName
184 |         emit (newName := translateUnaryOp operand op)
185 |         return (localRef newName)
186 |     IR.BinaryOperator value1 op value2 -> do
187 |         operand1 <- translateValue value1
188 |         operand2 <- translateValue value2
189 |         newName  <- freshName
190 |         emit (newName := translateBinaryOp operand1 operand2 op)
191 |         return (localRef newName)
192 |     IR.Call fn args -> do
193 |         let value = todo
194 |         operand     <- translateValue value
195 |         printFormat <- translateStringLiteral "%s "
196 |         emit (Do (call printf [printFormat, operand]))
197 | 
198 |         scanFormat  <- translateStringLiteral "%26lld"
199 |         let instr = L.Alloca { -- TODO factor this out from `emitAlloca`
200 |             L.allocatedType = i64,
201 |             L.numElements   = Nothing,
202 |             L.alignment     = 0,
203 |             L.metadata      = []
204 |         }
205 |         outputName <- freshName
206 |         emit (outputName := instr)
207 |         let outputPtr = LocalReference (ptr i64) outputName
208 |         emit (Do (call scanf [scanFormat, outputPtr]))
209 | 
210 |         loadedName <- freshName
211 |         let loadInstr = L.Load { -- TODO factor this out from `load`
212 |             L.volatile       = False,
213 |             L.address        = outputPtr,
214 |             L.maybeAtomicity = Nothing,
215 |             L.alignment      = 0,
216 |             L.metadata       = []
217 |         }
218 |         emit (loadedName := loadInstr)
219 |         return (LocalReference i64 loadedName)
220 | 
221 | translateStringLiteral :: LLVM m => Text -> m Operand
222 | translateStringLiteral text = do
223 |     globalName <- freshName
224 |     let (type', global) = stringConstant globalName text
225 |     emitGlobal global
226 |     ptrName <- freshName
227 |     -- TODO: Should we use a Constant GetElementPtr instead?
228 |     --   No we shouldn't: only the string itself needs to be part of the binary, not the pointer to it.
229 |     -- If yes, can we give it the string constant directly, or do we need 2 separate globals?
230 |     --   We'd need two separate ones: it expects a constant *pointer* as argument (not an array).
231 |     let instr = L.GetElementPtr {
232 |         L.address  = ConstantOperand (GlobalReference (ptr type') globalName),
233 |         L.indices  = replicate 2 (number (Bits 32) 0),
234 |         L.inBounds = False,
235 |         L.metadata = []
236 |     }
237 |     emit (ptrName := instr)
238 |     return (LocalReference (ptr i8) ptrName)
239 | 
240 | stringConstant :: L.Name -> Text -> (L.Type, Global)
241 | stringConstant name text = (type', globalConstant name type' constant) where
242 |     type'    = L.ArrayType (fromIntegral (length charList)) i8
243 |     constant = (LC.Array i8 . map (LC.Int 8 . fromIntegral . Data.Char.ord)) charList
244 |     charList = textToString text ++ [Data.Char.chr 0]
245 | 
246 | globalConstant :: L.Name -> L.Type -> Constant -> Global
247 | globalConstant name type' value =
248 |     LG.globalVariableDefaults {
249 |         LG.name        = name,
250 |         LG.initializer = Just value,
251 |         LG.isConstant  = True,
252 |         LG.type'       = type'
253 |     }
254 | 
255 | translateStatement :: LLVM m => IR.Statement -> m ()
256 | translateStatement = \case
257 |     IR.BlockDecl (IR.Name ident _ _) body -> do
258 |         emitBlock (translatedID ident) do
259 |             translateBlock body
260 |     IR.Let name expr -> do
261 |         alloca name
262 |         operand <- translateExpression expr
263 |         store name operand
264 |     IR.Assign name value -> do
265 |         operand <- translateValue value
266 |         store name operand
267 |     {-
268 |     IR.Say value -> do
269 |         formatPtr <- translateStringLiteral "%s\n"
270 |         operand   <- translateValue value
271 |         emit (Do (call printf [formatPtr, operand]))
272 |     IR.Write value -> do
273 |         formatPtr <- translateStringLiteral "%lld\n"
274 |         operand   <- translateValue value
275 |         emit (Do (call printf [formatPtr, operand]))
276 |     -}
277 | 
278 | call :: Operand -> [Operand] -> Instruction
279 | call callee args = L.Call {
280 |     L.tailCallKind       = Nothing,
281 |     L.callingConvention  = L.C,
282 |     L.returnAttributes   = [],
283 |     L.function           = Right callee,
284 |     L.arguments          = map (\arg -> (arg, [])) args,
285 |     L.functionAttributes = [],
286 |     L.metadata           = []
287 | }
288 | 
289 | printf :: Operand
290 | printf = ConstantOperand (GlobalReference (ptr (L.FunctionType i32 [ptr i8] True)) "printf")
291 | 
292 | scanf :: Operand
293 | scanf = ConstantOperand (GlobalReference (ptr (L.FunctionType i32 [ptr i8] True)) "scanf")
294 | 
295 | translateBlock :: LLVM m => IR.Block -> m (Terminator, [CallsBlockWith])
296 | translateBlock IR.Block { IR.arguments, IR.body, IR.transfer } = do
297 |     translateArguments arguments
298 |     mapM_ translateStatement body
299 |     translateTransfer transfer
300 | 
301 | translateArguments :: LLVM m => [IR.Name] -> m ()
302 | translateArguments arguments = do
303 |     -- [For each calling block: CalledByBlockWith { callingBlock = block's name, argumentsReceived = [values it passed for arguments] }]
304 |     calledByBlocks <- getArguments
305 |     forM_ calledByBlocks \CalledByBlockWith { argumentsReceived } -> do
306 |         assertEqM (length argumentsReceived) (length arguments)
307 |     -- [For each calling block: [For each argument it passed: (the argument's value, the block's name)]]
308 |     let incomingValuesGroupedByBlock =
309 |             map (\CalledByBlockWith { callingBlock, argumentsReceived } ->
310 |                     map (\operand -> (operand, callingBlock))
311 |                         argumentsReceived)
312 |                 calledByBlocks
313 |     -- [For each argument: [For each calling block: (the argument's value, the block's name)]]
314 |     let incomingValuesGroupedByArg = transpose incomingValuesGroupedByBlock
315 |     -- `++ repeat []` so we process all the arguments even if this block is never called (which is always the case in the FirstPass!!)
316 |     forM_ (zip arguments (incomingValuesGroupedByArg ++ repeat [])) \(argument, incomingValues) -> do
317 |         phiName <- freshName
318 |         when (incomingValues != []) do
319 |             let instr = L.Phi {
320 |                 L.type'          = translatedType (IR.nameType argument),
321 |                 L.incomingValues = incomingValues,
322 |                 L.metadata       = []
323 |             }
324 |             emit (phiName := instr)
325 |             -- HACK: We make an alloca for each argument, despite them never being mutated or anything,
326 |             -- just so we can refer to them consistently using the `allocaForLet` mapped names, just like normal `let`s.
327 |             alloca argument
328 |             store argument (LocalReference (translatedType (IR.nameType argument)) phiName)
329 | 
330 | translateTransfer :: LLVM m => IR.Transfer -> m (Terminator, [CallsBlockWith])
331 | translateTransfer = \case
332 |     IR.Jump target -> do
333 |         callsBlockWith <- translateTarget target
334 |         let instr = L.Br { L.dest = calledBlock callsBlockWith, L.metadata' = [] }
335 |         return (instr, [callsBlockWith])
336 |     IR.Branch value targets -> do
337 |         operand <- translateValue value
338 |         assertEqM (length targets) 2
339 |         callsBlocksWith <- mapM translateTarget targets
340 |         let instr = L.CondBr {
341 |             L.condition = operand,
342 |             L.trueDest  = calledBlock (callsBlocksWith !! 1),
343 |             L.falseDest = calledBlock (callsBlocksWith !! 0),
344 |             L.metadata' = []
345 |         }
346 |         return (instr, callsBlocksWith)
347 | 
348 | translateTarget :: LLVM m => IR.Target -> m CallsBlockWith
349 | translateTarget IR.Target { IR.targetBlock, IR.targetArgs } = do
350 |     operands <- mapM translateValue targetArgs
351 |     return CallsBlockWith { calledBlock = translatedID (IR.nameID targetBlock), argumentsPassed = operands }
352 | 
353 | 
354 | 
355 | ---------------------------------------------------------------------------------------------------- TRANSLATION BACKEND
356 | 
357 | -- Two passes using plain state monads
358 | 
359 | runTwoPass :: (forall m. LLVM m => m a) -> ([Global], a)
360 | runTwoPass generate = result where
361 |     firstResult  = execState (FirstState 0 Map.empty)                                     (runFirstPass  generate)
362 |     secondResult = runState  (SecondState 0 (callersMap firstResult) [] [] [] dummyBlock) (runSecondPass generate)
363 |     dummyBlock   = UnfinishedBlock (L.UnName maxBound) [] Nothing
364 |     resultValue  = fst secondResult
365 |     SecondState {
366 |         globals,
367 |         allocas,
368 |         finishedBlocks,
369 |         unfinishedBlocks
370 |     }            = snd secondResult
371 |     allocaBlock  = BasicBlock "alloca" allocas (Do (L.Br { L.dest = "start", L.metadata' = [] }))
372 |     createdFn    = LG.functionDefaults { LG.name = "main", LG.returnType = i32, LG.basicBlocks = allocaBlock : finishedBlocks }
373 |     result       = if unfinishedBlocks == dummyBlock then (createdFn : globals, resultValue) else bug "The dummy block changed somehow!"
374 | 
375 | newtype FirstPass a = FirstPass {
376 |     runFirstPass :: State FirstState a
377 | } deriving (Functor, Applicative, Monad, MonadState FirstState)
378 | 
379 | data FirstState = FirstState {
380 |     freshNamer :: Word,
381 |     callersMap :: Map L.Name [CalledByBlockWith]
382 | } deriving Generic
383 | 
384 | instance LLVM FirstPass where
385 |     freshName = do
386 |         field @"freshNamer" += 1
387 |         num <- getM (field @"freshNamer")
388 |         return (L.UnName num)
389 |     emitBlock blockName bodyAction = do
390 |         (_, callsBlocksWith) <- bodyAction
391 |         forM_ callsBlocksWith \CallsBlockWith { calledBlock, argumentsPassed } -> do
392 |             when (argumentsPassed != []) do
393 |                 let calledByThisBlock = CalledByBlockWith { callingBlock = blockName, argumentsReceived = argumentsPassed }
394 |                 modifyM (field @"callersMap") (Map.alter (Just . prepend calledByThisBlock . fromMaybe []) calledBlock)
395 |                 return ()
396 |     emit       _   = return ()
397 |     emitAlloca _ _ = return ()
398 |     emitGlobal _   = return ()
399 |     getArguments   = return []
400 | 
401 | newtype SecondPass a = SecondPass {
402 |     runSecondPass :: State SecondState a
403 | } deriving (Functor, Applicative, Monad, MonadState SecondState)
404 | 
405 | data SecondState = SecondState {
406 |     secondNamer      :: Word,
407 |     callersOfBlocks  :: Map L.Name [CalledByBlockWith], -- readonly
408 |     allocas          :: [Named Instruction],
409 |     globals          :: [Global],
410 |     finishedBlocks   :: [BasicBlock],
411 |     unfinishedBlocks :: UnfinishedBlock
412 | } deriving Generic
413 | 
414 | data UnfinishedBlock = UnfinishedBlock {
415 |     blockName     :: L.Name,
416 |     instructions  :: [Named Instruction],
417 |     previousBlock :: Maybe UnfinishedBlock
418 | } deriving (Generic, Eq)
419 | 
420 | instance LLVM SecondPass where
421 |     freshName = do
422 |         field @"secondNamer" += 1
423 |         num <- getM (field @"secondNamer")
424 |         return (L.UnName num)
425 |     emit instruction = do
426 |         modifyM (field @"unfinishedBlocks" . field @"instructions") (++ [instruction])
427 |         return ()
428 |     emitAlloca type' name = do
429 |         let instr = L.Alloca {
430 |             L.allocatedType = type',
431 |             L.numElements   = Nothing,
432 |             L.alignment     = 0,
433 |             L.metadata      = []
434 |         }
435 |         modifyM (field @"allocas") (++ [name := instr])
436 |         return ()
437 |     emitGlobal global = do
438 |         modifyM (field @"globals") (prepend global)
439 |         return ()
440 |     getArguments = do
441 |         thisBlock <- getM (field @"unfinishedBlocks" . field @"blockName")
442 |         callers   <- getM (field @"callersOfBlocks")
443 |         return (Map.findWithDefault [] thisBlock callers)
444 |     emitBlock blockName bodyAction = do
445 |         modifyM (field @"unfinishedBlocks") (UnfinishedBlock blockName [] . Just)
446 |         (terminator, callsBlocksWith) <- bodyAction
447 |         -- here we just assert that the `callsBlocksWith` is the same as what we got in the first pass
448 |         forM_ callsBlocksWith \CallsBlockWith { calledBlock, argumentsPassed } -> do
449 |             -- TODO assert that the `calledBlock` is one of those in `terminator`
450 |             savedCallers <- liftM (Map.findWithDefault [] calledBlock) (getM (field @"callersOfBlocks"))
451 |             let calledByUs = filter (\CalledByBlockWith { callingBlock } -> callingBlock == blockName) savedCallers
452 |             -- TODO there is a probably a simpler way to express this?
453 |             if argumentsPassed == []
454 |                 then do
455 |                     assertEqM calledByUs []
456 |                 else do
457 |                     assertEqM calledByUs [CalledByBlockWith { callingBlock = blockName, argumentsReceived = argumentsPassed }]
458 |         doModifyM (field @"finishedBlocks") \finishedBlocks -> do
459 |             instructions <- getM (field @"unfinishedBlocks" . field @"instructions")
460 |             savedName    <- getM (field @"unfinishedBlocks" . field @"blockName")
461 |             assertEqM blockName savedName
462 |             let newBlock = BasicBlock savedName instructions (Do terminator)
463 |             return (newBlock : finishedBlocks)
464 |         modifyM (field @"unfinishedBlocks" ) (assert . previousBlock)
465 |         return ()
466 | 


--------------------------------------------------------------------------------
/src/Main.hs:
--------------------------------------------------------------------------------
  1 | module Main where
  2 | 
  3 | import MyPrelude
  4 | 
  5 | import Control.Exception  (SomeException, catch)
  6 | import Data.List          (partition, isPrefixOf)
  7 | import System.Environment (getArgs)
  8 | 
  9 | import qualified System.Directory as Directory
 10 | import qualified System.Exit      as Exit
 11 | import qualified System.Process   as Process
 12 | import qualified Foreign.Ptr      as Ptr
 13 | 
 14 | import qualified LLVM.Analysis        as L
 15 | import qualified LLVM.Context         as L
 16 | import qualified LLVM.ExecutionEngine as L
 17 | import qualified LLVM.Module          as L
 18 | import qualified LLVM.OrcJIT          as L
 19 | import qualified LLVM.PassManager     as L
 20 | import qualified LLVM.Target          as L
 21 | import qualified LLVM.Transforms      as L
 22 | 
 23 | import qualified Pretty
 24 | import qualified Token
 25 | import qualified AST
 26 | import qualified Name
 27 | import qualified Type
 28 | import qualified IR
 29 | import qualified LLVM
 30 | 
 31 | import Token (Token)
 32 | import AST   (AST)
 33 | import Name  (ResolvedName)
 34 | import Type  (TypedName)
 35 | 
 36 | newtype Command a = Command {
 37 |     runCommand :: ExceptT Text Managed a
 38 | } deriving (Functor, Applicative, Monad, MonadIO, MonadManaged, MonadError Text)
 39 | 
 40 | data Arguments = Arguments {
 41 |     command  :: Text,
 42 |     inFile   :: Maybe Text,
 43 |     outFile  :: Maybe Text,
 44 |     optimize :: Bool
 45 | } deriving Generic
 46 | 
 47 | arguments :: Command Arguments
 48 | arguments = do
 49 |     (flags, args) <- liftM (partition (isPrefixOf "-")) (liftIO getArgs)
 50 |     let optimize = elem "-O" flags -- TODO warn on unknown flags
 51 |     (command, inFile, outFile) <- case map stringToText args of
 52 |         []        -> throwError "No command given!" -- TODO print help
 53 |         [a]       -> return (a, Nothing, Nothing)
 54 |         [a, b]    -> return (a, Just b,  Nothing)
 55 |         [a, b, c] -> return (a, Just b,  Just c)
 56 |         _         -> throwError "Too many arguments!"
 57 |     return Arguments { command, inFile, outFile, optimize }
 58 | 
 59 | getInput :: Command Text
 60 | getInput = do
 61 |     Arguments { inFile } <- arguments
 62 |     case inFile of
 63 |         Nothing       -> liftIO getContents
 64 |         Just fileName -> liftIO (readFile (textToString fileName)) -- TODO catch exceptions :\
 65 | 
 66 | tokens :: Command [With Loc Token]
 67 | tokens = do
 68 |     input <- getInput
 69 |     try (mapLeft prettyShow (Token.tokenize input))
 70 | 
 71 | ast :: Command (AST Loc Text)
 72 | ast = do
 73 |     input <- tokens
 74 |     try (mapLeft prettyShow (AST.parse input))
 75 | 
 76 | names :: Command (AST Loc ResolvedName)
 77 | names = do
 78 |     input  <- ast
 79 |     result <- try (mapLeft prettyShow (Name.resolveNames  input))
 80 |     _      <- try (mapLeft prettyShow (Name.validateNames result))
 81 |     return result
 82 | 
 83 | types :: Command (AST Loc TypedName)
 84 | types = do
 85 |     input  <- names
 86 |     result <- try (mapLeft prettyShow (Type.checkTypes    input))
 87 |     _      <- try (mapLeft prettyShow (Name.validateNames result)) -- make sure types are assigned to names consistently!
 88 |     _      <- try (mapLeft prettyShow (Type.validateTypes result))
 89 |     return result
 90 | 
 91 | ir :: Command [IR.Function]
 92 | ir = do
 93 |     result <- liftM (map (IR.translateFunction . nodeWithout)) types
 94 |     _      <- try (mapLeft prettyShow (IR.validate result))
 95 |     opt    <- liftM optimize arguments
 96 |     if opt
 97 |         then do
 98 |             let optimized = map (modify (field @"functionBody") IR.eliminateTrivialBlocks) result
 99 |             _ <- try (mapLeft prettyShow (IR.validate optimized))
100 |             return optimized
101 |         else do
102 |             return result
103 | 
104 | llvmAst :: Command LLVM.Module
105 | llvmAst = liftM LLVM.translateFunctions ir
106 | 
107 | llvmContextAndModule :: Command (L.Context, L.Module)
108 | llvmContextAndModule = do
109 |     moduleAst <- llvmAst
110 |     context   <- usingManaged L.withContext
111 |     module'   <- usingManaged (L.withModuleFromAST context moduleAst)
112 |     liftIO (L.verify module') -- TODO is there an exception to catch??
113 |     whenM (liftM optimize arguments) do -- FIXME if we generate invalid LLVM, we want to print it before verifying, otherwise after!
114 |         passManager <- usingManaged (L.withPassManager (L.PassSetSpec [L.PromoteMemoryToRegister] Nothing Nothing Nothing))
115 |         _ <- liftIO (L.runPassManager passManager module')
116 |         return ()
117 |     return (context, module')
118 | 
119 | llvmModule :: Command L.Module
120 | llvmModule = liftM snd llvmContextAndModule
121 | 
122 | llvm :: Command ByteString
123 | llvm = do
124 |     module' <- llvmModule
125 |     liftIO (L.moduleLLVMAssembly module')
126 | 
127 | asm :: Command ByteString
128 | asm = do
129 |     module' <- llvmModule
130 |     target  <- usingManaged L.withHostTargetMachine
131 |     liftIO (L.moduleTargetAssembly target module')
132 | 
133 | obj :: Command ByteString
134 | obj = do
135 |     module' <- llvmModule
136 |     target  <- usingManaged L.withHostTargetMachine
137 |     liftIO (L.moduleObject target module')
138 | 
139 | build :: Command ()
140 | build = do
141 |     Arguments { inFile, outFile } <- arguments
142 |     let objFile = textToString (fromMaybe "stdin" inFile) ++ ".o"
143 |     module' <- llvmModule
144 |     target  <- usingManaged L.withHostTargetMachine
145 |     let removeOrIgnore file = liftIO (catch (Directory.removeFile file) (\(_ :: SomeException) -> return ())) -- ugh
146 |     usingManaged \body -> do
147 |         L.writeObjectToFile target (L.File objFile) module'
148 |         result <- body ()
149 |         removeOrIgnore objFile
150 |         return result
151 |     let args = objFile : maybe [] (prepend "-o" . single . textToString) outFile
152 |     (exitCode, out, err) <- liftIO (Process.readProcessWithExitCode "gcc" args "")
153 |     removeOrIgnore objFile
154 |     when (exitCode != Exit.ExitSuccess) do
155 |         throwError (stringToText ("GCC reported error:\n" ++ out ++ "\n" ++ err))
156 | 
157 | {- TODO port to new API
158 | -- OrcJIT version, fails to resolve `printf` symbol
159 | runOrcJit :: Command Text
160 | runOrcJit = do
161 |     let resolver :: L.MangledSymbol -> L.IRCompileLayer l -> L.MangledSymbol -> IO L.JITSymbol
162 |         resolver _testFunc compileLayer symbol = L.findSymbol compileLayer symbol True
163 |         nullResolver :: L.MangledSymbol -> IO L.JITSymbol
164 |         nullResolver s = return (L.JITSymbol 0 (L.JITSymbolFlags False False))
165 |     module'      <- llvmModule
166 |     target       <- usingManaged L.withHostTargetMachine
167 |     objectLayer  <- usingManaged L.withObjectLinkingLayer
168 |     compileLayer <- usingManaged (L.withIRCompileLayer objectLayer target)
169 |     testFunc     <- liftIO (L.mangleSymbol compileLayer "main")
170 |     --moduleSet    <- usingManaged (L.withModuleSet compileLayer [module'] (L.SymbolResolver (resolver testFunc compileLayer) nullResolver))
171 |     mainSymbol   <- liftIO (L.mangleSymbol compileLayer "main")
172 |     jitSymbol    <- liftIO (L.findSymbol compileLayer mainSymbol True)
173 |     result       <- (liftIO . runMainPtr . Ptr.castPtrToFunPtr . Ptr.wordPtrToPtr . L.jitSymbolAddress) jitSymbol
174 |     return ("EXIT CODE: " ++ showText result)
175 | -}
176 | 
177 | run :: Command ()
178 | run = do
179 |     Arguments { outFile } <- arguments
180 |     when (isJust outFile) do
181 |         throwError "An output file doesn't make sense for the `run` command!"
182 |     (context, module') <- llvmContextAndModule
183 |     engine             <- usingManaged (L.withMCJIT context Nothing Nothing Nothing Nothing)
184 |     compiledModule     <- usingManaged (L.withModuleInEngine engine module')
185 |     maybeMain          <- liftIO (L.getFunction compiledModule "main")
186 |     mainPtr            <- maybe (throwError "ERROR: `main` not found in JIT-compiled code!") return maybeMain
187 |     result             <- (liftIO . runMainPtr . Ptr.castFunPtr) mainPtr
188 |     liftIO (putStrLn ("EXIT CODE: " ++ showText result))
189 | 
190 | foreign import ccall "dynamic" runMainPtr :: Ptr.FunPtr (IO Int32) -> IO Int32
191 | 
192 | outputCommand :: Pretty.Render a => Command a -> Command ()
193 | outputCommand command = do
194 |     Arguments { outFile } <- arguments
195 |     handle <- case outFile of
196 |         Nothing       -> return stdout
197 |         Just fileName -> usingManaged (withFile (textToString fileName) WriteMode) -- TODO exceptions :\
198 |     result <- command
199 |     liftIO (Pretty.output handle result)
200 | 
201 | commands :: [(Text, Command ())]
202 | commands = execWriter do
203 |     let command name cmd = tell [(name, outputCommand cmd)]
204 |     command "tokens" tokens
205 |     command "ast"    ast
206 |     command "names"  names
207 |     command "types"  types
208 |     command "ir"     ir
209 |     command "llvm"   llvm
210 |     command "asm"    asm
211 |     command "obj"    obj
212 |     -- these have their own ways to handle output
213 |     tell [("build", build)]
214 |     tell [("run",   run)]
215 | 
216 | main :: IO ()
217 | main = runManaged do
218 |     result <- (runExceptT . runCommand) do
219 |         Arguments { command } <- arguments
220 |         fromMaybe (throwError "Command not recognized!") (lookup command commands)
221 |     either (liftIO . hPutStrLn stderr) return result
222 | 


--------------------------------------------------------------------------------
/src/MyPrelude.hs:
--------------------------------------------------------------------------------
  1 | module MyPrelude (module MyPrelude, module Reexports) where
  2 | 
  3 | 
  4 | 
  5 | -------------------------------------------------------------------------- reexports
  6 | 
  7 | import Prelude                          as Reexports hiding (putStr, putStrLn, getLine, getContents, interact, readFile, writeFile, appendFile, head, tail, (++), foldl, scanl, (/=), ($), ($!))
  8 | import Data.Text.IO                     as Reexports        (putStr, putStrLn, getLine, getContents, interact, readFile, writeFile, appendFile, hGetContents, hPutStr, hPutStrLn)
  9 | import System.IO                        as Reexports        (Handle, FilePath, IOMode (ReadMode, WriteMode, AppendMode, ReadWriteMode), stdin, stdout, stderr, withFile)
 10 | import Data.Foldable                    as Reexports        (foldl')
 11 | import Data.Int                         as Reexports        ( Int,  Int8,  Int16,  Int32,  Int64)
 12 | import Data.Word                        as Reexports        (Word, Word8, Word16, Word32, Word64)
 13 | import Data.Either                      as Reexports        (isLeft, isRight, fromLeft, fromRight)
 14 | import Data.Maybe                       as Reexports        (isJust, isNothing, fromMaybe, maybeToList, catMaybes, mapMaybe)
 15 | import Data.List                        as Reexports        (scanl', uncons, intercalate)
 16 | import Data.Function                    as Reexports        (on)
 17 | import Control.Applicative              as Reexports        (Alternative (empty), liftA2, liftA3)
 18 | import Control.Monad                    as Reexports        (liftM, forM, forM_, zipWithM, zipWithM_, foldM, foldM_, filterM, replicateM, forever, join, guard, when, unless)
 19 | import Control.Monad.Trans              as Reexports        (MonadTrans (lift))
 20 | import Control.Monad.IO.Class           as Reexports        (MonadIO    (liftIO))
 21 | import Control.Monad.Managed.Safe       as Reexports        (Managed, MonadManaged (using), managed, managed_, runManaged)
 22 | import Control.Monad.Except             as Reexports        (ExceptT, Except, MonadError,  throwError, catchError, runExceptT, runExcept)
 23 | import Control.Monad.Reader             as Reexports        (ReaderT, Reader, MonadReader, ask, local)
 24 | import Control.Monad.Writer.Strict      as Reexports        (WriterT, Writer, MonadWriter, tell, runWriterT, runWriter, execWriterT, execWriter)
 25 | import Control.Monad.State.Strict       as Reexports        (StateT,  State,  MonadState)
 26 | import Data.ByteString                  as Reexports        (ByteString)
 27 | import Data.Text                        as Reexports        (Text, toLower, toUpper)
 28 | import Data.Text.Prettyprint.Doc        as Reexports        (Doc)
 29 | import Data.Set                         as Reexports        (Set)
 30 | import Data.Map.Strict                  as Reexports        (Map)
 31 | import GHC.Generics                     as Reexports        (Generic)
 32 | import Data.Generics.Product.Fields     as Reexports        (HasField')
 33 | import Data.Generics.Sum.Constructors   as Reexports        (AsConstructor')
 34 | import Data.Loc                         as Reexports        (Loc)
 35 | 
 36 | 
 37 | 
 38 | -------------------------------------------------------------------------- local imports
 39 | 
 40 | import qualified Prelude
 41 | import qualified Text.Pretty.Simple
 42 | import qualified Data.Text                      as Text
 43 | import qualified Data.Text.Encoding             as Text
 44 | import qualified Data.Text.Lazy                 as LazyText
 45 | import qualified Control.Monad.Reader           as Reader      (runReaderT, runReader)
 46 | import qualified Control.Monad.State.Strict     as State       (runStateT,  runState,  evalStateT,  evalState,  execStateT,  execState, get, put)
 47 | import qualified Data.Generics.Product.Fields   as GenericLens (field')
 48 | import qualified Data.Generics.Sum.Constructors as GenericLens (_Ctor')
 49 | import Control.Applicative   (some, many, Const (Const, getConst), (<|>))
 50 | import Data.Functor.Identity (Identity (Identity, runIdentity))
 51 | import Data.Profunctor (Profunctor (lmap, rmap), Choice (right'))
 52 | 
 53 | import GHC.Stack (HasCallStack, withFrozenCallStack)
 54 | import qualified GHC.Stack as Stack
 55 | import qualified Debug.Trace
 56 | 
 57 | 
 58 | -------------------------------------------------------------------------- prelude replacements
 59 | 
 60 | head :: [a] -> Maybe a
 61 | head = \case
 62 |     []  -> Nothing
 63 |     a:_ -> Just a
 64 | 
 65 | tail :: [a] -> Maybe [a]
 66 | tail = \case
 67 |     []   -> Nothing
 68 |     _:as -> Just as
 69 | 
 70 | (++) :: Semigroup a => a -> a -> a
 71 | (++) = (Prelude.<>)
 72 | 
 73 | (!=) :: Eq a => a -> a -> Bool
 74 | (!=) = (Prelude./=)
 75 | 
 76 | strictly :: (a -> b) -> (a -> b)
 77 | strictly = (Prelude.$!)
 78 | 
 79 | 
 80 | -------------------------------------------------------------------------- other utility functions
 81 | 
 82 | (%) :: Integral num => num -> num -> num
 83 | (%) = mod
 84 | 
 85 | bool :: a -> a -> Bool -> a
 86 | bool false true b = if b then true else false
 87 | 
 88 | at :: Int -> [a] -> Maybe a
 89 | at pos = head . drop pos
 90 | 
 91 | prepend :: a -> [a] -> [a]
 92 | prepend = (:)
 93 | 
 94 | single :: a -> [a]
 95 | single = \a -> [a]
 96 | 
 97 | singleIf ::  Bool -> a -> [a]
 98 | singleIf = pureIf -- see below
 99 | 
100 | justIf :: Bool -> a -> Maybe a
101 | justIf = pureIf
102 | 
103 | asLeft :: Maybe a -> Either a ()
104 | asLeft = maybe (Right ()) Left
105 | 
106 | asRight :: Maybe a -> Either () a
107 | asRight = reflection . asLeft
108 | 
109 | left :: Either a b -> Maybe a
110 | left = either Just (const Nothing)
111 | 
112 | right :: Either a b -> Maybe b
113 | right = left . reflection
114 | 
115 | fromLeftOr :: (b -> a) -> Either a b -> a
116 | fromLeftOr f = whichever . fmap f
117 | 
118 | fromRightOr :: (a -> b) -> Either a b -> b
119 | fromRightOr f = fromLeftOr f . reflection
120 | 
121 | reflection :: Either a b -> Either b a
122 | reflection = either Right Left
123 | 
124 | whichever :: Either a a -> a
125 | whichever = either id id
126 | 
127 | mapLeft :: (a -> a') -> Either a b -> Either a' b
128 | mapLeft f = reflection . fmap f . reflection
129 | 
130 | 
131 | 
132 | -------------------------------------------------------------------------- Applicative and Alternative
133 | 
134 | liftA0 :: Applicative f => a -> f a
135 | liftA0 = pure
136 | 
137 | liftA1 :: Applicative f => (a -> b) -> f a -> f b
138 | liftA1 = fmap
139 | 
140 | pureIf :: Alternative f => Bool -> a -> f a
141 | pureIf b a = if b then pure a else empty
142 | 
143 | oneOf :: Alternative f => [f a] -> f a
144 | oneOf = foldl' (<|>) empty
145 | 
146 | -- sometimes we want the Maybe version, sometimes we want the list version...
147 | -- (`liftA1 head (zeroOrOne x)` recovers the Maybe version)
148 | zeroOrOne :: Alternative f => f a -> f [a]
149 | zeroOrOne a = oneOf [liftA0 [], liftA1 single a]
150 | 
151 | oneOrMore :: Alternative f => f a -> f [a]
152 | oneOrMore = some
153 | 
154 | zeroOrMore :: Alternative f => f a -> f [a]
155 | zeroOrMore = many
156 | 
157 | 
158 | 
159 | -------------------------------------------------------------------------- Monad
160 | 
161 | ifM :: Monad m => m Bool -> m a -> m (Maybe a)
162 | ifM condition action = do
163 |     result <- condition
164 |     if result
165 |         then liftM Just action
166 |         else return Nothing
167 | 
168 | elseM :: Monad m => m (Maybe a) -> m a -> m a
169 | elseM thenAction elseAction = do
170 |     result <- thenAction
171 |     case result of
172 |         Just a  -> return a
173 |         Nothing -> elseAction
174 | 
175 | whenM :: Monad m => m Bool -> m () -> m ()
176 | whenM condition = unused . ifM condition
177 | 
178 | whileM :: Monad m => m Bool -> m ()
179 | whileM condition = whenM condition (whileM condition)
180 | 
181 | whileJustM :: Monad m => a -> (a -> m (Maybe a)) -> m ()
182 | whileJustM a action = whileRightM a (liftM asRight . action . whichever)
183 | 
184 | whileRightM :: Monad m => a -> (Either a c -> m (Either b c)) -> m b
185 | whileRightM = impl . Left where
186 |     impl input action = do
187 |         result <- action input
188 |         case result of
189 |             Right c -> impl (Right c) action
190 |             Left  b -> return b
191 | 
192 | unfoldM :: Monad m => m (Maybe a) -> m [a]
193 | unfoldM action = do
194 |     result <- action
195 |     case result of
196 |         Just a  -> liftM (prepend a) (unfoldM action)
197 |         Nothing -> return []
198 | 
199 | try :: MonadError e m => Either e a -> m a
200 | try = either throwError return
201 | 
202 | tryM :: MonadError e m => m (Either e a) -> m a
203 | tryM action = do
204 |     result <- action
205 |     try result
206 | 
207 | usingManaged :: MonadManaged m => (forall r. (a -> IO r) -> IO r) -> m a
208 | usingManaged with = using (managed with)
209 | 
210 | unused :: Functor m => m a -> m ()
211 | unused = fmap (const ())
212 | 
213 | 
214 | 
215 | -------------------------------------------------------------------------- lenses and prisms
216 | 
217 | -- TODO
218 | -- composing a lens with a prism gives a traversal, we should probably have that too
219 | -- which functions can/should be generalized?
220 | 
221 | type Lens  outer inner = forall    f.             Functor     f  => (inner  ->  f inner) -> (outer  ->  f outer)
222 | 
223 | type Prism outer inner = forall to f. (Choice to, Applicative f) => (inner `to` f inner) -> (outer `to` f outer)
224 | 
225 | get :: Lens outer inner -> outer -> inner
226 | get lens outer = getConst (lens Const outer)
227 | 
228 | set :: Lens outer inner -> inner -> outer -> outer
229 | set lens inner outer = runIdentity (lens (const (Identity inner)) outer)
230 | 
231 | modify :: Lens outer inner -> (inner -> inner) -> (outer -> outer)
232 | modify lens f outer = set lens (f (get lens outer)) outer
233 | 
234 | getWhen :: Prism outer inner -> outer -> Maybe inner
235 | getWhen prism outer = right (snd (unPrism prism) outer)
236 | 
237 | is :: outer -> Prism outer inner -> Bool
238 | is outer prism = isJust (getWhen prism outer)
239 | 
240 | isn't :: outer -> Prism outer inner -> Bool
241 | isn't outer prism = not (is outer prism)
242 | 
243 | constructFrom :: Prism outer inner -> inner -> outer
244 | constructFrom prism inner = fst (unPrism prism) inner
245 | 
246 | modifyWhen :: Prism outer inner -> (inner -> inner) -> (outer -> outer)
247 | modifyWhen prism f outer = maybe outer (constructFrom prism . f) (getWhen prism outer)
248 | 
249 | field :: forall name inner outer. HasField' name outer inner => Lens outer inner
250 | field = GenericLens.field' @name
251 | 
252 | constructor :: forall name inner outer. AsConstructor' name outer inner => Prism outer inner
253 | constructor = GenericLens._Ctor' @name
254 | 
255 | -- for convenience
256 | match :: forall name inner outer. AsConstructor' name outer inner => outer -> Maybe inner
257 | match = getWhen (constructor @name)
258 | 
259 | {- I wanted to use `#foo` instead of `@"foo"` syntax, using OverloadedLabels, but turns out it doesn't allow uppercase labels (for constructors) :(
260 | 
261 | data Field (name :: Symbol) = Field
262 | data Case  (name :: Symbol) = Case
263 | 
264 | instance name1 ~ name2 => GHC.IsLabel name1 (Field name2) where
265 |     fromLabel _ = Field
266 | 
267 | instance name1 ~ name2 => GHC.IsLabel name1 (Case name2) where
268 |     fromLabel _ = Case
269 | 
270 | field :: forall name inner outer. GenericLens.HasField      name inner outer => Field name -> Lens  outer inner
271 | field Field = GenericLens.field @name
272 | 
273 | _Case :: forall name inner outer. GenericLens.AsConstructor name inner outer => Case  name -> Prism outer inner
274 | _Case Case  = GenericLens._Ctor @name
275 | -}
276 | 
277 | 
278 | -- copied from https://artyom.me/lens-over-tea-5
279 | -- TODO maybe rewrite these
280 | -- TODO don't export them
281 | data Market a b s t = Market (b -> t) (s -> Either t a)
282 | 
283 | instance Functor (Market a b s) where
284 |   fmap f (Market bt seta) = Market (f . bt) (either (Left . f) Right . seta)
285 | 
286 | instance Profunctor (Market a b) where
287 |   lmap f (Market bt seta) = Market bt (seta . f)
288 |   rmap f (Market bt seta) = fmap f (Market bt seta)
289 | 
290 | instance Choice (Market a b) where
291 |   right' (Market bt seta) = Market (Right . bt) \cs -> case cs of
292 |     Left c -> Left (Left c)
293 |     Right s -> case seta s of
294 |       Left t -> Left (Right t)
295 |       Right a -> Right a
296 | 
297 | unPrism :: Prism a b -> (b -> a, a -> Either a b)
298 | unPrism p =
299 |   let -- bft   :: b -> Identity t
300 |       -- setfa :: s -> Either (Identity t) a
301 |       Market bft setfa = p (Market Identity Right)
302 |       -- bt    :: b -> t
303 |       -- seta  :: s -> Either t a
304 |       bt = runIdentity . bft
305 |       seta = either (Left . runIdentity) Right . setfa
306 |   in (bt, seta)
307 | 
308 | 
309 | 
310 | -------------------------------------------------------------------------- reader & state monad
311 | 
312 | runReaderT :: r -> ReaderT r m a -> m a
313 | runReaderT = flip Reader.runReaderT
314 | 
315 | runReader :: r -> Reader r a -> a
316 | runReader = flip Reader.runReader
317 | 
318 | runStateT :: s -> StateT s m a -> m (a, s)
319 | runStateT = flip State.runStateT
320 | 
321 | runState :: s -> State s a -> (a, s)
322 | runState = flip State.runState
323 | 
324 | evalStateT :: Monad m => s -> StateT s m a -> m a
325 | evalStateT = flip State.evalStateT
326 | 
327 | evalState :: s -> State s a -> a
328 | evalState = flip State.evalState
329 | 
330 | execStateT ::  Monad m => s -> StateT s m a -> m s
331 | execStateT = flip State.execStateT
332 | 
333 | execState :: s -> State s a -> s
334 | execState = flip State.execState
335 | 
336 | -- these are renamed to avoid conflicts with lens get/set, above
337 | getState :: MonadState s m => m s
338 | getState = State.get
339 | 
340 | -- these could be defined in terms of `modifyState`/`doModifyState`, except they are the actual primitives of `MonadState`!
341 | setState :: MonadState s m => s -> m ()
342 | setState = State.put
343 | 
344 | doSetState :: MonadState s m => m s -> m ()
345 | doSetState = unused . doModifyState . const
346 | 
347 | modifyState :: MonadState s m => (s -> s) -> m s
348 | modifyState f = doModifyState (return . f)
349 | 
350 | doModifyState :: MonadState s m => (s -> m s) -> m s
351 | doModifyState modifyAction = do
352 |     oldState <- getState
353 |     newState <- modifyAction oldState
354 |     strictly setState newState
355 |     return newState
356 | 
357 | setM :: MonadState outer m => Lens outer inner -> inner -> m ()
358 | setM lens = doSetM lens . return
359 | 
360 | doSetM :: MonadState outer m => Lens outer inner -> m inner -> m ()
361 | doSetM lens = unused . doModifyM lens . const
362 | 
363 | getM :: MonadState outer m => Lens outer inner -> m inner
364 | getM lens = modifyM lens id
365 | 
366 | modifyM :: MonadState outer m => Lens outer inner -> (inner -> inner) -> m inner
367 | modifyM lens f = doModifyM lens (return . f)
368 | 
369 | doModifyM :: MonadState outer m => Lens outer inner -> (inner -> m inner) -> m inner
370 | doModifyM lens modifyAction = liftM (get lens) (doModifyState (lens modifyAction))
371 | 
372 | getWhenM :: MonadState outer m => Prism outer inner -> m (Maybe inner)
373 | getWhenM prism = modifyWhenM prism id
374 | 
375 | constructFromM :: MonadState outer m => Prism outer inner -> inner -> m ()
376 | constructFromM prism = doConstructFromM prism . return
377 | 
378 | doConstructFromM :: MonadState outer m => Prism outer inner -> m inner -> m ()
379 | doConstructFromM prism = unused . doModifyWhenM prism . const
380 | 
381 | modifyWhenM :: MonadState outer m => Prism outer inner -> (inner -> inner) -> m (Maybe inner)
382 | modifyWhenM prism f = doModifyWhenM prism (return . f)
383 | 
384 | doModifyWhenM :: MonadState outer m => Prism outer inner -> (inner -> m inner) -> m (Maybe inner)
385 | doModifyWhenM prism modifyAction = liftM (getWhen prism) (doModifyState (prism modifyAction))
386 | 
387 | -- TODO `zoom` maybe?
388 | 
389 | (+=) :: (MonadState outer m, Num inner) => Lens outer inner -> inner -> m inner
390 | lens += n = modifyM lens (+ n)
391 | 
392 | (-=) :: (MonadState outer m, Num inner) => Lens outer inner -> inner -> m inner
393 | lens -= n = modifyM lens (subtract n)
394 | 
395 | (*=) :: (MonadState outer m, Num inner) => Lens outer inner -> inner -> m inner
396 | lens *= n = modifyM lens (* n)
397 | 
398 | (/=) :: (MonadState outer m, Fractional inner) => Lens outer inner -> inner -> m inner
399 | lens /= n = modifyM lens (/ n)
400 | 
401 | infixr 4 +=, -=, *=, /=
402 | 
403 | -- we could have `%=`, but with *both* `%` and `%=` having different meanings relative to Haskell convention, it's probably too surprising
404 | 
405 | 
406 | 
407 | -------------------------------------------------------------------------- Text stuff
408 | 
409 | type LazyText = LazyText.Text
410 | 
411 | showText :: Show a => a -> Text
412 | showText = stringToText . show
413 | 
414 | prettyShow :: Show a => a -> Text
415 | prettyShow = LazyText.toStrict . Text.Pretty.Simple.pShowLightBg
416 | 
417 | prettyPrint :: Show a => a -> IO ()
418 | prettyPrint = Text.Pretty.Simple.pPrintLightBg
419 | 
420 | stringToText :: String -> Text
421 | stringToText = Text.pack
422 | 
423 | textToString :: Text -> String
424 | textToString = Text.unpack
425 | 
426 | byteStringToText :: ByteString -> Text
427 | byteStringToText = Text.decodeUtf8
428 | 
429 | textToByteString :: Text -> ByteString
430 | textToByteString = Text.encodeUtf8
431 | 
432 | 
433 | 
434 | 
435 | -------------------------------------------------------------------------- asserts and debugging
436 | 
437 | {-# WARNING todo "TODO" #-}
438 | todo :: HasCallStack => a
439 | todo = error "TODO"
440 | 
441 | bug :: HasCallStack => Text -> a
442 | bug x = error (textToString ("BUG: " ++ x))
443 | 
444 | class Assert x where
445 |     type AssertResult x
446 |     msgAssert :: HasCallStack => Text -> x -> AssertResult x
447 | 
448 | assert :: (HasCallStack, Assert x) => x -> AssertResult x
449 | assert = withFrozenCallStack
450 |     (msgAssert "")
451 | 
452 | assertM :: (HasCallStack, Assert x, Monad m) => x -> m (AssertResult x)
453 | assertM x = withFrozenCallStack
454 |     (strictly return (assert x))
455 | 
456 | msgAssertM :: (HasCallStack, Assert x, Monad m) => Text -> x -> m (AssertResult x)
457 | msgAssertM msg x = withFrozenCallStack
458 |     (strictly return (msgAssert msg x))
459 | 
460 | assertEqM :: (HasCallStack, Eq a, Show a, Monad m) => a -> a -> m ()
461 | assertEqM a b = withFrozenCallStack
462 |     (msgAssertM (showText a ++ " == " ++ showText b) (a == b))
463 | 
464 | instance Assert Bool where
465 |     type AssertResult Bool = ()
466 |     msgAssert msg = withFrozenCallStack
467 |         (bool (bug ("Failed assertion! " ++ msg)) ())
468 | 
469 | instance Assert (Maybe a) where
470 |     type AssertResult (Maybe a) = a
471 |     msgAssert msg = withFrozenCallStack
472 |         (fromMaybe (bug ("Failed assertion! " ++ msg)))
473 | 
474 | {- remove the Show constraint if it turns out to be problematic! -}
475 | instance Show e => Assert (Either e a) where
476 |     type AssertResult (Either e a) = a
477 |     msgAssert msg = withFrozenCallStack
478 |         (fromRightOr (\e -> bug ("Failed assertion! " ++ msg ++ " " ++ showText e)))
479 | 
480 | debug :: (HasCallStack, Show a) => a -> a
481 | debug a = withFrozenCallStack
482 |     (trace (showText a) a)
483 | 
484 | debugM :: (HasCallStack, Monad m, Show a) => a -> m ()
485 | debugM a = withFrozenCallStack
486 |     (traceM (showText a))
487 | 
488 | prettyDebug :: (HasCallStack, Show a) => a -> a
489 | prettyDebug a = withFrozenCallStack
490 |     (trace (prettyShow a) a)
491 | 
492 | prettyDebugM :: (HasCallStack, Monad m, Show a) => a -> m ()
493 | prettyDebugM a = withFrozenCallStack
494 |     (traceM (prettyShow a))
495 | 
496 | trace :: HasCallStack => Text -> a -> a
497 | trace text a = Debug.Trace.trace message a where
498 |     message = "DEBUG: " ++ textToString text ++ " [" ++ srcLoc ++ "]"
499 |     Stack.SrcLoc { Stack.srcLocFile, Stack.srcLocStartLine, Stack.srcLocStartCol } = snd (assert (head (Stack.getCallStack Stack.callStack)))
500 |     srcLoc = srcLocFile ++ ":" ++ show srcLocStartLine ++ ":" ++ show srcLocStartCol
501 | 
502 | traceM :: (HasCallStack, Monad m) => Text -> m ()
503 | traceM text = withFrozenCallStack
504 |     (trace text (return ()))
505 | 
506 | 
507 | 
508 | 
509 | -------------------------------------------------------------------------- widely-used project-specific definitions
510 | 
511 | class Enumerable a where
512 |     enumerate :: [a]
513 |     default enumerate :: (Enum a, Bounded a) => [a]
514 |     enumerate = [minBound..maxBound]
515 | 
516 | data ArithmeticOperator
517 |     = Add
518 |     | Sub
519 |     | Mul
520 |     | Div
521 |     | Mod
522 |     deriving (Generic, Eq, Show, Enum, Bounded)
523 | 
524 | instance Enumerable ArithmeticOperator
525 | 
526 | data ComparisonOperator
527 |     = Less
528 |     | LessEqual
529 |     | Greater
530 |     | GreaterEqual
531 |     | Equal
532 |     | NotEqual
533 |     deriving (Generic, Eq, Show, Enum, Bounded)
534 | 
535 | instance Enumerable ComparisonOperator
536 | 
537 | data LogicalOperator
538 |     = And
539 |     | Or
540 |     deriving (Generic, Eq, Show, Enum, Bounded)
541 | 
542 | instance Enumerable LogicalOperator
543 | 
544 | data BinaryOperator
545 |     = ArithmeticOperator ArithmeticOperator
546 |     | ComparisonOperator ComparisonOperator
547 |     | LogicalOperator    LogicalOperator
548 |     deriving (Generic, Eq, Show)
549 | 
550 | instance Enumerable BinaryOperator where
551 |     enumerate = map ArithmeticOperator enumerate ++ map ComparisonOperator enumerate ++ map LogicalOperator enumerate
552 | 
553 | data UnaryOperator
554 |     = Not
555 |     | Negate
556 |     deriving (Generic, Eq, Show, Enum, Bounded)
557 | 
558 | instance Enumerable UnaryOperator
559 | 
560 | 
561 | data With metadata a = With {
562 |     getMetadata :: metadata,
563 |     unWith      :: a
564 | } deriving (Generic, Show, Functor, Foldable, Traversable)
565 | 
566 | instance Eq a => Eq (With metadata a) where
567 |     (==) = (==) `on` unWith
568 | 
569 | instance Ord a => Ord (With metadata a) where
570 |     compare = compare `on` unWith
571 | 
572 | instance Monoid metadata => Applicative (With metadata) where
573 |     pure = With mempty
574 |     liftA2 f (With a1 b1) (With a2 b2) = With (a1 ++ a2) (f b1 b2)
575 | 
576 | newtype NodeWith node metadata a = NodeWith {
577 |     getNodeWith :: With metadata (node metadata a)
578 | } deriving (Generic, Eq, Ord, Show, Functor, Foldable, Traversable)
579 | 
580 | nodeWithout :: NodeWith node metadata a -> node metadata a
581 | nodeWithout = unWith . getNodeWith
582 | 
583 | nodeMetadata :: NodeWith node metadata a -> metadata
584 | nodeMetadata = getMetadata . getNodeWith
585 | 
586 | mapNode :: (node metadata a -> node metadata b) -> NodeWith node metadata a -> NodeWith node metadata b
587 | mapNode f = NodeWith . fmap f . getNodeWith
588 | 
589 | mapNodeM :: Monad m => (node metadata a -> m (node metadata b)) -> NodeWith node metadata a -> m (NodeWith node metadata b)
590 | mapNodeM f = liftM NodeWith . mapM f . getNodeWith
591 | 
592 | forNodeM :: Monad m => NodeWith node metadata a -> (node metadata a -> m (node metadata b)) -> m (NodeWith node metadata b)
593 | forNodeM = flip mapNodeM
594 | 
595 | -- Workaround for bug with the `Alternative` instance in base: https://ghc.haskell.org/trac/ghc/ticket/15992
596 | newtype Compose f g a = Compose { getCompose :: f (g a) } deriving (Generic, Eq, Ord, Show, Functor, Foldable)
597 | 
598 | instance (Traversable f, Traversable g) => Traversable (Compose f g) where
599 |     traverse f (Compose t) = Compose <$> traverse (traverse f) t
600 | 
601 | instance (Applicative f, Applicative g) => Applicative (Compose f g) where
602 |     pure x = Compose (pure (pure x))
603 |     Compose f <*> Compose x = Compose (liftA2 (<*>) f x)
604 |     liftA2 f (Compose x) (Compose y) =
605 |       Compose (liftA2 (liftA2 f) x y)
606 | 
607 | instance (Alternative f, Applicative g) => Alternative (Compose f g) where
608 |     empty = Compose empty
609 |     Compose a <|> Compose b = Compose (a <|> b)
610 |     many = Compose . liftA1 sequenceA . many . getCompose
611 |     some = Compose . liftA1 sequenceA . some . getCompose
612 | 


--------------------------------------------------------------------------------
/src/Name.hs:
--------------------------------------------------------------------------------
  1 | module Name (BuiltinName (..), LocalName (..), Name (..), qualifiedName, unqualifiedName, NameWith (..), Path (..), ResolvedName, Error (..), resolveNames, ValidationError (..), validateNames) where
  2 | 
  3 | import MyPrelude
  4 | 
  5 | import qualified Data.Map  as Map
  6 | import qualified Data.Text as Text
  7 | 
  8 | import qualified Pretty as P
  9 | import qualified AST
 10 | import AST (AST)
 11 | 
 12 | 
 13 | 
 14 | ------------------------------------------------------------------------ types
 15 | 
 16 | data BuiltinName
 17 |     = Builtin_Int
 18 |     | Builtin_Bool
 19 |     | Builtin_Text
 20 |     | Builtin_Unit
 21 |     | Builtin_true
 22 |     | Builtin_false
 23 |     | Builtin_ask
 24 |     | Builtin_say
 25 |     | Builtin_write
 26 |     deriving (Generic, Eq, Ord, Enum, Bounded, Show)
 27 | 
 28 | instance Enumerable BuiltinName
 29 | 
 30 | -- subscopes within each scope are numbered positionally, starting with 0
 31 | data Path = Path {
 32 |     function :: Text,
 33 |     scope    :: [Int]
 34 | } deriving (Generic, Eq, Ord, Show)
 35 | 
 36 | data LocalName = LocalName {
 37 |     path      :: Path,
 38 |     givenName :: Text
 39 | } deriving (Generic, Eq, Ord, Show)
 40 | 
 41 | data Name
 42 |     = BuiltinName  BuiltinName
 43 |     | FunctionName Text
 44 |     | Name         LocalName
 45 |     deriving (Generic, Eq, Ord, Show)
 46 | 
 47 | data NameWith info = NameWith {
 48 |     name :: Name,
 49 |     info :: info
 50 | } deriving (Generic, Show, Functor)
 51 | 
 52 | instance Eq (NameWith info) where
 53 |     (==) = (==) `on` name
 54 | 
 55 | instance Ord (NameWith info) where
 56 |     compare = compare `on` name
 57 | 
 58 | type ResolvedName = NameWith AST.BindingType
 59 | 
 60 | qualifiedName :: Name -> Text
 61 | qualifiedName = \case
 62 |     BuiltinName  name -> unqualifiedName (BuiltinName name)
 63 |     FunctionName name -> "." ++ name
 64 |     Name    localName -> "." ++ function (path localName) ++ "." ++ Text.intercalate "." (map showText (scope (path localName))) ++ "." ++ givenName localName
 65 | 
 66 | unqualifiedName :: Name -> Text
 67 | unqualifiedName = \case
 68 |     BuiltinName  builtin   -> Text.drop (Text.length "Builtin_" ) (showText builtin)
 69 |     FunctionName name      -> name
 70 |     Name         localName -> givenName localName
 71 | 
 72 | 
 73 | 
 74 | ------------------------------------------------------------------------ pretty-printing
 75 | 
 76 | instance AST.RenderName Name where
 77 |     renderName defOrUse = let makeName isBuiltin nameType name = P.note (P.Identifier (P.IdentInfo name defOrUse nameType isBuiltin)) (P.pretty name) in \case
 78 |         Name (LocalName Path { function, scope } given) -> renderedPath ++ renderedGiven
 79 |             where pathText      = function ++ "." ++ foldr (\a b -> showText a ++ "." ++ b) "" scope
 80 |                   renderedPath  = makeName False P.Block    pathText
 81 |                   renderedGiven = makeName False P.Unit     given
 82 |         FunctionName name    ->   makeName False P.Function name
 83 |         BuiltinName  builtin ->   makeName True  P.Unknown  (unqualifiedName (BuiltinName builtin))
 84 | 
 85 | instance AST.RenderName ResolvedName where
 86 |     renderName defOrUse (NameWith name _) = AST.renderName defOrUse name
 87 | 
 88 | 
 89 | 
 90 | ------------------------------------------------------------------------ resolution frontend
 91 | 
 92 | class (forall metadata. Monad (m metadata)) => NameResolveM m where
 93 |     lookupName    :: Text -> m metadata ResolvedName
 94 |     enterScope    :: m metadata a -> m metadata a
 95 |     bindName      :: AST.BindingType -> Text -> m metadata ResolvedName
 96 |     enterMetadata :: metadata -> m metadata a -> m metadata a
 97 | 
 98 | enterMetadataOf :: NameResolveM m => NodeWith node metadata name -> m metadata a -> m metadata a
 99 | enterMetadataOf = enterMetadata . nodeMetadata
100 | 
101 | class ResolveNamesIn node where
102 |     resolveNamesIn :: NameResolveM m => node metadata Text -> m metadata (node metadata ResolvedName)
103 | 
104 | instance ResolveNamesIn AST.Type where
105 |     resolveNamesIn = \case
106 |         AST.NamedType name -> do
107 |             resolvedName <- lookupName name
108 |             return (AST.NamedType resolvedName)
109 |         AST.FunctionType parameters returns -> do
110 |             resolvedParameters <- mapM resolveNamesIn parameters
111 |             resolvedReturns    <- resolveNamesIn returns
112 |             return (AST.FunctionType resolvedParameters resolvedReturns)
113 | 
114 | instance ResolveNamesIn AST.Function where
115 |     resolveNamesIn AST.Function { AST.functionName, AST.arguments, AST.returns, AST.body } = do
116 |         -- the argument types and return type are in global scope, must be resolved before entering any scope
117 |         argumentTypes <- forM arguments \argument -> do
118 |             enterMetadataOf argument do
119 |                 (resolveNamesIn . AST.argumentType . nodeWithout) argument
120 |         resolvedReturns <- mapM resolveNamesIn returns
121 |         resolvedFunction <- bindName AST.Let functionName
122 |         -- the argument names are in scope for the body, and may also be shadowed by it
123 |         (resolvedArguments, resolvedBody) <- enterScope do
124 |             resolvedArguments <- forM (zip argumentTypes arguments) \(resolvedType, argument) -> do
125 |                 enterMetadataOf argument do
126 |                     forNodeM argument \AST.Argument { AST.argumentName } -> do
127 |                         resolvedName <- bindName AST.Let argumentName
128 |                         return (AST.Argument resolvedName resolvedType)
129 |             resolvedBody <- resolveNamesIn body
130 |             return (resolvedArguments, resolvedBody)
131 |         return AST.Function {
132 |             AST.functionName = resolvedFunction,
133 |             AST.arguments    = resolvedArguments,
134 |             AST.returns      = resolvedReturns,
135 |             AST.body         = resolvedBody
136 |         }
137 | 
138 | instance ResolveNamesIn AST.Block where
139 |     resolveNamesIn AST.Block { AST.exitTarget, AST.statements } = enterScope do
140 |         resolvedTarget     <- mapM (bindName AST.Let) exitTarget
141 |         resolvedStatements <- mapM resolveNamesIn statements
142 |         return (AST.Block resolvedTarget resolvedStatements)
143 | 
144 | instance ResolveNamesIn AST.Statement where
145 |     resolveNamesIn = \case
146 |         AST.Binding btype name expr -> do
147 |             -- resolve the expression BEFORE binding the name:
148 |             -- the name should not be in scope for the expression!
149 |             resolvedExpr <- resolveNamesIn expr
150 |             fullName <- bindName btype name
151 |             return (AST.Binding btype fullName resolvedExpr)
152 |         AST.Assign var expr -> do
153 |             resolvedVar  <- lookupName var
154 |             resolvedExpr <- resolveNamesIn expr
155 |             return (AST.Assign resolvedVar resolvedExpr)
156 |         AST.IfThen expr body -> do
157 |             resolvedExpr <- resolveNamesIn expr
158 |             resolvedBody <- resolveNamesIn body
159 |             return (AST.IfThen resolvedExpr resolvedBody)
160 |         AST.IfThenElse expr body1 body2 -> do
161 |             resolvedExpr  <- resolveNamesIn expr
162 |             resolvedBody1 <- resolveNamesIn body1
163 |             resolvedBody2 <- resolveNamesIn body2
164 |             return (AST.IfThenElse resolvedExpr resolvedBody1 resolvedBody2)
165 |         AST.Forever body -> do
166 |             resolvedBody <- resolveNamesIn body
167 |             return (AST.Forever resolvedBody)
168 |         AST.While expr body -> do
169 |             resolvedExpr <- resolveNamesIn expr
170 |             resolvedBody <- resolveNamesIn body
171 |             return (AST.While resolvedExpr resolvedBody)
172 |         AST.Return target maybeExpr -> do
173 |             resolvedTarget <- lookupName target
174 |             resolvedExpr   <- mapM resolveNamesIn maybeExpr
175 |             return (AST.Return resolvedTarget resolvedExpr)
176 |         AST.Break target -> do
177 |             resolvedTarget <- lookupName target
178 |             return (AST.Break resolvedTarget)
179 |         AST.Expression expr -> do
180 |             resolvedExpr <- resolveNamesIn expr
181 |             return (AST.Expression resolvedExpr)
182 | 
183 | -- We used to be able to do this as just `mapM lookupName`, but that doesn't record metadata...
184 | -- Wonder if we could do anything to make it work "automatically" again...
185 | instance ResolveNamesIn AST.Expression where
186 |     resolveNamesIn = \case
187 |         AST.Named n -> do
188 |             resolvedName <- lookupName n
189 |             return (AST.Named resolvedName)
190 |         AST.UnaryOperator op expr -> do
191 |             resolvedExpr <- resolveNamesIn expr
192 |             return (AST.UnaryOperator op resolvedExpr)
193 |         AST.BinaryOperator expr1 op expr2 -> do
194 |             resolvedExpr1 <- resolveNamesIn expr1
195 |             resolvedExpr2 <- resolveNamesIn expr2
196 |             return (AST.BinaryOperator resolvedExpr1 op resolvedExpr2)
197 |         AST.Call fn args -> do
198 |             resolvedFn   <- resolveNamesIn fn
199 |             resolvedArgs <- mapM resolveNamesIn args
200 |             return (AST.Call resolvedFn resolvedArgs)
201 |         AST.NumberLiteral number -> do
202 |             return (AST.NumberLiteral number)
203 |         AST.TextLiteral text -> do
204 |             return (AST.TextLiteral text)
205 | 
206 | instance ResolveNamesIn node => ResolveNamesIn (NodeWith node) where
207 |     resolveNamesIn node = do
208 |         enterMetadataOf node do
209 |             mapNodeM resolveNamesIn node
210 | 
211 | 
212 | 
213 | ------------------------------------------------------------------------ resolution backend
214 | 
215 | data Error
216 |     = NameNotFound Text Path
217 |     | NameConflict Text Path
218 |     deriving (Generic, Show)
219 | 
220 | newtype NameResolve metadata a = NameResolve {
221 |     runNameResolve :: ExceptT Error (State (Context metadata)) a
222 | } deriving (Functor, Applicative, Monad, MonadState (Context metadata), MonadError Error)
223 | 
224 | resolveNames :: AST metadata Text -> Either (With metadata Error) (AST metadata ResolvedName)
225 | resolveNames = plumbMetadata . runState (Context [] [] []) . runExceptT . runNameResolve . mapM resolveNamesIn where
226 |     plumbMetadata = \case
227 |         (Right result, _      ) -> Right result
228 |         (Left  error,  context) -> Left (With (assert (head (metadata context))) error)
229 | 
230 | -- the stack of scopes we are currently inside
231 | -- fst: how many sub-scopes within that scope we have visited so far
232 | -- snd: the names bound within that scope
233 | type LocalContext = [(Int, Map Text AST.BindingType)]  -- TODO maybe use Natural and NonEmpty here
234 | 
235 | data Context metadata = Context {
236 |     functions :: [Text],
237 |     locals    :: LocalContext,
238 |     metadata  :: [metadata]
239 | } deriving (Generic, Show)
240 | 
241 | currentFunction :: Context metadata -> Text
242 | currentFunction = assert . head . functions
243 | 
244 | lookupLocal :: Text -> LocalContext -> Maybe ([Int], AST.BindingType)
245 | lookupLocal name = \case
246 |     [] -> Nothing
247 |     ((_, names) : parent) -> case Map.lookup name names of
248 |         Just bindingType  -> Just (map fst parent, bindingType)
249 |         Nothing           -> lookupLocal name parent
250 | 
251 | lookupInContext :: Text -> Context metadata -> Maybe ResolvedName
252 | lookupInContext givenName context@Context { functions, locals } = oneOf [tryLocal, tryFunction, tryBuiltin] where
253 |     tryLocal    = fmap makeLocalName (lookupLocal givenName locals) where
254 |         makeLocalName (scope, info) = NameWith { name = Name LocalName { path = Path { function = currentFunction context, scope }, givenName }, info }
255 |     tryFunction = justIf (elem givenName functions) NameWith { name = FunctionName givenName, info = AST.Let }
256 |     tryBuiltin  = fmap makeBuiltinName (lookup ("Builtin_" ++ givenName) builtinNames) where
257 |         builtinNames                = map (\builtinName -> (showText builtinName, builtinName)) (enumerate @BuiltinName)
258 |         makeBuiltinName builtinName = NameWith { name = BuiltinName builtinName, info = AST.Let }
259 | 
260 | instance NameResolveM NameResolve where
261 |     lookupName name = do
262 |         context <- getState
263 |         case lookupInContext name context of
264 |             Just found -> return found
265 |             Nothing    -> throwError (NameNotFound name (Path (currentFunction context) (map fst (locals context))))
266 | 
267 |     enterScope action = do
268 |         modifyM (field @"locals") (prepend (0, Map.empty))
269 |         result    <- action
270 |         newLocals <- modifyM (field @"locals") (assert . tail)
271 |         case newLocals of
272 |             (scopeID, names) : rest -> do
273 |                 assertM (scopeID >= 0)
274 |                 setM (field @"locals") ((scopeID + 1, names) : rest)
275 |                 return result
276 |             [] -> do
277 |                 return result
278 | 
279 |     bindName info name = do
280 |         context <- getState
281 |         case locals context of
282 |             [] -> do
283 |                 doModifyM (field @"functions") \functions -> do
284 |                     when (elem name functions) do
285 |                         throwError (NameConflict name (Path name [])) -- TODO should be a nil path instead...?
286 |                     return (prepend name functions)
287 |                 return NameWith { name = FunctionName name, info }
288 |             (scopeID, names) : rest -> do
289 |                 when (Map.member name names) do
290 |                     throwError (NameConflict name (Path (currentFunction context) (map fst (locals context))))
291 |                 setM (field @"locals") ((scopeID, Map.insert name info names) : rest)
292 |                 return NameWith { name = Name LocalName { path = Path { function = currentFunction context, scope = map fst rest }, givenName = name }, info }
293 | 
294 |     enterMetadata metadata action = do
295 |         modifyM (field @"metadata") (prepend metadata)
296 |         result <- action
297 |         modifyM (field @"metadata") (assert . tail)
298 |         return result
299 | 
300 | 
301 | 
302 | ------------------------------------------------------------------------ validation
303 | 
304 | data ValidationError info
305 |     = NotInScope   Name
306 |     | Redefined    Name
307 |     | InfoMismatch (NameWith info) (NameWith info)
308 |     deriving (Generic, Show)
309 | 
310 | -- This checks that:
311 | --  * Each name is in scope where it is used.
312 | --  * A name is not defined more than once by the same scope.
313 | --  * The info stored alongside the name is the same at each of its occurrences.
314 | -- This does NOT check that:
315 | --  * The `path` component of the name is correct. This is regarded as an implementation detail, subject to change.
316 | --  * The binding types are stored correctly. This is an unfortunate limitation of being polymorphic over the `info` type.
317 | validateNames :: Eq info => AST metadata (NameWith info) -> Either (ValidationError info) ()
318 | validateNames = runExcept . evalStateT [Map.empty, builtinNames] . mapM_ validate where
319 |     builtinNames = Map.fromList (zip (map BuiltinName (enumerate @BuiltinName)) (repeat Nothing))
320 | 
321 | type ValidateM info = StateT [Map Name (Maybe info)] (Except (ValidationError info))
322 | 
323 | class Validate node where
324 |     validate :: Eq info => node metadata (NameWith info) -> ValidateM info ()
325 | 
326 | instance Validate AST.Type where
327 |     validate = \case
328 |         AST.NamedType name -> do
329 |             validateName name
330 |         AST.FunctionType parameters returns -> do
331 |             mapM_ validate parameters
332 |             validate returns
333 | 
334 | instance Validate AST.Function where
335 |     validate function = do
336 |         mapM_ validate (map (AST.argumentType . nodeWithout) (AST.arguments function))
337 |         mapM_ validate (AST.returns function)
338 |         recordName (AST.functionName function)
339 |         modifyState (prepend Map.empty)
340 |         mapM_ recordName (map (AST.argumentName . nodeWithout) (AST.arguments function))
341 |         validate (AST.body function)
342 |         modifyState (assert . tail)
343 |         return ()
344 | 
345 | instance Validate AST.Block where
346 |     validate block = do
347 |         modifyState (prepend Map.empty)
348 |         mapM_ recordName (AST.exitTarget block)
349 |         mapM_ validate   (AST.statements block)
350 |         modifyState (assert . tail)
351 |         return ()
352 | 
353 | instance Validate AST.Statement where
354 |     validate = \case
355 |         AST.Binding _ name expr -> do
356 |             validate expr
357 |             recordName name
358 |         AST.Assign n expr -> do
359 |             validate expr
360 |             validateName n
361 |         AST.IfThen expr body -> do
362 |             validate expr
363 |             validate body
364 |         AST.IfThenElse expr body1 body2 -> do
365 |             validate expr
366 |             mapM_ validate [body1, body2]
367 |         AST.Forever body -> do
368 |             validate body
369 |         AST.While expr body -> do
370 |             validate expr
371 |             validate body
372 |         AST.Return target maybeExpr -> do
373 |             validateName target
374 |             mapM_ validate maybeExpr
375 |         AST.Break target -> do
376 |             validateName target
377 |         AST.Expression expr -> do
378 |             validate expr
379 | 
380 | instance Validate AST.Expression where
381 |     validate = \case
382 |         AST.Named n -> do
383 |             validateName n
384 |         AST.UnaryOperator _ expr -> do
385 |             validate expr
386 |         AST.BinaryOperator expr1 _ expr2 -> do
387 |             mapM_ validate [expr1, expr2]
388 |         AST.NumberLiteral _ -> do
389 |             return ()
390 |         AST.TextLiteral _ -> do
391 |             return ()
392 |         AST.Call fn args -> do
393 |             validate fn
394 |             mapM_ validate args
395 | 
396 | instance Validate node => Validate (NodeWith node) where
397 |     validate = validate . nodeWithout
398 | 
399 | validateName :: Eq info => NameWith info -> ValidateM info ()
400 | validateName (NameWith name info1) = do
401 |     context <- getState
402 |     case Map.lookup name (Map.unions context) of
403 |         Nothing -> do
404 |             throwError (NotInScope name)
405 |         Just Nothing -> do
406 |             return () -- builtin names have no stored info (TODO?)
407 |         Just (Just info2) -> do
408 |             when (info1 != info2) do
409 |                 throwError (InfoMismatch (NameWith name info1) (NameWith name info2))
410 | 
411 | recordName :: NameWith info -> ValidateM info ()
412 | recordName (NameWith name info) = do
413 |     doModifyState \context -> do
414 |         let scope = assert (head context)
415 |         when (Map.member name scope) do
416 |             throwError (Redefined name)
417 |         return (prepend (Map.insert name (Just info) scope) (assert (tail context)))
418 |     return ()
419 | 
420 | 


--------------------------------------------------------------------------------
/src/Pretty.hs:
--------------------------------------------------------------------------------
  1 | module Pretty (Document, Info (..), IdentInfo (..), DefinitionOrUse (..), Type (..), Style (..), Color (..), Render (..), output,
  2 |                note, keyword, dot, colon, semicolon, defineEquals, assignEquals, string, number, boolean, braces, parens, unaryOperator, binaryOperator,
  3 |                P.dquotes, P.hardline, P.hsep, P.nest, P.pretty, P.punctuate) where
  4 | 
  5 | import MyPrelude
  6 | 
  7 | import qualified Data.Text.Prettyprint.Doc                 as P
  8 | import qualified Data.Text.Prettyprint.Doc.Render.Terminal as PT
  9 | 
 10 | import qualified Data.ByteString
 11 | 
 12 | 
 13 | type Document = P.Doc Info
 14 | 
 15 | note :: Info -> Document -> Document
 16 | note = P.annotate
 17 | 
 18 | keyword :: Text -> Document
 19 | keyword = note Keyword . P.pretty
 20 | 
 21 | dot :: Document
 22 | dot = note Dot "."
 23 | 
 24 | colon :: Document
 25 | colon = note Colon ":"
 26 | 
 27 | semicolon :: Document
 28 | semicolon = note Semicolon ";"
 29 | 
 30 | defineEquals :: Document
 31 | defineEquals = note DefineEquals "="
 32 | 
 33 | assignEquals :: Document
 34 | assignEquals = note AssignEquals "="
 35 | 
 36 | string :: Text -> Document
 37 | string = note (Literal Text) . P.dquotes . P.pretty
 38 | 
 39 | number :: (P.Pretty a, Integral a) => a -> Document
 40 | number = note (Literal Int) . P.pretty . (+ 0) -- just to silence the "unused constraint" warning
 41 | 
 42 | boolean :: Bool -> Document
 43 | boolean = note (Literal Bool) . bool "false" "true"
 44 | 
 45 | braces :: Document -> Document
 46 | braces doc = note Brace "{" ++ doc ++ note Brace "}"
 47 | 
 48 | parens :: Document -> Document
 49 | parens doc = note Paren "(" ++ doc ++ note Paren ")"
 50 | 
 51 | -- FIXME: Deduplicate these with `module Token` maybe??
 52 | unaryOperator :: UnaryOperator -> Document
 53 | unaryOperator  = note UserOperator . \case
 54 |     Not                   -> "!"
 55 |     Negate                -> "-"
 56 | 
 57 | binaryOperator :: BinaryOperator -> Document
 58 | binaryOperator = note UserOperator . \case
 59 |     ArithmeticOperator op -> case op of
 60 |         Add               -> "+"
 61 |         Sub               -> "-"
 62 |         Mul               -> "*"
 63 |         Div               -> "/"
 64 |         Mod               -> "%"
 65 |     ComparisonOperator op -> case op of
 66 |         Equal             -> "=="
 67 |         NotEqual          -> "!="
 68 |         Less              -> "<"
 69 |         LessEqual         -> "<="
 70 |         Greater           -> ">"
 71 |         GreaterEqual      -> ">="
 72 |     LogicalOperator    op -> case op of
 73 |         And               -> "&&"
 74 |         Or                -> "||"
 75 | 
 76 | -- TODO bikeshed the names of all these things
 77 | 
 78 | data Info
 79 |     = Keyword
 80 |     | Brace
 81 |     | Paren
 82 |     | Bracket
 83 |     | DefineEquals
 84 |     | AssignEquals
 85 |     | Dot
 86 |     | Colon
 87 |     | Semicolon
 88 |     | UserOperator
 89 |     | Literal    Type
 90 |     | Sigil      IdentInfo
 91 |     | Identifier IdentInfo
 92 |     deriving (Generic, Eq, Show)
 93 | 
 94 | data Type
 95 |     = Int
 96 |     | Bool
 97 |     | Text
 98 |     | Unit
 99 |     | Function
100 |     | Block
101 |     | Type
102 |     | Unknown
103 |     deriving (Generic, Eq, Show)
104 | 
105 | data DefinitionOrUse
106 |     = Definition
107 |     | Use
108 |     deriving (Generic, Eq, Show)
109 | 
110 | data IdentInfo = IdentInfo {
111 |     identName :: Text,
112 |     defOrUse  :: DefinitionOrUse,
113 |     identType :: Type,
114 |     isBuiltin :: Bool
115 | } deriving (Generic, Eq, Show)
116 | 
117 | data Style = Style {
118 |     color        :: Maybe Color,
119 |     isDull       :: Bool,
120 |     isBold       :: Bool,
121 |     isItalic     :: Bool,
122 |     isUnderlined :: Bool
123 | } deriving (Generic, Eq, Show)
124 | 
125 | data Color
126 |     = Black
127 |     | White
128 |     | Red
129 |     | Green
130 |     | Blue
131 |     | Cyan
132 |     | Magenta
133 |     | Yellow
134 |     deriving (Generic, Eq, Show)
135 | 
136 | defaultStyle :: Info -> Style
137 | defaultStyle = \case
138 |     Keyword          -> plain { isBold = True }
139 |     Brace            -> plain { isBold = True }
140 |     Paren            -> plain
141 |     Bracket          -> plain
142 |     DefineEquals     -> plain { isBold = True }
143 |     AssignEquals     -> plain { color  = Just Yellow }
144 |     Dot              -> plain { isBold = True }
145 |     Colon            -> plain { isBold = True }
146 |     Semicolon        -> plain { isBold = True }
147 |     UserOperator     -> plain { color  = Just Yellow }
148 |     Literal    _     -> plain { color  = Just Red }
149 |     Sigil      info  -> plain { isUnderlined = defOrUse info == Definition }
150 |     Identifier info  -> plain { isUnderlined = defOrUse info == Definition, color = Just (identColorForType (identType info)) }
151 |         where identColorForType = \case
152 |                   Unknown  -> Cyan
153 |                   Function -> if isBuiltin info then Yellow else Green
154 |                   Block    -> Green
155 |                   Type     -> Cyan
156 |                   _        -> if isBuiltin info then Red else Magenta
157 | 
158 | plain :: Style
159 | plain = Style Nothing False False False False
160 | 
161 | class Render a where
162 |     render :: a -> Document
163 |     outputWithStyle :: (Info -> Style) -> Handle -> a -> IO ()
164 |     outputWithStyle style handle = PT.hPutDoc handle . fmap (ansiStyle . style) . render
165 |     listSeparator :: Document
166 |     listSeparator = P.hardline
167 | 
168 | output :: Render a => Handle -> a -> IO ()
169 | output = outputWithStyle defaultStyle
170 | 
171 | instance Render Text where
172 |     render            = P.pretty
173 |     outputWithStyle _ = hPutStr
174 | 
175 | instance Render ByteString where
176 |     render            = P.pretty . byteStringToText
177 |     outputWithStyle _ = Data.ByteString.hPutStr
178 | 
179 | instance forall metadata a. Render a => Render (With metadata a) where
180 |     render        = render . unWith
181 |     listSeparator = listSeparator @a
182 | 
183 | instance forall node metadata a. Render (node metadata a) => Render (NodeWith node metadata a) where
184 |     render        = render . nodeWithout
185 |     listSeparator = listSeparator @(node metadata a)
186 | 
187 | instance forall a. Render a => Render [a] where
188 |     render        = mconcat . P.punctuate (listSeparator @a) . map render
189 |     listSeparator = listSeparator @a
190 | 
191 | ansiStyle :: Style -> PT.AnsiStyle
192 | ansiStyle Style { color, isDull, isBold, isItalic, isUnderlined } = style where
193 |     style     = maybe mempty (fromColor . mapColor) color ++ fontStyle
194 |     fontStyle = mconcat (catMaybes [justIf isBold PT.bold, justIf isItalic PT.italicized, justIf isUnderlined PT.underlined])
195 |     fromColor = if isDull then PT.colorDull else PT.color
196 |     mapColor  = \case
197 |         Black   -> PT.Black
198 |         White   -> PT.White
199 |         Red     -> PT.Red
200 |         Green   -> PT.Green
201 |         Blue    -> PT.Blue
202 |         Cyan    -> PT.Cyan
203 |         Magenta -> PT.Magenta
204 |         Yellow  -> PT.Yellow
205 | 


--------------------------------------------------------------------------------
/src/Token.hs:
--------------------------------------------------------------------------------
  1 | module Token (Token (..), Keyword (..), Bracket (..), BracketKind (..), BracketDirection (..), Error (..), tokenize) where
  2 | 
  3 | import MyPrelude
  4 | 
  5 | import Data.Char (isAlpha, isAlphaNum)
  6 | 
  7 | import qualified Text.Regex.Applicative        as RE
  8 | import qualified Text.Regex.Applicative.Common as RE (signed, decimal)
  9 | import qualified Language.Lexer.Applicative    as Lex
 10 | import qualified Data.Loc                      as Loc
 11 | 
 12 | import qualified Pretty as P
 13 | 
 14 | class TextRepresentation a where
 15 |     toText :: a -> Text
 16 | 
 17 | instance TextRepresentation Text where
 18 |     toText = id
 19 | 
 20 | instance TextRepresentation Integer where
 21 |     toText = showText
 22 | 
 23 | instance TextRepresentation ArithmeticOperator where
 24 |     toText = \case
 25 |         Add -> "+"
 26 |         Sub -> "-"
 27 |         Mul -> "*"
 28 |         Div -> "/"
 29 |         Mod -> "%"
 30 | 
 31 | instance TextRepresentation ComparisonOperator where
 32 |     toText = \case
 33 |         Less         -> "<"
 34 |         LessEqual    -> "<="
 35 |         Greater      -> ">"
 36 |         GreaterEqual -> ">="
 37 |         Equal        -> "=="
 38 |         NotEqual     -> "!="
 39 | 
 40 | instance TextRepresentation LogicalOperator where
 41 |     toText = \case
 42 |         And -> "&&"
 43 |         Or  -> "||"
 44 | 
 45 | instance TextRepresentation BinaryOperator where
 46 |     toText = \case
 47 |         ArithmeticOperator op -> toText op
 48 |         ComparisonOperator op -> toText op
 49 |         LogicalOperator    op -> toText op
 50 | 
 51 | instance TextRepresentation UnaryOperator where
 52 |     toText = \case
 53 |         Not    -> "!"
 54 |         Negate -> "-"
 55 | 
 56 | data Keyword
 57 |     = K_break
 58 |     | K_else
 59 |     | K_forever
 60 |     | K_function
 61 |     | K_if
 62 |     | K_let
 63 |     | K_return
 64 |     | K_returns
 65 |     | K_var
 66 |     | K_while
 67 |     deriving (Generic, Eq, Show, Enum, Bounded)
 68 | 
 69 | instance Enumerable Keyword
 70 | 
 71 | instance TextRepresentation Keyword where
 72 |     toText = stringToText . drop 2 . show
 73 | 
 74 | data BracketKind
 75 |     = Round
 76 |     | Curly
 77 |     | Square
 78 |     deriving (Generic, Eq, Show, Enum, Bounded)
 79 | 
 80 | instance Enumerable BracketKind
 81 | 
 82 | data BracketDirection
 83 |     = Open
 84 |     | Close
 85 |     deriving (Generic, Eq, Show, Enum, Bounded)
 86 | 
 87 | instance Enumerable BracketDirection
 88 | 
 89 | data Bracket = Bracket {
 90 |     bracketKind      :: BracketKind,
 91 |     bracketDirection :: BracketDirection
 92 | } deriving (Generic, Eq, Show)
 93 | 
 94 | instance Enumerable Bracket where
 95 |     enumerate = [Bracket kind dir | kind <- enumerate, dir <- enumerate]
 96 | 
 97 | instance TextRepresentation Bracket where
 98 |     toText = \case
 99 |         Bracket Round  Open  -> "("
100 |         Bracket Round  Close -> ")"
101 |         Bracket Curly  Open  -> "{"
102 |         Bracket Curly  Close -> "}"
103 |         Bracket Square Open  -> "["
104 |         Bracket Square Close -> "]"
105 | 
106 | data Token
107 |     = Keyword        Keyword
108 |     | Name           Text
109 |     | BinaryOperator BinaryOperator
110 |     | UnaryOperator  UnaryOperator
111 |     | Bracket'       Bracket
112 |     | Number         Integer
113 |     | Text           Text
114 |     | EqualsSign
115 |     | Comma
116 |     | Colon
117 |     | Semicolon
118 |     deriving (Generic, Eq, Show)
119 | 
120 | instance TextRepresentation Token where
121 |     toText = \case
122 |         Keyword        keyword -> toText keyword
123 |         BinaryOperator binop   -> toText binop
124 |         UnaryOperator  unop    -> toText unop
125 |         Bracket'       bracket -> toText bracket
126 |         Name           name'   -> toText name'
127 |         Number         number' -> toText number'
128 |         Text           text'   -> toText text'
129 |         EqualsSign             -> "="
130 |         Comma                  -> ","
131 |         Colon                  -> ":"
132 |         Semicolon              -> ";"
133 | 
134 | instance P.Render Token where
135 |     listSeparator = " "
136 |     render = \case
137 |         Keyword        keyword -> P.keyword (toText keyword)
138 |         BinaryOperator binop   -> P.binaryOperator binop
139 |         UnaryOperator  unop    -> P.unaryOperator unop
140 |         Name           name'   -> P.note (P.Identifier (P.IdentInfo name' P.Use P.Unknown False)) (P.pretty name')
141 |         Number         number' -> P.number number'
142 |         Text           text'   -> P.string text'
143 |         EqualsSign             -> P.defineEquals
144 |         Comma                  -> ","
145 |         Colon                  -> P.colon
146 |         Semicolon              -> P.semicolon
147 |         Bracket'       bracket -> P.note (bracketInfo (bracketKind bracket)) (P.pretty (toText bracket)) where
148 |             bracketInfo = \case
149 |                 Round  -> P.Paren
150 |                 Curly  -> P.Brace
151 |                 Square -> P.Bracket
152 | 
153 | type RE = RE.RE Char
154 | 
155 | matchRepresentable :: TextRepresentation a => a -> RE a
156 | matchRepresentable a = do
157 |     (unused . RE.string . textToString . toText) a
158 |     return a
159 | 
160 | matchEnumerable :: (Enumerable a, TextRepresentation a) => (a -> Token) -> RE Token
161 | matchEnumerable toToken = liftA1 toToken (oneOf (map matchRepresentable enumerate))
162 | 
163 | literal :: Token -> RE Token
164 | literal = matchRepresentable
165 | 
166 | orUnderscore :: (Char -> Bool) -> (Char -> Bool)
167 | orUnderscore f = \c -> f c || (c == '_')
168 | 
169 | name :: RE Text
170 | name = do
171 |     first <- RE.psym (orUnderscore isAlpha)
172 |     rest  <- zeroOrMore (RE.psym (orUnderscore isAlphaNum))
173 |     return (stringToText (first : rest))
174 | 
175 | nameOrKeyword :: Text -> Token
176 | nameOrKeyword text' = case lookup text' (map (\keyword -> (toText keyword, keyword)) (enumerate @Keyword)) of
177 |     Just keyword -> Keyword keyword
178 |     Nothing      -> Name text'
179 | 
180 | number :: RE Integer
181 | number = RE.signed RE.decimal
182 | 
183 | text :: RE Text
184 | text = do
185 |     unused (RE.sym '"')
186 |     content <- zeroOrMore (RE.psym (\c -> notElem c ['"', '\n']))
187 |     unused (RE.sym '"')
188 |     return (stringToText content)
189 | 
190 | token :: RE Token
191 | token = oneOf
192 |     [liftA1 nameOrKeyword name,
193 |      liftA1 Number        number,
194 |      liftA1 Text          text,
195 |      matchEnumerable UnaryOperator,
196 |      matchEnumerable BinaryOperator,
197 |      matchEnumerable Bracket',
198 |      literal EqualsSign,
199 |      literal Comma,
200 |      literal Colon,
201 |      literal Semicolon]
202 | 
203 | whitespace :: RE Char
204 | whitespace = oneOf (map RE.sym [' ', '\n'])
205 | 
206 | tokens :: Lex.Lexer Token
207 | tokens = Lex.token (Lex.longest token) ++ Lex.whitespace (Lex.longest whitespace)
208 | 
209 | data Error = InvalidTokenAt Loc.Pos deriving (Generic, Show)
210 | 
211 | tokenize :: Text -> Either Error [With Loc Token]
212 | tokenize = fmap (map locConvert) . mapLeft errorConvert . Lex.streamToEitherList . Lex.runLexer tokens "TODO filename" . textToString where
213 |     errorConvert (Lex.LexicalError pos) = InvalidTokenAt pos
214 |     locConvert (Loc.L loc token) = With loc token
215 | 


--------------------------------------------------------------------------------
/src/Type.hs:
--------------------------------------------------------------------------------
  1 | module Type (TypeInfo (..), Type (..), TypedName, Error (..), checkTypes, typeOf, ValidationError (..), validateTypes) where
  2 | 
  3 | import MyPrelude
  4 | 
  5 | import qualified Data.Map  as Map
  6 | import qualified Data.Text as Text
  7 | 
  8 | import qualified Pretty as P
  9 | import qualified AST
 10 | import AST (AST)
 11 | import qualified Name
 12 | import Name (Name, NameWith (NameWith), ResolvedName)
 13 | 
 14 | 
 15 | 
 16 | ------------------------------------------------------------------------ types
 17 | 
 18 | data TypeInfo
 19 |     = IsType  Type -- stored for names of types in type annotations
 20 |     | HasType Type -- stored for value-level names
 21 |     deriving (Generic, Eq, Show)
 22 | 
 23 | data Type
 24 |     = Int
 25 |     | Bool
 26 |     | Text
 27 |     | Unit
 28 |     | Function [Type] Type
 29 |     deriving (Generic, Eq, Show)
 30 | 
 31 | type TypedName = NameWith TypeInfo
 32 | 
 33 | typeOf :: AST.Expression metadata TypedName -> Type
 34 | typeOf = \case
 35 |     AST.Named name ->
 36 |         case Name.info name of
 37 |             HasType ty -> ty
 38 |             IsType  _  -> bug "Expression which IsType in typed AST"
 39 |     AST.NumberLiteral _ ->
 40 |         Int
 41 |     AST.TextLiteral _ ->
 42 |         Text
 43 |     AST.UnaryOperator op _ ->
 44 |         case op of
 45 |             Not    -> Bool
 46 |             Negate -> Int
 47 |     AST.BinaryOperator _ op _ ->
 48 |         case op of
 49 |             ArithmeticOperator _ -> Int
 50 |             ComparisonOperator _ -> Bool
 51 |             LogicalOperator    _ -> Bool
 52 |     AST.Call fn _ ->
 53 |         case typeOf (nodeWithout fn) of
 54 |             Function _ returnType -> returnType
 55 |             _                     -> bug "Call of non-function in typed AST"
 56 | 
 57 | 
 58 | 
 59 | ------------------------------------------------------------------------ pretty-printing
 60 | 
 61 | instance P.Render TypeInfo where
 62 |     listSeparator = ", "
 63 |     render ty = P.note (P.Identifier (P.IdentInfo (typeText ty) P.Use P.Type True)) (P.pretty (typeText ty)) where
 64 |         typeText = \case
 65 |             HasType (Function argumentTypes returnType) ->
 66 |                 "function(" ++ Text.intercalate ", " (map (typeText . HasType) argumentTypes) ++ ")" ++ case returnType of
 67 |                     Unit      -> ""
 68 |                     otherType -> " returns " ++ typeText (HasType otherType)
 69 |             HasType otherType -> showText otherType
 70 |             IsType  _         -> "Type"
 71 | 
 72 | instance P.Render Type where
 73 |     listSeparator = ", "
 74 |     render = P.render . HasType
 75 | 
 76 | instance AST.RenderName TypedName where
 77 |     renderName defOrUse (NameWith name ty) = maybeAppendTypeAnnotation (fmap setTypeInNote (AST.renderName defOrUse name))
 78 |         where maybeAppendTypeAnnotation binding = binding ++ (if defOrUse == P.Definition then (P.colon ++ " " ++ P.render ty) else "")
 79 |               setTypeInNote = \case
 80 |                    P.Identifier info -> P.Identifier (info { P.identType })
 81 |                    _                 -> bug "Pretty-printing annotation on ResolvedName was not Identifier"
 82 |               identType = case ty of
 83 |                   IsType _ -> P.Type
 84 |                   HasType hasType -> case hasType of
 85 |                       Int          -> P.Int
 86 |                       Bool         -> P.Bool
 87 |                       Text         -> P.Text
 88 |                       Unit         -> P.Unit
 89 |                       Function _ _ -> P.Function
 90 | 
 91 | 
 92 | 
 93 | ------------------------------------------------------------------------ typechecker frontend
 94 | 
 95 | -- TODO more info in here?
 96 | data Error
 97 |     = TypeMismatch Expected TypeInfo
 98 |     | WrongNumberOfArguments -- (can we move this into Expected?)
 99 |     | FunctionWithoutReturn
100 |     | AssignToLet
101 |     | LiteralOutOfRange
102 |     deriving (Generic, Show)
103 | 
104 | data Expected
105 |     = Expected Type
106 |     | ExpectedFunction
107 |     | ExpectedExpression
108 |     | ExpectedType
109 |     deriving (Generic, Show)
110 | 
111 | class (forall metadata. Monad (m metadata)) => TypeCheckM m where
112 |     recordType    :: Type -> ResolvedName     -> m metadata () -- records `HasType`; for now, the only things which are `IsType` are builtins
113 |     lookupType    :: ResolvedName             -> m metadata TypeInfo
114 |     reportError   :: Error                    -> m metadata a
115 |     enterMetadata :: metadata -> m metadata a -> m metadata a
116 | 
117 | -- TODO deduplicate?
118 | enterMetadataOf :: TypeCheckM m => NodeWith node metadata name -> m metadata a -> m metadata a
119 | enterMetadataOf = enterMetadata . nodeMetadata
120 | 
121 | class CheckTypeOf node where
122 |     inferType :: TypeCheckM m => node metadata ResolvedName -> m metadata TypeInfo
123 |     checkUnit :: TypeCheckM m => node metadata ResolvedName -> m metadata ()
124 |     inferType node = do
125 |         checkUnit node
126 |         return (HasType Unit)
127 |     checkUnit = checkType Unit
128 | 
129 | checkType :: (TypeCheckM m, CheckTypeOf node) => Type -> node metadata ResolvedName -> m metadata ()
130 | checkType expected node = do
131 |     actual <- inferType node
132 |     when (actual != HasType expected) do
133 |         reportError (TypeMismatch (Expected expected) actual)
134 | 
135 | instance CheckTypeOf AST.Expression where
136 |     inferType = \case
137 |         AST.Named name -> do
138 |             nameType <- lookupType name
139 |             case nameType of
140 |                 HasType _ -> return nameType
141 |                 IsType  _ -> reportError (TypeMismatch ExpectedExpression nameType)
142 |         AST.NumberLiteral int -> do
143 |             when (int > fromIntegral (maxBound :: Int64) || int < fromIntegral (minBound :: Int64)) do
144 |                 reportError LiteralOutOfRange
145 |             return (HasType Int)
146 |         AST.TextLiteral _ -> do
147 |             return (HasType Text)
148 |         AST.UnaryOperator op expr -> do
149 |             let type' = case op of
150 |                     Not    -> Bool
151 |                     Negate -> Int
152 |             checkType type' expr
153 |             return (HasType type')
154 |         AST.BinaryOperator expr1 op expr2 -> do
155 |             let (inType, outType) = case op of
156 |                     ArithmeticOperator _ -> (Int,  Int)
157 |                     ComparisonOperator _ -> (Int,  Bool)
158 |                     LogicalOperator    _ -> (Bool, Bool)
159 |             checkType inType expr1
160 |             checkType inType expr2
161 |             return (HasType outType)
162 |         AST.Call function arguments -> do
163 |             functionType <- inferType function
164 |             case functionType of
165 |                 HasType (Function argumentTypes returnType) -> do
166 |                     when (length argumentTypes != length arguments) do
167 |                         reportError WrongNumberOfArguments
168 |                     zipWithM checkType argumentTypes arguments
169 |                     return (HasType returnType)
170 |                 _ -> do
171 |                     reportError (TypeMismatch ExpectedFunction functionType)
172 | 
173 | instance CheckTypeOf AST.Statement where
174 |     checkUnit = \case
175 |         AST.Binding _ name expr -> do
176 |             inferred <- inferType expr
177 |             case inferred of
178 |                 HasType hasType -> recordType hasType name
179 |                 IsType  _       -> reportError (TypeMismatch ExpectedExpression inferred)
180 |         AST.Assign name expr -> do
181 |             when (Name.info name != AST.Var) do
182 |                 reportError AssignToLet
183 |             nameType <- lookupType name
184 |             case nameType of
185 |                 HasType hasType -> checkType hasType expr
186 |                 IsType  _       -> reportError (TypeMismatch ExpectedExpression nameType)
187 |         AST.IfThen expr block -> do
188 |             checkType Bool expr
189 |             checkUnit block
190 |         AST.IfThenElse expr block1 block2 -> do
191 |             checkType Bool expr
192 |             checkUnit block1
193 |             checkUnit block2
194 |         AST.Forever block -> do
195 |             checkBlock Unit block
196 |         AST.While expr block -> do
197 |             checkType Bool expr
198 |             checkBlock Unit block
199 |         AST.Return target maybeExpr -> do
200 |             returnType <- lookupType target
201 |             mapM_ (checkType (assert (match @"HasType" returnType))) maybeExpr
202 |             when (maybeExpr == Nothing) do
203 |                 checkType Unit (AST.Named target) -- HACK?
204 |         AST.Break target -> do
205 |             breakType <- lookupType target
206 |             assertEqM breakType (HasType Unit)
207 |         AST.Expression expr -> do
208 |             unused (inferType expr)
209 | 
210 | instance CheckTypeOf AST.Block where
211 |     checkUnit AST.Block { AST.statements } = do
212 |         mapM_ checkUnit statements
213 | 
214 | checkBlock :: TypeCheckM m => Type -> NodeWith AST.Block metadata ResolvedName -> m metadata ()
215 | checkBlock exitTargetType block = do
216 |     recordType exitTargetType (assert (AST.exitTarget (nodeWithout block)))
217 |     checkUnit block
218 | 
219 | resolveAsType :: TypeCheckM m => NodeWith AST.Type metadata ResolvedName -> m metadata Type
220 | resolveAsType typeNode = enterMetadataOf typeNode do
221 |     resolved <- inferType (nodeWithout typeNode)
222 |     return (msgAssert "type annotation resolved to a non-type" (match @"IsType" resolved))
223 | 
224 | instance CheckTypeOf AST.Function where
225 |     checkUnit AST.Function { AST.functionName, AST.arguments, AST.returns, AST.body } = do
226 |         argumentTypes <- forM arguments \argument -> do
227 |             enterMetadataOf argument do
228 |                 let AST.Argument { AST.argumentName, AST.argumentType } = nodeWithout argument
229 |                 resolvedType <- resolveAsType argumentType
230 |                 recordType resolvedType argumentName
231 |                 return resolvedType
232 |         maybeReturnType <- mapM resolveAsType returns
233 |         let returnType = fromMaybe Unit maybeReturnType
234 |         recordType (Function argumentTypes returnType) functionName
235 |         checkBlock returnType body
236 |         when (returnType != Unit && not (definitelyReturns (controlFlow body))) do
237 |             reportError FunctionWithoutReturn
238 | 
239 | instance CheckTypeOf AST.Type where
240 |     inferType = \case
241 |         AST.NamedType name -> do
242 |             nameType <- lookupType name
243 |             case nameType of
244 |                 IsType  _ -> return nameType
245 |                 HasType _ -> reportError (TypeMismatch ExpectedType nameType)
246 |         AST.FunctionType parameters returns -> do
247 |             resolvedParameters <- mapM resolveAsType parameters
248 |             resolvedReturns    <- resolveAsType returns
249 |             return (IsType (Function resolvedParameters resolvedReturns))
250 | 
251 | instance CheckTypeOf node => CheckTypeOf (NodeWith node) where
252 |     inferType node = do
253 |         enterMetadataOf node do
254 |             inferType (nodeWithout node)
255 | 
256 | data ControlFlow = ControlFlow {
257 |     definitelyReturns :: Bool, -- guaranteed divergence also counts as "returning"
258 |     potentiallyBreaks :: Bool
259 | }
260 | 
261 | instance Semigroup ControlFlow where
262 |     prev <> next = ControlFlow returns breaks where
263 |         returns = definitelyReturns prev || (not (potentiallyBreaks prev) && definitelyReturns next)
264 |         breaks  = potentiallyBreaks prev || (not (definitelyReturns prev) && potentiallyBreaks next)
265 | 
266 | instance Monoid ControlFlow where
267 |     mempty = ControlFlow False False
268 | 
269 | class CheckControlFlow node where
270 |     controlFlow :: Eq name => node metadata name -> ControlFlow
271 | 
272 | instance CheckControlFlow AST.Block where
273 |     controlFlow = mconcat . map controlFlow . AST.statements
274 | 
275 | instance CheckControlFlow AST.Statement where
276 |     controlFlow = \case
277 |         AST.Return {} ->
278 |             ControlFlow True  False
279 |         AST.Break {} ->
280 |             ControlFlow False True
281 |         AST.Binding {} ->
282 |             ControlFlow False False
283 |         AST.Assign {} ->
284 |             ControlFlow False False
285 |         AST.Expression {} ->
286 |             ControlFlow False False
287 |         AST.While {} ->
288 |             ControlFlow False False -- loops can't currently break out of the /outer/ context
289 |         AST.IfThen _ block ->
290 |             ControlFlow False (potentiallyBreaks (controlFlow block))
291 |         AST.IfThenElse _ block1 block2 ->
292 |             ControlFlow (returns1 && returns2) (breaks1 || breaks2) where
293 |                 ControlFlow returns1 breaks1 = controlFlow block1
294 |                 ControlFlow returns2 breaks2 = controlFlow block2
295 |         AST.Forever blockWith ->
296 |             ControlFlow (noBreaks || doesReturn) False where
297 |                 -- we can check whether there is a `break` by whether the `exitTarget` is ever referred to
298 |                 -- (we make use of the Foldable instances for the AST)
299 |                 -- we have to make sure to leave out the `exitTarget` itself!
300 |                 noBreaks   = not (any (== (assert (AST.exitTarget block))) (block { AST.exitTarget = Nothing }))
301 |                 doesReturn = definitelyReturns (controlFlow block)
302 |                 block      = nodeWithout blockWith
303 | 
304 | instance CheckControlFlow node => CheckControlFlow (NodeWith node) where
305 |     controlFlow = controlFlow . nodeWithout
306 | 
307 | 
308 | ------------------------------------------------------------------------ typechecker backend
309 | 
310 | newtype TypeCheck metadata a = TypeCheck {
311 |     runTypeCheck :: (ExceptT Error) (State (TypeCheckState metadata)) a
312 | } deriving (Functor, Applicative, Monad, MonadState (TypeCheckState metadata), MonadError Error)
313 | 
314 | data TypeCheckState metadata = TypeCheckState {
315 |     types    :: Map Name TypeInfo,
316 |     metadata :: [metadata]
317 | } deriving (Generic, Show)
318 | 
319 | checkTypes :: AST metadata ResolvedName -> Either (With metadata Error) (AST metadata TypedName)
320 | checkTypes ast = pipeline ast where
321 |     pipeline        = constructResult . runState initialState . runExceptT . runTypeCheck . mapM_ checkUnit
322 |     initialState    = TypeCheckState (Map.fromList builtinNames) []
323 |     builtinNames    = map (\builtinName -> (Name.BuiltinName builtinName, inferBuiltin builtinName)) (enumerate @Name.BuiltinName)
324 |     constructResult = \case
325 |         (Right (),    TypeCheckState { types    }) -> Right (map (fmap (makeNameTyped types)) ast)
326 |         (Left  error, TypeCheckState { metadata }) -> Left  (With (assert (head metadata)) error)
327 |     makeNameTyped types (NameWith name _) = NameWith name (assert (Map.lookup name types))
328 | 
329 | instance TypeCheckM TypeCheck where
330 |     recordType typeOfName name = do
331 |         doModifyM (field @"types") \typeMap -> do
332 |             assertM (not (Map.member (Name.name name) typeMap))
333 |             return (Map.insert (Name.name name) (HasType typeOfName) typeMap)
334 |         return ()
335 |     lookupType name = do
336 |         typeMap <- getM (field @"types")
337 |         return (msgAssert "Name not found in types map!" (Map.lookup (Name.name name) typeMap))
338 |     reportError err = do
339 |         throwError err
340 |     enterMetadata metadata action = do -- TODO maybe deduplicate this from here and `Name`?
341 |         modifyM (field @"metadata") (prepend metadata)
342 |         result <- action
343 |         modifyM (field @"metadata") (assert . tail)
344 |         return result
345 | 
346 | inferBuiltin :: Name.BuiltinName -> TypeInfo
347 | inferBuiltin = \case
348 |     Name.Builtin_Int   -> IsType  Int
349 |     Name.Builtin_Bool  -> IsType  Bool
350 |     Name.Builtin_Text  -> IsType  Text
351 |     Name.Builtin_Unit  -> IsType  Unit
352 |     Name.Builtin_true  -> HasType Bool
353 |     Name.Builtin_false -> HasType Bool
354 |     Name.Builtin_ask   -> HasType (Function [Text] Int)
355 |     Name.Builtin_say   -> HasType (Function [Text] Unit)
356 |     Name.Builtin_write -> HasType (Function [Int]  Unit)
357 | 
358 | 
359 | 
360 | ------------------------------------------------------------------------ validation
361 | 
362 | -- TODO check presence/absence of exit targets (or in Name.validate??)
363 | data ValidationError metadata
364 |     = BadType          Expected (AST.Expression metadata TypedName)
365 |     | BadAnnotation    Type     (AST.Type       metadata TypedName)
366 |     | BadArgumentCount Int      (AST.Expression metadata TypedName)
367 |     deriving (Generic, Show)
368 | 
369 | type ValidateM metadata a = Except (ValidationError metadata) a
370 | 
371 | class Validate node where
372 |     validate :: node metadata TypedName -> ValidateM metadata ()
373 | 
374 | -- This checks that:
375 | --  * The AST is locally well-typed at each point, based on the types stored within `Name`s.
376 | --  * The explicitly-written type annotations in the AST are accurate.
377 | -- This does NOT check that:
378 | --  * Names are actually in scope, and the info stored in `Name`s is consistent. Use `Name.validate` for that!
379 | --  * Literals are within range, and assignments match their binding types.
380 | validateTypes :: AST metadata TypedName -> Either (ValidationError metadata) ()
381 | validateTypes = runExcept . mapM_ validate
382 | 
383 | validateAnnotation :: Type -> AST.Type metadata TypedName -> ValidateM metadata ()
384 | validateAnnotation = curry \case
385 |     (expected@(Function expectedParams expectedReturns), annotated@(AST.FunctionType annotatedParams annotatedReturns)) -> do
386 |         when (length expectedParams != length annotatedParams) do
387 |             throwError (BadAnnotation expected annotated)
388 |         zipWithM_ validateAnnotation expectedParams (map nodeWithout annotatedParams)
389 |         validateAnnotation expectedReturns (nodeWithout annotatedReturns)
390 |     (expectedType, AST.NamedType namedType) | Name.info namedType == IsType expectedType -> do
391 |         return ()
392 |     (expectedType, annotation) -> do
393 |         throwError (BadAnnotation expectedType annotation)
394 | 
395 | instance Validate AST.Function where
396 |     validate AST.Function { AST.functionName, AST.arguments, AST.returns, AST.body } = do
397 |         argumentTypes <- forM arguments \argument -> do
398 |             let AST.Argument { AST.argumentName, AST.argumentType } = nodeWithout argument
399 |             case Name.info argumentName of
400 |                 HasType ty -> do
401 |                     validateAnnotation ty (nodeWithout argumentType)
402 |                     return ty
403 |                 IsType _ -> do
404 |                     throwError (BadType ExpectedExpression (AST.Named argumentName))
405 |         case Name.info functionName of
406 |             HasType infoType@(Function infoParams infoReturns) -> do
407 |                 mapM_ (validateAnnotation infoReturns . nodeWithout) returns
408 |                 when (infoParams != argumentTypes) do
409 |                     throwError (BadType (Expected infoType) (AST.Named functionName)) -- ehhhhhh
410 |                 mapM_ (validateName infoReturns) ((AST.exitTarget . nodeWithout) body) -- TODO check that it's Just!
411 |             HasType _ -> do
412 |                 throwError (BadType ExpectedFunction (AST.Named functionName))
413 |             IsType _ -> do
414 |                 throwError (BadType ExpectedExpression (AST.Named functionName))
415 |         validate body
416 | 
417 | instance Validate AST.Block where
418 |     validate = mapM_ validate . AST.statements
419 | 
420 | instance Validate AST.Statement where
421 |     validate = \case
422 |         AST.Binding _ name@(NameWith _ info) expr -> do
423 |             case info of
424 |                 HasType ty -> validateExpr ty expr
425 |                 IsType  _  -> throwError (BadType ExpectedExpression (AST.Named name))
426 |         AST.Assign name@(NameWith _ info) expr -> do
427 |             case info of
428 |                 HasType ty -> validateExpr ty expr
429 |                 IsType  _  -> throwError (BadType ExpectedExpression (AST.Named name))
430 |         AST.IfThen expr body -> do
431 |             validateExpr Bool expr
432 |             validate body
433 |         AST.IfThenElse expr body1 body2 -> do
434 |             validateExpr Bool expr
435 |             mapM_ validate [body1, body2]
436 |         AST.Forever body -> do
437 |             validate body
438 |         AST.While expr body -> do
439 |             validateExpr Bool expr
440 |             validate body
441 |         AST.Return target maybeExpr -> do
442 |             mapM_ (validateExpr (typeOf (AST.Named target))) maybeExpr
443 |             when (maybeExpr == Nothing) do
444 |                 validateName Unit target
445 |         AST.Break target -> do
446 |             validateName Unit target
447 |         AST.Expression expr -> do
448 |             validate expr
449 | 
450 | instance Validate AST.Expression where
451 |     validate = \case
452 |         AST.UnaryOperator op expr -> do
453 |             validateExpr opExpectsType expr where
454 |                 opExpectsType = case op of
455 |                     Not    -> Bool
456 |                     Negate -> Int
457 |         AST.BinaryOperator expr1 op expr2 -> do
458 |             mapM_ (validateExpr opExpectsType) [expr1, expr2] where
459 |                 opExpectsType = case op of
460 |                     ArithmeticOperator _ -> Int
461 |                     ComparisonOperator _ -> Int
462 |                     LogicalOperator    _ -> Bool
463 |         AST.Named _ -> do
464 |             return ()
465 |         AST.NumberLiteral _ -> do
466 |             return ()
467 |         AST.TextLiteral _ -> do
468 |             return ()
469 |         AST.Call function args -> do
470 |             case typeOf (nodeWithout function) of
471 |                 Function argTypes _ -> do
472 |                     when (length args != length argTypes) do
473 |                         throwError (BadArgumentCount (length argTypes) (AST.Call function args))
474 |                     zipWithM_ validateExpr argTypes args
475 |                 _ -> do
476 |                     throwError (BadType ExpectedFunction (nodeWithout function))
477 | 
478 | instance Validate node => Validate (NodeWith node) where
479 |     validate = validate . nodeWithout
480 | 
481 | validateName :: Type -> TypedName -> ValidateM metadata ()
482 | validateName ty = validateExprImpl ty . AST.Named
483 | 
484 | validateExpr :: Type -> NodeWith AST.Expression metadata TypedName -> ValidateM metadata ()
485 | validateExpr ty = validateExprImpl ty . nodeWithout
486 | 
487 | validateExprImpl :: Type -> AST.Expression metadata TypedName -> ValidateM metadata ()
488 | validateExprImpl expectedType expr = do
489 |     when (typeOf expr != expectedType) do -- FIXME maybe `validate` shouldn't panic if it sees a `Type` in the wrong place, as `typeOf` does!!
490 |         throwError (BadType (Expected expectedType) expr)
491 |     validate expr
492 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
 1 | flags:             {}
 2 | extra-package-dbs: []
 3 | packages:    ['.']
 4 | extra-deps:  [lexer-applicative-2.1.0.2]
 5 | #resolver:   nightly-2017-07-25
 6 | #resolver:   lts-12.10
 7 | resolver:    nightly-2018-11-24
 8 | allow-newer: true # for intero
 9 | 
10 | nix:
11 |   enable:       true
12 |   pure:         true
13 |   add-gc-roots: true
14 | #  packages:    [llvm_7]
15 |   shell-file:   shell.nix # so we can get it from the unstable channel
16 | 


--------------------------------------------------------------------------------