├── .gitignore
├── .travis.yml
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── Setup.hs
├── docs
    ├── LangsOverview.rst
    ├── Lasca Design.rst
    ├── Log of Lasca Development.md
    └── Syntax.rst
├── examples
    ├── Data.lasca
    ├── Either.lasca
    ├── Json.lasca
    ├── binarytrees.lasca
    ├── builtin.lasca
    ├── dynamic.lasca
    ├── factorial.lasca
    ├── hello.lasca
    ├── lambda.lasca
    ├── nbody.lasca
    ├── nbody2.lasca
    ├── nbody3.lasca
    ├── queen.lasca
    ├── regex.lasca
    └── ski.lasca
├── gencode
    └── GenBench.hs
├── lasca.cabal
├── lasca.nix
├── lascart.nix
├── libs
    └── base
    │   ├── Array.lasca
    │   ├── ArrayBuffer.lasca
    │   ├── Bits.lasca
    │   ├── ByteArray.lasca
    │   ├── List.lasca
    │   ├── Map.lasca
    │   ├── Option.lasca
    │   ├── Prelude.lasca
    │   └── String.lasca
├── make-release.sh
├── release.nix
├── rts
    ├── CMakeLists.txt
    ├── builtin.c
    ├── lasca.h
    ├── runtime.c
    ├── utf8proc
    │   ├── CMakeLists.txt
    │   ├── LICENSE.md
    │   ├── MANIFEST
    │   ├── Makefile
    │   ├── NEWS.md
    │   ├── README.md
    │   ├── bench
    │   │   ├── Makefile
    │   │   ├── bench.c
    │   │   ├── icu.c
    │   │   ├── unistring.c
    │   │   ├── util.c
    │   │   └── util.h
    │   ├── data
    │   │   ├── Makefile
    │   │   ├── charwidths.jl
    │   │   └── data_generator.rb
    │   ├── lump.md
    │   ├── test
    │   │   ├── case.c
    │   │   ├── charwidth.c
    │   │   ├── custom.c
    │   │   ├── graphemetest.c
    │   │   ├── iterate.c
    │   │   ├── normtest.c
    │   │   ├── printproperty.c
    │   │   ├── tests.c
    │   │   ├── tests.h
    │   │   └── valid.c
    │   ├── utf8proc.c
    │   ├── utf8proc.h
    │   ├── utf8proc_data.c
    │   └── utils.cmake
    ├── xxhash.c
    └── xxhash.h
├── shell.nix
├── src
    ├── lasca
    │   └── Main.hs
    ├── lib
    │   └── Lasca
    │   │   ├── Codegen.hs
    │   │   ├── Compiler.hs
    │   │   ├── Desugar.hs
    │   │   ├── Emit.hs
    │   │   ├── EmitCommon.hs
    │   │   ├── EmitDynamic.hs
    │   │   ├── EmitStatic.hs
    │   │   ├── Infer.hs
    │   │   ├── JIT.hs
    │   │   ├── Lexer.hs
    │   │   ├── Modules.hs
    │   │   ├── Namer.hs
    │   │   ├── Options.hs
    │   │   ├── Parser.hs
    │   │   ├── Syntax.hs
    │   │   └── Type.hs
    └── test
    │   ├── TestMain.hs
    │   └── golden
    │       ├── ArrayBuffer.golden
    │       ├── Either.golden
    │       ├── List.golden
    │       ├── Map.golden
    │       ├── Option.golden
    │       ├── String.golden
    │       ├── array.golden
    │       ├── binarytrees.golden
    │       ├── builtin.golden
    │       ├── data.golden
    │       ├── dynamic.golden
    │       ├── factorial.golden
    │       ├── hello.golden
    │       ├── lambda.golden
    │       ├── nbody.golden
    │       ├── nbody2.golden
    │       ├── nbody3.golden
    │       ├── queen.golden
    │       ├── regex.golden
    │       └── ski.golden
├── stack-shell.nix
└── stack.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/*
 2 | .stack*
 3 | .cabal*
 4 | .liquid/
 5 | .vscode/*
 6 | result
 7 | .history/
 8 | build/
 9 | out/
10 | dist
11 | *.iml
12 | target/
13 | cabal.sandbox.config
14 | hello
15 | *.dll
16 | *.dylib
17 | *.so
18 | *.js
19 | *.ll
20 | *.pdf
21 | *.prof
22 | *.o
23 | *.a
24 | *.out
25 | *.aux
26 | *.hp
27 | .DS_Store
28 | Thumbs.db
29 | 
30 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # This is the simple Travis configuration, which is intended for use
 2 | # on applications which do not require cross-platform and
 3 | # multiple-GHC-version support. For more information and other
 4 | # options, see:
 5 | #
 6 | # https://docs.haskellstack.org/en/stable/travis_ci/
 7 | #
 8 | # Copy these contents into the root directory of your Github project in a file
 9 | # named .travis.yml
10 | 
11 | # Use new container infrastructure to enable caching
12 | sudo: true
13 | 
14 | # Do not choose a language; we provide our own build tools.
15 | language: nix
16 | 
17 | dist: trusty
18 | 
19 | script: nix-build release.nix
20 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.4.3)
2 | project (LascaRTS)
3 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
4 | add_subdirectory (rts)
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016- Alexander Nemish
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 |    * Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 |    * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following disclaimer
13 | in the documentation and/or other materials provided with the
14 | distribution.
15 |    * Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PANDOC = pandoc
 2 | IFORMAT = markdown
 3 | FLAGS = --standalone --toc --highlight-style pygments
 4 | 
 5 | #TEST_RTS = +RTS -sstderr
 6 | TEST_RTS =
 7 | LASCA_VERSION="0.0.2"
 8 | 
 9 | build: rts
10 | 	stack build --extra-lib-dirs=build/rts
11 | 
12 | fast: rts
13 | 	stack build --fast -j 8 --extra-lib-dirs=build/rts
14 | 
15 | install: build
16 | 	stack install --extra-lib-dirs=build/rts && stack test --extra-lib-dirs=build/rts
17 | 
18 | fastinstall: fast
19 | 	stack install --fast --extra-lib-dirs=build/rts
20 | 
21 | bench:
22 | 	time lasca -O2 -e examples/gen.lasca
23 | 
24 | rts:
25 | 	mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make && cp rts/liblascart* $(LASCAPATH)
26 | 
27 | relink: rts
28 | 	rm -rf .stack-work/dist/x86_64-osx/Cabal-2.0.1.0/build/Lasca/lasca
29 | 	rm -rf .stack-work/install
30 | 	stack build --fast -j 8 --copy-bins --extra-lib-dirs=build/rts
31 | 
32 | rusts:
33 | 	cd rts/rust && cargo build && cp target/debug/liblascarts.dylib ../../../
34 | 
35 | test:
36 | 	stack test --extra-lib-dirs=build/rts
37 | 
38 | fasttest:
39 | 	stack test -j 8 --fast --extra-lib-dirs=build/rts
40 | 
41 | examples:
42 | 	lasca -O2 -e --mode static  libs/base/Array.lasca $(TEST_RTS)
43 | 	lasca -O2 -e --mode dynamic libs/base/Array.lasca $(TEST_RTS)
44 | 	lasca -O2 -e --mode static  libs/base/List.lasca $(TEST_RTS)
45 | 	lasca -O2 -e --mode dynamic libs/base/List.lasca $(TEST_RTS)
46 | 	lasca -O2 -e --mode static  libs/base/Option.lasca $(TEST_RTS)
47 | 	lasca -O2 -e --mode dynamic libs/base/Option.lasca $(TEST_RTS)
48 | 	lasca -O2 -e --mode static  libs/base/String.lasca $(TEST_RTS)
49 | 	lasca -O2 -e --mode dynamic libs/base/String.lasca $(TEST_RTS)
50 | 	lasca -O2 -e --mode static  examples/Map.lasca $(TEST_RTS)
51 | 	lasca -O2 -e --mode dynamic examples/Map.lasca $(TEST_RTS)
52 | 	lasca -O2 -e --mode static  examples/Data.lasca $(TEST_RTS)
53 | 	lasca -O2 -e --mode dynamic examples/Data.lasca $(TEST_RTS)
54 | 	lasca -O2 -e --mode dynamic examples/dynamic.lasca $(TEST_RTS)
55 | 	lasca -O2 -e --mode static  examples/factorial.lasca $(TEST_RTS) -- 15
56 | 	lasca -O2 -e --mode dynamic examples/factorial.lasca $(TEST_RTS) -- 15
57 | 	lasca -O2 -e --mode dynamic examples/hello.lasca $(TEST_RTS)
58 | 	lasca -O2 -e --mode static  examples/hello.lasca $(TEST_RTS)
59 | 	lasca -O2 -e --mode static  examples/lambda.lasca $(TEST_RTS)
60 | 	lasca -O2 -e --mode dynamic examples/lambda.lasca $(TEST_RTS)
61 | 	lasca -O2 -e --mode static  examples/nbody.lasca $(TEST_RTS) -- 50000
62 | 	lasca -O2 -e --mode dynamic examples/nbody.lasca $(TEST_RTS) -- 50000
63 | 	lasca -O2 -e --mode static  examples/nbody2.lasca $(TEST_RTS) -- 50000
64 | 	lasca -O2 -e --mode dynamic examples/nbody2.lasca $(TEST_RTS) -- 50000
65 | 	lasca -O2 -e --mode static  examples/nbody3.lasca $(TEST_RTS) -- 50000
66 | 	lasca -O2 -e --mode dynamic examples/nbody3.lasca $(TEST_RTS) -- 50000
67 | 	lasca -O2 -e --mode static  examples/binarytrees.lasca $(TEST_RTS) -- 10
68 | 	lasca -O2 -e --mode dynamic examples/binarytrees.lasca $(TEST_RTS) -- 10
69 | 	lasca -O2 -e --mode static  examples/ski.lasca $(TEST_RTS)
70 | 
71 | perf:
72 | 	stack install --profile -j 8 --extra-lib-dirs=build/rts
73 | 	time lasca examples/Map.lasca +RTS -sstderr -N4 -p -hc
74 | 	hp2ps -c lasca.hp
75 | 	ghc-prof-flamegraph lasca.prof
76 | 
77 | release: build
78 | 	./make-release.sh ${LASCA_VERSION}
79 | designpdf:
80 | 	rst2pdf -b 1 docs/Lasca\ Design.rst
81 | 
82 | .PHONY: clean examples rts install
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Lasca Language
  2 | ==============
  3 | 
  4 | [![Build Status](https://travis-ci.org/nau/lasca-compiler.svg?branch=master)](https://travis-ci.org/nau/lasca-compiler)
  5 | [![Join the chat at https://gitter.im/lasca-lang/compiler](https://badges.gitter.im/lasca-lang/Lobby.svg)](https://gitter.im/lasca-lang/compiler?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
  6 | 
  7 | Lasca is Scala shifted towards Haskell.
  8 | 
  9 | Lasca is a LLVM-based statically or dynamically typed strict functional programming language. Simplified OCaml if you will.
 10 | 
 11 | It has a 'dynamic' compilation mode, meaning instant code generation without compile time type checking/inference, allowing instant compilation/execution cycle, and more freedom dynamic languages give.
 12 | 
 13 | It has a full type inference, parametric polymorphism, GC, algebraic data types, pattern matching,
 14 | and type classes are coming soon.
 15 | 
 16 | Imagine
 17 | 
 18 | - Scala with fast compilation/start time, optional dynamic typing, and without null
 19 | - Go with ADTs, global type inference, and parametric polymorphism
 20 | - Haskell with decent records syntax, runtime polymorphism, and string interpolation
 21 | - OCaml with typeclasses, overloaded +-*/ for ints and floats, and do-notation
 22 | - Rust with garbage collector without <::>!?
 23 | - Erlang with types and fast execution
 24 | - Python with multithreading, pattern matching, and multiline lambdas
 25 | - TypeScript with indentation significant syntax, and LLVM
 26 | - Julia with static type checking, and zero-based indexing
 27 | 
 28 | Inspired by:
 29 | 
 30 | - Scala
 31 | - Haskell, Liquid Haskell, Linear Haskell, Idris
 32 | - OCaml/SML/F#/1ML
 33 | - Clojure (persisted data structures, HAMT/CHAMP)
 34 | - Go (CSP)
 35 | - Erlang (actors, immutability, minimalism)
 36 | - Python (docstrings, doctests, syntax)
 37 | - Julia
 38 | - Swift
 39 | - Nim
 40 | - Pony
 41 | - [Koka](https://github.com/koka-lang/koka) (algebraic effects)
 42 | 
 43 | Ideas
 44 | ---
 45 | 
 46 | - light, non-symbol-polluted syntax (Python)
 47 | - indentation-based
 48 | - readability first
 49 | - fast development cycle
 50 | - presentation compiler for IDE integration
 51 | - IDE-friendly (intellisence 'dot-autocomplete', auto-formatting, compiler API)
 52 | - type-safe
 53 | - strict functional
 54 | - expression-based
 55 | - practical first, but as clean and concise as possible
 56 | - prefer things done one way
 57 | - LLVM backend
 58 | - JavaScript/WebAssembly backend (native or via LLVM/emscripten)
 59 | - GraalVM backend?
 60 | - no OOP and data subclassing/inheritance?
 61 | - syntactic sugar is ok
 62 | - no null
 63 | - annotations (Java/Python-style)
 64 | - annotation-based extensions
 65 | - macros based metaprogramming (like Scala Macros, Template Haskell)
 66 | - Concurrency Oriented Programming (Erlang). Objects are out. Concurrency is in.
 67 | - [Gradual Typing](http://homes.soic.indiana.edu/jsiek/what-is-gradual-typing/)
 68 | - Deferred Type Errors (runtime compilation mode, Haskell)
 69 | - Linear/affine types (Rust, Linear Haskell)?
 70 | - Liquid Type system (refinement types, [Leon](http://leon.epfl.ch), [Liquid Haskell](https://github.com/ucsd-progsys/liquidhaskell)) and
 71 |   [Z3](https://github.com/Z3Prover/z3)/[CVC4](https://cvc4.cs.stanford.edu/web/) as proof assistant.
 72 | - [Algebraic Subtyping for module system](https://www.cl.cam.ac.uk/~sd601/thesis.pdf)
 73 | - import features (Scala-like)
 74 | - compile-time and runtime reflection
 75 | - save/distribute AST (Scala TASTY). Full program optimization/reflection
 76 | - important things must be greppable and googlable, call it searchability :)
 77 | - compiler as a service: [Language Server Protocol](https://langserver.org/)
 78 | - markdown/rst comments/docs, doctest (Julia, Python)
 79 | - CPS/Actors/π-calculus/STM?, non-blocking IO, reactive
 80 | 
 81 | Example
 82 | ---
 83 | 
 84 | Current implementation uses braces and semicolons, but I consider adding indentation-based syntax, or semicolon inference.
 85 | 
 86 | ```haskell
 87 | -- Algebraic data type a la Haskell
 88 | data JValue
 89 |     = JNull
 90 |     | JNum(n: Float)
 91 |     | JString(s: String)
 92 |     | JBool(v: Bool)
 93 |     | JArray(v: [JValue])
 94 |     | JObject(v: Map String JValue)
 95 | 
 96 | -- function argument type annotations are optional, compiler infers those
 97 | def jsonToString(js: JValue) = match js {
 98 |     JObject(m) ->
 99 |         if Map.isEmpty(m) then "{}" else {
100 |             println(toString(m));
101 |             res = Array.makeArray(m.size, "");
102 |             var idx = 0;
103 |             Map.foreachWithKey(m, { k, v ->
104 |                 setIndex(res, idx.readVar, "\"${k}\": ${jsonToString(v)}");
105 |                 idx := idx.readVar + 1;
106 |             });
107 |             s = String.join(", ", res);
108 |             "{ ${s} }"
109 |         }
110 |     JNull -> "null"
111 |     JNum(n) -> toString(n)
112 |     JBool(v) -> toString(v)
113 |     JString(v) -> "\"${v}\""
114 |     JArray(v) -> {
115 |         values = Array.map(v, jsonToString);
116 |         toString(values);
117 |     }
118 | }
119 | ```
120 | 
121 | What Works Right Now
122 | ---
123 | 
124 | - JIT and AOT compilation and execution (via LLVM OrcJIT)
125 |   - lasca -e hello.lasca to execute
126 |   - lasca hello.lasca to create a binary
127 | - type inference
128 | - dynamic typing mode (```lasca -e --mode dynamic hello.lasca```)
129 | - ADTs, inner functions, out of order function definitions
130 | - pattern matching
131 | - calling external C functions
132 | - string interpolation, UTF8 encoded immutable strings
133 | - builtin types: `String`, `Bool`, `Int`, `Byte`, `Int16`, `Int32`, `Float`, `Array`, `ByteArray`, `Var`, `FileHandle`
134 | - implemented `List`, `Option`, `Either`, `Map`, `ArrayBuffer`
135 | - regular expressions with [PCRE-2](https://www.pcre.org/)
136 | - overloaded `+` `-` `*` `/` operators
137 | 
138 | Package System
139 | ---
140 | 
141 | Consider [Nix](https://nixos.org/nix/) as package manager
142 | 
143 | Compiler Modes
144 | ----
145 | 
146 | - Dynamic Mode, aka Prototype Mode.
147 |   Syntax is checked.
148 |   All types are dynamically checked.
149 | - Static Mode.
150 |   Syntax is checked.
151 |   Typechecking/inference, faster execution.
152 | - Hardcore
153 |   Liquid types enabled.  (See Liquid Haskell)
154 |   Proves checked.
155 |   Array bounds checks eliminated.
156 | 
157 | Type System
158 | ---
159 | 
160 | - Hindley-Milner by default, dependent types if needed
161 | - traits, kind of type classes
162 | - Liquid types as in Liquid Haskell
163 | 
164 | Memory Management
165 | ----
166 | 
167 | GC, concurrent mark and sweep
168 | per actor/green thread GC
169 | Consider [MultiCore Ocaml GC](http://kcsrk.info/multicore/gc/2017/07/06/multicore-ocaml-gc/)
170 | 
171 | for now, use [Boehm conservative GC](http://www.hboehm.info/gc/)
172 | 
173 | Other
174 | ---
175 | 
176 | - indentation significant (i.e. Python, Haskell)
177 | - uppercase Typenames, lowercase idents and type arguments (Haskell/Scala style)
178 | - pattern-matching
179 | - ADT, traits, type classes
180 | - easy C interoperability
181 | - no exceptions (Go/Rust panic style errors)
182 | - don't overuse `'~!@#$%^&* symbols
183 | - default immutability
184 | - string interpolation: "${ident} = ${expression}"
185 | - multiline strings
186 | - Uniform Function Call Syntax (Rust, D).
187 |   For example, any function can be a method for its first argument:
188 | 
189 | ```scala
190 |     def toString(s: String) = ...
191 |     "Hello".toString
192 |     def plus(l: Num, r: Num)
193 |     1.plus(2)
194 | ```
195 | 
196 | - uniform select principle. Use (.) for record field selection, func calls, package name resolution etc
197 | - UTF-8 strings
198 | - Haskell-like application for type functions: Option Int, Either Int String, etc
199 | 
200 | Install on Mac OS using Homebrew
201 | ---
202 | 
203 |     brew install boehmgc pcre2
204 |     brew install nau/lasca/lasca-compiler
205 | 
206 | Setup LASCAPATH environment variable. Add this to your .bash_profile
207 | 
208 |     export LASCAPATH="$(brew --prefix lasca-compiler)/src"
209 | 
210 | Try it!
211 | 
212 |     echo 'def main() = println("Hello Lasca!")' > hello.lasca
213 |     lasca -e hello.lasca
214 |     > Hello Lasca!
215 | 
216 | Add bash completion config for lasca compiler options:
217 | 
218 |     lasca --bash-completion-script lasca > $(brew --prefix)/etc/bash_completion.d/lasca
219 | 
220 | Build on Mac OS
221 | ---
222 | 
223 | You need LLVM 6.0 installed, and latest Haskell Stack.
224 | 
225 |     brew install cmake boehmgc pcre2
226 | 
227 |     brew install llvm-hs/llvm/llvm-6.0 # this compiles llvm from sources, make take some time
228 | 
229 |     brew install haskell-stack
230 | 
231 |     stack setup
232 | 
233 | Setup LASCAPATH environment variable. Add this to your .bash_profile
234 | 
235 |     export LASCAPATH="${lasca-compiler-src-dir}/libs/base"
236 | 
237 | Add your `~/.local/bin` directory to your `PATH`
238 | 
239 |     export PATH=$PATH:~/.local/bin
240 | 
241 | Build and install lasca compiler
242 | 
243 |     make install
244 | 
245 | Add bash completion config for lasca compiler options:
246 | 
247 |     lasca --bash-completion-script lasca > $(brew --prefix)/etc/bash_completion.d/lasca
248 | 
249 | Run hello.lasca
250 | 
251 |     lasca --exec examples/hello.lasca
252 | 
253 | Build on Ubuntu
254 | ---
255 | 
256 | Requirements: Haskell Stack > 1.6, Cabal > 2.0, LLVM 6, CMake
257 | 
258 | Don't install Haskell Stack from apt. [It's likely to be older than 1.6 and won't be able to upgrade](https://askubuntu.com/questions/986596/how-to-upgrade-haskell-stack-on-ubuntu-16-04)
259 | 
260 | Do this instead:
261 | 
262 |     curl -sSL https://get.haskellstack.org/ | sh
263 | 
264 |     sudo apt install llvm-6.0-dev libgc-dev zlib1g-dev cmake
265 |     sudo add-apt-repository universe
266 |     sudo apt install libpcre2-dev
267 |     export LASCAPATH="${lasca-compiler-src-dir}/libs/base"
268 |     export PATH=$PATH:~/.local/bin
269 |     stack setup
270 |     make install
271 |     lasca -e examples/hello.lasca
272 | 
273 | Current n-body run
274 | ---
275 | 
276 | There are several implementation of [n-body problem](
277 | http://benchmarksgame.alioth.debian.org/u64q/nbody.html)
278 | Currently it's quite slow due to boxing.
279 | 
280 |     $ time lasca -e -O2 examples/nbody.lasca -- 50000000
281 |     -0.169075164
282 |     -0.169059907
283 | 
284 |     real      7m13.261s
285 |     user      7m39.476s
286 |     sys       0m38.716s
287 | 
288 |     find src -name "*.hs"  | xargs cat | wc -l
289 |     4738
290 | 
291 |     cat rts/runtime.c rts/builtin.c rts/lasca.h | wc -l
292 |     1324
293 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain


--------------------------------------------------------------------------------
/docs/Log of Lasca Development.md:
--------------------------------------------------------------------------------
  1 | # Log of Lasca Development
  2 | 
  3 | These are mostly my rants on things in the industry I found crazy.
  4 | Legacy has a overwhelming power.
  5 | 
  6 | ## 19/02/2018 Today I learned how to print 64-bit ints in C on both Mac and Linux.
  7 | 
  8 | This code works fine on MacOs,
  9 | 
 10 |     printf("%lli", code);
 11 | 
 12 | but gives this warning on Linux:
 13 | 
 14 |     warning: format ‘%lli’ expects argument of type ‘long long int’,
 15 |     but argument 2 has type ‘int64_t {aka long int}’ [-Wformat=]
 16 | 
 17 | Apparently the right way to print int64_t in printf/snprintf family functions is this:
 18 | 
 19 |     #define __STDC_FORMAT_MACROS
 20 |     #include <inttypes.h>
 21 | 
 22 |     uint64_t i;
 23 |     printf("%" PRId64 "\n", i);
 24 | 
 25 | Sigh.
 26 | 
 27 | ## 17/08/2018 State of Unicode support in programming languages
 28 | 
 29 | [SO overview](https://stackoverflow.com/questions/1036585/unicode-support-in-various-programming-languages)
 30 | 
 31 | Everything is very, very sad.
 32 | Only few modern languages use UTF-8 out of the box: Rust, Julia, and Go.
 33 | Others do various hacks or use UTF-16, which is even worse.
 34 | Legacy has an overwhelming power.
 35 | 
 36 | Sigh.
 37 | 
 38 | http://utf8everywhere.org/
 39 | [UTF-16 Considered Harmfull](https://softwareengineering.stackexchange.com/questions/102205/should-utf-16-be-considered-harmful)
 40 | 
 41 | ## 08/10/2018 Default Hash Function and Hash table
 42 | 
 43 | Choosing a hash function is a crucial choice.
 44 | 
 45 | Most languages/platforms changed their hashing functions to something more secure. Many chose SipHash:
 46 | 
 47 | - Python (starting in version 3.4)
 48 | - Ruby
 49 | - Rust
 50 | 
 51 | In Lasca we'll use SipHash
 52 | 
 53 | [SipHash: a fast short-input PRF](https://131002.net/siphash/siphash.pdf)
 54 | 
 55 | [Which hashing algorithm is best for uniqueness and speed?](https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed)
 56 | 
 57 | Java's default String hash algorithm is aweful
 58 | 
 59 |     s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
 60 | 
 61 | It's easy to find collisions
 62 | 
 63 | Haskell uses [FNV1](http://isthe.com/chongo/tech/comp/fnv/) in [hashable](http://hackage.haskell.org/package/hashable-1.2.6.1/docs/src/Data-Hashable-Class.html#line-627)
 64 | 
 65 | Links
 66 | From this paper
 67 | https://bigdata.uni-saarland.de/publications/p249-richter.pdf
 68 | 
 69 | we consider
 70 | Mult as the best candidate to be used in practice when
 71 | quality results on high throughputs is desired, but at the cost of
 72 | a high variance across data distributions
 73 | 
 74 | We can conclude that RH provides a very interesting
 75 | trade-off: for a small penalty (often within 1-5%) in peak
 76 | performance on the best of cases (all lookups successful), RH
 77 | significantly improves on the worst-case over LP in general, up
 78 | to more than a factor 4.
 79 | Across the whole set of experiments, RH is always among
 80 | the top performers, and even the best method for most cases.
 81 | This observation holds for all data set sizes we tested.
 82 | 
 83 | As a conclusion,
 84 | in a write-heavy workload, quadratic probing looks as the best
 85 | option in general.
 86 | 
 87 | Our overall conclusion is that AoS outperforms
 88 | SoA by a larger margin than the other way around. Inside
 89 | caches (not shown), both methods are comparable in terms of
 90 | lookup performance, with AoS performing slightly better. When
 91 | using SIMD, SoA has an edge over AoS — at least on current
 92 | hardware — because keys are already densely packed.
 93 | 
 94 | https://github.com/leo-yuriev/t1ha
 95 | http://cyan4973.github.io/xxHash/
 96 | https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion
 97 | https://github.com/google/highwayhash/issues/28
 98 | https://medium.freecodecamp.org/hash-table-attack-8e4371fc5261
 99 | https://rcoh.me/posts/hash-map-analysis/
100 | http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
101 | https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/
102 | 
103 | ### Design Decisions based on research
104 | 
105 | Open addressing with Robin Hood Probing and backward shift deletion
106 | Hash function [xxHash 64](http://cyan4973.github.io/xxHash/)
107 | Grow either by prime numbers or powers of 2
108 | Load factor threshold ≈ 0.6-0.7
109 | Collision count threshold ≈ log₂(n)
110 | 


--------------------------------------------------------------------------------
/examples/Data.lasca:
--------------------------------------------------------------------------------
 1 | module Data
 2 | -- Records
 3 | -- Dynamic mode
 4 | import Option
 5 | 
 6 | data Test = Test(a: Int)
 7 | 
 8 | data Point = Point(x: Int, y: Int, z: Test)
 9 | 
10 | data Expr = Ident(n: String) | Num(nm: Int) | No
11 | 
12 | data StringList = Cons(v: String, tail: StringList) | Nil
13 | 
14 | def main() = {
15 |     t = Test(3);
16 |     p1 = Point(12, 2, t);
17 |     ident = Ident("test");
18 |     num = Num(1);
19 |     no = No;
20 |     some = Option.Some(1);
21 |     list = Cons("1", Cons("2", Nil));
22 |     println(list.toString);
23 |     println(ident.toString);
24 |     println(ident.n);
25 |     println(no.toString);
26 |     s = p1.x + p1.y - p1.z.a;
27 |     println(toString(s == 11));
28 |     println("Hello")
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/Either.lasca:
--------------------------------------------------------------------------------
 1 | module Either
 2 | 
 3 | data Either a b = Left(left: a) | Right(right: b)
 4 | 
 5 | def isLeft(self: Either a b) = match self {
 6 |     Left(l) -> true
 7 |     _ -> false
 8 | }
 9 | 
10 | def isRight(self: Either a b) = match self {
11 |     Left(l) -> false
12 |     _ -> true
13 | }
14 | 
15 | def map(self: Either a b, f: b -> c) = match self {
16 |     Right(r) -> Right(f(r))
17 |     _ -> self
18 | }
19 | 
20 | def main() = {
21 |     l = Left("Test");
22 |     r = Right(123);
23 |     r1 = map(r, { r -> r + 2 });
24 |     println("Left is ${isLeft(l)}, right is left ${r.isLeft}, right is right ${r.isRight}");
25 |     println("r1 should be 125 and is: ${r1}");
26 | }


--------------------------------------------------------------------------------
/examples/Json.lasca:
--------------------------------------------------------------------------------
 1 | import Map
 2 | import Array
 3 | import String
 4 | 
 5 | data JValue
 6 |     = JNull
 7 |     | JNum(n: Float)
 8 |     | JString(s: String)
 9 |     | JBool(v: Bool)
10 |     | JArray(v: [JValue])
11 |     | JObject(v: Map String JValue)
12 | 
13 | def jsonToString(js: JValue) = match js {
14 |     JObject(m) ->
15 |         if Map.isEmpty(m) then "{}" else {
16 |             println(toString(m));
17 |             res = Array.makeArray(m.size, "");
18 |             var idx = 0;
19 |             Map.foreachWithKey(m, { k, v ->
20 |                 setIndex(res, idx.readVar, "\"${k}\": ${jsonToString(v)}");
21 |                 idx := idx.readVar + 1;
22 |             });
23 |             s = String.join(", ", res);
24 |             "{ ${s} }"
25 |         }
26 |     JNull -> "null"
27 |     JNum(n) -> toString(n)
28 |     JBool(v) -> toString(v)
29 |     JString(v) -> "\"${v}\""
30 |     JArray(v) -> {
31 |         values = Array.map(v, jsonToString);
32 |         toString(values);
33 |     }
34 | }
35 | 
36 | def parseJson(str) = {
37 |     "asdf"
38 | }
39 | 
40 | def main() = {
41 |     m = Map.insert(Map.single("message", JArray([JString(""), JNull])), "empty", JBool(true));
42 |     js = JArray([JNum(-3.14), JString("Pen Pineapple Apple Pen"), JObject(m), JObject(Map.empty()), JArray([])]);
43 |     println(jsonToString(js));
44 | }


--------------------------------------------------------------------------------
/examples/binarytrees.lasca:
--------------------------------------------------------------------------------
 1 | {-
 2 |     The Computer Language Benchmarks Game
 3 |     http://benchmarksgame.alioth.debian.org/
 4 | -}
 5 | import Array
 6 | 
 7 | data Tree a = Empty | Node(treeLeft: Tree a, treeRight: Tree a)
 8 | 
 9 | def make(d) =
10 |     if d == 0 then Node(Empty, Empty)
11 |     else let d = d - 1 in Node(make(d), make(d))
12 | 
13 | def check(t) = match t {
14 |     Empty -> 0
15 |     Node(l, r) -> 1 + check(l) + check(r)
16 | }
17 | 
18 | minDepth = 4
19 | maxDepth = {
20 |     args = getArgs();
21 |     if Array.length(args) != 2 then 21 else toInt(args[1])
22 | }
23 | stretchDepth = maxDepth + 1
24 | 
25 | def pow(n) = if n == 0 then 1 else 2 * pow(n-1)
26 | 
27 | def main() = {
28 |     def loop1(d, i) = if i < ((maxDepth - d) / 2 + 1) then {
29 |         def loop2(d, i, niter, c) =
30 |             if i == niter then c else loop2(d, i + 1, niter, c + check(make(d)));
31 |         dd = d + i * 2;
32 |         niter = pow(maxDepth - dd + minDepth);
33 |         c = loop2(dd, 0, niter, 0);
34 |         println("${niter}\t trees of depth ${d}\t check: ${c}");
35 |         loop1(d, i + 1);
36 |     } else 0;
37 | 
38 |     c = check(make(stretchDepth));
39 |     println("stretch tree of depth ${stretchDepth}\t check: ${c}");
40 |     longLivedTree = make(maxDepth);
41 |     loop1(minDepth, 0);
42 |     println("long lived tree of depth ${maxDepth}\t check: ${check(longLivedTree)}");
43 | }
44 | 


--------------------------------------------------------------------------------
/examples/builtin.lasca:
--------------------------------------------------------------------------------
 1 | import Bits
 2 | import Option
 3 | 
 4 | def testLiterals() = {
 5 |     a = 1234567890;
 6 |     b = -1234567890;
 7 |     c = true;
 8 |     d = false;
 9 |     e = 123.456;
10 |     f = -123.45e-5;
11 |     g = intToByte(127);
12 |     h = intToByte(-128);
13 |     i = "String";
14 |     j = ();
15 |     l = [1, 2];
16 |     m = 0xDEADbeef;
17 |     n = -0o755;
18 |     println("${a} ${b} ${c} ${d} \$${e} ${f} ${g} ${h} ${i} ${j} ${l} ${m} ${n}");
19 | }
20 | 
21 | def bitwiseOperations() = {
22 |     a = intAnd(5, 4); println(a.toString);
23 |     a = intAnd(5, 2); println(a.toString);
24 |     a = intOr(4, 1); println(a.toString);
25 |     a = intXor(3, 3); println(a.toString);
26 |     a = intShiftL(3, 3); println(a.toString);
27 |     a = intShiftL(1, 63); println(a.toString);
28 |     a = intShiftR(4611686018427387904, 62); println(a.toString);
29 |     a = intNot(0); println(a.toString);
30 |     a = intPopCount(9223372036854775807); println(a.toString);
31 |     a = intPopCount(-1); println(a.toString);
32 | 
33 |     a = byteAnd(intToByte(5), intToByte(4)); println(a.toString);
34 |     a = byteAnd(intToByte(5), intToByte(2)); println(a.toString);
35 |     a = byteOr(intToByte(4), intToByte(1)); println(a.toString);
36 |     a = byteXor(intToByte(3), intToByte(3)); println(a.toString);
37 |     a = byteShiftL(intToByte(1), intToByte(7)); println(a.toString);
38 |     a = byteNot(intToByte(0)); println(a.toString);
39 | }
40 | 
41 | def logicalOperations() = {
42 |     a = true and false; println(a.toString);
43 |     a = true or false; println(a.toString);
44 |     a = not false; println(a.toString);
45 |     a = not false and false or true; println(a.toString);
46 | }
47 | 
48 | def patternMatching() = {
49 |     a = match 1 {
50 |         0 -> false
51 |         1 -> true
52 |         _ -> false
53 |     }; println(a.toString);
54 |     a = match a {
55 |         true -> 1
56 |         false -> 0
57 |     }; println(a.toString);
58 |     a = match 3.14 {
59 |         3.14 -> 1
60 |         _ -> 0
61 |     }; println(a.toString);
62 |     a = match "String" {
63 |         "" -> "empty"
64 |         "String but not this" -> "wrong string"
65 |         "String" -> "Correct String"
66 |     }; println(a);
67 |     a = match Some(1) {
68 |         None -> 0
69 |         Some(0) -> 666
70 |         Some(1) -> 777
71 |     }; println(a.toString);
72 | }
73 | 
74 | def innerFunctions() = {
75 |     a = 3;
76 |     def inner1(x) = x + a; -- check capturing outer vars
77 |     println("${inner1(2)}");
78 | 
79 |     def inner2(x) = {
80 |         inner2 = 1;
81 |         x + 5 + inner2; -- check shadowing
82 |     };
83 |     println("${inner2(10)}");
84 | 
85 |     def inner3(x) = if x == 0 then 0 else {
86 |         println(toString(x));
87 |         inner3(x - 1); -- inner recursive
88 |     };
89 |     inner3(3);
90 | }
91 | 
92 | def main() = {
93 |     __I_D_E_N_T_123_ = true;
94 |     testLiterals();
95 |     bitwiseOperations();
96 |     logicalOperations();
97 |     patternMatching();
98 |     innerFunctions();
99 | }


--------------------------------------------------------------------------------
/examples/dynamic.lasca:
--------------------------------------------------------------------------------
1 | def id(x) = x
2 | 
3 | def main() = {
4 |     a = [1, "2"]; -- this should be an error at static mode, and work fine in dynamic mode
5 |     c = 1;
6 |     println(c.toString)
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/examples/factorial.lasca:
--------------------------------------------------------------------------------
 1 | import Array
 2 | 
 3 | def fact(n) = if n == 1 then 1 else n * fact(n - 1)
 4 | 
 5 | def main() = {
 6 |     args = getArgs();
 7 |     i = toInt(args[1]);
 8 |     println("Factorial of ${i} is: ${fact(i)}")
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/hello.lasca:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lasca -e
2 | def main() = println("Вітаю, Світе! Будь Lasca.")
3 | 


--------------------------------------------------------------------------------
/examples/lambda.lasca:
--------------------------------------------------------------------------------
 1 | def test() = {
 2 |     def fact(n) = if n == 1 then 1 else n * fact(n - 1) ;
 3 |     println(toString(fact(3)))
 4 | --    println(toString(inner() + three()))
 5 | }
 6 | 
 7 | def main() = {
 8 |     b = println;
 9 |     test();
10 |     c = "Hello";
11 |     a = { s, d -> b(c) };
12 |     a("test", 1)
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/nbody.lasca:
--------------------------------------------------------------------------------
  1 | import Array
  2 | 
  3 | Pi = 3.141592653589793
  4 | SolarMass = 4.0 * Pi * Pi
  5 | DaysPerYear = 365.24
  6 | 
  7 | Jupiter = [
  8 |      4.84143144246472090e+00,                  -- x
  9 |     -1.16032004402742839e+00,                  -- y
 10 |     -1.03622044471123109e-01,                  -- z
 11 |      1.66007664274403694e-03 * DaysPerYear,    -- vx
 12 |      7.69901118419740425e-03 * DaysPerYear,    -- vy
 13 |     -6.90460016972063023e-05 * DaysPerYear,    -- vz
 14 |      9.54791938424326609e-04 * SolarMass,      -- mass
 15 | ]
 16 | 
 17 | Saturn = [
 18 |      8.34336671824457987e+00,
 19 |      4.12479856412430479e+00,
 20 |     -4.03523417114321381e-01,
 21 |     -2.76742510726862411e-03 * DaysPerYear,
 22 |      4.99852801234917238e-03 * DaysPerYear,
 23 |      2.30417297573763929e-05 * DaysPerYear,
 24 |      2.85885980666130812e-04 * SolarMass,
 25 | ]
 26 | 
 27 | Uranus = [
 28 |      1.28943695621391310e+01,
 29 |     -1.51111514016986312e+01,
 30 |     -2.23307578892655734e-01,
 31 |      2.96460137564761618e-03 * DaysPerYear,
 32 |      2.37847173959480950e-03 * DaysPerYear,
 33 |     -2.96589568540237556e-05 * DaysPerYear,
 34 |      4.36624404335156298e-05 * SolarMass,
 35 | ]
 36 | 
 37 | Neptune = [
 38 |      1.53796971148509165e+01,
 39 |     -2.59193146099879641e+01,
 40 |      1.79258772950371181e-01,
 41 |      2.68067772490389322e-03 * DaysPerYear,
 42 |      1.62824170038242295e-03 * DaysPerYear,
 43 |     -9.51592254519715870e-05 * DaysPerYear,
 44 |      5.15138902046611451e-05 * SolarMass,
 45 | ]
 46 | 
 47 | Sun = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass]
 48 | 
 49 | def offsetMomentum(object: [Float], px, py, pz) = {
 50 |     vx = -px / SolarMass;
 51 |     vy = -py / SolarMass;
 52 |     vz = -pz / SolarMass;
 53 |     [ object[0], object[1], object[2],
 54 |       vx, vy, vz,
 55 |       object[6],
 56 |     ]
 57 | }
 58 | 
 59 | def init(bodies: [[Float]]) = {
 60 |     def go(bodies: [[Float]], i: Int, pxyz: [Float]) = {
 61 |         body = bodies[i];
 62 |         updatedPs = [
 63 |             pxyz[0] + body[3] * body[6],
 64 |             pxyz[1] + body[4] * body[6],
 65 |             pxyz[2] + body[5] * body[6],
 66 |         ];
 67 |         if i == 0 then updatedPs else go(bodies, i - 1, updatedPs);
 68 |     };
 69 | 
 70 |     len = length(bodies);
 71 |     pxyzInit = [0.0, 0.0, 0.0];
 72 |     offsets = go(bodies, len - 1, pxyzInit);
 73 |     sun = offsetMomentum(bodies[0], offsets[0], offsets[1], offsets[2]);
 74 |     [sun, bodies[1], bodies[2], bodies[3], bodies[4]]
 75 | }
 76 | 
 77 | def advance(bodies: [[Float]], dt) = {
 78 |     def advanceInner(from: [Float], to: [Float], dt) = {
 79 |         dx = from[0] - to[0];
 80 |         dy = from[1] - to[1];
 81 |         dz = from[2] - to[2];
 82 |         dSquared = dx * dx + dy * dy + dz * dz;
 83 |         distance = sqrt(dSquared);
 84 |         mag = dt / (dSquared * distance);
 85 |         toMass = to[6];
 86 |         fromMass = from[6];
 87 |         toMassMag = toMass * mag;
 88 |         fromMassMag = fromMass * mag;
 89 |         fromVx = from[3] - dx * toMassMag;
 90 |         fromVy = from[4] - dy * toMassMag;
 91 |         fromVz = from[5] - dz * toMassMag;
 92 |         toVx   = to[3]   + dx * fromMassMag;
 93 |         toVy   = to[4]   + dy * fromMassMag;
 94 |         toVz   = to[5]   + dz * fromMassMag;
 95 |         [
 96 |           [from[0], from[1], from[2], fromVx, fromVy, fromVz, fromMass],
 97 |           [to[0], to[1], to[2], toVx, toVy, toVz, toMass],
 98 |         ]
 99 |     };
100 | 
101 |     r01 = advanceInner(bodies[0], bodies[1], dt);
102 |     r02 = advanceInner(r01[0],    bodies[2], dt);
103 |     r03 = advanceInner(r02[0],    bodies[3], dt);
104 |     r04 = advanceInner(r03[0],    bodies[4], dt);
105 | 
106 |     r12 = advanceInner(r01[1],    r02[1], dt);
107 |     r13 = advanceInner(r12[0],    r03[1], dt);
108 |     r14 = advanceInner(r13[0],    r04[1], dt);
109 | 
110 |     r23 = advanceInner(r12[1],    r13[1], dt);
111 |     r24 = advanceInner(r23[0],    r14[1], dt);
112 | 
113 |     r34 = advanceInner(r23[1],    r24[1], dt);
114 | 
115 |     sun = bodies[0];
116 |     r040 = r04[0];
117 |     sunVx = r040[3];
118 |     sunVy = r040[4];
119 |     sunVz = r040[5];
120 | 
121 |     jupiter = bodies[1];
122 |     r140 = r14[0];
123 |     jupiterVx = r140[3];
124 |     jupiterVy = r140[4];
125 |     jupiterVz = r140[5];
126 | 
127 |     saturn = bodies[2];
128 |     r240 = r24[0];
129 |     saturnVx = r240[3];
130 |     saturnVy = r240[4];
131 |     saturnVz = r240[5];
132 | 
133 |     uranus = bodies[3];
134 |     r340 = r34[0];
135 |     uranusVx = r340[3];
136 |     uranusVy = r340[4];
137 |     uranusVz = r340[5];
138 | 
139 |     neptune = bodies[4];
140 |     r341 = r34[1];
141 |     neptuneVx = r341[3];
142 |     neptuneVy = r341[4];
143 |     neptuneVz = r341[5];
144 | 
145 |     [
146 |       [sun[0] + dt * sunVx,     sun[1] + dt * sunVy,     sun[2] + dt * sunVz,     sunVx, sunVy, sunVz, sun[6]],
147 |       [jupiter[0] + dt * jupiterVx, jupiter[1] + dt * jupiterVy, jupiter[2] + dt * jupiterVz, jupiterVx, jupiterVy, jupiterVz, jupiter[6]],
148 |       [saturn[0] + dt * saturnVx,  saturn[1] + dt * saturnVy,  saturn[2] + dt * saturnVz,  saturnVx,  saturnVy,  saturnVz, saturn[6]],
149 |       [uranus[0] + dt * uranusVx,  uranus[1] + dt * uranusVy,  uranus[2] + dt * uranusVz,  uranusVx,  uranusVy,  uranusVz, uranus[6]],
150 |       [neptune[0] + dt * neptuneVx, neptune[1] + dt * neptuneVy, neptune[2] + dt * neptuneVz,  neptuneVx,  neptuneVy,  neptuneVz, neptune[6]],
151 |     ]
152 | 
153 | }
154 | 
155 | def squared(x: Float, y, z) = x * x + y * y + z * z
156 | 
157 | def energy(bodies: [[Float]]) = {
158 |     def energyInner(from: [Float], to: [Float]) = {
159 |         dx = from[0] - to[0];
160 |         dy = from[1] - to[1];
161 |         dz = from[2] - to[2];
162 |         distance = sqrt(squared(dx, dy, dz));
163 |         from[6] * to[6] / distance;
164 |     };
165 | 
166 |     sun = bodies[0];
167 |     sunVx = sun[3];
168 |     sunVy = sun[4];
169 |     sunVz = sun[5];
170 |     sunMass = sun[6];
171 | 
172 |     jupiter = bodies[1];
173 |     jupiterVx = jupiter[3];
174 |     jupiterVy = jupiter[4];
175 |     jupiterVz = jupiter[5];
176 |     jupiterMass = jupiter[6];
177 | 
178 |     saturn = bodies[2];
179 |     saturnVx = saturn[3];
180 |     saturnVy = saturn[4];
181 |     saturnVz = saturn[5];
182 |     saturnMass = saturn[6];
183 | 
184 |     uranus = bodies[3];
185 |     uranusVx = uranus[3];
186 |     uranusVy = uranus[4];
187 |     uranusVz = uranus[5];
188 |     uranusMass = uranus[6];
189 | 
190 |     neptune = bodies[4];
191 |     neptuneVx = neptune[3];
192 |     neptuneVy = neptune[4];
193 |     neptuneVz = neptune[5];
194 |     neptuneMass = neptune[6];
195 | 
196 |     -- Those are not variable reassignment, but shadowing instead, like in OCaml
197 |     e =     0.5 * sunMass     * squared(sunVx, sunVy, sunVz);
198 |     e = e + 0.5 * jupiterMass * squared(jupiterVx, jupiterVy, jupiterVz);
199 |     e = e + 0.5 * saturnMass  * squared(saturnVx, saturnVy, saturnVz);
200 |     e = e + 0.5 * uranusMass  * squared(uranusVx, uranusVy, uranusVz);
201 |     e = e + 0.5 * neptuneMass * squared(neptuneVx, neptuneVy, neptuneVz);
202 | 
203 |     e = e - energyInner(bodies[0], bodies[1]);
204 |     e = e - energyInner(bodies[0], bodies[2]);
205 |     e = e - energyInner(bodies[0], bodies[3]);
206 |     e = e - energyInner(bodies[0], bodies[4]);
207 | 
208 |     e = e - energyInner(bodies[1], bodies[2]);
209 |     e = e - energyInner(bodies[1], bodies[3]);
210 |     e = e - energyInner(bodies[1], bodies[4]);
211 | 
212 |     e = e - energyInner(bodies[2], bodies[3]);
213 |     e = e - energyInner(bodies[2], bodies[4]);
214 | 
215 |     e = e - energyInner(bodies[3], bodies[4]);
216 | 
217 |     e
218 | }
219 | 
220 | def calculate(i, bodies) = if i > 0 then calculate(i - 1, advance(bodies, 0.01)) else bodies
221 | 
222 | def main() = {
223 |     -- set to 50000000 for real benchmark
224 |     args = getArgs();
225 |     numIterations = toInt(args[1]);
226 |     bodies = [Sun, Jupiter, Saturn, Uranus, Neptune];
227 |     initedBodies = init(bodies);
228 |     println(toString(energy(initedBodies)));
229 |     advanced = calculate(numIterations, initedBodies);
230 |     println(toString(energy(advanced)));
231 | }
232 | 


--------------------------------------------------------------------------------
/examples/nbody2.lasca:
--------------------------------------------------------------------------------
  1 | import Array
  2 | 
  3 | Pi = 3.141592653589793
  4 | SolarMass = 4.0 * Pi * Pi
  5 | DaysPerYear = 365.24
  6 | 
  7 | data Body = Body(x: Float, y: Float, z: Float, vx: Float, vy: Float, vz: Float, mass: Float)
  8 | 
  9 | Jupiter = Body(
 10 |      4.84143144246472090e+00,                  -- x
 11 |     -1.16032004402742839e+00,                  -- y
 12 |     -1.03622044471123109e-01,                  -- z
 13 |      1.66007664274403694e-03 * DaysPerYear,    -- vx
 14 |      7.69901118419740425e-03 * DaysPerYear,    -- vy
 15 |     -6.90460016972063023e-05 * DaysPerYear,    -- vz
 16 |      9.54791938424326609e-04 * SolarMass,      -- mass
 17 | )
 18 | 
 19 | Saturn = Body(
 20 |      8.34336671824457987e+00,
 21 |      4.12479856412430479e+00,
 22 |     -4.03523417114321381e-01,
 23 |     -2.76742510726862411e-03 * DaysPerYear,
 24 |      4.99852801234917238e-03 * DaysPerYear,
 25 |      2.30417297573763929e-05 * DaysPerYear,
 26 |      2.85885980666130812e-04 * SolarMass,
 27 | )
 28 | 
 29 | Uranus = Body(
 30 |      1.28943695621391310e+01,
 31 |     -1.51111514016986312e+01,
 32 |     -2.23307578892655734e-01,
 33 |      2.96460137564761618e-03 * DaysPerYear,
 34 |      2.37847173959480950e-03 * DaysPerYear,
 35 |     -2.96589568540237556e-05 * DaysPerYear,
 36 |      4.36624404335156298e-05 * SolarMass,
 37 | )
 38 | 
 39 | Neptune = Body(
 40 |      1.53796971148509165e+01,
 41 |     -2.59193146099879641e+01,
 42 |      1.79258772950371181e-01,
 43 |      2.68067772490389322e-03 * DaysPerYear,
 44 |      1.62824170038242295e-03 * DaysPerYear,
 45 |     -9.51592254519715870e-05 * DaysPerYear,
 46 |      5.15138902046611451e-05 * SolarMass,
 47 | )
 48 | 
 49 | Sun = Body(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass)
 50 | 
 51 | def offsetMomentum(object, px, py, pz) = {
 52 |     vx = -px / SolarMass;
 53 |     vy = -py / SolarMass;
 54 |     vz = -pz / SolarMass;
 55 |     Body(object.x, object.y, object.z, vx, vy, vz, object.mass)
 56 | }
 57 | 
 58 | def init(bodies: [Body]) = {
 59 |     def go(bodies: [Body], i, pxyz: [Float]) = {
 60 |         body = bodies[i];
 61 |         updatedPs = [
 62 |             pxyz[0] + body.vx * body.mass,
 63 |             pxyz[1] + body.vy * body.mass,
 64 |             pxyz[2] + body.vz * body.mass,
 65 |         ];
 66 |         if i == 0 then updatedPs else go(bodies, i - 1, updatedPs)
 67 |     };
 68 | 
 69 |     len = length(bodies);
 70 |     pxyzInit = [0.0, 0.0, 0.0];
 71 |     offsets = go(bodies, len - 1, pxyzInit);
 72 |     sun = offsetMomentum(bodies[0], offsets[0], offsets[1], offsets[2]);
 73 |     setIndex(bodies, 0, sun);
 74 |     bodies
 75 | }
 76 | 
 77 | def advanceInner(from, to, dt) = {
 78 |     dx = from.x - to.x;
 79 |     dy = from.y - to.y;
 80 |     dz = from.z - to.z;
 81 |     dSquared = dx * dx + dy * dy + dz * dz;
 82 |     distance = sqrt(dSquared);
 83 |     mag = dt / (dSquared * distance);
 84 |     toMass = to.mass;
 85 |     fromMass = from.mass;
 86 |     toMassMag = toMass * mag;
 87 |     fromMassMag = fromMass * mag;
 88 |     fromVx = from.vx - dx * toMassMag;
 89 |     fromVy = from.vy - dy * toMassMag;
 90 |     fromVz = from.vz - dz * toMassMag;
 91 |     toVx   = to.vx   + dx * fromMassMag;
 92 |     toVy   = to.vy   + dy * fromMassMag;
 93 |     toVz   = to.vz   + dz * fromMassMag;
 94 |     [
 95 |       Body(from.x, from.y, from.z, fromVx, fromVy, fromVz, fromMass),
 96 |       Body(to.x, to.y, to.z, toVx, toVy, toVz, toMass),
 97 |     ]
 98 | }
 99 | 
100 | def updateIdx(bodies: [Body], i, j, from: Body, to: Body) = [
101 |     if i == 0 then from else if j == 0 then to else bodies[0],
102 |     if i == 1 then from else if j == 1 then to else bodies[1],
103 |     if i == 2 then from else if j == 2 then to else bodies[2],
104 |     if i == 3 then from else if j == 3 then to else bodies[3],
105 |     if i == 4 then from else if j == 4 then to else bodies[4],
106 | ]
107 | 
108 | def update(body: Body, dt: Float) = Body(
109 |     body.x + dt * body.vx, body.y + dt * body.vy, body.z + dt * body.vz,
110 |     body.vx, body.vy, body.vz, body.mass)
111 | 
112 | def advance(bodies: [Body], dt) = {
113 |     def loop1(bodies, i, dt) = {
114 |         def loop2(bodies: [Body], i: Int, j: Int, dt: Float) =
115 |             if j < 5 then {
116 |                 res = advanceInner(bodies[i], bodies[j], dt);
117 |                 newBodies = updateIdx(bodies, i, j, res[0], res[1]);
118 |                 loop2(newBodies, i, j + 1, dt);
119 |             } else bodies;
120 | 
121 |         if i < 5 then {
122 |             res = loop2(bodies, i, i + 1, dt);
123 |             loop1(res, i + 1, dt);
124 |         } else bodies
125 |     };
126 | 
127 |     bodies1 = loop1(bodies, 0, dt); -- FIXME
128 |     [
129 |       update(bodies1[0], dt),
130 |       update(bodies1[1], dt),
131 |       update(bodies1[2], dt),
132 |       update(bodies1[3], dt),
133 |       update(bodies1[4], dt),
134 |     ]
135 | }
136 | 
137 | def squared(x: Float, y: Float, z: Float) = x * x + y * y + z * z
138 | 
139 | def energy(bodies: [Body]) = {
140 |     def energyInner(from, to) = {
141 |         dx = from.x - to.x;
142 |         dy = from.y - to.y;
143 |         dz = from.z - to.z;
144 |         distance = sqrt(squared(dx, dy, dz));
145 |         from.mass * to.mass / distance;
146 |     };
147 | 
148 | 
149 |     sun = bodies[0];
150 |     jupiter = bodies[1];
151 |     saturn = bodies[2];
152 |     uranus = bodies[3];
153 |     neptune = bodies[4];
154 | 
155 |     -- Those are not variable reassignment, but shadowing instead, like in OCaml
156 |     e =     0.5 * sun.mass     * squared(sun.vx, sun.vy, sun.vz);
157 |     e = e + 0.5 * jupiter.mass * squared(jupiter.vx, jupiter.vy, jupiter.vz);
158 |     e = e + 0.5 * saturn.mass  * squared(saturn.vx, saturn.vy, saturn.vz);
159 |     e = e + 0.5 * uranus.mass  * squared(uranus.vx, uranus.vy, uranus.vz);
160 |     e = e + 0.5 * neptune.mass * squared(neptune.vx, neptune.vy, neptune.vz);
161 | 
162 |     e = e - energyInner(bodies[0], bodies[1]);
163 |     e = e - energyInner(bodies[0], bodies[2]);
164 |     e = e - energyInner(bodies[0], bodies[3]);
165 |     e = e - energyInner(bodies[0], bodies[4]);
166 | 
167 |     e = e - energyInner(bodies[1], bodies[2]);
168 |     e = e - energyInner(bodies[1], bodies[3]);
169 |     e = e - energyInner(bodies[1], bodies[4]);
170 | 
171 |     e = e - energyInner(bodies[2], bodies[3]);
172 |     e = e - energyInner(bodies[2], bodies[4]);
173 | 
174 |     e = e - energyInner(bodies[3], bodies[4]);
175 | 
176 |     e
177 | }
178 | 
179 | def calculate(bodies: [Body], i) = if i > 0 then calculate(advance(bodies, 0.01), i - 1) else bodies
180 | 
181 | def main() = {
182 |     -- set to 50000000 for real benchmark
183 |     args = getArgs();
184 |     numIterations = toInt(args[1]);
185 |     bodies = [Sun, Jupiter, Saturn, Uranus, Neptune];
186 |     initedBodies = bodies.init;
187 |     println(initedBodies.energy.toString);
188 |     advanced = calculate(initedBodies, numIterations);
189 |     println(toString(energy(advanced)));
190 | }
191 | 


--------------------------------------------------------------------------------
/examples/nbody3.lasca:
--------------------------------------------------------------------------------
  1 | import Array
  2 | 
  3 | Pi = 3.141592653589793
  4 | SolarMass = 4.0 * Pi * Pi
  5 | DaysPerYear = 365.24
  6 | 
  7 | data Body = Body(x: Var Float, y: Var Float, z: Var Float, vx: Var Float, vy: Var Float, vz: Var Float, mass: Float)
  8 | 
  9 | def body(x, y, z, vx, vy, vz, mass) = Body(Var(x), Var(y), Var(z), Var(vx), Var(vy), Var(vz), mass)
 10 | 
 11 | Jupiter = body(
 12 |      4.84143144246472090e+00,                  -- x
 13 |     -1.16032004402742839e+00,                  -- y
 14 |     -1.03622044471123109e-01,                  -- z
 15 |      1.66007664274403694e-03 * DaysPerYear,    -- vx
 16 |      7.69901118419740425e-03 * DaysPerYear,    -- vy
 17 |     -6.90460016972063023e-05 * DaysPerYear,    -- vz
 18 |      9.54791938424326609e-04 * SolarMass,      -- mass
 19 | )
 20 | 
 21 | Saturn = body(
 22 |      8.34336671824457987e+00,
 23 |      4.12479856412430479e+00,
 24 |     -4.03523417114321381e-01,
 25 |     -2.76742510726862411e-03 * DaysPerYear,
 26 |      4.99852801234917238e-03 * DaysPerYear,
 27 |      2.30417297573763929e-05 * DaysPerYear,
 28 |      2.85885980666130812e-04 * SolarMass,
 29 | )
 30 | 
 31 | Uranus = body(
 32 |      1.28943695621391310e+01,
 33 |     -1.51111514016986312e+01,
 34 |     -2.23307578892655734e-01,
 35 |      2.96460137564761618e-03 * DaysPerYear,
 36 |      2.37847173959480950e-03 * DaysPerYear,
 37 |     -2.96589568540237556e-05 * DaysPerYear,
 38 |      4.36624404335156298e-05 * SolarMass,
 39 | )
 40 | 
 41 | Neptune = body(
 42 |      1.53796971148509165e+01,
 43 |     -2.59193146099879641e+01,
 44 |      1.79258772950371181e-01,
 45 |      2.68067772490389322e-03 * DaysPerYear,
 46 |      1.62824170038242295e-03 * DaysPerYear,
 47 |     -9.51592254519715870e-05 * DaysPerYear,
 48 |      5.15138902046611451e-05 * SolarMass,
 49 | )
 50 | 
 51 | Sun = body(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass)
 52 | 
 53 | def offsetMomentum(bodies: [Body]) = {
 54 |     var px = 0.0;
 55 |     var py = 0.0;
 56 |     var pz = 0.0;
 57 |     for(0, 5, { i ->
 58 |         ibody = bodies[i];
 59 |         m = ibody.mass;
 60 |         px := px.readVar + ibody.vx.readVar * m;
 61 |         py := py.readVar + ibody.vy.readVar * m;
 62 |         pz := pz.readVar + ibody.vz.readVar * m;
 63 |     });
 64 |     object = bodies[0];
 65 |     object.vx := -px.readVar / SolarMass;
 66 |     object.vy := -py.readVar / SolarMass;
 67 |     object.vz := -pz.readVar / SolarMass;
 68 | }
 69 | 
 70 | def squared(x: Float, y: Float, z: Float) = x * x + y * y + z * z
 71 | 
 72 | def advance(bodies: [Body], dt) = {
 73 |     for(0, 5 - 1, { i ->
 74 |         ibody = bodies[i];
 75 |         imass = ibody.mass;
 76 |         for(i + 1, 5, { j ->
 77 |             jbody = bodies[j];
 78 |             dx = ibody.x.readVar - jbody.x.readVar;
 79 |             dy = ibody.y.readVar - jbody.y.readVar;
 80 |             dz = ibody.z.readVar - jbody.z.readVar;
 81 |             dSquared = squared(dx, dy, dz);
 82 |             distance = sqrt(dSquared);
 83 |             mag = dt / (dSquared * distance);
 84 |             jmass = jbody.mass;
 85 |             ibody.vx := ibody.vx.readVar - dx * jmass * mag;
 86 |             ibody.vy := ibody.vy.readVar - dy * jmass * mag;
 87 |             ibody.vz := ibody.vz.readVar - dz * jmass * mag;
 88 | 
 89 |             jbody.vx := jbody.vx.readVar + dx * imass * mag;
 90 |             jbody.vy := jbody.vy.readVar + dy * imass * mag;
 91 |             jbody.vz := jbody.vz.readVar + dz * imass * mag;
 92 |         });
 93 |     });
 94 |     for(0, 5, { i ->
 95 |         body = bodies[i];
 96 |         body.x := body.x.readVar + dt * body.vx.readVar;
 97 |         body.y := body.y.readVar + dt * body.vy.readVar;
 98 |         body.z := body.z.readVar + dt * body.vz.readVar;
 99 |     });
100 |     bodies
101 | }
102 | 
103 | def energy(bodies: [Body]) = {
104 |     var e = 0.0;
105 |     for(0, 5, { i ->
106 |         body = bodies[i];
107 |         e := e.readVar + 0.5 * body.mass * squared(body.vx.readVar, body.vy.readVar, body.vz.readVar);
108 |         for(i + 1, 5, { j ->
109 |             from  = bodies[i];
110 |             to    = bodies[j];
111 |             dx = from.x.readVar - to.x.readVar;
112 |             dy = from.y.readVar - to.y.readVar;
113 |             dz = from.z.readVar - to.z.readVar;
114 |             distance = sqrt(squared(dx, dy, dz));
115 |             e := e.readVar - from.mass * to.mass / distance;
116 |         });
117 |     });
118 |     e.readVar
119 | }
120 | 
121 | def calculate(bodies: [Body], i) = if i > 0 then calculate(advance(bodies, 0.01), i - 1) else bodies
122 | 
123 | def main() = {
124 |     -- set to 50000000 for real benchmark
125 |     args = getArgs();
126 |     numIterations = toInt(args[1]);
127 |     bodies = [Sun, Jupiter, Saturn, Uranus, Neptune];
128 |     bodies.offsetMomentum;
129 |     println(bodies.energy.toString);
130 |     calculate(bodies, numIterations);
131 |     println(toString(energy(bodies)));
132 | }
133 | 


--------------------------------------------------------------------------------
/examples/queen.lasca:
--------------------------------------------------------------------------------
 1 | import List
 2 | import String
 3 | 
 4 | words = Cons("GOD", Cons("SAVE", Cons("THE", Cons("QUEEN", Nil))))
 5 | 
 6 | def permute(acc, words) = match words {
 7 |     Cons(w, ws) -> {
 8 |         String.foreach(w, { c -> 
 9 |             ch = chr(c);
10 |             permute([acc, ch].concat, ws);
11 |         });
12 |     }
13 |     Nil -> println(acc)
14 | }
15 | def main() = permute("", words)
16 | 


--------------------------------------------------------------------------------
/examples/regex.lasca:
--------------------------------------------------------------------------------
1 | import String
2 | 
3 | def main() = {
4 |     regex = compilePattern("(Scala|Haskell|Python|Rust|Ocaml|Java)");
5 |     println(toString(matchRegex(regex, "Haskell")));
6 |     replaced = regexReplace(regex, "Haskell or Python", "\$1 (consider Lasca instead of \$1)");
7 |     println(replaced);
8 | }


--------------------------------------------------------------------------------
/examples/ski.lasca:
--------------------------------------------------------------------------------
1 | 
2 | def i(x) = x
3 | def k(x, y) = x
4 | def s(x, y, z) = let a = x(z), b = y(z) in a(b)
5 | 
6 | def main() = (k(println, 1))(i("Hello"))
7 | 
8 | 


--------------------------------------------------------------------------------
/gencode/GenBench.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.IO
 4 | import System.Exit
 5 | import System.Environment
 6 | import Control.Monad
 7 | 
 8 | genTopLevel idx = "def test" ++ (show idx) ++ "() = let x = 1+1 in if x > 2 then 1 else 2"
 9 | genLine idx = "x" ++ show idx ++ " = 1" ++ (if idx > 0 then " + x" ++ show (idx - 1) else "") ++ ";"
10 | 
11 | 
12 | genNLines funcId n = do
13 |   putStrLn $ "def test" ++ show funcId ++ "() = {"
14 |   loop "" 0 0
15 |   putStrLn "true"
16 |   putStrLn "}"
17 |     where
18 |       loop code lastPrintedLines idx = do
19 |           let newline = genLine idx
20 |           let updatedCode = code ++ newline
21 |           putStrLn newline
22 |           if (idx) >= n
23 |           then return () -- hPutStrLn stderr ("Generated " ++ (show idx) ++ " lines")
24 |           else if (idx - lastPrintedLines) > 1000
25 |                then do
26 |                   let kloc = idx `div` 1000
27 | --                  hPutStrLn stderr ("Generated " ++ (show kloc) ++ " klocs")
28 |                   loop updatedCode idx (idx + 1)
29 |                else loop updatedCode lastPrintedLines (idx + 1)
30 | 
31 | main :: IO ()
32 | main = do
33 |     args <- getArgs
34 | 
35 |     putStrLn "def main() = println(\"Done!\")"
36 | 
37 |     let (numFuncs, numLines) = case args of
38 |             []      -> (10, 20)
39 |             [numFuncs, numLines] -> (read numFuncs :: Integer, read numLines :: Integer)
40 |     forM_ [1..numFuncs] $ \idx -> genNLines idx numLines
41 |     return ()
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/lasca.cabal:
--------------------------------------------------------------------------------
  1 | name:                lasca
  2 | version:             0.0.2
  3 | license:             BSD3
  4 | license-file:        LICENSE
  5 | author:              Alexander Nemish
  6 | maintainer:          anemish@gmail.com
  7 | build-type:          Simple
  8 | extra-source-files:  README.md
  9 | cabal-version:       >= 1.10
 10 | 
 11 | library
 12 |   default-extensions:   OverloadedStrings
 13 |                         RecordWildCards
 14 |                         NamedFieldPuns
 15 |                         FlexibleContexts
 16 |                         PatternSynonyms
 17 |                         StrictData
 18 |   build-depends:        base >=4.9,
 19 |                         text,
 20 |                         utf8-string,
 21 |                         haskeline,
 22 |                         process,
 23 |                         containers,
 24 |                         multiset,
 25 |                         prettyprinter >=1.1,
 26 |                         -- fixed Lexer.float parsing or integer values
 27 |                         megaparsec >= 6.4.0,
 28 |                         scientific,
 29 |                         lens >= 4.15,
 30 |                         llvm-hs >= 5.0.0,
 31 |                         llvm-hs-pure >= 5.0.0,
 32 |                       --  llvm-hs-pretty >= 0.1.0.0,
 33 |                         optparse-applicative,
 34 |                         bytestring,
 35 |                         murmur-hash,
 36 |                         directory,
 37 |                         filepath,
 38 |                         mtl
 39 |   default-language:     Haskell2010
 40 |   ghc-options:          -Wincomplete-patterns
 41 |                         -funbox-strict-fields
 42 |                         -fhide-source-paths
 43 |                         -fPIC
 44 | 
 45 |   hs-source-dirs:       src/lib
 46 |   other-modules:        Paths_lasca
 47 |   exposed-modules:
 48 |                         Lasca.Compiler
 49 |                         Lasca.Options
 50 |                         Lasca.Infer
 51 |                         Lasca.Desugar
 52 |                         Lasca.Namer
 53 |                         Lasca.Emit
 54 |                         Lasca.EmitCommon
 55 |                         Lasca.EmitDynamic
 56 |                         Lasca.EmitStatic
 57 |                         Lasca.Codegen
 58 |                         Lasca.JIT
 59 |                         Lasca.Lexer
 60 |                         Lasca.Parser
 61 |                         Lasca.Syntax
 62 |                         Lasca.Type
 63 |                         Lasca.Modules
 64 | executable lasca
 65 |   other-extensions:     GADTs
 66 |   build-depends:        base >=4.9,
 67 |                         lasca
 68 |   Extra-libraries:      m, gc, pthread, pcre2-8
 69 |   default-language:     Haskell2010
 70 |   ghc-options:          -rtsopts
 71 |                         -threaded
 72 |                         -Wincomplete-patterns
 73 |                         -funbox-strict-fields
 74 |                         -fPIC
 75 |                         -rdynamic
 76 |   if os(darwin)
 77 |     extra-libraries:    lascartStatic
 78 |     ghc-options:        -fwhole-archive-hs-libs
 79 |   else
 80 |     ghc-options:        -optl-Wl,--whole-archive -optl-Wl,-llascartStatic -optl-Wl,--no-whole-archive
 81 |   hs-source-dirs:       src/lasca
 82 |   main-is:              Main.hs
 83 | 
 84 | test-suite lasca-test
 85 |   type:                exitcode-stdio-1.0
 86 |   hs-source-dirs:      src/test
 87 |   main-is:             TestMain.hs
 88 |   default-extensions:  OverloadedStrings
 89 |   build-depends:       base >=4.9,
 90 |                        lasca,
 91 |                        text,
 92 |                        directory,
 93 |                        bytestring,
 94 |                        shelly,
 95 |                        utf8-string,
 96 |                        megaparsec >= 6.0.0,
 97 |                        Glob,
 98 |                        filepath,
 99 |                        tasty,
100 |                        tasty-hunit,
101 |                        tasty-smallcheck,
102 |                        tasty-quickcheck,
103 |                        tasty-golden,
104 |                        tasty-program
105 |   ghc-options:         -threaded -rtsopts -fPIC
106 |   if os(darwin)
107 |     extra-libraries:    lascartStatic
108 |     ghc-options:        -fwhole-archive-hs-libs
109 |   else
110 |     ghc-options:        -optl-Wl,--whole-archive -optl-Wl,-llascartStatic -optl-Wl,--no-whole-archive
111 |   Extra-libraries:      gc, pthread, pcre2-8
112 |   default-language:    Haskell2010
113 | --executable gencode
114 | --  other-extensions:    GADTs
115 | --  build-depends:       base >=4.9
116 | --  default-language:    Haskell2010
117 | --  hs-source-dirs:      gencode
118 | --  main-is:             GenBench.hs
119 | 


--------------------------------------------------------------------------------
/lasca.nix:
--------------------------------------------------------------------------------
 1 | { mkDerivation, base, bytestring, containers, directory, filepath
 2 | , boehmgc, Glob, haskeline, lascart, lens, llvm-hs, llvm-hs-pure
 3 | , megaparsec, mtl, multiset, murmur-hash, optparse-applicative
 4 | , pcre2, prettyprinter, process, scientific, shelly, stdenv, tasty
 5 | , tasty-golden, tasty-hunit, tasty-program, tasty-quickcheck
 6 | , tasty-smallcheck, text, utf8-string, pkgs
 7 | }@args:
 8 | mkDerivation {
 9 |   pname = "lasca";
10 |   version = "0.0.2";
11 |   src = ./.;
12 |   isLibrary = false;
13 |   isExecutable = true;
14 |   libraryHaskellDepends = [
15 |     base bytestring containers directory filepath haskeline lens
16 |     llvm-hs llvm-hs-pure megaparsec mtl multiset murmur-hash
17 |     optparse-applicative prettyprinter process scientific text
18 |     utf8-string pkgs.libffi lascart
19 |   ];
20 |   executableHaskellDepends = [ base lascart ];
21 |   executableSystemDepends = [ boehmgc lascart pcre2 ];
22 |   testHaskellDepends = [
23 |     base bytestring filepath Glob megaparsec shelly tasty tasty-golden
24 |     tasty-hunit tasty-program tasty-quickcheck tasty-smallcheck text
25 |     utf8-string pkgs.libffi lascart
26 |   ];
27 |   testSystemDepends = [ lascart pkgs.libffi ];
28 |   dontStrip = true;
29 |   doCheck = true;
30 |   doHaddock = false;
31 |   preCheck = ''
32 |     export PATH="$PATH:dist/build/lasca"
33 |     export LASCAPATH="$src/libs/base:${stdenv.lib.getLib lascart}/lib"
34 |     echo "New PATH = $PATH"
35 |     echo "LASCAPATH = $LASCAPATH"
36 |     mkdir -p "$out/libs/base"
37 |     cp -r $src/libs/base/* "$out/libs/base"
38 |     # cp "${stdenv.lib.getLib lascart}/lib/liblascartStatic.a" "$out/libs/base"
39 |     # ld -v 2
40 |     # lasca --verbose -O2 examples/hello.lasca 2>&1
41 |   '';
42 |   license = stdenv.lib.licenses.bsd3;
43 | }
44 | 


--------------------------------------------------------------------------------
/lascart.nix:
--------------------------------------------------------------------------------
 1 | {stdenv, cmake, libffi, boehmgc, pcre2, pkgconfig, zlib}:
 2 | stdenv.mkDerivation rec {
 3 |     name = "lascart-${version}";
 4 |     version = "0.0.2";
 5 |     src = ./rts;
 6 |     nativeBuildInputs = [ pkgconfig cmake ];
 7 |     buildInputs = [ pcre2 boehmgc zlib libffi ];
 8 |     dontDisableStatic = true;
 9 |     cmakeFlags = [
10 |         "-DCMAKE_BUILD_TYPE=Debug"
11 |     ];
12 |     dontStrip = true;
13 |     meta = with stdenv.lib; {
14 |         platforms = platforms.linux ++ platforms.darwin;
15 |         license = licenses.bsd3;
16 |         homepage = http://lasca-lang.org;
17 |         description = "Lasca Runtime System";
18 |   };
19 | }


--------------------------------------------------------------------------------
/libs/base/Array.lasca:
--------------------------------------------------------------------------------
 1 | module Array
 2 | 
 3 | extern def unsafeCreateArray(size: Int): Array a = "createArray"
 4 | extern def makeArray(size: Int, init: a): Array a = "makeArray"
 5 | extern def append(first: Array a, second: Array a): Array a = "arrayAppend"
 6 | extern def copy(src: Array a, srcPos: Int, dest: Array a, destPos: Int, length: Int): Unit = "arrayCopy"
 7 | extern def getIndex(array: Array a, i: Int): a = "arrayGetIndex"
 8 | extern def setIndex(array: Array a, i: Int, value: a): Unit = "arraySetIndex"
 9 | extern def length(array: Array a): Int = "arrayLength"
10 | extern def init(n: Int, f: Int -> a): Array a = "arrayInit"
11 | 
12 | def map(array, f) = {
13 |     len = length(array);
14 |     init(len, { idx -> f(array[idx]) });
15 | }
16 | 
17 | def foreach(array: [a], f: a -> b): Unit = {
18 |     def foreachgo(array, f, i, len) = {
19 |         if i < len then {
20 |             -- no idea why, but otherwise LLVM doesn't do tail call optimization in foreachgo
21 |             apply(f, array[i]);
22 |             foreachgo(array, f, i + 1, len);
23 |         } else ()
24 |     };
25 |     foreachgo(array, f, 0, length(array));
26 | }
27 | 
28 | def range(start: Int, end: Int, step: Int): Array Int = {
29 |     init((end - start) / step + 1, { i -> start + i * step })
30 | }
31 | 
32 | def transform(array: Array a, f: Int -> a -> a): Array a = {
33 |     len = length(array);
34 |     def transformGo(array, f, i, len) = {
35 |         if i < len then {
36 |             setIndex(array, i, f(i, array[i]));
37 |             transformGo(array, f, i + 1, len)
38 |         } else ()
39 |     };
40 |     transformGo(array, f, 0, len)
41 | }
42 | 
43 | def testArray() = {
44 |     a = makeArray(10, "a");
45 |     b = makeArray(10, "b");
46 |     c = init(5, { i -> i.toString });
47 |     d = range(2, 8, 3);
48 |     setIndex(a, 2, b[1]);
49 |     println(toString(a));
50 |     println(toString(b));
51 |     println(toString(c));
52 |     println(toString(d));
53 |     println(toString(append(a, b)));
54 |     copy(b, 0, a, 4, 5);
55 |     println(toString(a));
56 | }
57 | 
58 | 
59 | def main() = {
60 |     testArray();
61 |     println("Hello")
62 | }
63 | 


--------------------------------------------------------------------------------
/libs/base/ArrayBuffer.lasca:
--------------------------------------------------------------------------------
  1 | import Array
  2 | 
  3 | data ArrayBuffer a = ArrayBuffer(
  4 |     array: Var (Array a), 
  5 |     getSize: Var Int,
  6 | )
  7 | 
  8 | emptyArray = Array.unsafeCreateArray(0)
  9 | initialSize = 16
 10 | 
 11 | def size(self: ArrayBuffer a) = self.getSize.readVar
 12 | 
 13 | def new() = make(initialSize)
 14 | 
 15 | def make(initialCapacity: Int) = if initialCapacity == 0 
 16 |     then ArrayBuffer(Var(emptyArray), Var(0))
 17 |     else if initialCapacity > 0 
 18 |     then ArrayBuffer(Var(Array.unsafeCreateArray(initialCapacity)), Var(0))
 19 |     else die("Illegal initial capacity ${initialCapacity}, should be >= 0")
 20 | 
 21 | def fromArray(a: Array a) = {
 22 |     ab = make(Array.length(a));
 23 |     appendArray(ab, a);
 24 | }
 25 | 
 26 | def isEmpty(self: ArrayBuffer a): Bool = self.getSize.readVar == 0
 27 | 
 28 | def getIndex(self, idx) = (self.array.readVar)[idx]
 29 | def setIndex(self, idx, value) = Array.setIndex(self.array.readVar, idx, value)
 30 | 
 31 | def clear(self) = reduceToSize(self, 0)
 32 | 
 33 | def ensureSize(self: ArrayBuffer a, n: Int): Unit = {
 34 |     sz = Array.length(self.array.readVar);
 35 |     var newSize = sz;
 36 |     if n > sz then {
 37 |         def calc(n, newSize) = if n > newSize.readVar then {
 38 |             newSize := newSize.readVar * 2; 
 39 |             println("${n} ${newSize.readVar}");
 40 |             calc(n, newSize);
 41 |         } else ();
 42 |         calc(n, newSize);
 43 |         newArray = Array.unsafeCreateArray(newSize.readVar);
 44 |         Array.copy(self.array.readVar, 0, newArray, 0, self.getSize.readVar);
 45 |         writeVar(self.array, newArray);
 46 |     } else ()
 47 | }
 48 | 
 49 | def reduceToSize(self, sz: Int): Unit = {
 50 |     require(sz <= self.size);
 51 |     self.getSize := sz;
 52 |   }
 53 | 
 54 | def append(self: ArrayBuffer a, elem) = {
 55 |     ensureSize(self, self.size + 1);
 56 |     a = self.array.readVar;
 57 |     Array.setIndex(a, self.getSize.readVar, elem);
 58 |     self.getSize := self.size + 1;
 59 |     self;
 60 | }
 61 | 
 62 | def prepend(self, elem) = {
 63 |     ensureSize(self, self.size + 1);
 64 |     a = self.array.readVar;
 65 |     Array.copy(a, 0, a, 1, self.size);
 66 |     Array.setIndex(a, 0, elem);
 67 |     self.getSize := self.size + 1;
 68 |     self
 69 | }
 70 | 
 71 | def appendArray(self: ArrayBuffer a, elems: Array a) = {
 72 |     sz = Array.length(elems);
 73 |     ensureSize(self, self.size + sz);
 74 |     a = self.array.readVar;
 75 |     var i = self.size;
 76 |     Array.foreach(elems, { elem -> 
 77 |         Array.setIndex(a, i.readVar, elem);
 78 |         i := i.readVar + 1;
 79 |     });
 80 |     self.getSize := self.size + sz;
 81 |     self;
 82 | }
 83 | 
 84 | def insertArray(self, n: Int, seq: Array a): Unit = {
 85 |     if n < 0 or n > self.size 
 86 |     then die("Index out of bounds: ${n}")
 87 |     else {
 88 |         len = Array.length(seq);
 89 |         newSize = self.size + len;
 90 |         ensureSize(self, newSize);
 91 |         a = self.array.readVar;
 92 |         Array.copy(a, n, a, n + len, self.size - n);
 93 |         var i = n;
 94 |         Array.foreach(seq, { elem -> 
 95 |             Array.setIndex(a, i.readVar, elem);
 96 |             i := i.readVar + 1;
 97 |         });
 98 |         self.getSize := newSize;
 99 |     }
100 | }
101 | 
102 | def prependArray(self: ArrayBuffer a, elems: Array a) = {
103 |     insertArray(self, 0, elems);
104 |     self;
105 | }
106 | 
107 | def remove(self, n: Int, count: Int) = {
108 |     if count < 0 then die("removing negative number of elements: ${count}")
109 |     else if count == 0 then () 
110 |     else if n < 0 or n > self.size - count then die("at ${n} deleting ${count}")
111 |     else {
112 |         a = self.array.readVar;
113 |         Array.copy(a, n + count, a, n, self.size - (n + count));
114 |         reduceToSize(self, self.size - count);
115 |     }
116 | }
117 | 
118 | def toArray(self) = {
119 |     a = self.array.readVar;
120 |     Array.init(self.size, {idx -> })
121 | }
122 | 
123 | def main() = {
124 |     ab = make(3);
125 |     println("${ab}");
126 |     append(ab, 1);
127 |     println("${ab}");
128 |     append(ab, 2);
129 |     println("${ab}");
130 |     append(ab, 3);
131 |     prepend(ab, 0);
132 |     println("${ab}");
133 |     append(ab, 4);
134 |     println("${ab}");
135 |     ensureSize(ab, 20);
136 |     appendArray(ab, [5, 6, 7, 8]);
137 |     println("${ab}");
138 |     remove(ab, 1, 2);
139 |     println("${ab}");
140 |     insertArray(ab, 4, [7,7,7]);
141 |     prependArray(ab, [-2, -1]);
142 |     println("${ab}");
143 |     setIndex(ab, 1, 42);
144 |     println("Test ${ab.size} ${ab.isEmpty} ${getIndex(ab, 1)}");
145 |     ab.clear;
146 |     println("${ab}");
147 | }


--------------------------------------------------------------------------------
/libs/base/Bits.lasca:
--------------------------------------------------------------------------------
 1 | extern def byteAnd(a: Byte, b: Byte): Byte = "byteAnd"
 2 | extern def byteOr(a: Byte, b: Byte): Byte = "byteOr"
 3 | extern def byteXor(a: Byte, b: Byte): Byte = "byteXor"
 4 | extern def byteShiftL(a: Byte, b: Byte): Byte = "byteShiftL"
 5 | -- arithmetical shift right
 6 | extern def byteShiftR(a: Byte, b: Byte): Byte = "byteShiftR"
 7 | extern def byteNot(a: Byte): Byte = "byteNot"
 8 | 
 9 | extern def intAnd(a: Int, b: Int): Int = "intAnd"
10 | extern def intOr(a: Int, b: Int): Int = "intOr"
11 | extern def intXor(a: Int, b: Int): Int = "intXor"
12 | extern def intShiftL(a: Int, b: Int): Int = "intShiftL"
13 | -- arithmetical shift right
14 | extern def intShiftR(a: Int, b: Int): Int = "intShiftR"
15 | extern def intNot(a: Int): Int = "intNot"
16 | extern def intPopCount(a: Int): Int = "intPopCount"


--------------------------------------------------------------------------------
/libs/base/ByteArray.lasca:
--------------------------------------------------------------------------------
 1 | module ByteArray
 2 | 
 3 | extern def create(size: Int): ByteArray = "createByteArray"
 4 | --extern def make(size: Int, init: Byte): ByteArray = "makeByteArray"
 5 | extern def copy(src: ByteArray, srcPos: Int, dest: ByteArray, destPos: Int, length: Int): Unit = "byteArrayCopy"
 6 | extern def getIndex(array: ByteArray, i: Int): Byte = "byteArrayGetIndex"
 7 | extern def setIndex(array: ByteArray, i: Int, value: Byte): Unit = "byteArraySetIndex"
 8 | extern def length(array: ByteArray): Int = "byteArrayLength"
 9 | 
10 | def testArray() = {
11 |     a = create(10);
12 |     b = create(10);
13 |     setIndex(a, 2, intToByte(2));
14 |     setIndex(b, 3, intToByte(3));
15 |     setIndex(a, 3, getIndex(b, 3));
16 |     copy(a, 0, b, 1, 9);  -- FIXME
17 |     println(toString(a));
18 |     println(toString(b));
19 |     println(toString(b.length));
20 | }
21 | 
22 | def main() = {
23 |     testArray();
24 |     println("Hello")
25 | }
26 | 


--------------------------------------------------------------------------------
/libs/base/List.lasca:
--------------------------------------------------------------------------------
 1 | module List
 2 | 
 3 | data List a = Nil | Cons(head: a, tail: List a)
 4 | 
 5 | def isEmpty(l) = match l {
 6 |     Nil -> true
 7 |     Cons(_, _) -> false
 8 | }
 9 | 
10 | def foldl(self: List a, z: b, f: a -> b -> b): b = match self {
11 |     Nil -> z
12 |     Cons(hd, tl) -> foldl(tl, f(hd, z), f)
13 | }
14 | 
15 | def foldr(self: List a, z: b, f: a -> b -> b): b = match self {
16 |     Nil -> z
17 |     Cons(hd, tl) -> f(hd, foldr(tl, z, f))
18 | }
19 | 
20 | def length(l) = foldl(l, 0, { e, acc -> acc + 1})
21 | 
22 | def map(self, f) = foldl(self, Nil, { e, acc -> Cons(f(e), acc) })
23 | 
24 | def filter(self, p: a -> Bool) = match self {
25 |     Nil -> Nil
26 |     Cons(hd, tl) -> if p(hd) then Cons(hd, filter(tl, p)) else filter(tl, p)
27 | }
28 | 
29 | 
30 | def main() = {
31 |     list = Cons("1", Cons("2", Nil));
32 |     len = list.length;
33 |     ints = map(list, toInt);
34 |     ints2 = map(ints, { i -> i + 10});
35 |     println(toString(ints2));
36 |     println("Hello world! ${list}. Is empty: ${list.isEmpty}, length = ${len.toString}");
37 | }


--------------------------------------------------------------------------------
/libs/base/Map.lasca:
--------------------------------------------------------------------------------
  1 | module Map
  2 | 
  3 | import Option
  4 | 
  5 | data Map k a  = Bin(binSize: Int, binKey: k, binValue: a, ltree: Map k a, rtree: Map k a)
  6 |               | Tip
  7 | 
  8 | data View k a = View(viewKey: k, viewValue: a, viewMap: Map k a)
  9 | 
 10 | def empty() = Tip
 11 | 
 12 | def isEmpty(self: Map k a): Bool = match self {
 13 |     Tip -> true
 14 |     _   -> false
 15 | }
 16 | 
 17 | def size(self: Map k a): Int = match self {
 18 |    Tip -> 0
 19 |    Bin(s, _, _, _, _) -> s
 20 | }
 21 | 
 22 | def lookup(self: Map k a, key: k): a = match self {
 23 |     Tip -> None
 24 |     Bin(_, kx, x, l, r) -> match runtimeCompare(key, kx) {
 25 |         -1 -> lookup(l, key)
 26 |         1  -> lookup(r, key)
 27 |         0  -> Some(x)
 28 |     }
 29 | }
 30 | 
 31 | def member(self, key) = match lookup(self, key) {
 32 |     None -> false
 33 |     _    -> true
 34 | }
 35 | 
 36 | def single(key, value) = Bin(1, key, value, Tip, Tip)
 37 | 
 38 | Delta = 3
 39 | 
 40 | Ratio = 2
 41 | 
 42 | def bin(k, x, l, r) = Bin(l.size + r.size + 1, k, x, l, r)
 43 | 
 44 | def singleL(k1, x1, t1, t) = match t {
 45 |     Bin(_, k2, x2, t2, t3) -> bin(k2, x2, bin(k1, x1, t1, t2), t3)
 46 | }
 47 | 
 48 | def singleR(k1, x1, t, t3) = match t {
 49 |     Bin(_, k2, x2, t1, t2) -> bin(k2, x2, t1, bin(k1, x1, t2, t3))
 50 | }
 51 | 
 52 | def doubleL(k1, x1, t1, t) = match t {
 53 |     Bin(_, k2, x2, Bin(_, k3, x3, t2, t3), t4) -> bin(k3, x3, bin(k1, x1, t1, t2), bin(k2, x2, t3, t4))
 54 | }
 55 | 
 56 | def doubleR(k1, x1, t, t4) = match t {
 57 |     Bin(_, k2, x2, t1, Bin(_, k3, x3, t2, t3)) -> bin(k3, x3, bin(k2, x2, t1, t2), bin(k1, x1, t3, t4))
 58 | }
 59 | 
 60 | 
 61 | def rotateL(k, x, l, r) = match r {
 62 |     Bin(_, _, _, ly, ry) -> if ly.size < Ratio * ry.size then singleL(k, x, l, r) else doubleL(k, x, l, r)
 63 | }
 64 | 
 65 | def rotateR(k, x, l, r) = match l {
 66 |     Bin(_, _, _, ly, ry) -> if ry.size < Ratio * ly.size then singleR(k, x, l, r) else doubleR(k, x, l, r)
 67 | }
 68 | 
 69 | def balance(k, x, l, r) = {
 70 |     if l.size + r.size <= 1 then Bin(l.size + r.size + 1, k, x, l, r)
 71 |     else if r.size > Delta * l.size then rotateL(k, x, l, r)
 72 |     else if l.size > Delta * r.size then rotateR(k, x, l, r)
 73 |     else Bin(l.size + r.size + 1, k, x, l, r)
 74 | }
 75 | 
 76 | 
 77 | 
 78 | def insert(self, k, x) = {
 79 |     def insertGo(self, orig, kx, x) = match self {
 80 |         Tip -> single(orig, x)
 81 |         Bin(s, ky, y, l, r) -> match runtimeCompare(kx, ky) {
 82 |             -1 -> let l1 = insertGo(l, orig, kx, x) in balance(ky, y, l1, r)
 83 |             1  -> let r1 = insertGo(r, orig, kx, x) in balance(ky, y, l, r1)
 84 |             0  -> Bin(s, orig, x, l, r)
 85 |         }
 86 |     };
 87 |     insertGo(self, k, k, x)
 88 | }
 89 | 
 90 | def minViewSure(k, x, l, r) = match l {
 91 |     Tip -> View(k, x, r)
 92 |     Bin(_, kx, xl, ll, lr) -> let view = minViewSure(kx, xl, ll, lr)
 93 |         in View(view.viewKey, view.viewValue, balance(k, x, view.viewMap, r))
 94 | }
 95 | 
 96 | def maxViewSure(k, x, l, r) = match r {
 97 |     Tip -> View(k, x, l)
 98 |     Bin(_, kr, xr, rl, rr) -> let view = maxViewSure(kr, xr, rl, rr)
 99 |         in View(view.viewKey, view.viewValue, balance(k, x, l, view.viewMap))
100 | }
101 | 
102 | {-
103 |   glues two trees together.
104 |   Assumes that [l] and [r] are already balanced with respect to each other.
105 | -}
106 | def glue(l, r) = match l {
107 |     Tip -> r
108 |     Bin(sl, kl, xl, ll, lr) -> match r {
109 |         Tip -> l
110 |         Bin(sr, kr, xr, rl, rr) ->
111 |             if sl > sr then match maxViewSure(kl, xl, ll, lr) {
112 |                 View(km, m, l1) -> balance(km, m, l1, r)
113 |             } else match minViewSure(kr, xr, rl, rr) {
114 |                 View(km, m, r1) -> balance(km, m, l, r1)
115 |             }
116 |     }
117 | }
118 | 
119 | def delete(self, k) = match self {
120 |     Tip -> Tip
121 |     Bin(_, kx, x, l, r) -> match runtimeCompare(k, kx) {
122 |         -1 -> balance(kx, x, delete(l, k), r)
123 |         1  -> balance(kx, x, l, delete(r, k))
124 |         0  -> glue(l, r)
125 |     }
126 | }
127 | 
128 | def mapWithKey(self, f) = match self {
129 |     Tip -> Tip
130 |     Bin(sx, kx, x, l, r) -> let x1 = f(kx, x) in Bin(sx, kx, x1, mapWithKey(l, f), mapWithKey(r, f))
131 | }
132 | 
133 | def foreachWithKey(self, f): Unit = match self {
134 |     Tip -> Tip
135 |     Bin(sx, kx, x, l, r) -> {
136 |         f(kx, x);
137 |         foreachWithKey(l, f);
138 |         foreachWithKey(r, f);
139 |     }
140 | }
141 | 
142 | def test(i, m) = if i > 0 then test(i - 1, insert(m, i, i.toString)) else m
143 | 
144 | def main() = {
145 |     emp = Tip;
146 |     one = single(1, "one");
147 |     two = insert(one, 2, "two");
148 |     three = insert(two, 3, "three");
149 |     four = insert(three, 4, "four");
150 |     println("runtimeCompare = ${runtimeCompare(1, 2)}");
151 |     println("Test isEmpty should be true: ${emp.isEmpty}");
152 |     println("Test size should be 0: ${emp.size}");
153 |     println("Test isEmpty should be false: ${one.isEmpty}");
154 |     println("Test isEmpty should be true: ${isEmpty(delete(one, 1))}");
155 | 
156 |     println("Test size should be 1: ${one.size}");
157 |     println("Lookup should be one: ${lookup(one, 1)}");
158 |     println("Lookup should be none: ${lookup(one, 2)}");
159 | 
160 |     println("Test size should be 2: ${two.size}");
161 |     println("Lookup should be two: ${lookup(two, 2)}");
162 |     println("Lookup should be none: ${lookup(two, 3)}");
163 | 
164 |     println("Test size should be 3: ${three.size}");
165 |     println("Lookup should be three: ${lookup(three, 3)}");
166 |     println("Lookup should be none: ${lookup(three, 4)}");
167 | 
168 |     println("Test size should be 4: ${four.size}");
169 |     println("Lookup should be four: ${lookup(four, 4)}");
170 |     println("4 is member of four: ${member(four, 4)}");
171 |     println("Lookup should be none: ${lookup(four, 5)}");
172 |     println(toString(four));
173 | 
174 |     thou = test(1000, empty());
175 |     println(toString(thou.size));
176 |     println(toString(size(delete(thou, 1000))));
177 | 
178 | }


--------------------------------------------------------------------------------
/libs/base/Option.lasca:
--------------------------------------------------------------------------------
 1 | module Option
 2 | 
 3 | data Option a = None | Some(v: a)
 4 | 
 5 | def map(self, f: a -> b) = match self {
 6 |     None -> None
 7 |     Some(value) -> Some(f(value))
 8 | }
 9 | 
10 | 
11 | def main() = {
12 |     println("Even ${None} is ${Some(true)}")
13 | }
14 | 


--------------------------------------------------------------------------------
/libs/base/Prelude.lasca:
--------------------------------------------------------------------------------
 1 | module Prelude
 2 | 
 3 | extern def libcErrno(): Int = "libcErrno"
 4 | extern def libcError(error: Int): String = "libcError"
 5 | extern def libcCurError(): String = "libcCurError"
 6 | 
 7 | extern def print(s: String): Unit = "print"
 8 | extern def println(s: String): Unit = "println"
 9 | extern def toString(a: a): String = "toString"
10 | extern def sqrt(a: Float): Float = "sqrt"
11 | extern def getArgs(): Array String = "getArgs"
12 | extern def toInt(s: String): Int = "toInt"
13 | extern def concat(strings: Array String): String = "concat"
14 | extern def exit(code: Int): a = "exit"
15 | 
16 | extern def runtimeIsConstr(constr: a, name: String): Bool = "runtimeIsConstr"
17 | extern def runtimeCheckTag(value: a, tag: Int): Bool = "runtimeCheckTag"
18 | extern def runtimeCompare(lhs: a, rhs: a): Int = "runtimeCompare"
19 | 
20 | extern def intToByte(i: Int): Byte = "intToByte"
21 | extern def byteToInt(i: Byte): Int = "byteToInt"
22 | extern def intToInt16(i: Int): Int16 = "intToInt16"
23 | extern def int16ToInt(i: Int16): Int = "int16ToInt"
24 | extern def intToInt32(i: Int): Int32 = "intToInt32"
25 | extern def int32ToInt(i: Int32): Int = "int32ToInt"
26 | extern def intToFloat(i: Int): Float = "intToFloat64"
27 | extern def floatToInt(i: Float): Int = "float64ToInt"
28 | 
29 | extern def intRem(a: Int, b: Int): Int = "intRem"
30 | 
31 | extern def hashCode(value: a): Int = "lascaHashCode"
32 | 
33 | data Var a = Var(readVar: a)
34 | 
35 | data FileHandle
36 | 
37 | extern def openFile(filename: String, mode: String): FileHandle = "lascaOpenFile"
38 | extern def readFile(filename: String): String = "lascaReadFile"
39 | extern def writeFile(filename: String, string: String): Unit = "lascaWriteFile"
40 | 
41 | extern def writeVar(ref: Var a, value: a): Var a = "writeVar"
42 | 
43 | extern def getCwd(): String = "lascaGetCwd"
44 | extern def chdir(path: String): Option String = "lascaChdir"
45 | extern def getEnv(name: String): Option String = "getEnv"
46 | extern def lascaSetEnv(name: String, value: String, replace: Bool): Int = "setEnv"
47 | extern def lascaUnsetEnv(name: String): Int = "unsetEnv"
48 | 
49 | def unarynot(e: Bool): Bool = if e then false else true
50 | 
51 | def die(msg: String): a = {
52 |     println(msg);
53 |     exit(1);
54 | }
55 | 
56 | def undefined() = die("undefined")
57 | 
58 | def require(req: Bool) = if req then () else die("Requirement failed")
59 | 
60 | def setEnv(name: String, value: String, replace: Bool): Unit = {
61 |     if lascaSetEnv(name, value, replace) == -1 then die("setEnv: ${libcCurError()}") else ()
62 | }
63 | 
64 | def unsetEnv(name: String): Unit = {
65 |     if lascaUnsetEnv(name) == -1 then die("unsetEnv: ${libcCurError()}") else ()
66 | }
67 | 
68 | def apply(f, arg) = f(arg)
69 | def apply2(f, arg1, arg2) = f(arg1, arg2)
70 | 
71 | def for(start: Int, end: Int, f: Int -> a) = {
72 |     if start < end then {
73 |         apply(f, start); -- no idea why, but without apply LLVM doesn't do tail call optimization
74 |         for(start + 1, end, f);
75 |     } else ();
76 | }
77 | 


--------------------------------------------------------------------------------
/libs/base/String.lasca:
--------------------------------------------------------------------------------
  1 | import Array
  2 | 
  3 | {-
  4 |     Strings are UTF-8 encoded.
  5 |     `Char` and `Code Point` mean valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
  6 |     http://unicode.org/glossary/#unicode_scalar_value 
  7 | -}
  8 | 
  9 | -- length of UTF-8 encoded byte string
 10 | extern def bytesCount(s: String): Int = "bytesLength"
 11 | extern def chr(codePoint: Int32): String = "codePointToString"
 12 | extern def fromCharArray(chars: Array Int32): String = "codePointsToString"
 13 | extern def charToLower(codePoint: Int32): Int32 = "utf8proc_tolower"
 14 | extern def charToUpper(codePoint: Int32): Int32 = "utf8proc_toupper"
 15 | extern def charToTitle(codePoint: Int32): Int32 = "utf8proc_totitle"
 16 | extern def isValidUnicodeScalar(codePoint: Int32): Bool = "utf8proc_codepoint_valid"
 17 | extern def iterate(s: String, f: Int32 -> Bool): Unit = "codePointsIterate"
 18 | extern def graphemeIterate(s: String, f: String -> Bool): Unit = "graphemesIterate"
 19 | extern def utf8procCategory(c: Int32): Int = "utf8proc_category"
 20 | 
 21 | data GeneralCategory
 22 |         = UppercaseLetter       -- ^ Lu: Letter, Uppercase
 23 |         | LowercaseLetter       -- ^ Ll: Letter, Lowercase
 24 |         | TitlecaseLetter       -- ^ Lt: Letter, Titlecase
 25 |         | ModifierLetter        -- ^ Lm: Letter, Modifier
 26 |         | OtherLetter           -- ^ Lo: Letter, Other
 27 |         | NonSpacingMark        -- ^ Mn: Mark, Non-Spacing
 28 |         | SpacingCombiningMark  -- ^ Mc: Mark, Spacing Combining
 29 |         | EnclosingMark         -- ^ Me: Mark, Enclosing
 30 |         | DecimalNumber         -- ^ Nd: Number, Decimal
 31 |         | LetterNumber          -- ^ Nl: Number, Letter
 32 |         | OtherNumber           -- ^ No: Number, Other
 33 |         | ConnectorPunctuation  -- ^ Pc: Punctuation, Connector
 34 |         | DashPunctuation       -- ^ Pd: Punctuation, Dash
 35 |         | OpenPunctuation       -- ^ Ps: Punctuation, Open
 36 |         | ClosePunctuation      -- ^ Pe: Punctuation, Close
 37 |         | InitialQuote          -- ^ Pi: Punctuation, Initial quote
 38 |         | FinalQuote            -- ^ Pf: Punctuation, Final quote
 39 |         | OtherPunctuation      -- ^ Po: Punctuation, Other
 40 |         | MathSymbol            -- ^ Sm: Symbol, Math
 41 |         | CurrencySymbol        -- ^ Sc: Symbol, Currency
 42 |         | ModifierSymbol        -- ^ Sk: Symbol, Modifier
 43 |         | OtherSymbol           -- ^ So: Symbol, Other
 44 |         | Space                 -- ^ Zs: Separator, Space
 45 |         | LineSeparator         -- ^ Zl: Separator, Line
 46 |         | ParagraphSeparator    -- ^ Zp: Separator, Paragraph
 47 |         | Control               -- ^ Cc: Other, Control
 48 |         | Format                -- ^ Cf: Other, Format
 49 |         | Surrogate             -- ^ Cs: Other, Surrogate
 50 |         | PrivateUse            -- ^ Co: Other, Private Use
 51 |         | NotAssigned           -- ^ Cn: Other, Not Assigned
 52 | 
 53 | def generalCategory(char: Int32): GeneralCategory = match utf8procCategory(char) {
 54 |     0  -> NotAssigned
 55 |     1  -> UppercaseLetter
 56 |     2  -> LowercaseLetter
 57 |     3  -> TitlecaseLetter
 58 |     4  -> ModifierLetter
 59 |     5  -> OtherLetter
 60 |     6  -> NonSpacingMark
 61 |     7  -> SpacingCombiningMark
 62 |     8  -> EnclosingMark
 63 |     9  -> DecimalNumber
 64 |     10 -> LetterNumber
 65 |     11 -> OtherNumber
 66 |     12 -> ConnectorPunctuation
 67 |     13 -> DashPunctuation
 68 |     14 -> OpenPunctuation
 69 |     15 -> ClosePunctuation
 70 |     16 -> InitialQuote
 71 |     17 -> FinalQuote
 72 |     18 -> OtherPunctuation
 73 |     19 -> MathSymbol
 74 |     20 -> CurrencySymbol
 75 |     21 -> ModifierSymbol
 76 |     22 -> OtherSymbol
 77 |     23 -> Space
 78 |     24 -> LineSeparator
 79 |     25 -> ParagraphSeparator
 80 |     26 -> Control
 81 |     27 -> Format
 82 |     28 -> Surrogate
 83 |     29 -> PrivateUse
 84 | }
 85 | 
 86 | {- 
 87 |     Regular Expression stuff. Lasca uses PCRE2 library internally.
 88 | -}
 89 | data Pattern
 90 | 
 91 | extern def compilePattern(pattern: String): Pattern = "lascaCompileRegex"
 92 | extern def matchRegex(pattern: Pattern, str: String): Bool = "lascaMatchRegex"
 93 | extern def regexReplace(pattern: Pattern, str: String, replacement: String): String = "lascaRegexReplace"
 94 | 
 95 | def replace(heystack: String, needle: String, replacement: String) = {
 96 |     p = compilePattern(needle);
 97 |     regexReplace(p, heystack, replacement)
 98 | }
 99 | 
100 | def startsWith(s: String, prefix: String): Bool = {
101 |     -- FIXME: write decent implementation
102 |     p = compilePattern("^${prefix}.*");
103 |     matchRegex(p, s);
104 | }
105 | 
106 | def endsWith(s: String, suffix: String): Bool = {
107 |     -- FIXME: write decent implementation
108 |     p = compilePattern(".*${suffix}\$");
109 |     matchRegex(p, s);
110 | }
111 | 
112 | def foreach(s: String, f: Int32 -> a): Unit = iterate(s, { char -> f(char); true })
113 | 
114 | def codePointAt(s: String, index: Int): Int32 = {
115 |     var i = 0;
116 |     var result = -1.intToInt32;
117 |     iterate(s, { char -> 
118 |         if i.readVar < index then {
119 |             i := i.readVar + 1;
120 |             true;
121 |         }
122 |         else {
123 |             result := char;
124 |             false;
125 |         }
126 |     });
127 |     if result.readVar == -1.intToInt32 then die("Index is out of range: ${index}") else result.readVar;
128 | }
129 | 
130 | def ord(s: String) = codePointAt(s, 0)
131 | 
132 | def foldl(s: String, zero: a, f: a -> Int32 -> a): a = {
133 |     var acc = zero;
134 |     iterate(s, { char -> acc := f(acc.readVar, char); true });
135 |     acc.readVar
136 | }
137 | 
138 | def codePointCount(s: String): Int = foldl(s, 0, { len, c -> len + 1})
139 | 
140 | def graphemeCount(s: String): Int = {
141 |     var count = 0;
142 |     graphemeIterate(s, { g -> 
143 |         count := count.readVar + 1; 
144 |         true
145 |     });
146 |     count.readVar
147 | }
148 | 
149 | def compareLength(s: String, length: Int): Int = {
150 |     var i = 0;
151 |     var result = 0;
152 |     iterate(s, { char -> 
153 |         i := i.readVar + 1;
154 |         i.readVar <= length
155 |     });
156 |     runtimeCompare(i.readVar, length);
157 | }
158 | 
159 | def length(s) = codePointCount(s)
160 | 
161 | def codePoints(s: String): Array Int32 = {
162 |     array = makeArray(s.codePointCount, 0.intToInt32);
163 |     foldl(s, 0, { idx, char -> setIndex(array, idx, char); idx + 1 });
164 |     array;
165 | }
166 | 
167 | def map(s: String, f: Int32 -> Int32): String = {
168 |     array = s.codePoints;
169 |     transform(array, { i, cp -> f(cp) });
170 |     fromCharArray(array);
171 | }
172 | 
173 | def toLower(s: String): String = if compareLength(s, 1) == 0 then chr(charToLower(ord(s))) else map(s, charToLower)
174 | 
175 | def toUpper(s: String): String = if compareLength(s, 1) == 0 then chr(charToUpper(ord(s))) else map(s, charToUpper)
176 | 
177 | def toTitle(s: String): String = if compareLength(s, 1) == 0 then chr(charToTitle(ord(s))) else map(s, charToTitle)
178 | 
179 | def capitalize(s: String): String = match compareLength(s, 1) {
180 |     -1 -> s -- empty string
181 |     0 -> chr(charToUpper(ord(s))) -- toUpper a single code point
182 |     _ -> {
183 |         array = s.codePoints;
184 |         setIndex(array, 0, charToUpper(array[0]));
185 |         fromCharArray(array);
186 |     }
187 | }
188 | 
189 | def joinGo(i: Int, arr: [String], len: Int, separator: String, strings: [String]) = {
190 |     if i < len then {
191 |         j = 2 * i;
192 |         setIndex(arr, j - 1, separator);
193 |         setIndex(arr, j, strings[i]);
194 |         joinGo(i + 1, arr, len, separator, strings);
195 |     } else ();
196 | }
197 | 
198 | def join(separator: String, strings: [String]): String = match Array.length(strings) {
199 |     0 -> ""
200 |     1 -> strings[0]
201 |     len -> {
202 |         arr = Array.makeArray(2 * len - 1, "");
203 |         setIndex(arr, 0, strings[0]);
204 |         joinGo(1, arr, len, separator, strings);
205 |         concat(arr);
206 |     }
207 | }
208 | 
209 | def isDigit(char: Int32) = (char - 48.intToInt32) <= 9.intToInt32
210 | def isLetter(char: Int32) = let cat = utf8procCategory(char) in 1 <= cat and cat <= 5 -- Letters
211 | def isNumeric(char: Int32) = let cat = utf8procCategory(char) in 9 <= cat and cat <= 11 -- Numbers
212 | def isSpace(char: Int32) = char == 32.intToInt32 
213 |     or 9.intToInt32 <= char and char <= 13.intToInt32 
214 |     or char == 133.intToInt32 
215 |     or 160.intToInt32 <= char and runtimeCompare(generalCategory(char), Space) == 0
216 | 
217 | def main() = {
218 |     empty = "";
219 |     test = "Teástuͤ";
220 |     symbol = "uͤ";
221 |     upperT = "T";
222 |     lowerA = "å";
223 |     asdf = "aßdƒ";
224 |     println(toString(codePointAt(test, 0)));
225 |     println(toString(codePointAt(test, 6)));
226 |     String.foreach(test, { cp -> println(cp.toString) });
227 |     println(toString(codePoints(test)));
228 |     println(toString(symbol.ord));
229 |     println("length in codepoints = ${codePointCount(test)}, length in bytes = ${bytesCount(test)}, length in graphemes = ${graphemeCount(test)}");
230 |     res = foldl("12345", 0, { acc, c -> acc + toInt(chr(c)); });
231 |     println(res.toString);
232 |     println("toLower T = ${chr(charToLower(upperT.ord))} ${toLower(test)}");
233 |     println("toUpper å = ${chr(charToUpper(lowerA.ord))} ${toUpper(test)}");
234 |     println("toTitle å = ${chr(charToTitle(lowerA.ord))} ${toTitle(test)}");
235 |     println("capitalize ${capitalize(empty)} ${capitalize(lowerA)} ${capitalize(test)} ${capitalize(asdf)}");
236 |     println("compare ${compareLength(empty, 1)} ${compareLength(empty, 0)} ${compareLength(empty, -1)} ${compareLength(test, 10)} ${compareLength(test, 7)} ${compareLength(test, 0)}");
237 |     println("replace ${replace(test, symbol, upperT)}");
238 |     println("${test} startsWith ${upperT}: ${startsWith(test, upperT)}, endsWith ${symbol}: ${endsWith(test, symbol)}");
239 |     println("${test} startsWith ${symbol}: ${startsWith(test, symbol)}, endsWith ${upperT}: ${endsWith(test, upperT)}");
240 |     println("Code point 123 is valid Unicode Scalar: ${isValidUnicodeScalar(123.intToInt32)}");
241 |     println("Surrogate code point 55296 is valid Unicode Scalar: ${isValidUnicodeScalar(55296.intToInt32)}");
242 |     println("Code point 1114112 is valid Unicode Scalar: ${isValidUnicodeScalar(1114112.intToInt32)}");
243 |     println("0 is digit: ${isDigit("0".ord)}, 9 is digit: ${isDigit("9".ord)}, 'a' is digit: ${isDigit("a".ord)}");
244 |     gc = { s -> generalCategory(ord(s)) };
245 |     println("${gc("1")} ${gc("a")} ${gc("A")} ${gc(" ")} ${gc("≈")} ${gc("€")}");
246 |     println("å is letter ${isLetter("å".ord)}, 1 is letter ${isLetter("1".ord)}");
247 |     println("å is numeric ${isNumeric("å".ord)}, 1 is numeric ${isNumeric("1".ord)}, ¾ is numeric ${isNumeric("¾".ord)}");
248 |     sp = { s -> isSpace(s.ord) };
249 |     println("' ' is space ${sp(" ")}, '\\t' is space ${sp("\t")}, '\\r' is space ${sp("\r")}, '\\n' is space ${sp("\n")}, 'U+0085' is space ${isSpace(133.intToInt32)}");
250 |     println(String.join(", ", ["1", "2"]));
251 | }
252 | 


--------------------------------------------------------------------------------
/make-release.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eux
 4 | 
 5 | makeRelease() {
 6 |     local version=$1
 7 |     echo "Making release of version $version"
 8 | 
 9 |     rm -rf dist
10 | 	mkdir -p dist/{bin,src,lib,bash_completion}
11 | 	cp "$(stack path --dist-dir)/build/lasca/lasca" dist/bin
12 | 
13 | 	LLVM_PATH="/usr/local/opt/llvm-6.0/lib/llvm-6.0/lib"
14 | 	echo "${LLVM_PATH}"
15 | 	cp "${LLVM_PATH}/libLLVM.dylib"    dist/lib
16 | 	cp "${LLVM_PATH}/libc++.1.0.dylib" dist/lib
17 | 	install_name_tool -add_rpath @executable_path/../lib dist/bin/lasca
18 | 	install_name_tool -change "${LLVM_PATH}/libLLVM.dylib" @rpath/libLLVM.dylib dist/bin/lasca
19 | 	install_name_tool -change "${LLVM_PATH}/libc++.1.0.dylib" @rpath/libc++.1.0.dylib dist/bin/lasca
20 | 
21 | 	chmod 0644 dist/lib/*
22 | 	install_name_tool -id @rpath/libLLVM.dylib dist/lib/libLLVM.dylib
23 | 	install_name_tool -id @rpath/libc++.1.0.dylib dist/lib/libc++.1.0.dylib
24 | 	cp build/rts/liblascartStatic.a dist/src
25 | 	cp libs/base/*.lasca dist/src
26 | 	lasca --bash-completion-script lasca > dist/bash_completion/lasca
27 | 	(cd dist; tar -czf "../lasca-${version}.tar.gz" .)
28 | 	shasum -a 256 "lasca-${version}.tar.gz"
29 | #	sed -E -e 's/sha256 "[a-zA-Z0-9]+"/sha256 $(SUM)/' ../homebrew-lasca/lasca-compiler.rb
30 | }
31 | 
32 | makeRelease $1


--------------------------------------------------------------------------------
/release.nix:
--------------------------------------------------------------------------------
1 | let
2 |   pkgs = import (builtins.fetchGit {
3 |       name = "nixpkgs";
4 |   url = "https://github.com/nixos/nixpkgs.git";
5 |   rev = "6ec64973bc3a48b0c54d11c782e8b88b550a8eab";
6 |   ref = "release-18.09";}) {};
7 |   lascart = pkgs.callPackage ./lascart.nix {};
8 | in
9 |   pkgs.haskellPackages.callPackage ./lasca.nix { inherit lascart; }


--------------------------------------------------------------------------------
/rts/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(objlib OBJECT runtime.c builtin.c lasca.h utf8proc/utf8proc.c utf8proc/utf8proc.h xxhash.h)
 2 | add_library (lascart SHARED $<TARGET_OBJECTS:objlib>)
 3 | add_library (lascartStatic  $<TARGET_OBJECTS:objlib>)
 4 | # set_target_properties(lascartStatic PROPERTIES OUTPUT_NAME lascart)
 5 | set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
 6 | 
 7 | find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR})
 8 | 
 9 | if( EXISTS "${FFI_INCLUDE_PATH}/ffi.h" )
10 |     message("Found lib ffi in ${FFI_INCLUDE_PATH}")
11 | else()
12 |     find_path(FFI_INCLUDE_PATH ffi/ffi.h PATHS ${FFI_INCLUDE_DIR})
13 |     if( EXISTS "${FFI_INCLUDE_PATH}/ffi/ffi.h" )
14 |         message("Searching ffi.h ${FFI_INCLUDE_PATH}")
15 |         set(FFI_INCLUDE_PATH "${FFI_INCLUDE_PATH}/ffi" CACHE INTERNAL "")
16 |     endif()
17 | endif()
18 | 
19 | message("Found ffi.h in ${FFI_INCLUDE_PATH}")
20 | 
21 | find_library(FFI_LIBRARY ffi PATHS ${FFI_LIBRARY_DIR})
22 | if( NOT FFI_LIBRARY )
23 |     message(FATAL_ERROR "libffi is not found.")
24 | endif()
25 | 
26 | 
27 | find_path(GC_INCLUDE_PATH gc.h PATHS ${GC_INCLUDE_DIR})
28 | find_library(GC_LIBRARY gc PATHS ${GC_LIBRARY_DIR})
29 | message("Found gc.h in ${GC_INCLUDE_PATH}")
30 | 
31 | find_library(PCRE2_LIBRARY pcre2-8 PATHS ${PCRE2_LIBRARY_DIR})
32 | message("Found pcre in ${PCRE2_LIBRARY}")
33 | 
34 | target_include_directories(objlib PRIVATE ${GC_INCLUDE_PATH} ${FFI_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/utf8proc)
35 | 
36 | target_link_libraries(lascart m ${GC_LIBRARY} ${FFI_LIBRARY} ${PCRE2_LIBRARY})
37 | target_link_libraries(lascartStatic m ${GC_LIBRARY} ${FFI_LIBRARY} ${PCRE2_LIBRARY})
38 | 
39 | install(TARGETS lascart LIBRARY DESTINATION lib)
40 | install(TARGETS lascartStatic LIBRARY ARCHIVE DESTINATION lib)


--------------------------------------------------------------------------------
/rts/lasca.h:
--------------------------------------------------------------------------------
  1 | #ifndef LASCA_H
  2 | #define LASCA_H
  3 | #define PCRE2_CODE_UNIT_WIDTH 8
  4 | #include <pcre2.h>
  5 | #define XXH_ACCEPT_NULL_INPUT_POINTER 1
  6 | #define XXH_INLINE_ALL
  7 | #include "xxhash.h"
  8 | 
  9 | 
 10 | // Operators
 11 | static const int64_t ADD = 10;
 12 | static const int64_t SUB = 11;                           // x - y
 13 | static const int64_t MUL = 12;
 14 | static const int64_t DIV = 13;                           // x / y
 15 | static const int64_t MOD = 14;                           // x % y
 16 | 
 17 | static const int64_t EQ = 42;                            // x == y
 18 | static const int64_t NE = 43;                            // x != y
 19 | static const int64_t LT = 44;                            // x < y
 20 | static const int64_t LE = 45;                            // x <= y
 21 | static const int64_t GE = 46;                            // x >= y
 22 | static const int64_t GT = 47;                            // x > y
 23 |   // Boolean unary operations
 24 | static const int64_t ZNOT = 50;                          // !x
 25 | 
 26 |   // Boolean binary operations
 27 | static const int64_t ZOR = 60;                           // x || y
 28 | static const int64_t ZAND = 61;                          // x && y
 29 | 
 30 | typedef struct {
 31 |     const char* name;
 32 | } LaType;
 33 | 
 34 | typedef struct {
 35 |     const LaType* type;
 36 |     void* fields[];
 37 | } Box;
 38 | 
 39 | typedef Box Unit;
 40 | 
 41 | typedef struct {
 42 |     const LaType* type;
 43 |     int8_t num;
 44 | } Byte;
 45 | 
 46 | typedef struct {
 47 |     const LaType* type;
 48 |     int64_t num;
 49 | } Int;
 50 | 
 51 | typedef struct {
 52 |     const LaType* type;
 53 |     int16_t num;
 54 | } Int16;
 55 | 
 56 | typedef struct {
 57 |     const LaType* type;
 58 |     int32_t num;
 59 | } Int32;
 60 | 
 61 | typedef Byte Bool;
 62 | 
 63 | typedef struct {
 64 |     const LaType* type;
 65 |     double num;
 66 | } Float64;
 67 | 
 68 | typedef struct {
 69 |     const LaType* type;
 70 |     int64_t length;
 71 |     char bytes[];
 72 | } String;
 73 | 
 74 | typedef struct {
 75 |     const LaType* type;
 76 |     int64_t funcIdx;
 77 |     int64_t argc;
 78 |     Box** argv;
 79 | } Closure;
 80 | 
 81 | typedef struct {
 82 |     const LaType* type;
 83 |     int64_t length;
 84 |     Box* data[];
 85 | } Array;
 86 | 
 87 | typedef struct {
 88 |     const LaType* type;
 89 |     int64_t tag;
 90 |     Box* values[];
 91 | } DataValue;
 92 | 
 93 | typedef DataValue Option;
 94 | 
 95 | typedef struct {
 96 |     const LaType* type;
 97 |     String* error;
 98 | } Unknown;
 99 | 
100 | typedef struct {
101 |     const LaType* type;
102 |     pcre2_code *re;
103 | } Pattern;
104 | 
105 | typedef struct {
106 |     String* name;
107 |     void * funcPtr;
108 |     int64_t arity;
109 | } Function;
110 | 
111 | typedef struct {
112 |     int64_t size;
113 |     Function functions[];
114 | } Functions;
115 | 
116 | typedef struct {
117 |     LaType* type;
118 |   //  int64_t tag;   // it's not set now. Not sure we need this
119 |     String* name;
120 |     int64_t numFields;
121 |     String* fields[];
122 | } Struct;
123 | 
124 | typedef struct {
125 |     LaType* type;
126 |     String* name;
127 |     int64_t numValues;
128 |     Struct* constructors[];
129 | } Data;
130 | 
131 | typedef struct {
132 |     int64_t size;
133 |     Data* data[];
134 | } Types;
135 | 
136 | typedef struct {
137 |     int64_t argc;
138 |     Box* argv;
139 | } Environment;
140 | 
141 | typedef struct {
142 |     Functions* functions;
143 |     Types* types;
144 |     int8_t verbose;
145 | } Runtime;
146 | 
147 | typedef struct {
148 |     int64_t line;
149 |     int64_t column;
150 | } Position;
151 | 
152 | #define asBool(ptr) ((Bool*)ptr)
153 | #define asByte(ptr) ((Byte*)ptr)
154 | #define asInt(ptr) ((Int*)ptr)
155 | #define asInt16(ptr) ((Int16*)ptr)
156 | #define asInt32(ptr) ((Int32*)ptr)
157 | #define asFloat(ptr) ((Float64*)ptr)
158 | #define asString(ptr) ((String*)ptr)
159 | #define asDataValue(ptr) ((DataValue*)ptr)
160 | #define asClosure(ptr) ((Closure*)ptr)
161 | #define asArray(ptr) ((Array*)ptr)
162 | #define asByteArray(ptr) ((String*)ptr)
163 | 
164 | extern Unit UNIT_SINGLETON;
165 | extern Bool TRUE_SINGLETON;
166 | extern Bool FALSE_SINGLETON;
167 | extern DataValue NONE;
168 | // Primitive Types
169 | extern const LaType* LAUNIT   ;
170 | extern const LaType* LABOOL   ;
171 | extern const LaType* LABYTE   ;
172 | extern const LaType* LAINT16  ;
173 | extern const LaType* LAINT32  ;
174 | extern const LaType* LAINT    ;
175 | extern const LaType* LAFLOAT64;
176 | extern const LaType* LASTRING ;
177 | extern const LaType* LACLOSURE;
178 | extern const LaType* LAARRAY  ;
179 | extern const LaType* LABYTEARRAY;
180 | extern const LaType* LAFILE_HANDLE;
181 | extern const LaType* LAPATTERN;
182 | extern const LaType* LAOPTION;
183 | extern unsigned long long xxHashSeed;
184 | 
185 | bool eqTypes(const LaType* lhs, const LaType* rhs);
186 | void *gcMalloc(size_t s);
187 | String* __attribute__ ((pure)) makeString(const char * str);
188 | Box *box(const LaType* type_id, void *value);
189 | Int* boxInt(int64_t i);
190 | Int16* boxInt16(int16_t i);
191 | Int32* boxInt32(int32_t i);
192 | void * unbox(const LaType* expected, const Box* ti);
193 | int64_t runtimeCompare(Box* lhs, Box* rhs);
194 | Box* runtimeApply(Box* val, int64_t argc, Box* argv[], Position pos);
195 | String* toString(const Box* value);
196 | Box* println(const Box* val);
197 | Box* boxArray(size_t size, ...);
198 | Array* createArray(size_t size);
199 | const char * __attribute__ ((const)) typeIdToName(const LaType* typeId);
200 | DataValue* some(Box* value);
201 | 
202 | #endif


--------------------------------------------------------------------------------
/rts/utf8proc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 2.8)
 2 | 
 3 | include (utils.cmake)
 4 | 
 5 | disallow_intree_builds()
 6 | 
 7 | project (utf8proc C)
 8 | 
 9 | # This is the ABI version number, which may differ from the
10 | # API version number (defined in utf8proc.h).
11 | # Be sure to also update these in Makefile and MANIFEST!
12 | set(SO_MAJOR 2)
13 | set(SO_MINOR 1)
14 | set(SO_PATCH 0)
15 | 
16 | add_definitions (
17 |   -DUTF8PROC_EXPORTS
18 | )
19 | 
20 | if (NOT MSVC)
21 |   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -std=c99 -pedantic -Wall")
22 | endif ()
23 | 
24 | add_library (utf8proc
25 |   utf8proc.c
26 |   utf8proc.h
27 | )
28 | 
29 | set_target_properties (utf8proc PROPERTIES
30 |   POSITION_INDEPENDENT_CODE ON
31 |   VERSION "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}"
32 |   SOVERSION ${SO_MAJOR}
33 | )
34 | 


--------------------------------------------------------------------------------
/rts/utf8proc/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## utf8proc license ##
 2 | 
 3 | **utf8proc** is a software package originally developed
 4 | by Jan Behrens and the rest of the Public Software Group, who
 5 | deserve nearly all of the credit for this library, that is now maintained by the Julia-language developers.  Like the original utf8proc,
 6 | whose copyright and license statements are reproduced below, all new
 7 | work on the utf8proc library is licensed under the [MIT "expat"
 8 | license](http://opensource.org/licenses/MIT):
 9 | 
10 | *Copyright &copy; 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
11 | 
12 | Permission is hereby granted, free of charge, to any person obtaining a
13 | copy of this software and associated documentation files (the "Software"),
14 | to deal in the Software without restriction, including without limitation
15 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 | and/or sell copies of the Software, and to permit persons to whom the
17 | Software is furnished to do so, subject to the following conditions:
18 | 
19 | The above copyright notice and this permission notice shall be included in
20 | all copies or substantial portions of the Software.
21 | 
22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 | DEALINGS IN THE SOFTWARE.
29 | 
30 | ## Original utf8proc license ##
31 | 
32 | *Copyright (c) 2009, 2013 Public Software Group e. V., Berlin, Germany*
33 | 
34 | Permission is hereby granted, free of charge, to any person obtaining a
35 | copy of this software and associated documentation files (the "Software"),
36 | to deal in the Software without restriction, including without limitation
37 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
38 | and/or sell copies of the Software, and to permit persons to whom the
39 | Software is furnished to do so, subject to the following conditions:
40 | 
41 | The above copyright notice and this permission notice shall be included in
42 | all copies or substantial portions of the Software.
43 | 
44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
47 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
49 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
50 | DEALINGS IN THE SOFTWARE.
51 | 
52 | ## Unicode data license ##
53 | 
54 | This software distribution contains derived data from a modified version of
55 | the Unicode data files. The following license applies to that data:
56 | 
57 | **COPYRIGHT AND PERMISSION NOTICE**
58 | 
59 | *Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
60 | under the Terms of Use in http://www.unicode.org/copyright.html.*
61 | 
62 | Permission is hereby granted, free of charge, to any person obtaining a
63 | copy of the Unicode data files and any associated documentation (the "Data
64 | Files") or Unicode software and any associated documentation (the
65 | "Software") to deal in the Data Files or Software without restriction,
66 | including without limitation the rights to use, copy, modify, merge,
67 | publish, distribute, and/or sell copies of the Data Files or Software, and
68 | to permit persons to whom the Data Files or Software are furnished to do
69 | so, provided that (a) the above copyright notice(s) and this permission
70 | notice appear with all copies of the Data Files or Software, (b) both the
71 | above copyright notice(s) and this permission notice appear in associated
72 | documentation, and (c) there is clear notice in each modified Data File or
73 | in the Software as well as in the documentation associated with the Data
74 | File(s) or Software that the data or software has been modified.
75 | 
76 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
77 | KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
78 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
79 | THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
80 | INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
81 | CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
82 | USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
83 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
84 | PERFORMANCE OF THE DATA FILES OR SOFTWARE.
85 | 
86 | Except as contained in this notice, the name of a copyright holder shall
87 | not be used in advertising or otherwise to promote the sale, use or other
88 | dealings in these Data Files or Software without prior written
89 | authorization of the copyright holder.
90 | 
91 | Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
92 | registered in some jurisdictions. All other trademarks and registered
93 | trademarks mentioned herein are the property of their respective owners.
94 | 


--------------------------------------------------------------------------------
/rts/utf8proc/MANIFEST:
--------------------------------------------------------------------------------
1 | include/
2 | include/utf8proc.h
3 | lib/
4 | lib/libutf8proc.a
5 | lib/libutf8proc.so -> libutf8proc.so.2.1.0
6 | lib/libutf8proc.so.2 -> libutf8proc.so.2.1.0
7 | lib/libutf8proc.so.2.1.0
8 | 


--------------------------------------------------------------------------------
/rts/utf8proc/Makefile:
--------------------------------------------------------------------------------
  1 | # libutf8proc Makefile
  2 | 
  3 | # programs
  4 | AR?=ar
  5 | CC?=gcc
  6 | INSTALL=install
  7 | FIND=find
  8 | 
  9 | # compiler settings
 10 | CFLAGS ?= -O2
 11 | PICFLAG = -fPIC
 12 | C99FLAG = -std=c99
 13 | WCFLAGS = -Wall -pedantic
 14 | UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS
 15 | 
 16 | # shared-library version MAJOR.MINOR.PATCH ... this may be *different*
 17 | # from the utf8proc version number because it indicates ABI compatibility,
 18 | # not API compatibility: MAJOR should be incremented whenever *binary*
 19 | # compatibility is broken, even if the API is backward-compatible.
 20 | # The API version number is defined in utf8proc.h.
 21 | # Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
 22 | MAJOR=2
 23 | MINOR=1
 24 | PATCH=0
 25 | 
 26 | OS := $(shell uname)
 27 | ifeq ($(OS),Darwin) # MacOS X
 28 |   SHLIB_EXT = dylib
 29 |   SHLIB_VERS_EXT = $(MAJOR).dylib
 30 | else # GNU/Linux, at least (Windows should probably use cmake)
 31 |   SHLIB_EXT = so
 32 |   SHLIB_VERS_EXT = so.$(MAJOR).$(MINOR).$(PATCH)
 33 | endif
 34 | 
 35 | # installation directories (for 'make install')
 36 | prefix=/usr/local
 37 | libdir=$(prefix)/lib
 38 | includedir=$(prefix)/include
 39 | 
 40 | # meta targets
 41 | 
 42 | .PHONY: all clean data update manifest install
 43 | 
 44 | all: libutf8proc.a libutf8proc.$(SHLIB_EXT)
 45 | 
 46 | clean:
 47 | 	rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT)
 48 | ifneq ($(OS),Darwin)
 49 | 	rm -f libutf8proc.so.$(MAJOR)
 50 | endif
 51 | 	rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom
 52 | 	rm -rf MANIFEST.new tmp
 53 | 	$(MAKE) -C bench clean
 54 | 	$(MAKE) -C data clean
 55 | 
 56 | data: data/utf8proc_data.c.new
 57 | 
 58 | update: data/utf8proc_data.c.new
 59 | 	cp -f data/utf8proc_data.c.new utf8proc_data.c
 60 | 
 61 | manifest: MANIFEST.new
 62 | 
 63 | # real targets
 64 | 
 65 | data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl
 66 | 	$(MAKE) -C data utf8proc_data.c.new
 67 | 
 68 | utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c
 69 | 	$(CC) $(UCFLAGS) -c -o utf8proc.o utf8proc.c
 70 | 
 71 | libutf8proc.a: utf8proc.o
 72 | 	rm -f libutf8proc.a
 73 | 	$(AR) rs libutf8proc.a utf8proc.o
 74 | 
 75 | libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
 76 | 	$(CC) $(LDFLAGS) -shared -o $@ -Wl,-soname -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
 77 | 	chmod a-x $@
 78 | 
 79 | libutf8proc.so: libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH)
 80 | 	ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@
 81 | 	ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@.$(MAJOR)
 82 | 
 83 | libutf8proc.$(MAJOR).dylib: utf8proc.o
 84 | 	$(CC) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH)
 85 | 
 86 | libutf8proc.dylib: libutf8proc.$(MAJOR).dylib
 87 | 	ln -f -s libutf8proc.$(MAJOR).dylib $@
 88 | 
 89 | install: libutf8proc.a libutf8proc.$(SHLIB_EXT) libutf8proc.$(SHLIB_VERS_EXT)
 90 | 	mkdir -m 755 -p $(DESTDIR)$(includedir)
 91 | 	$(INSTALL) -m 644 utf8proc.h $(DESTDIR)$(includedir)
 92 | 	mkdir -m 755 -p $(DESTDIR)$(libdir)
 93 | 	$(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir)
 94 | 	$(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)
 95 | 	ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
 96 | ifneq ($(OS),Darwin)
 97 | 	ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
 98 | endif
 99 | 
100 | MANIFEST.new:
101 | 	rm -rf tmp
102 | 	$(MAKE) install prefix=/usr DESTDIR=$(PWD)/tmp
103 | 	$(FIND) tmp/usr -mindepth 1 -type l -printf "%P -> %l\n" -or -type f -printf "%P\n" -or -type d -printf "%P/\n" | LC_ALL=C sort > $@
104 | 	rm -rf tmp
105 | 
106 | # Test programs
107 | 
108 | data/NormalizationTest.txt:
109 | 	$(MAKE) -C data NormalizationTest.txt
110 | 
111 | data/GraphemeBreakTest.txt:
112 | 	$(MAKE) -C data GraphemeBreakTest.txt
113 | 
114 | test/tests.o: test/tests.c test/tests.h utf8proc.h
115 | 	$(CC) $(UCFLAGS) -c -o test/tests.o test/tests.c
116 | 
117 | test/normtest: test/normtest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
118 | 	$(CC) $(UCFLAGS) test/normtest.c test/tests.o utf8proc.o -o $@
119 | 
120 | test/graphemetest: test/graphemetest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
121 | 	$(CC) $(UCFLAGS) test/graphemetest.c test/tests.o utf8proc.o -o $@
122 | 
123 | test/printproperty: test/printproperty.c test/tests.o utf8proc.o utf8proc.h test/tests.h
124 | 	$(CC) $(UCFLAGS) test/printproperty.c test/tests.o utf8proc.o -o $@
125 | 
126 | test/charwidth: test/charwidth.c test/tests.o utf8proc.o utf8proc.h test/tests.h
127 | 	$(CC) $(UCFLAGS) test/charwidth.c test/tests.o utf8proc.o -o $@
128 | 
129 | test/valid: test/valid.c test/tests.o utf8proc.o utf8proc.h test/tests.h
130 | 	$(CC) $(UCFLAGS) test/valid.c test/tests.o utf8proc.o -o $@
131 | 
132 | test/iterate: test/iterate.c test/tests.o utf8proc.o utf8proc.h test/tests.h
133 | 	$(CC) $(UCFLAGS) test/iterate.c test/tests.o utf8proc.o -o $@
134 | 
135 | test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h
136 | 	$(CC) $(UCFLAGS) test/case.c test/tests.o utf8proc.o -o $@
137 | 
138 | test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
139 | 	$(CC) $(UCFLAGS) test/custom.c test/tests.o utf8proc.o -o $@
140 | 
141 | check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
142 | 	$(MAKE) -C bench
143 | 	test/normtest data/NormalizationTest.txt
144 | 	test/graphemetest data/GraphemeBreakTest.txt
145 | 	test/charwidth
146 | 	test/valid
147 | 	test/iterate
148 | 	test/case
149 | 	test/custom
150 | 


--------------------------------------------------------------------------------
/rts/utf8proc/NEWS.md:
--------------------------------------------------------------------------------
  1 | # utf8proc release history #
  2 | 
  3 | ## Version 2.1 ##
  4 | 
  5 | 2016-12-26:
  6 | 
  7 | - New functions `utf8proc_map_custom` and `utf8proc_decompose_custom`
  8 |   to allow user-supplied transformations of codepoints, in conjunction
  9 |   with other transformations ([#89]).
 10 | 
 11 | - New function `utf8proc_normalize_utf32` to apply normalizations
 12 |   directly to UTF-32 data (not just UTF-8) ([#88]).
 13 | 
 14 | - Fixed stack overflow that could occur due to incorrect definition
 15 |   of `UINT16_MAX` with some compilers ([#84]).
 16 | 
 17 | - Fixed conflict with `stdbool.h` in Visual Studio ([#90]).
 18 | 
 19 | - Updated font metrics to use Unifont 9.0.04.
 20 | 
 21 | ## Version 2.0.2 ##
 22 | 
 23 | 2016-07-27:
 24 | 
 25 | - Move `-Wmissing-prototypes` warning flag from `Makefile` to `.travis.yml`
 26 |   since MSVC does not understand this flag and it is occasionally useful to
 27 |   build using MSVC through the `Makefile` ([#79]).
 28 | 
 29 | - Use a different variable name for a nested loop in `bench/bench.c`, and
 30 |   declare it in a C89 way rather than inside the `for` to avoid "error:
 31 |   'for' loop initial declarations are only allowed in C99 mode" ([#80]).
 32 | 
 33 | ## Version 2.0.1 ##
 34 | 
 35 | 2016-07-13:
 36 | 
 37 | - Bug fix in `utf8proc_grapheme_break_stateful` ([#77]).
 38 | 
 39 | - Tests now use versioned Unicode files, so they will no longer
 40 |   break when a new version of Unicode is released ([#78]).
 41 | 
 42 | ## Version 2.0 ##
 43 | 
 44 | 2016-07-13:
 45 | 
 46 | - Updated for Unicode 9.0 ([#70]).
 47 | 
 48 | - New `utf8proc_grapheme_break_stateful` to handle the complicated
 49 |   grapheme-breaking rules in Unicode 9.  The old `utf8proc_grapheme_break`
 50 |   is still provided, but may incorrectly identify grapheme breaks
 51 |   in some Unicode-9 sequences.
 52 | 
 53 | - Smaller Unicode tables ([#62], [#68]).  This required changes
 54 |   in the `utf8proc_property_t` structure, which breaks backward
 55 |   compatibility if you access this `struct` directly.  The
 56 |   functions in the API remain backward-compatible, however.
 57 | 
 58 | - Buffer overrun fix ([#66]).
 59 | 
 60 | ## Version 1.3.1 ##
 61 | 
 62 | 2015-11-02:
 63 | 
 64 | - Do not export symbol for internal function `unsafe_encode_char()` ([#55]).
 65 | 
 66 | - Install relative symbolic links for shared libraries ([#58]).
 67 | 
 68 | - Enable and fix compiler warnings ([#55], [#58]).
 69 | 
 70 | - Add missing files to `make clean` ([#58]).
 71 | 
 72 | ## Version 1.3 ##
 73 | 
 74 | 2015-07-06:
 75 | 
 76 | - Updated for Unicode 8.0 ([#45]).
 77 | 
 78 | - New `utf8proc_tolower` and `utf8proc_toupper` functions, portable
 79 |   replacements for `towlower` and `towupper` in the C library ([#40]).
 80 | 
 81 | - Don't treat Unicode "non-characters" as invalid, and improved
 82 |   validity checking in general ([#35]).
 83 | 
 84 | - Prefix all typedefs with `utf8proc_`, e.g. `utf8proc_int32_t`,
 85 |   to avoid collisions with other libraries ([#32]).
 86 | 
 87 | - Rename `DLLEXPORT` to `UTF8PROC_DLLEXPORT` to prevent collisions.
 88 | 
 89 | - Fix build breakage in the benchmark routines.
 90 | 
 91 | - More fine-grained Makefile variables (`PICFLAG` etcetera), so that
 92 |   compilation flags can be selectively overridden, and in particular
 93 |   so that `CFLAGS` can be changed without accidentally eliminating
 94 |   necessary flags like `-fPIC` and `-std=c99` ([#43]).
 95 | 
 96 | - Updated character-width tables based on Unifont 8.0.01 ([#51]) and
 97 |   the Unicode 8 character categories ([#47]).
 98 | 
 99 | ## Version 1.2 ##
100 | 
101 | 2015-03-28:
102 | 
103 | - Updated for Unicode 7.0 ([#6]).
104 | 
105 | - New function `utf8proc_grapheme_break(c1,c2)` that returns whether
106 |   there is a grapheme break between `c1` and `c2` ([#20]).
107 | 
108 | - New function `utf8proc_charwidth(c)` that returns the number of
109 |   column-positions that should be required for `c`; essentially a
110 |   portable replacment for `wcwidth(c)` ([#27]).
111 | 
112 | - New function `utf8proc_category(c)` that returns the Unicode
113 |   category of `c` (as one of the constants `UTF8PROC_CATEGORY_xx`).
114 |   Also, a function `utf8proc_category_string(c)` that returns the Unicode
115 |   category of `c` as a two-character string.
116 | 
117 | - `cmake` script `CMakeLists.txt`, in addition to `Makefile`, for
118 |   easier compilation on Windows ([#28]).
119 | 
120 | - Various `Makefile` improvements: a `make check` target to perform
121 |   tests ([#13]), `make install`, a rule to automate updating the Unicode
122 |   tables, etcetera.
123 | 
124 | - The shared library is now versioned (e.g. has a soname on GNU/Linux) ([#24]).
125 | 
126 | - C++/MSVC compatibility ([#17]).
127 | 
128 | - Most `#defined` constants are now `enums` ([#29]).
129 | 
130 | - New preprocessor constants `UTF8PROC_VERSION_MAJOR`,
131 |   `UTF8PROC_VERSION_MINOR`, and `UTF8PROC_VERSION_PATCH` for compile-time
132 |   detection of the API version.
133 | 
134 | - Doxygen-formatted documentation ([#29]).
135 | 
136 | - The Ruby and PostgreSQL plugins have been removed due to lack of testing ([#22]).
137 | 
138 | ## Version 1.1.6 ##
139 | 
140 | 2013-11-27:
141 | 
142 | - PostgreSQL 9.2 and 9.3 compatibility (lowercase `c` language name)
143 | 
144 | ## Version 1.1.5 ##
145 | 
146 | 2009-08-20:
147 | 
148 | - Use `RSTRING_PTR()` and `RSTRING_LEN()` instead of `RSTRING()->ptr` and
149 |   `RSTRING()->len` for ruby1.9 compatibility (and `#define` them, if not
150 |   existent)
151 | 
152 | 2009-10-02:
153 | 
154 | - Patches for compatibility with Microsoft Visual Studio
155 | 
156 | 2009-10-08:
157 | 
158 | - Fixes to make utf8proc usable in C++ programs
159 | 
160 | 2009-10-16:
161 | 
162 | ## Version 1.1.4 ##
163 | 
164 | 2009-06-14:
165 | 
166 | - replaced C++ style comments for compatibility reasons
167 | - added typecasts to suppress compiler warnings
168 | - removed redundant source files for ruby-gemfile generation
169 | 
170 | 2009-08-19:
171 | 
172 | - Changed copyright notice for Public Software Group e. V.
173 | - Minor changes in the `README` file
174 | 
175 | ## Version 1.1.3 ##
176 | 
177 | 2008-10-04:
178 | 
179 | - Added a function `utf8proc_version` returning a string containing the version
180 |   number of the library.
181 | - Included a target `libutf8proc.dylib` for MacOSX.
182 | 
183 | 2009-05-01:
184 | - PostgreSQL 8.3 compatibility (use of `SET_VARSIZE` macro)
185 | 
186 | ## Version 1.1.2 ##
187 | 
188 | 2007-07-25:
189 | 
190 | - Fixed a serious bug in the data file generator, which caused characters
191 |   being treated incorrectly, when stripping default ignorable characters or
192 |   calculating grapheme cluster boundaries.
193 | 
194 | ## Version 1.1.1 ##
195 | 
196 | 2007-06-25:
197 | 
198 | - Added a new PostgreSQL function `unistrip`, which behaves like `unifold`,
199 |   but also removes all character marks (e.g. accents).
200 | 
201 | 2007-07-22:
202 | 
203 | - Changed license from BSD to MIT style.
204 | - Added a new function `utf8proc_codepoint_valid` to the C library.
205 | - Changed compiler flags in `Makefile` from `-g -O0` to `-O2`
206 | - The ruby script, which was used to build the `utf8proc_data.c` file, is now
207 |   included in the distribution.
208 | 
209 | ## Version 1.0.3 ##
210 | 
211 | 2007-03-16:
212 | 
213 | - Fixed a bug in the ruby library, which caused an error, when splitting an
214 |   empty string at grapheme cluster boundaries (method `String#utf8chars`).
215 | 
216 | ## Version 1.0.2 ##
217 | 
218 | 2006-09-21:
219 | 
220 | - included a check in `Integer#utf8`, which raises an exception, if the given
221 |   code-point is invalid because of being too high (this was missing yet)
222 | 
223 | 2006-12-26:
224 | 
225 | - added support for PostgreSQL version 8.2
226 | 
227 | ## Version 1.0.1 ##
228 | 
229 | 2006-09-20:
230 | 
231 | - included a gem file for the ruby version of the library
232 | 
233 | Release of version 1.0.1
234 | 
235 | ## Version 1.0 ##
236 | 
237 | 2006-09-17:
238 | 
239 | - added the `LUMP` option, which lumps certain characters together (see `lump.md`) (also used for the PostgreSQL `unifold` function)
240 | - added the `STRIPMARK` option, which strips marking characters (or marks of composed characters)
241 | - deprecated ruby method `String#char_ary` in favour of `String#utf8chars`
242 | 
243 | ## Version 0.3 ##
244 | 
245 | 2006-07-18:
246 | 
247 | - changed normalization from NFC to NFKC for postgresql unifold function
248 | 
249 | 2006-08-04:
250 | 
251 | - added support to mark the beginning of a grapheme cluster with 0xFF (option: `CHARBOUND`)
252 | - added the ruby method `String#chars`, which is returning an array of UTF-8 encoded grapheme clusters
253 | - added `NLF2LF` transformation in postgresql `unifold` function
254 | - added the `DECOMPOSE` option, if you neither use `COMPOSE` or `DECOMPOSE`, no normalization will be performed (different from previous versions)
255 | - using integer constants rather than C-strings for character properties
256 | - fixed (hopefully) a problem with the ruby library on Mac OS X, which occurred when compiler optimization was switched on
257 | 
258 | ## Version 0.2 ##
259 | 
260 | 2006-06-05:
261 | 
262 | - changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
263 | - improved efficiency of PostgreSQL function (no transformation to C string is done)
264 | 
265 | 2006-06-20:
266 | 
267 | - added -fpic compiler flag in Makefile
268 | - fixed bug in the C code for the ruby library (usage of non-existent function)
269 | 
270 | ## Version 0.1 ##
271 | 
272 | 2006-06-02: initial release of version 0.1
273 | 
274 | [#6]: https://github.com/JuliaLang/utf8proc/issues/6
275 | [#13]: https://github.com/JuliaLang/utf8proc/issues/13
276 | [#17]: https://github.com/JuliaLang/utf8proc/issues/17
277 | [#20]: https://github.com/JuliaLang/utf8proc/issues/20
278 | [#22]: https://github.com/JuliaLang/utf8proc/issues/22
279 | [#24]: https://github.com/JuliaLang/utf8proc/issues/24
280 | [#27]: https://github.com/JuliaLang/utf8proc/issues/27
281 | [#28]: https://github.com/JuliaLang/utf8proc/issues/28
282 | [#29]: https://github.com/JuliaLang/utf8proc/issues/29
283 | [#32]: https://github.com/JuliaLang/utf8proc/issues/32
284 | [#35]: https://github.com/JuliaLang/utf8proc/issues/35
285 | [#40]: https://github.com/JuliaLang/utf8proc/issues/40
286 | [#43]: https://github.com/JuliaLang/utf8proc/issues/43
287 | [#45]: https://github.com/JuliaLang/utf8proc/issues/45
288 | [#47]: https://github.com/JuliaLang/utf8proc/issues/47
289 | [#51]: https://github.com/JuliaLang/utf8proc/issues/51
290 | [#55]: https://github.com/JuliaLang/utf8proc/issues/55
291 | [#58]: https://github.com/JuliaLang/utf8proc/issues/58
292 | [#62]: https://github.com/JuliaLang/utf8proc/issues/62
293 | [#66]: https://github.com/JuliaLang/utf8proc/issues/66
294 | [#68]: https://github.com/JuliaLang/utf8proc/issues/68
295 | [#70]: https://github.com/JuliaLang/utf8proc/issues/70
296 | [#77]: https://github.com/JuliaLang/utf8proc/issues/77
297 | [#78]: https://github.com/JuliaLang/utf8proc/issues/78
298 | [#79]: https://github.com/JuliaLang/utf8proc/issues/79
299 | [#80]: https://github.com/JuliaLang/utf8proc/issues/80
300 | [#84]: https://github.com/JuliaLang/utf8proc/pull/84
301 | [#88]: https://github.com/JuliaLang/utf8proc/pull/88
302 | [#89]: https://github.com/JuliaLang/utf8proc/pull/89
303 | [#90]: https://github.com/JuliaLang/utf8proc/issues/90
304 | 


--------------------------------------------------------------------------------
/rts/utf8proc/README.md:
--------------------------------------------------------------------------------
 1 | # utf8proc
 2 | [![Travis CI Status](https://travis-ci.org/JuliaLang/utf8proc.png)](https://travis-ci.org/JuliaLang/utf8proc)
 3 | [![AppVeyor Status](https://ci.appveyor.com/api/projects/status/aou20lfkyhj8xbwq/branch/master?svg=true)](https://ci.appveyor.com/project/tkelman/utf8proc/branch/master)
 4 | 
 5 | 
 6 | [utf8proc](http://julialang.org/utf8proc/) is a small, clean C
 7 | library that provides Unicode normalization, case-folding, and other
 8 | operations for data in the [UTF-8
 9 | encoding](http://en.wikipedia.org/wiki/UTF-8).  It was [initially
10 | developed](http://www.public-software-group.org/utf8proc) by Jan
11 | Behrens and the rest of the [Public Software
12 | Group](http://www.public-software-group.org/), who deserve *nearly all
13 | of the credit* for this package.  With the blessing of the Public
14 | Software Group, the [Julia developers](http://julialang.org/) have
15 | taken over development of utf8proc, since the original developers have
16 | moved to other projects.
17 | 
18 | (utf8proc is used for basic Unicode
19 | support in the [Julia language](http://julialang.org/), and the Julia
20 | developers became involved because they wanted to add Unicode 7 support and other features.)
21 | 
22 | (The original utf8proc package also includes Ruby and PostgreSQL plug-ins.
23 | We removed those from utf8proc in order to focus exclusively on the C
24 | library for the time being, but plan to add them back in or release them as separate packages.)
25 | 
26 | The utf8proc package is licensed under the
27 | free/open-source [MIT "expat"
28 | license](http://opensource.org/licenses/MIT) (plus certain Unicode
29 | data governed by the similarly permissive [Unicode data
30 | license](http://www.unicode.org/copyright.html#Exhibit1)); please see
31 | the included `LICENSE.md` file for more detailed information.
32 | 
33 | ## Quick Start
34 | 
35 | For compilation of the C library run `make`.
36 | 
37 | ## General Information
38 | 
39 | The C library is found in this directory after successful compilation
40 | and is named `libutf8proc.a` (for the static library) and
41 | `libutf8proc.so` (for the dynamic library).
42 | 
43 | The Unicode version supported is 9.0.0.
44 | 
45 | For Unicode normalizations, the following options are used:
46 | 
47 | * Normalization Form C:  `STABLE`, `COMPOSE`
48 | * Normalization Form D:  `STABLE`, `DECOMPOSE`
49 | * Normalization Form KC: `STABLE`, `COMPOSE`, `COMPAT`
50 | * Normalization Form KD: `STABLE`, `DECOMPOSE`, `COMPAT`
51 | 
52 | ## C Library
53 | 
54 | The documentation for the C library is found in the `utf8proc.h` header file.
55 | `utf8proc_map` is function you will most likely be using for mapping UTF-8
56 | strings, unless you want to allocate memory yourself.
57 | 
58 | ## To Do
59 | 
60 | See the Github [issues list](https://github.com/JuliaLang/utf8proc/issues).
61 | 
62 | ## Contact
63 | 
64 | Bug reports, feature requests, and other queries can be filed at
65 | the [utf8proc issues page on Github](https://github.com/JuliaLang/utf8proc/issues).
66 | 
67 | ## See also
68 | 
69 | An independent Lua translation of this library, [lua-mojibake](https://github.com/differentprogramming/lua-mojibake), is also available.
70 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/Makefile:
--------------------------------------------------------------------------------
 1 | CURL=curl
 2 | 
 3 | CC = cc
 4 | CFLAGS = -O2 -std=c99 -pedantic -Wall
 5 | 
 6 | all: bench
 7 | 
 8 | LIBUTF8PROC = ../utf8proc.o
 9 | 
10 | bench: bench.o util.o $(LIBUTF8PROC)
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ bench.o util.o $(LIBUTF8PROC)
12 | 
13 | DATAURL = https://raw.githubusercontent.com/duerst/eprun/master/benchmark
14 | DATAFILES = Deutsch_.txt Japanese_.txt Korean_.txt Vietnamese_.txt
15 | 
16 | $(DATAFILES):
17 | 	$(CURL) -O $(DATAURL)/$@
18 | 
19 | bench.out: $(DATAFILES) bench
20 | 	./bench -nfkc $(DATAFILES) > $@
21 | 
22 | # you may need make CPPFLAGS=... LDFLAGS=... to help it find ICU
23 | icu: icu.o util.o
24 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ icu.o util.o -licuuc
25 | 
26 | icu.out: $(DATAFILES) icu
27 | 	./icu $(DATAFILES) > $@
28 | 
29 | unistring: unistring.o util.o
30 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ unistring.o util.o -lunistring
31 | 
32 | unistring.out: $(DATAFILES) unistring
33 | 	./unistring $(DATAFILES) > $@
34 | 
35 | .c.o:
36 | 	$(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $<
37 | 
38 | clean:
39 | 	rm -rf *.o *.txt bench *.out icu unistring
40 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/bench.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include "utf8proc.h"
 6 | #include "util.h"
 7 | 
 8 | int main(int argc, char **argv)
 9 | {
10 | 	 int i, j;
11 | 	 int options = 0;
12 | 	 
13 | 	 for (i = 1; i < argc; ++i) {
14 | 		  if (!strcmp(argv[i], "-nfkc")) {
15 | 			   options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
16 | 			   continue;
17 | 		  }
18 | 		  if (!strcmp(argv[i], "-nfkd")) {
19 | 			   options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE|UTF8PROC_COMPAT;
20 | 			   continue;
21 | 		  }
22 | 		  if (!strcmp(argv[i], "-nfc")) {
23 | 			   options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE;
24 | 			   continue;
25 | 		  }
26 | 		  if (!strcmp(argv[i], "-nfd")) {
27 | 			   options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE;
28 | 			   continue;
29 | 		  }
30 | 		  if (!strcmp(argv[i], "-casefold")) {
31 | 			   options |= UTF8PROC_CASEFOLD;
32 | 			   continue;
33 | 		  }
34 | 		  if (argv[i][0] == '-') {
35 | 			   fprintf(stderr, "unrecognized option: %s\n", argv[i]);
36 | 			   return EXIT_FAILURE;
37 | 		  }
38 | 
39 | 		  size_t len;
40 | 		  uint8_t *src = readfile(argv[i], &len);
41 | 		  if (!src) {
42 | 			   fprintf(stderr, "error reading %s\n", argv[i]);
43 | 			   return EXIT_FAILURE;
44 | 		  }
45 | 		  uint8_t *dest;
46 | 		  mytime start = gettime();
47 | 		  for (j = 0; j < 100; ++j) {
48 | 			   utf8proc_map(src, len, &dest, options);
49 | 			   free(dest);
50 | 		  }
51 | 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
52 | 		  free(src);
53 | 	 }
54 | 
55 | 	 return EXIT_SUCCESS;
56 | }
57 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/icu.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /* ICU4C */
 5 | #include <unicode/utypes.h>
 6 | #include <unicode/ustring.h>
 7 | #include <unicode/ucnv.h>
 8 | #include <unicode/unorm2.h>
 9 | 
10 | #include "util.h"
11 | 
12 | int main(int argc, char **argv)
13 | {
14 | 	 int i;
15 | 
16 | 	 UErrorCode err;
17 | 	 UConverter *uc = ucnv_open("UTF8", &err);
18 | 	 if (U_FAILURE(err)) return EXIT_FAILURE;
19 | 
20 | 	 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err);
21 | 	 if (U_FAILURE(err)) return EXIT_FAILURE;
22 | 	 
23 | 	 for (i = 1; i < argc; ++i) {
24 | 		  if (argv[i][0] == '-') {
25 | 			   fprintf(stderr, "unrecognized option: %s\n", argv[i]);
26 | 			   return EXIT_FAILURE;
27 | 		  }
28 | 
29 | 		  size_t len;
30 | 		  uint8_t *src = readfile(argv[i], &len);
31 | 		  if (!src) {
32 | 			   fprintf(stderr, "error reading %s\n", argv[i]);
33 | 			   return EXIT_FAILURE;
34 | 		  }
35 | 
36 | 		  /* convert UTF8 data to ICU's UTF16 */
37 | 		  UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar));
38 | 		  ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err);
39 | 		  if (U_FAILURE(err)) return EXIT_FAILURE;
40 | 		  size_t ulen = u_strlen(usrc);
41 | 
42 | 		  /* ICU's insane normalization API requires you to
43 | 			 know the size of the destination buffer in advance,
44 | 			 or alternatively to repeatly try normalizing and
45 | 			 double the buffer size until it succeeds.  Here, I just
46 | 			 allocate a huge destination buffer to avoid the issue. */
47 | 		  UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar));
48 | 
49 | 		  mytime start = gettime();
50 | 		  for (int i = 0; i < 100; ++i) {
51 | 			   unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err);
52 | 			   if (U_FAILURE(err)) return EXIT_FAILURE;
53 | 		  }
54 | 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
55 | 		  free(udest);
56 | 		  free(usrc);
57 | 		  free(src);
58 | 	 }
59 | 
60 | 	 return EXIT_SUCCESS;
61 | }
62 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/unistring.c:
--------------------------------------------------------------------------------
 1 | /* comparitive benchmark of GNU libunistring */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | 
 7 | /* libunistring */
 8 | #include <unistr.h>
 9 | #include <uninorm.h>
10 | 
11 | #include "util.h"
12 | 
13 | int main(int argc, char **argv)
14 | {
15 | 	 int i;
16 | 	 uninorm_t nf = UNINORM_NFKC;
17 | 
18 | 	 for (i = 1; i < argc; ++i) {
19 | 		  if (!strcmp(argv[i], "-nfkc")) {
20 | 			   nf = UNINORM_NFKC;
21 | 			   continue;
22 | 		  }
23 | 		  if (!strcmp(argv[i], "-nfkd")) {
24 | 			   nf = UNINORM_NFKD;
25 | 			   continue;
26 | 		  }
27 | 		  if (!strcmp(argv[i], "-nfc")) {
28 | 			   nf = UNINORM_NFC;
29 | 			   continue;
30 | 		  }
31 | 		  if (!strcmp(argv[i], "-nfd")) {
32 | 			   nf = UNINORM_NFD;
33 | 			   continue;
34 | 		  }
35 | 		  if (argv[i][0] == '-') {
36 | 			   fprintf(stderr, "unrecognized option: %s\n", argv[i]);
37 | 			   return EXIT_FAILURE;
38 | 		  }
39 | 
40 | 		  size_t len;
41 | 		  uint8_t *src = readfile(argv[i], &len);
42 | 		  if (!src) {
43 | 			   fprintf(stderr, "error reading %s\n", argv[i]);
44 | 			   return EXIT_FAILURE;
45 | 		  }
46 | 
47 | 		  size_t destlen;
48 | 		  uint8_t *dest;
49 | 		  mytime start = gettime();
50 | 		  for (int i = 0; i < 100; ++i) {
51 | 			   dest = u8_normalize(nf, src, len, NULL, &destlen);
52 | 			   if (!dest) return EXIT_FAILURE;
53 | 			   free(dest);
54 | 		  }
55 | 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
56 | 		  free(src);
57 | 	 }
58 | 
59 | 	 return EXIT_SUCCESS;
60 | }
61 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/util.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <sys/stat.h>
 4 | 
 5 | #include "util.h"
 6 | 
 7 | /* read file named FILENAME into an array of *len bytes,
 8 |    returning NULL on error */
 9 | uint8_t *readfile(const char *filename, size_t *len)
10 | {
11 | 	 *len = 0;
12 | 	 struct stat st;
13 | 	 if (0 != stat(filename, &st)) return NULL;
14 | 	 *len = st.st_size;
15 | 	 FILE *f = fopen(filename, "r");
16 | 	 if (!f) return NULL;
17 | 	 uint8_t *s = (uint8_t *) malloc(sizeof(uint8_t) * *len);
18 | 	 if (!s) return NULL;
19 | 	 if (fread(s, 1, *len, f) != *len) {
20 | 		  free(s);
21 | 		  s = NULL;
22 | 	 }
23 | 	 fclose(f);
24 | 	 return s;
25 | }
26 | 
27 | mytime gettime(void) {
28 | 	 mytime t;
29 | 	 gettimeofday(&t, NULL);
30 | 	 return t;
31 | }
32 | 
33 | /* time difference in seconds */
34 | double elapsed(mytime t1, mytime t0)
35 | {
36 |      return (double)(t1.tv_sec - t0.tv_sec) +
37 |           (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6;
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/rts/utf8proc/bench/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_H
 2 | #define UTIL_H 1
 3 | 
 4 | #include <inttypes.h>
 5 | #include <sys/time.h>
 6 | #include <time.h>
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | uint8_t *readfile(const char *filename, size_t *len);
13 | 
14 | typedef struct timeval mytime;
15 | mytime gettime(void);
16 | double elapsed(mytime t1, mytime t0);
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | 
22 | #endif /* UTIL_H */
23 | 


--------------------------------------------------------------------------------
/rts/utf8proc/data/Makefile:
--------------------------------------------------------------------------------
 1 | # Unicode data generation rules.  Except for the test data files, most
 2 | # users will not use these Makefile rules, which are primarily to re-generate
 3 | # unicode_data.c when we get a new Unicode version or charwidth data; they
 4 | # require ruby, fontforge, and julia to be installed.
 5 | 
 6 | # programs
 7 | CURL=curl
 8 | RUBY=ruby
 9 | PERL=perl
10 | MAKE=make
11 | JULIA=julia
12 | FONTFORGE=fontforge
13 | CURLFLAGS = --retry 5 --location
14 | 
15 | .PHONY: clean
16 | 
17 | .DELETE_ON_ERROR:
18 | 
19 | utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt CharWidths.txt
20 | 	$(RUBY) data_generator.rb < UnicodeData.txt > $@
21 | 
22 | # GNU Unifont version for font metric calculations:
23 | UNIFONT_VERSION=9.0.04
24 | 
25 | unifont.ttf:
26 | 	$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://mirrors.kernel.org/gnu/unifont/unifont-$(UNIFONT_VERSION)/unifont-$(UNIFONT_VERSION).ttf
27 | 
28 | unifont_upper.ttf:
29 | 	$(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://mirrors.kernel.org/gnu/unifont/unifont-$(UNIFONT_VERSION)/unifont_upper-$(UNIFONT_VERSION).ttf
30 | 
31 | %.sfd: %.ttf
32 | 	$(FONTFORGE) -lang=ff -c "Open(\"$<\");Save(\"$@\");Quit(0);"
33 | 
34 | CharWidths.txt: charwidths.jl unifont.sfd unifont_upper.sfd EastAsianWidth.txt
35 | 	$(JULIA) charwidths.jl > $@
36 | 
37 | # Unicode data version
38 | UNICODE_VERSION=9.0.0
39 | 
40 | UnicodeData.txt:
41 | 	$(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
42 | 
43 | EastAsianWidth.txt:
44 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt
45 | 
46 | GraphemeBreakProperty.txt:
47 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt
48 | 
49 | DerivedCoreProperties.txt:
50 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt
51 | 
52 | CompositionExclusions.txt:
53 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CompositionExclusions.txt
54 | 
55 | CaseFolding.txt:
56 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CaseFolding.txt
57 | 
58 | NormalizationTest.txt:
59 | 	$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt
60 | 
61 | GraphemeBreakTest.txt:
62 | 	$(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
63 | 
64 | clean:
65 | 	rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd
66 | 	rm -f utf8proc_data.c.new
67 | 


--------------------------------------------------------------------------------
/rts/utf8proc/data/charwidths.jl:
--------------------------------------------------------------------------------
  1 | # Following work by @jiahao, we compute character widths using a combination of
  2 | #   * advance widths from GNU Unifont (advance width 512 = 1 en)
  3 | #   * UAX 11: East Asian Width
  4 | #   * a few exceptions as needed
  5 | # Adapted from http://nbviewer.ipython.org/gist/jiahao/07e8b08bf6d8671e9734
  6 | #
  7 | # Requires Julia (obviously) and FontForge.
  8 | 
  9 | #############################################################################
 10 | # Julia 0.3/0.4 compatibility (taken from Compat package)
 11 | if VERSION < v"0.4.0-dev+1387"
 12 |     typealias AbstractString String
 13 | end
 14 | if VERSION < v"0.4.0-dev+1419"
 15 |     const UInt32 = Uint32
 16 | end
 17 | if VERSION < v"0.4.0-dev+3874"
 18 |     Base.parse{T<:Integer}(::Type{T}, s::AbstractString) = parseint(T, s)
 19 | end
 20 | 
 21 | CharWidths = Dict{Int,Int}()
 22 | 
 23 | #############################################################################
 24 | # Use ../libutf8proc for category codes, rather than the one in Julia,
 25 | # to minimize bootstrapping complexity when a new version of Unicode comes out.
 26 | catcode(c) = ccall((:utf8proc_category,"../libutf8proc"), Cint, (Int32,), c)
 27 | 
 28 | # use Base.UTF8proc module to get category codes constants, since
 29 | # we won't change these in utf8proc.
 30 | import Base.UTF8proc
 31 | 
 32 | #############################################################################
 33 | # Use a default width of 1 for all character categories that are
 34 | # letter/symbol/number-like.  This can be overriden by Unifont or UAX 11
 35 | # below, but provides a useful nonzero fallback for new codepoints when
 36 | # a new Unicode version has been released but Unifont hasn't been updated yet.
 37 | 
 38 | zerowidth = Set{Int}() # categories that may contain zero-width chars
 39 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CN)
 40 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_MN)
 41 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_MC)
 42 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ME)
 43 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_SK)
 44 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZS)
 45 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZL)
 46 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZP)
 47 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CC)
 48 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CF)
 49 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CS)
 50 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CO)
 51 | for c in 0x0000:0x110000
 52 |     if catcode(c) ∉ zerowidth
 53 |         CharWidths[c] = 1
 54 |     end
 55 | end
 56 | 
 57 | #############################################################################
 58 | # Widths from GNU Unifont
 59 | 
 60 | #Read sfdfile for character widths
 61 | function parsesfd(filename::AbstractString, CharWidths::Dict{Int,Int}=Dict{Int,Int}())
 62 |     state=:seekchar
 63 |     lineno = 0
 64 |     codepoint = width = nothing
 65 |     for line in readlines(open(filename))
 66 |         lineno += 1
 67 |         if state==:seekchar         #StartChar: nonmarkingreturn
 68 |             if contains(line, "StartChar: ")
 69 |                 codepoint = nothing
 70 |                 width = nothing
 71 |                 state = :readdata
 72 |             end
 73 |         elseif state==:readdata #Encoding: 65538 -1 2, Width: 1024
 74 |             contains(line, "Encoding:") && (codepoint = parse(Int, split(line)[3]))
 75 |             contains(line, "Width:") && (width = parse(Int, split(line)[2]))
 76 |             if codepoint!=nothing && width!=nothing && codepoint >= 0
 77 |                 w=div(width, 512) # 512 units to the en
 78 |                 if w > 0
 79 |                     # only add nonzero widths, since (1) the default is zero
 80 |                     # and (2) this circumvents some apparent bugs in Unifont
 81 |                     # (https://savannah.gnu.org/bugs/index.php?45395)
 82 |                     CharWidths[codepoint] = w
 83 |                 end
 84 |                 state = :seekchar
 85 |             end
 86 |         end
 87 |     end
 88 |     CharWidths
 89 | end
 90 | CharWidths=parsesfd("unifont.sfd", CharWidths)
 91 | CharWidths=parsesfd("unifont_upper.sfd", CharWidths)
 92 | 
 93 | #############################################################################
 94 | # Widths from UAX #11: East Asian Width
 95 | #   .. these take precedence over the Unifont width for all codepoints
 96 | #      listed explicitly as wide/full/narrow/half-width
 97 | 
 98 | for line in readlines(open("EastAsianWidth.txt"))
 99 |     #Strip comments
100 |     line[1] == '#' && continue
101 |     precomment = split(line, '#')[1]
102 |     #Parse code point range and width code
103 |     tokens = split(precomment, ';')
104 |     length(tokens) >= 2 || continue
105 |     charrange = tokens[1]
106 |     width = strip(tokens[2])
107 |     #Parse code point range into Julia UnitRange
108 |     rangetokens = split(charrange, "..")
109 |     charstart = parse(UInt32, "0x"*rangetokens[1])
110 |     charend = parse(UInt32, "0x"*rangetokens[length(rangetokens)>1 ? 2 : 1])
111 | 
112 |     #Assign widths
113 |     for c in charstart:charend
114 |         if width=="W" || width=="F" # wide or full
115 |             CharWidths[c]=2
116 |         elseif width=="Na"|| width=="H" # narrow or half
117 |             CharWidths[c]=1
118 |         end
119 |     end
120 | end
121 | 
122 | #############################################################################
123 | # A few exceptions to the above cases, found by manual comparison
124 | # to other wcwidth functions and similar checks.
125 | 
126 | for c in keys(CharWidths)
127 |     cat = catcode(c)
128 | 
129 |     # make sure format control character (category Cf) have width 0,
130 |     # except for the Arabic characters 0x06xx (see unicode std 6.2, sec. 8.2)
131 |     if cat==UTF8proc.UTF8PROC_CATEGORY_CF && c ∉ [0x0601,0x0602,0x0603,0x06dd]
132 |         CharWidths[c]=0
133 |     end
134 | 
135 |     # Unifont has nonzero width for a number of non-spacing combining
136 |     # characters, e.g. (in 7.0.06): f84,17b4,17b5,180b,180d,2d7f, and
137 |     # the variation selectors
138 |     if cat==UTF8proc.UTF8PROC_CATEGORY_MN
139 |         CharWidths[c]=0
140 |     end
141 | 
142 |     # We also assign width of zero to unassigned and private-use
143 |     # codepoints (Unifont includes ConScript Unicode Registry PUA fonts,
144 |     # but since these are nonstandard it seems questionable to recognize them).
145 |     if cat==UTF8proc.UTF8PROC_CATEGORY_CO || cat==UTF8proc.UTF8PROC_CATEGORY_CN
146 |         CharWidths[c]=0
147 |     end
148 | 
149 |     # for some reason, Unifont has width-2 glyphs for ASCII control chars
150 |     if cat==UTF8proc.UTF8PROC_CATEGORY_CC
151 |         CharWidths[c]=0
152 |     end
153 | end
154 | 
155 | #By definition, should have zero width (on the same line)
156 | #0x002028 ' ' category: Zl name: LINE SEPARATOR/
157 | #0x002029 ' ' category: Zp name: PARAGRAPH SEPARATOR/
158 | CharWidths[0x2028]=0
159 | CharWidths[0x2029]=0
160 | 
161 | #By definition, should be narrow = width of 1 en space
162 | #0x00202f ' ' category: Zs name: NARROW NO-BREAK SPACE/
163 | CharWidths[0x202f]=1
164 | 
165 | #By definition, should be wide = width of 1 em space
166 | #0x002001 ' ' category: Zs name: EM QUAD/
167 | #0x002003 ' ' category: Zs name: EM SPACE/
168 | CharWidths[0x2001]=2
169 | CharWidths[0x2003]=2
170 | 
171 | #############################################################################
172 | # Output (to a file or pipe) for processing by data_generator.rb
173 | # ... don't bother to output zero widths since that will be the default.
174 | 
175 | firstc = 0x000000
176 | lastv = 0
177 | uhex(c) = uppercase(hex(c,4))
178 | for c in 0x0000:0x110000
179 |     v = get(CharWidths, c, 0)
180 |     if v != lastv || c == 0x110000
181 |         v < 4 || error("invalid charwidth $v for $c")
182 |         if firstc+1 < c
183 |             println(uhex(firstc), "..", uhex(c-1), "; ", lastv)
184 |         else
185 |             println(uhex(firstc), "; ", lastv)
186 |         end
187 |         firstc = c
188 |         lastv = v
189 |     end
190 | end
191 | 


--------------------------------------------------------------------------------
/rts/utf8proc/lump.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | U+0020      <-- all space characters (general category Zs)
 3 | U+0027  '   <-- left/right single quotation mark U+2018..2019,
 4 |                 modifier letter apostrophe U+02BC,
 5 |                 modifier letter vertical line U+02C8
 6 | U+002D  -   <-- all dash characters (general category Pd),
 7 |                 minus U+2212
 8 | U+002F  /   <-- fraction slash U+2044,
 9 |                 division slash U+2215
10 | U+003A  :   <-- ratio U+2236
11 | U+003C  <   <-- single left-pointing angle quotation mark U+2039,
12 |                 left-pointing angle bracket U+2329,
13 |                 left angle bracket U+3008
14 | U+003E  >   <-- single right-pointing angle quotation mark U+203A,
15 |                 right-pointing angle bracket U+232A,
16 |                 right angle bracket U+3009
17 | U+005C  \   <-- set minus U+2216
18 | U+005E  ^   <-- modifier letter up arrowhead U+02C4,
19 |                 modifier letter circumflex accent U+02C6,
20 |                 caret U+2038,
21 |                 up arrowhead U+2303
22 | U+005F  _   <-- all connector characters (general category Pc),
23 |                 modifier letter low macron U+02CD
24 | U+0060  `   <-- modifier letter grave accent U+02CB
25 | U+007C  |   <-- divides U+2223
26 | U+007E  ~   <-- tilde operator U+223C
27 | ```
28 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/case.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | #include <wctype.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |      int error = 0, better = 0;
 7 |      utf8proc_int32_t c;
 8 | 
 9 |      (void) argc; /* unused */
10 |      (void) argv; /* unused */
11 | 
12 |      /* some simple sanity tests of the character widths */
13 |      for (c = 0; c <= 0x110000; ++c) {
14 |           utf8proc_int32_t l = utf8proc_tolower(c);
15 |           utf8proc_int32_t u = utf8proc_toupper(c);
16 | 
17 |           check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
18 |           check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
19 | 
20 |           if (sizeof(wint_t) > 2 || c < (1<<16)) {
21 |                wint_t l0 = towlower(c), u0 = towupper(c);
22 |                
23 |                /* OS unicode tables may be out of date.  But if they
24 |                   do have a lower/uppercase mapping, hopefully it
25 |                   is correct? */
26 |                if (l0 != c && l0 != l) {
27 |                     fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
28 |                             l, c, l0);
29 |                     ++error;
30 |                }
31 |                else if (l0 != l) { /* often true for out-of-date OS unicode */
32 |                     ++better;
33 |                     /* printf("%x != towlower(%x) == %x\n", l, c, l0); */
34 |                }
35 |                if (u0 != c && u0 != u) {
36 |                     fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
37 |                             u, c, u0);
38 |                     ++error;
39 |                }
40 |                else if (u0 != u) { /* often true for out-of-date OS unicode */
41 |                     ++better;
42 |                     /* printf("%x != towupper(%x) == %x\n", u, c, u0); */
43 |                }
44 |           }
45 |      }
46 |      check(!error, "utf8proc case conversion FAILED %d tests.", error);
47 |      printf("More up-to-date than OS unicode tables for %d tests.\n", better);
48 |      printf("utf8proc case conversion tests SUCCEEDED.\n");
49 |      return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/charwidth.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | #include <ctype.h>
 3 | #include <wchar.h>
 4 | 
 5 | static int my_isprint(int c) {
 6 |      int cat = utf8proc_get_property(c)->category;
 7 |      return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) ||
 8 |           (c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd);
 9 | }
10 | 
11 | int main(int argc, char **argv)
12 | {
13 |      int c, error = 0, updates = 0;
14 | 
15 |      (void) argc; /* unused */
16 |      (void) argv; /* unused */
17 | 
18 |      /* some simple sanity tests of the character widths */
19 |      for (c = 0; c <= 0x110000; ++c) {
20 |           int cat = utf8proc_get_property(c)->category;
21 |           int w = utf8proc_charwidth(c);
22 |           if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) &&
23 |               w > 0) {
24 |                fprintf(stderr, "nonzero width %d for combining char %x\n", w, c);
25 |                error = 1;
26 |           }
27 |           if (w == 0 &&
28 | 			  ((cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) ||
29 | 			   (cat >= UTF8PROC_CATEGORY_ND && cat <= UTF8PROC_CATEGORY_SC) ||
30 | 			   (cat >= UTF8PROC_CATEGORY_SO && cat <= UTF8PROC_CATEGORY_ZS))) {
31 |                fprintf(stderr, "zero width for symbol-like char %x\n", c);
32 |                error = 1;
33 |           }
34 |           if (c <= 127 && ((!isprint(c) && w > 0) ||
35 |                            (isprint(c) && wcwidth(c) != w))) {
36 |                fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n",
37 |                        wcwidth(c), w, 
38 |                        isprint(c) ? "printable" : "non-printable", c);
39 |                error = 1;
40 |           }
41 |           if (!my_isprint(c) && w > 0) {
42 |                fprintf(stderr, "non-printing %x had width %d\n", c, w);
43 |                error = 1;
44 |           }
45 |      }
46 |      check(!error, "utf8proc_charwidth FAILED tests.");
47 | 
48 |      /* print some other information by compariing with system wcwidth */
49 |      printf("Mismatches with system wcwidth (not necessarily errors):\n");
50 |      for (c = 0; c <= 0x110000; ++c) {
51 |           int w = utf8proc_charwidth(c);
52 |           int wc = wcwidth(c);
53 |           if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue;
54 |           /* lots of these errors for out-of-date system unicode tables */
55 |           if (wc == -1 && my_isprint(c) && w > 0) {
56 | 			   updates += 1;
57 | #if 0
58 |                printf("  wcwidth(%x) = -1 for printable char\n", c);
59 | #endif
60 | 		  }
61 |           if (wc == -1 && !my_isprint(c) && w > 0)
62 |                printf("  wcwidth(%x) = -1 for non-printable width-%d char\n", c, w);
63 |           if (wc >= 0 && wc != w)
64 |                printf("  wcwidth(%x) = %d != charwidth %d\n", c, wc, w);
65 |      }
66 | 	 printf("   ... (positive widths for %d chars unknown to wcwidth) ...\n",
67 | 			updates);
68 |      printf("Character-width tests SUCCEEDED.\n");
69 | 
70 |      return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/custom.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | 
 3 | static int thunk_test = 1;
 4 | 
 5 | static utf8proc_int32_t custom(utf8proc_int32_t codepoint, void *thunk)
 6 | {
 7 |     check(((int *) thunk) == &thunk_test, "unexpected thunk passed");
 8 |     if (codepoint == 'a')
 9 |         return 'b';
10 |     if (codepoint == 'S')
11 |         return 0x00df; /* ß */
12 |     return codepoint;
13 | }
14 | 
15 | int main(void)
16 | {
17 |     utf8proc_uint8_t input[] = {0x41,0x61,0x53,0x62,0xef,0xbd,0x81,0x00}; /* "AaSb\uff41" */
18 |     utf8proc_uint8_t correct[] = {0x61,0x62,0x73,0x73,0x62,0x61,0x00}; /* "abssba" */
19 |     utf8proc_uint8_t *output;
20 |     utf8proc_map_custom(input, 0, &output, UTF8PROC_CASEFOLD | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_NULLTERM,
21 |                         custom, &thunk_test);
22 |     printf("mapped \"%s\" -> \"%s\"\n", (char*)input, (char*)output);
23 |     check(strlen((char*) output) == 6, "incorrect output length");
24 |     check(!memcmp(correct, output, 7), "incorrect output data");
25 |     free(output);
26 |     return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/graphemetest.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | 
 3 | int main(int argc, char **argv)
 4 | {
 5 |     char *buf = NULL;
 6 |     size_t bufsize = 0;
 7 |     FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
 8 |     utf8proc_uint8_t src[1024];
 9 |     int len;
10 |     
11 |     check(f != NULL, "error opening GraphemeBreakTest.txt");
12 |     while (getline(&buf, &bufsize, f) > 0) {
13 |         size_t bi = 0, si = 0;
14 |         lineno += 1;
15 |         
16 |         if (lineno % 100 == 0)
17 |             printf("checking line %zd...\n", lineno);
18 |         
19 |         if (buf[0] == '#') continue;
20 |         
21 |         while (buf[bi]) {
22 |             bi = skipspaces(buf, bi);
23 |             if (buf[bi] == '/') { /* grapheme break */
24 |                 src[si++] = '/';
25 |                 bi++;
26 |             }
27 |             else if (buf[bi] == '+') { /* no break */
28 |                 bi++;
29 |             }
30 |             else if (buf[bi] == '#') { /* start of comments */
31 |                 break;
32 |             }
33 | 	    else { /* hex-encoded codepoint */
34 |                 len = encode((char*) (src + si), buf + bi) - 1;
35 |                 while (src[si]) ++si; /* advance to NUL termination */
36 |                 bi += len;
37 |             }
38 |         }
39 |         if (si && src[si-1] == '/')
40 |             --si; /* no break after final grapheme */
41 |         src[si] = 0; /* NUL-terminate */
42 |         
43 |         if (si) {
44 |             utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
45 |             size_t i = 0, j = 0;
46 |             utf8proc_ssize_t glen;
47 |             utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
48 |             while (i < si) {
49 |                 if (src[i] != '/')
50 |                     utf8[j++] = src[i++];
51 |                 else
52 |                     i++;
53 |             }
54 |             glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
55 |             if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
56 |                  /* the test file contains surrogate codepoints, which are only for UTF-16 */
57 |                  printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
58 |             }
59 |             else {
60 |                  check(glen >= 0, "utf8proc_map error = %s",
61 |                        utf8proc_errmsg(glen));
62 |                  for (i = 0; i <= glen; ++i)
63 |                       if (g[i] == 0xff)
64 |                           g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
65 |                  check(!strcmp((char*)g, (char*)src),
66 |                        "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
67 |             }
68 |             free(g);
69 |         }
70 |     }
71 |     fclose(f);
72 |     printf("Passed tests after %zd lines!\n", lineno);
73 |     return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/iterate.c:
--------------------------------------------------------------------------------
  1 | #include "tests.h"
  2 | #include <ctype.h>
  3 | #include <wchar.h>
  4 | 
  5 | static  int     tests;
  6 | static  int     error;
  7 | 
  8 | #define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
  9 | #define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
 10 | 
 11 | static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
 12 | {
 13 |     utf8proc_int32_t out[16];
 14 |     utf8proc_ssize_t ret;
 15 | 
 16 |     /* Make a copy to ensure that memory is left uninitialized after "len"
 17 |      * bytes. This way, Valgrind can detect overreads.
 18 |      */
 19 |     unsigned char tmp[16];
 20 |     memcpy(tmp, buf, len);
 21 | 
 22 |     tests++;
 23 |     if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
 24 |         fprintf(stderr, "Failed (%d):", line);
 25 |         for (int i = 0; i < len ; i++) {
 26 |             fprintf(stderr, " 0x%02x", tmp[i]);
 27 |         }
 28 |         fprintf(stderr, " -> %zd\n", ret);
 29 |         error++;
 30 |     }
 31 | }
 32 | 
 33 | int main(int argc, char **argv)
 34 | {
 35 |     uint32_t byt;
 36 |     unsigned char buf[16];
 37 | 
 38 |     tests = error = 0;
 39 | 
 40 |     // Check valid sequences that were considered valid erroneously before
 41 |     buf[0] = 0xef;
 42 |     buf[1] = 0xb7;
 43 |     for (byt = 0x90; byt < 0xa0; byt++) {
 44 |         CHECKVALID(2, byt, 3);
 45 |     }
 46 |     // Check 0xfffe and 0xffff
 47 |     buf[1] = 0xbf;
 48 |     CHECKVALID(2, 0xbe, 3);
 49 |     CHECKVALID(2, 0xbf, 3);
 50 |     // Check 0x??fffe & 0x??ffff
 51 |     for (byt = 0x1fffe; byt < 0x110000; byt += 0x10000) {
 52 |         buf[0] = 0xf0 | (byt >> 18);
 53 |         buf[1] = 0x80 | ((byt >> 12) & 0x3f);
 54 |         CHECKVALID(3, 0xbe, 4);
 55 |         CHECKVALID(3, 0xbf, 4);
 56 |     }
 57 |     
 58 |     // Continuation byte not after lead
 59 |     for (byt = 0x80; byt < 0xc0; byt++) {
 60 |         CHECKINVALID(0, byt, 1);
 61 |     }
 62 | 
 63 |     // Continuation byte not after lead
 64 |     for (byt = 0x80; byt < 0xc0; byt++) {
 65 |         CHECKINVALID(0, byt, 1);
 66 |     }
 67 | 
 68 |     // Test lead bytes
 69 |     for (byt = 0xc0; byt <= 0xff; byt++) {
 70 | 	// Single lead byte at end of string
 71 |         CHECKINVALID(0, byt, 1);
 72 |         // Lead followed by non-continuation character < 0x80
 73 |         CHECKINVALID(1, 65, 2);
 74 | 	// Lead followed by non-continuation character > 0xbf
 75 |         CHECKINVALID(1, 0xc0, 2);
 76 |     }
 77 | 
 78 |     // Test overlong 2-byte
 79 |     buf[0] = 0xc0;
 80 |     for (byt = 0x81; byt <= 0xbf; byt++) {
 81 |         CHECKINVALID(1, byt, 2);
 82 |     }
 83 |     buf[0] = 0xc1;
 84 |     for (byt = 0x80; byt <= 0xbf; byt++) {
 85 |         CHECKINVALID(1, byt, 2);
 86 |     }
 87 | 
 88 |     // Test overlong 3-byte
 89 |     buf[0] = 0xe0;
 90 |     buf[2] = 0x80;
 91 |     for (byt = 0x80; byt <= 0x9f; byt++) {
 92 |         CHECKINVALID(1, byt, 3);
 93 |     }
 94 | 
 95 |     // Test overlong 4-byte
 96 |     buf[0] = 0xf0;
 97 |     buf[2] = 0x80;
 98 |     buf[3] = 0x80;
 99 |     for (byt = 0x80; byt <= 0x8f; byt++) {
100 |         CHECKINVALID(1, byt, 4);
101 |     }
102 | 
103 |     // Test 4-byte > 0x10ffff
104 |     buf[0] = 0xf4;
105 |     buf[2] = 0x80;
106 |     buf[3] = 0x80;
107 |     for (byt = 0x90; byt <= 0xbf; byt++) {
108 |         CHECKINVALID(1, byt, 4);
109 |     }
110 |     buf[1] = 0x80;
111 |     for (byt = 0xf5; byt <= 0xf7; byt++) {
112 |         CHECKINVALID(0, byt, 4);
113 |     }
114 | 
115 |     // Test 5-byte
116 |     buf[4] = 0x80;
117 |     for (byt = 0xf8; byt <= 0xfb; byt++) {
118 |         CHECKINVALID(0, byt, 5);
119 |     }
120 | 
121 |     // Test 6-byte
122 |     buf[5] = 0x80;
123 |     for (byt = 0xfc; byt <= 0xfd; byt++) {
124 |         CHECKINVALID(0, byt, 6);
125 |     }
126 | 
127 |     // Test 7-byte
128 |     buf[6] = 0x80;
129 |     CHECKINVALID(0, 0xfe, 7);
130 | 
131 |     // Three and above byte sequences
132 |     for (byt = 0xe0; byt < 0xf0; byt++) {
133 |         // Lead followed by only 1 continuation byte
134 |         CHECKINVALID(0, byt, 2);
135 |         // Lead ended by non-continuation character < 0x80
136 |         CHECKINVALID(2, 65, 3);
137 |         // Lead ended by non-continuation character > 0xbf
138 |         CHECKINVALID(2, 0xc0, 3);
139 |     }
140 | 
141 |     // 3-byte encoded surrogate character(s)
142 |     buf[0] = 0xed; buf[2] = 0x80;
143 |     // Single surrogate
144 |     CHECKINVALID(1, 0xa0, 3);
145 |     // Trailing surrogate first
146 |     CHECKINVALID(1, 0xb0, 3);
147 | 
148 |     // Four byte sequences
149 |     buf[1] = 0x80;
150 |     for (byt = 0xf0; byt < 0xf5; byt++) {
151 |         // Lead followed by only 1 continuation bytes
152 |         CHECKINVALID(0, byt, 2);
153 |         // Lead followed by only 2 continuation bytes
154 |         CHECKINVALID(0, byt, 3);
155 |         // Lead followed by non-continuation character < 0x80
156 |         CHECKINVALID(3, 65, 4);
157 |         // Lead followed by non-continuation character > 0xbf
158 |         CHECKINVALID(3, 0xc0, 4);
159 | 
160 |     }
161 | 
162 |      check(!error, "utf8proc_iterate FAILED %d tests out of %d", error, tests);
163 |      printf("utf8proc_iterate tests SUCCEEDED, (%d) tests passed.\n", tests);
164 | 
165 |      return 0;
166 | }
167 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/normtest.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | 
 3 | #define CHECK_NORM(NRM, norm, src) {                                 \
 4 |     char *src_norm = (char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src);      \
 5 |     check(!strcmp(norm, src_norm),                                  \
 6 |           "normalization failed for %s -> %s", src, norm);          \
 7 |     free(src_norm);                                                 \
 8 | }
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |      char *buf = NULL;
13 |      size_t bufsize = 0;
14 |      FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
15 |      char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024];
16 | 
17 |      check(f != NULL, "error opening NormalizationTest.txt");
18 |      while (getline(&buf, &bufsize, f) > 0) {
19 |           size_t offset;
20 |           lineno += 1;
21 | 
22 |           if (buf[0] == '@') {
23 |                printf("line %zd: %s", lineno, buf + 1);
24 |                continue;
25 |           }
26 |           else if (lineno % 1000 == 0)
27 |                printf("checking line %zd...\n", lineno);
28 | 
29 |           if (buf[0] == '#') continue;
30 | 
31 |           offset = encode(source, buf);
32 |           offset += encode(NFC, buf + offset);
33 |           offset += encode(NFD, buf + offset);
34 |           offset += encode(NFKC, buf + offset);
35 |           offset += encode(NFKD, buf + offset);
36 | 
37 |           CHECK_NORM(NFC, NFC, source);
38 |           CHECK_NORM(NFC, NFC, NFC);
39 |           CHECK_NORM(NFC, NFC, NFD);
40 |           CHECK_NORM(NFC, NFKC, NFKC);
41 |           CHECK_NORM(NFC, NFKC, NFKD);
42 | 
43 |           CHECK_NORM(NFD, NFD, source);
44 |           CHECK_NORM(NFD, NFD, NFC);
45 |           CHECK_NORM(NFD, NFD, NFD);
46 |           CHECK_NORM(NFD, NFKD, NFKC);
47 |           CHECK_NORM(NFD, NFKD, NFKD);
48 | 
49 |           CHECK_NORM(NFKC, NFKC, source);
50 |           CHECK_NORM(NFKC, NFKC, NFC);
51 |           CHECK_NORM(NFKC, NFKC, NFD);
52 |           CHECK_NORM(NFKC, NFKC, NFKC);
53 |           CHECK_NORM(NFKC, NFKC, NFKD);
54 | 
55 |           CHECK_NORM(NFKD, NFKD, source);
56 |           CHECK_NORM(NFKD, NFKD, NFC);
57 |           CHECK_NORM(NFKD, NFKD, NFD);
58 |           CHECK_NORM(NFKD, NFKD, NFKC);
59 |           CHECK_NORM(NFKD, NFKD, NFKD);
60 |      }
61 |      fclose(f);
62 |      printf("Passed tests after %zd lines!\n", lineno);
63 |      return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/printproperty.c:
--------------------------------------------------------------------------------
 1 | /* simple test program to print out the utf8proc properties for a codepoint */
 2 | 
 3 | #include "tests.h"
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |      int i;
 8 | 
 9 |      for (i = 1; i < argc; ++i) {
10 |           unsigned int c;
11 |           if (!strcmp(argv[i], "-V")) {
12 |                printf("utf8proc version %s\n", utf8proc_version());
13 |                continue;
14 |           }
15 |           check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
16 |           const utf8proc_property_t *p = utf8proc_get_property(c);
17 |           printf("U+%s:\n"
18 |                  "  category = %s\n"
19 |                  "  combining_class = %d\n"
20 |                  "  bidi_class = %d\n"
21 |                  "  decomp_type = %d\n"
22 |                  "  uppercase_mapping = %x\n"
23 |                  "  lowercase_mapping = %x\n"
24 |                  "  titlecase_mapping = %x\n"
25 |                  "  comb_index = %d\n"
26 |                  "  bidi_mirrored = %d\n"
27 |                  "  comp_exclusion = %d\n"
28 |                  "  ignorable = %d\n"
29 |                  "  control_boundary = %d\n"
30 |                  "  boundclass = %d\n"
31 |                  "  charwidth = %d\n",
32 |                  argv[i],
33 |                  utf8proc_category_string(c),
34 |                  p->combining_class,
35 |                  p->bidi_class,
36 |                  p->decomp_type,
37 |                  utf8proc_toupper(c),
38 |                  utf8proc_tolower(c),
39 |                  utf8proc_totitle(c),
40 |                  p->comb_index,
41 |                  p->bidi_mirrored,
42 |                  p->comp_exclusion,
43 |                  p->ignorable,
44 |                  p->control_boundary,
45 |                  p->boundclass,
46 |                  utf8proc_charwidth(c));
47 |      }
48 |      return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/tests.c:
--------------------------------------------------------------------------------
 1 | /* Common functions for our test programs. */
 2 | 
 3 | #include "tests.h"
 4 | 
 5 | size_t lineno = 0;
 6 | 
 7 | void check(int cond, const char *format, ...)
 8 | {
 9 |      if (!cond) {
10 |           va_list args;
11 |           fprintf(stderr, "line %zd: ", lineno);
12 |           va_start(args, format);
13 |           vfprintf(stderr, format, args);
14 |           va_end(args);
15 |           fprintf(stderr, "\n");
16 |           exit(1);
17 |      }
18 | }
19 | 
20 | size_t skipspaces(const char *buf, size_t i)
21 | {
22 |     while (isspace(buf[i])) ++i;
23 |     return i;
24 | }
25 | 
26 | /* if buf points to a sequence of codepoints encoded as hexadecimal strings,
27 |    separated by whitespace, and terminated by any character not in
28 |    [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
29 |    in dest, returning the number of bytes read from buf */
30 | size_t encode(char *dest, const char *buf)
31 | {
32 |      size_t i = 0, j, d = 0;
33 |      for (;;) {
34 |           int c;
35 |           i = skipspaces(buf, i);
36 |           for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
37 |                ; /* find end of hex input */
38 |           if (j == i) { /* no codepoint found */
39 |                dest[d] = 0; /* NUL-terminate destination string */
40 |                return i + 1;
41 |           }
42 |           check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
43 |           i = j; /* skip to char after hex input */
44 |           d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
45 |      }
46 | }
47 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/tests.h:
--------------------------------------------------------------------------------
 1 | /* Common functions and includes for our test programs. */
 2 | 
 3 | /*
 4 |  * Set feature macro to enable getline() and wcwidth().
 5 |  *
 6 |  * Please refer to section 2.2.1 of POSIX.1-2008:
 7 |  * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_02_01_02
 8 |  */
 9 | #define _XOPEN_SOURCE 700
10 | 
11 | #include <stdio.h>
12 | #include <stdlib.h>
13 | #include <ctype.h>
14 | #include <string.h>
15 | #include <stdarg.h>
16 | 
17 | #include "../utf8proc.h"
18 | 
19 | extern size_t lineno;
20 | 
21 | void check(int cond, const char *format, ...);
22 | size_t skipspaces(const char *buf, size_t i);
23 | size_t encode(char *dest, const char *buf);
24 | 


--------------------------------------------------------------------------------
/rts/utf8proc/test/valid.c:
--------------------------------------------------------------------------------
 1 | #include "tests.h"
 2 | #include <ctype.h>
 3 | #include <wchar.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |      int c, error = 0;
 8 | 
 9 |      (void) argc; /* unused */
10 |      (void) argv; /* unused */
11 | 
12 |      /* some simple sanity tests of  */
13 |      for (c = 0; c < 0xd800; c++) {
14 |         if (!utf8proc_codepoint_valid(c)) {
15 |            fprintf(stderr, "Failed: codepoint_valid(%04x) -> false\n", c);
16 |            error++;
17 |         }
18 |      }
19 |      for (;c < 0xe000; c++) {
20 |         if (utf8proc_codepoint_valid(c)) {
21 |            fprintf(stderr, "Failed: codepoint_valid(%04x) -> true\n", c);
22 |            error++;
23 |         }
24 |      }
25 |      for (;c < 0x110000; c++) {
26 |         if (!utf8proc_codepoint_valid(c)) {
27 |            fprintf(stderr, "Failed: codepoint_valid(%06x) -> false\n", c);
28 |            error++;
29 |         }
30 |      }
31 |      for (;c < 0x110010; c++) {
32 |         if (utf8proc_codepoint_valid(c)) {
33 |            fprintf(stderr, "Failed: codepoint_valid(%06x) -> true\n", c);
34 |            error++;
35 |         }
36 |      }
37 |      check(!error, "utf8proc_codepoint_valid FAILED %d tests.", error);
38 |      printf("Validity tests SUCCEEDED.\n");
39 | 
40 |      return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/rts/utf8proc/utils.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | function (disallow_intree_builds)
 3 |   # Adapted from LLVM's toplevel CMakeLists.txt file
 4 |   if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE )
 5 |     message(FATAL_ERROR "
 6 |       In-source builds are not allowed. CMake would overwrite the
 7 |       makefiles distributed with utf8proc. Please create a directory
 8 |       and run cmake from there. Building in a subdirectory is
 9 |       fine, e.g.:
10 |       
11 |         mkdir build
12 |         cd build
13 |         cmake ..
14 |       
15 |       This process created the file `CMakeCache.txt' and the
16 |       directory `CMakeFiles'. Please delete them.
17 |       
18 |       ")
19 |   endif()
20 | endfunction()
21 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
1 | (import ./release.nix).env


--------------------------------------------------------------------------------
/src/lasca/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE Strict #-}
2 | module Main where
3 | 
4 | import qualified Lasca.Compiler
5 | 
6 | main :: IO ()
7 | main = Lasca.Compiler.main
8 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/Compiler.hs:
--------------------------------------------------------------------------------
  1 | module Lasca.Compiler where
  2 | 
  3 | import Lasca.Namer
  4 | import Lasca.Desugar
  5 | import Lasca.Codegen
  6 | import Lasca.EmitCommon
  7 | import Lasca.Emit
  8 | import qualified Lasca.EmitStatic as EmitStatic
  9 | import qualified Lasca.EmitDynamic as EmitDynamic
 10 | import Lasca.JIT
 11 | import Lasca.Infer
 12 | import Lasca.Type
 13 | import Lasca.Syntax
 14 | import Lasca.Options
 15 | import Lasca.Modules
 16 | 
 17 | 
 18 | import Control.Monad
 19 | import Data.Maybe
 20 | import Text.Printf
 21 | import qualified Data.Text as T
 22 | import qualified Data.ByteString.Char8 as Char8
 23 | 
 24 | import System.Info
 25 | import System.Environment
 26 | import System.Exit
 27 | import System.Process
 28 | import System.Directory
 29 | import System.FilePath
 30 | import System.IO ( hGetContents )
 31 | import System.IO.Error
 32 | import Data.List
 33 | import qualified Data.Map.Strict as Map
 34 | import Debug.Trace as Debug
 35 | import Control.Applicative
 36 | 
 37 | import qualified LLVM.Module as LLVM
 38 | import qualified LLVM.Target as LLVM
 39 | import qualified LLVM.Relocation as Reloc
 40 | import qualified LLVM.Target.Options as TO
 41 | import qualified LLVM.CodeModel as CodeModel
 42 | import qualified LLVM.CodeGenOpt as CodeGenOpt
 43 | 
 44 | 
 45 | parsePhase opts filename = do
 46 |     exists <- doesFileExist filename
 47 |     if exists then do
 48 |         absoluteFilePath <- canonicalizePath filename
 49 |         searchPaths <- moduleSearchPaths
 50 |         (imported, mainModule) <- loadModule searchPaths Map.empty [] absoluteFilePath (Name $ T.pack filename)
 51 |         let linearized = linearizeIncludes mainModule
 52 |         let ex = foldr (\m exprs -> moduleExprs m ++ exprs) [] linearized
 53 | --          Debug.traceM $ printf "AAA %s\n%s" (show  exprs1) (show ex)
 54 |         when (verboseMode opts) $ putStrLn $ printf "Parsed OK, imported %s, linearized: %s" (show imported) (show linearized)
 55 |         when (printAst opts) $ mapM_ print ex
 56 |         when (verboseMode opts) $ putStrLn ("Compiler mode is " ++ show (mode opts))
 57 |         return ex
 58 |     else error $ printf "Couldn't open file %s" (show filename)
 59 | 
 60 | runPhases opts filename = do
 61 |     exprs <- parsePhase opts filename
 62 |     let (named, state) = namerPhase opts exprs
 63 |     let ctx = _context state
 64 |     let mainModule = _currentModule state
 65 |     let mainFunctionName = NS mainModule "main"
 66 |     let desugared = desugarPhase ctx named
 67 |     typed <- if mode opts == Static
 68 |              then typerPhase opts ctx filename desugared
 69 |              else return desugared
 70 |     let desugared2 = patmatPhase ctx typed
 71 |     let desugared3 = lambdaLiftPhase ctx desugared2 -- must be after typechecking
 72 |     let !desugared4 = delambdafyPhase ctx desugared3 -- must be after typechecking
 73 |     when (printAst opts) $ putStrLn $ intercalate "\n" (map printExprWithType desugared4)
 74 |     let mod = codegenPhase ctx filename desugared4 mainFunctionName
 75 |     if exec opts then do
 76 |         when (verboseMode opts) $ putStrLn "Running JIT"
 77 |         runJIT opts mod
 78 |     else compileExecutable opts filename mod
 79 | 
 80 | typerPhase opts ctx filename exprs = do
 81 |     result <- typeCheck ctx exprs
 82 |     case result of
 83 |         Right (env, typedExprs) -> do
 84 |             when (verboseMode opts) $ putStrLn "typechecked OK"
 85 |             when (printTypes opts) $ putStrLn (showPretty env)
 86 |             return typedExprs
 87 |         Left e -> do
 88 |             dir <- getCurrentDirectory
 89 |             let source = dir </> filename
 90 |             die (source ++ ":" ++ showTypeError e)
 91 | 
 92 | codegenPhase context filename exprs mainFunctionName = do
 93 |     let opts = _lascaOpts context
 94 |     let modo = emptyModule filename
 95 |     let cgen = if mode opts == Static then EmitStatic.cgen else EmitDynamic.cgen
 96 |     let ctx = collectGlobals context exprs
 97 |     runLLVM modo $  do
 98 |         declareStdFuncs
 99 |         fmt <- genFunctionMap exprs
100 |         let defs = reverse (_dataDefs ctx)
101 |         tst <- genTypesStruct ctx defs
102 |         genRuntime opts fmt tst
103 |         forM_ exprs $ \expr -> do
104 |             defineStringConstants expr
105 |             codegenTop ctx cgen expr
106 |         codegenStartFunc ctx cgen mainFunctionName
107 | 
108 | processMainFile :: LascaOpts -> String -> IO ()
109 | processMainFile opts filename = runPhases opts filename
110 | 
111 | findCCompiler = do
112 |     ccEnv <- lookupEnv "CC"
113 |     cl5 <- findExecutable "clang-5"
114 |     clang <- findExecutable "clang"
115 |     gcc <- findExecutable "gcc"
116 |     return $ ccEnv <|> cl5 <|> clang <|> gcc
117 | 
118 | withHostTargetMachine :: (LLVM.TargetMachine -> IO a) -> IO a
119 | withHostTargetMachine f = do
120 |     LLVM.initializeAllTargets
121 |     triple <- LLVM.getProcessTargetTriple
122 |     cpu <- LLVM.getHostCPUName
123 |     features <- LLVM.getHostCPUFeatures
124 |     (target, _) <- LLVM.lookupTarget Nothing triple
125 |     LLVM.withTargetOptions $ \options ->
126 |         LLVM.withTargetMachine target triple cpu features options Reloc.PIC CodeModel.Default CodeGenOpt.Default f
127 | 
128 | 
129 | compileExecutable opts fname mod = do
130 |     withOptimizedModule opts mod $ \context m -> do
131 |         ll <- LLVM.moduleLLVMAssembly m
132 |         let asm = Char8.unpack ll
133 |         writeFile (fname ++ ".ll") asm
134 |         withHostTargetMachine $ \tm -> LLVM.writeObjectToFile tm (LLVM.File (fname ++ ".o")) m
135 |     let outputPath = case outputFile opts of
136 |           [] -> dropExtension fname
137 |           path -> path
138 |     let optLevel = optimization opts
139 |     let optimizationOpts = ["-O" ++ show optLevel | optLevel > 0]
140 |     result <- findCCompiler
141 |     lascaPathEnv <- lookupEnv "LASCAPATH"
142 |     let lascaPath = fromMaybe "." lascaPathEnv
143 |     absLascaPathEnv <- mapM canonicalizePath (splitSearchPath lascaPath)
144 |     let cc = fromMaybe (error "Did find C compiler. Install Clang or GCC, or define CC environment variable") result
145 |         lascartStaticLink  = ["-llascartStatic"]
146 |         lascartDynamicLink = ["-llascart"]
147 |         libLascaLink = ["-rdynamic"]
148 |             -- passes --export-dynamic to the linker.
149 |             -- Needed for OrcJit to to able to dynamicly load generated `main` function
150 |             ++ lascartStaticLink
151 |             -- ++ lascartDynamicLink
152 |         libDirs = fmap (\p -> "-L" ++ p) absLascaPathEnv
153 |         links = ["-lgc", "-lffi", "-lm", "-lpcre2-8"]
154 |     -- object files must be specified before libraries for successful static linking
155 |     let args = optimizationOpts ++ [ "-o", outputPath, fname ++ ".o"] ++ libDirs ++ ["-fPIC", "-g"] ++ libLascaLink ++ links
156 |     let command = unwords $ cc : args
157 |     when (verboseMode opts) $ putStrLn command
158 |     (output, errCode) <- getProcessOutput command
159 |     -- putStrLn output
160 |     when (errCode /= ExitSuccess) $ die output
161 |     -- return ()
162 | 
163 | getProcessOutput :: String -> IO (String, ExitCode)
164 | getProcessOutput command =
165 |      -- Create the process
166 |   do (_pIn, pOut, pErr, handle) <- runInteractiveCommand command
167 |      -- Wait for the process to finish and store its exit code
168 |      exitCode <- waitForProcess handle
169 |      -- Get the standard output.
170 |      output   <- hGetContents pOut
171 |      stderr   <- hGetContents pErr
172 |      -- return both the output and the exit code.
173 |      return (output ++ stderr, exitCode)
174 | 
175 | runLasca :: LascaOpts -> IO ()
176 | runLasca opts = do
177 |     if null (lascaFiles opts)
178 |     then die ("need file") -- TODO show help
179 |     else do
180 |         let file = head (lascaFiles opts)
181 |         processMainFile opts file
182 | 
183 | main :: IO ()
184 | main = do
185 |     opts <- parseOptions
186 |     runLasca opts
187 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/Emit.hs:
--------------------------------------------------------------------------------
  1 | module Lasca.Emit (codegenTop, collectGlobals) where
  2 | 
  3 | import Text.Printf
  4 | import Data.String
  5 | import Data.Text (Text)
  6 | import qualified Data.Text as T
  7 | import qualified Data.Text.Encoding as Encoding
  8 | import Data.Map.Strict (Map)
  9 | import qualified Data.Map.Strict as Map
 10 | import Data.Set (Set)
 11 | import qualified Data.Set as Set
 12 | import qualified Debug.Trace as Debug
 13 | 
 14 | import Control.Monad.State
 15 | import Control.Lens.Operators
 16 | 
 17 | import Lasca.Codegen
 18 | import Lasca.Type
 19 | import Lasca.EmitCommon
 20 | import qualified Lasca.EmitStatic as EmitStatic
 21 | import Lasca.Syntax
 22 | 
 23 | genExternalFuncWrapper ctx f@(Let True meta name returnType lam _) = do
 24 |     modState <- get
 25 |     let codeGenResult = codeGen modState
 26 |         blocks = createBlocks codeGenResult
 27 |         retType = typeMapping returnType
 28 | 
 29 |     define retType (nameToSBS name) (toSig externArgs) blocks
 30 |   where
 31 |     (externArgs, Literal _ (StringLit externName)) = uncurryLambda lam
 32 |     codeGen modState = execCodegen [] modState $ do
 33 |         entry <- addBlock entryBlockName
 34 |         setBlock entry
 35 |         let argTypes = map (\(Arg n t) -> t) externArgs
 36 |         largs <- forM externArgs $ \(Arg n tpe) -> do
 37 |             let argName = nameToSBS n
 38 |             let ref = typeToLaTypeRef tpe
 39 |             when (isDynamicMode ctx && tpe `Set.member` autoBoxedTypes) $ do
 40 |                 r <- callBuiltin "unbox" [constOp ref, localPtr argName] -- check primitive types
 41 |                 return ()
 42 |             EmitStatic.resolveBoxing EmitStatic.anyTypeVar tpe (localPtr argName)
 43 |         let retType = externalTypeMapping returnType
 44 | --        Debug.traceM $ printf "%s genExternalFuncWrapper %s, retType %s" (show name) (show $ externFuncLLvmType f) (show retType)
 45 |         res <- call (externFuncLLvmType f) (textToSBS externName) largs
 46 |         wrapped <- EmitStatic.resolveBoxing returnType EmitStatic.anyTypeVar res
 47 |         ret wrapped
 48 | genExternalFuncWrapper ctx other = error $ "genExternalFuncWrapper got " ++ (show other)
 49 | 
 50 | 
 51 | collectGlobals ctx exprs = do
 52 |     execState (mapM toplevel exprs) ctx
 53 |   where
 54 |     toplevel expr = case expr of
 55 |         Let False meta name _ expr EmptyExpr -> globalVals %= Map.insert name expr
 56 |         Let True meta name _ lam EmptyExpr -> globalFunctions %= Map.insert name expr
 57 |         _ -> return ()
 58 | 
 59 | codegenTop ctx cgen topExpr = case topExpr of
 60 |     this@(Let False meta name _ expr _) -> do
 61 |         modify (\s -> s { _globalValsInit = _globalValsInit s ++ [(name, expr)] })
 62 |         let valType = llvmTypeOf this
 63 |     --    Debug.traceM $ printf "Cons %s: %s" (show name) (show valType)
 64 |         defineGlobal (nameToSBS name) valType (Just $ defaultValueForType valType)
 65 | 
 66 |     f@(Let True meta name tpe lam _) -> do
 67 |         if meta ^. isExternal then do
 68 |             let (Literal _ (StringLit externName)) = body
 69 |             external (externalTypeMapping tpe) (textToSBS externName) (externArgsToSig args) False []
 70 |             genExternalFuncWrapper ctx f
 71 |         else do
 72 |             modState <- get
 73 |             let codeGenResult = codeGen modState
 74 |             let blocks = createBlocks codeGenResult
 75 |             mapM_ defineStringLit (generatedStrings codeGenResult)
 76 |             let retType = mappedReturnType args funcType
 77 |             define retType (nameToSBS name) largs blocks
 78 |       where
 79 |         (args, body) = uncurryLambda lam
 80 | 
 81 |         funcType = typeOf lam
 82 |         largs = map (\(n, t) -> (nameToSBS n, t)) argsWithTypes
 83 | 
 84 |         funcTypeToLlvm (Arg name _) (TypeFunc a b, acc) = (b, (name, typeMapping a) : acc)
 85 |         funcTypeToLlvm arg t = error $ "AAA3" ++ show arg ++ show t
 86 | 
 87 |         argsWithTypes = do
 88 |     --        Debug.traceM $ printf "codegenTop %s(%s): %s" (show name) (show args) (show funcType)
 89 |             reverse $ snd $ foldr funcTypeToLlvm (funcType, []) (reverse args)
 90 | 
 91 |         codeGen modState = execCodegen [] modState $ do
 92 |       --      Debug.traceM $ printf "argsWithTypes %s" (show argsWithTypes)
 93 |             entry <- addBlock entryBlockName
 94 |             setBlock entry
 95 |             forM_ argsWithTypes $ \(n, t) -> do
 96 |                 var <- alloca t
 97 |                 store var (local t (nameToSBS n))
 98 |         --        Debug.traceM $ printf "assign %s: %s = %s" n (show t) (show var)
 99 |                 assign n var
100 |             cgen ctx body >>= ret
101 | 
102 |     (Data _ name tvars constructors) -> return ()
103 |     Module{} -> return ()
104 |     Import{} -> return ()
105 |     _ -> error $ printf "Expression of this kind should not get to codegenTop. It's a bug. %s at %s"
106 |             (show topExpr) (show $ exprPosition topExpr)


--------------------------------------------------------------------------------
/src/lib/Lasca/EmitDynamic.hs:
--------------------------------------------------------------------------------
  1 | module Lasca.EmitDynamic where
  2 | 
  3 | import LLVM.Module
  4 | import LLVM.Context
  5 | import LLVM.Analysis
  6 | import LLVM.PassManager
  7 | 
  8 | import qualified LLVM.AST as AST
  9 | import qualified LLVM.AST.Global
 10 | import qualified LLVM.AST.Type as T
 11 | import qualified LLVM.AST.Instruction as I
 12 | import qualified LLVM.AST.Constant as C
 13 | import qualified LLVM.AST.Float as F
 14 | import qualified LLVM.AST.IntegerPredicate as IP
 15 | import qualified LLVM.AST.FloatingPointPredicate as FP
 16 | import qualified LLVM.AST.FunctionAttribute as FA
 17 | import qualified LLVM.AST.IntegerPredicate as IPred
 18 | 
 19 | -- import qualified Data.Text as Text
 20 | import qualified Data.ByteString as ByteString
 21 | import qualified Data.Text.Encoding as Encoding
 22 | import Text.Printf
 23 | import qualified Data.ByteString.UTF8 as UTF8
 24 | import Data.String
 25 | import qualified Data.ByteString.Char8 as Char8
 26 | import qualified Data.ByteString as BS
 27 | import qualified Data.ByteString.Short as SBS
 28 | 
 29 | import LLVM.ExecutionEngine ( withMCJIT, withModuleInEngine, getFunction )
 30 | 
 31 | import qualified Data.Text
 32 | import qualified Data.ByteString
 33 | import qualified Data.Text.Encoding
 34 | import Data.Digest.Murmur32
 35 | import Data.Maybe
 36 | import qualified Data.List as List
 37 | import Data.Word
 38 | import Data.Int
 39 | import Control.Monad.State
 40 | import Control.Monad.Except
 41 | import Control.Applicative
 42 | import qualified Control.Lens as Lens
 43 | import Control.Lens.Operators
 44 | import Data.Map.Strict (Map)
 45 | import qualified Data.Map.Strict as Map
 46 | import Data.Set (Set)
 47 | import qualified Data.Set as Set
 48 | import qualified Data.Sequence as Seq
 49 | import qualified Debug.Trace as Debug
 50 | import System.Exit
 51 | import System.Directory
 52 | import System.FilePath
 53 | 
 54 | import Lasca.Codegen
 55 | import Lasca.Type
 56 | import Lasca.EmitCommon
 57 | import Lasca.Infer
 58 | import qualified Lasca.Syntax as S
 59 | import Lasca.Syntax (Ctx)
 60 | import qualified Lasca.Options as Opts
 61 | 
 62 | cgen :: Ctx -> S.Expr -> Codegen AST.Operand
 63 | cgen ctx (S.Let False meta a _ b c) = do
 64 |     i <- alloca $ llvmTypeOf b
 65 |     val <- cgen ctx b
 66 |     store i val
 67 |     assign a i
 68 |     cgen ctx c
 69 | cgen ctx (S.Ident meta name) = do
 70 |     syms <- gets symtab
 71 |     modState <- gets moduleState
 72 |     let mapping = functions modState
 73 |     case lookup name syms of
 74 |       Just x ->
 75 |   --       Debug.trace ("Local " ++ show name)
 76 |           load x
 77 |       Nothing | name `Map.member` S._globalFunctions ctx -> boxClosure name mapping []
 78 |               | name `Map.member` S._globalVals ctx -> load (globalOp ptrType (nameToSBS name))
 79 |               | otherwise -> boxError (nameToText name)
 80 | cgen ctx (S.Literal meta l) = do
 81 | --  Debug.traceM $ "Generating literal " ++ show l ++ " on " ++ show (S.pos meta)
 82 |     boxLit l meta
 83 | cgen ctx this@(S.Array meta exprs) = do
 84 |     vs <- sequence [cgen ctx e | e <- exprs]
 85 |     boxArray vs
 86 | cgen ctx this@(S.Select meta tree expr) = cgenSelect ctx this
 87 | 
 88 | cgen ctx this@(S.Apply meta (S.Ident _ "unary-") [expr]) = cgenApplyUnOp ctx this
 89 | cgen ctx this@(S.Apply meta (S.Ident _ fn) [lhs, rhs]) | fn `Map.member` binops = cgenApplyBinOp ctx this
 90 | cgen ctx (S.Apply meta expr args) = cgenApply ctx meta expr args
 91 | cgen ctx (S.Closure _ funcName enclosedVars) = do
 92 |     modState <- gets moduleState
 93 |     let mapping = functions modState
 94 |     boxClosure funcName mapping enclosedVars
 95 | cgen ctx m@S.Match{} =
 96 |     error $ printf "Match expressions should be already desugared! %s at: %s" (show m) (show $ S.exprPosition m)
 97 | cgen ctx (S.If meta cond tr fl) = cgenIfDynamic ctx meta cond tr fl
 98 | cgen ctx e = error ("cgen shit " ++ show e)
 99 | 
100 | cgenIfDynamic ctx meta cond tr fl = do
101 |     let resultType = llvmTypeOf tr
102 |     let test = do
103 |             cond <- cgen ctx cond
104 |             -- unbox Bool
105 |             bool <- unboxBoolDynamically cond
106 |             instr (I.ICmp IP.EQ bool constTrue [])
107 |     cgenIf resultType test (cgen ctx tr) (cgen ctx fl)
108 | 
109 | cgenSelect ctx this@(S.Select meta tree expr) = do
110 |     tree <- cgen ctx tree
111 |     e <- cgen ctx expr
112 |     let pos = createPosition $ S.pos meta
113 |     callBuiltin "runtimeSelect" [tree, e, constOp pos]
114 | cgenSelect ctx e = error ("cgenSelect should only be called on Select, but called on" ++ show e)
115 | 
116 | cgenApplyUnOp ctx this@(S.Apply meta op@(S.Ident _ "unary-") [expr]) = do
117 |     lexpr <- cgen ctx expr
118 |     callBuiltin "runtimeUnaryOp" [constIntOp 1, lexpr]
119 | cgenApplyUnOp ctx e = error ("cgenApplyUnOp should only be called on Apply, but called on" ++ show e)
120 | 
121 | cgenApplyBinOp ctx (S.Apply meta (S.Ident _ fn) [lhs, rhs]) = do
122 |     llhs <- cgen ctx lhs
123 |     lrhs <- cgen ctx rhs
124 |     let code = fromMaybe (error ("Couldn't find binop " ++ show fn)) (Map.lookup fn binops)
125 |     let codeOp = constIntOp code
126 |     callBuiltin "runtimeBinOp" [codeOp, llhs, lrhs]
127 | cgenApplyBinOp ctx e = error ("cgenApplyBinOp should only be called on Apply, but called on" ++ show e)
128 | 
129 | cgenApply ctx meta expr args = do
130 |     syms <- gets symtab
131 |     let symMap = Map.fromList syms
132 |     let isGlobal fn = (fn `Map.member` S._globalFunctions ctx) && not (fn `Map.member` symMap)
133 |     case expr of
134 |        -- TODO Here are BUGZZZZ!!!! :)
135 |        -- TODO check arguments!
136 |        -- this is done to speed-up calls if you `a global function
137 |         S.Ident _ fn | isGlobal fn -> do
138 |             let f = S._globalFunctions ctx Map.! fn
139 | --            Debug.traceM $ printf "Calling %s" fn
140 |             largs <- forM args $ \arg -> cgen ctx arg
141 |             call (funcLLvmType f) (nameToSBS fn) largs
142 | 
143 |         expr -> do
144 |             modState <- gets moduleState
145 |             e <- cgen ctx expr
146 |             largs <- mapM (cgen ctx) args
147 |             let argc = constIntOp (length largs)
148 |             sargsPtr <- allocaSize ptrType argc
149 |             let asdf (idx, arg) = do
150 |                   p <- getelementptr sargsPtr [idx]
151 |                   store p arg
152 |             sargs <- bitcast sargsPtr ptrType -- runtimeApply accepts i8*, so need to bitcast. Remove when possible
153 |             -- cdecl calling convension, arguments passed right to left
154 |             sequence_ [asdf (constIntOp i, a) | (i, a) <- zip [0..] largs]
155 |             let pos = createPosition $ S.pos meta
156 |             callBuiltin "runtimeApply" [e, argc, sargs, constOp pos]
157 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/JIT.hs:
--------------------------------------------------------------------------------
  1 | module Lasca.JIT (
  2 |   runJIT,
  3 |   withOptimizedModule
  4 | ) where
  5 | 
  6 | import           Data.Int
  7 | import           Data.Word
  8 | import qualified Data.Text.IO as T
  9 | import qualified Data.Text.Lazy as LT
 10 | import qualified Data.Text.IO as TIO
 11 | import System.IO
 12 | import           Foreign.Ptr
 13 | import           Foreign.C.String
 14 | import           Foreign.C.Types
 15 | import Foreign.Marshal.Array
 16 | import           Lasca.Syntax
 17 | import Lasca.Options
 18 | 
 19 | import           Control.Monad.Except
 20 | 
 21 | import qualified LLVM.AST             as AST
 22 | import           LLVM.CodeModel
 23 | import           LLVM.Context
 24 | import           LLVM.Module          as Mod
 25 | import LLVM.Target hiding (withHostTargetMachine)
 26 | 
 27 | import           LLVM.Analysis
 28 | import           LLVM.PassManager
 29 | import           LLVM.Transforms
 30 | import           LLVM.OrcJIT
 31 | import           LLVM.OrcJIT.CompileLayer
 32 | import           LLVM.Linking (loadLibraryPermanently, getSymbolAddressInProcess)
 33 | import qualified LLVM.CodeGenOpt as CodeGenOpt
 34 | import qualified LLVM.CodeModel as CodeModel
 35 | import qualified LLVM.Relocation as Reloc
 36 | --import LLVM.Pretty (ppllvm)
 37 | 
 38 | import qualified Data.ByteString as BS
 39 | import qualified Data.ByteString.Char8 as Char8
 40 | 
 41 | foreign import ccall "dynamic" mainFun :: FunPtr (Int -> Ptr CString -> IO ()) -> Int -> Ptr CString -> IO ()
 42 | 
 43 | passes :: Int -> PassSetSpec
 44 | passes level = defaultCuratedPassSetSpec { optLevel = Just (fromIntegral level) }
 45 | 
 46 | withHostTargetMachine :: (TargetMachine -> IO a) -> IO a
 47 | withHostTargetMachine f = do
 48 |   initializeAllTargets
 49 |   triple <- getProcessTargetTriple
 50 |   cpu <- getHostCPUName
 51 |   features <- getHostCPUFeatures
 52 |   (target, _) <- lookupTarget Nothing triple
 53 |   withTargetOptions $ \options ->
 54 |     -- Make it PIC, otherwise it won't work with shared libraries
 55 |     withTargetMachine target triple cpu features options Reloc.PIC CodeModel.Default CodeGenOpt.Default f
 56 | 
 57 | 
 58 | resolver :: IRCompileLayer l -> SymbolResolver
 59 | resolver compileLayer =
 60 |   SymbolResolver
 61 |     (\s -> findSymbol compileLayer s True)
 62 |     (\s ->
 63 |        fmap (\a -> Right $ JITSymbol a (JITSymbolFlags False True False True)) (getSymbolAddressInProcess s)
 64 |     )
 65 | 
 66 | {-
 67 |   Read https://purelyfunctional.org/posts/2018-04-02-llvm-hs-jit-external-function.html
 68 |   for explanation.
 69 | -}
 70 | runJIT :: LascaOpts -> AST.Module -> IO ()
 71 | runJIT opts mod = do
 72 | --    putStrLn $ LT.unpack $ ppllvm mod
 73 |     b <- loadLibraryPermanently Nothing
 74 |     unless (not b) (error "Couldn’t load library")
 75 |     withOptimizedModule opts mod $ \context m ->
 76 |         withHostTargetMachine $ \tm ->
 77 |             withObjectLinkingLayer $ \linkingLayer ->
 78 |                 withIRCompileLayer linkingLayer tm $ \compileLayer ->
 79 |                     withModule compileLayer m
 80 |                         (resolver compileLayer) $ \moduleHandle -> do
 81 |                             mainSymbol <- mangleSymbol compileLayer "main"
 82 |                             (Right (JITSymbol mainFn _)) <- findSymbol compileLayer mainSymbol True
 83 |                             let args = lascaFiles opts
 84 |                             let len = length args
 85 |                             cargs <- mapM newCString args
 86 |                             array <- mallocArray len
 87 |                             pokeArray array cargs
 88 |                             result <- mainFun (castPtrToFunPtr (wordPtrToPtr mainFn)) len array
 89 |                             return ()
 90 | 
 91 | withOptimizedModule opts mod f = withContext $ \context ->
 92 |     withModuleFromAST context mod $ \m ->
 93 |         withPassManager (passes (optimization opts)) $ \pm -> do
 94 |             -- Optimization Pass
 95 | --                    linkModules m stdModule
 96 |             runPassManager pm m
 97 |             optmod <- moduleAST m
 98 |             when (printLLVMAsm opts) $ do
 99 |                 s <- moduleLLVMAssembly m
100 |                 Char8.putStrLn s
101 |             f context m
102 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/Lexer.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies     #-}
 2 | {-# LANGUAGE FlexibleContexts #-}
 3 | module Lasca.Lexer where
 4 | 
 5 | import Data.Void
 6 | import Data.Text (Text)
 7 | import qualified Data.Text as T
 8 | import Data.Scientific
 9 | import Data.Char
10 | import qualified Data.List.NonEmpty as NonEmpty
11 | import Text.Megaparsec
12 | import Control.Monad (void, when)
13 | import Text.Megaparsec.Char
14 | import qualified Text.Megaparsec.Char.Lexer as L
15 | 
16 | type Parser = Parsec Void Text
17 | 
18 | ops = ["+","*","-","/",";", "==", ":=", "=",",",".","<",">","|",":"]
19 | keywords = ["module", "import", "data", "def", "extern",
20 |   "if", "then", "else", "in", "let", "true", "false", "match", "do", "lazy", "var", "and", "not", "or"
21 |   ]
22 | 
23 | sc :: Parser () -- ‘sc’ stands for “space consumer”
24 | sc = L.space (void space1) lineComment blockComment
25 |   where lineComment = (string "--" <|> string "#") *> void (takeWhileP (Just "character") (/= '\n'))
26 |         blockComment = L.skipBlockComment "{-" "-}"
27 | 
28 | identChar = alphaNumChar
29 | 
30 | lexeme = L.lexeme sc
31 | 
32 | symbol = L.symbol sc
33 | 
34 | integer = lexeme (try (char '0' *> char' 'x' *> L.hexadecimal)
35 |   <|> try (char '0' *> char' 'o' *> L.octal)
36 |   <|> try L.decimal)
37 | 
38 | stringLiteral :: Parser Text
39 | stringLiteral = do
40 |     char '"'
41 |     l <- manyTill L.charLiteral (char '"')
42 |     return $ T.pack l
43 | 
44 | float         = lexeme L.float
45 | signedInteger = L.signed sc integer
46 | signedFloat   = L.signed sc float
47 | parens = between (symbol "(") (symbol ")")
48 | brackets  = between (symbol "[") (symbol "]")
49 | braces  = between (symbol "{") (symbol "}")
50 | comma = symbol ","
51 | semi = symbol ";"
52 | commaSep p  = p `sepBy` comma
53 | trailCommaSep p  = p `sepEndBy` comma
54 | semiSep  p  = p `sepBy` semi
55 | 
56 | reserved :: Text -> Parser ()
57 | reserved w = string w *> notFollowedBy identChar *> sc
58 | 
59 | reservedOp :: Text -> Parser ()
60 | reservedOp w = string w *> notFollowedBy opChar *> sc
61 | 
62 | identOp = lexeme $ some opChar
63 | 
64 | upperIdentifier = lexeme $ try (do
65 |     c <- upperChar
66 |     T.cons c <$> idrest
67 |     <?> "uppercase identifier")
68 | 
69 | identifier :: Parser Text
70 | identifier = lexeme $ try $ do
71 |     ident <- identifierOrReserved
72 |     when (ident `elem` keywords) $ unexpected . Label . NonEmpty.fromList $ "reserved " ++ (T.unpack ident)
73 |     when (ident == "_") $ unexpected . Label . NonEmpty.fromList $ "wildcard"
74 |     return ident
75 | 
76 | idrest = takeWhileP Nothing (\ch -> isAlphaNum ch || ch == '_' || ch == '$')
77 | 
78 | identifierOrReserved = lexeme $ try $ do
79 |     c <- satisfy (\ch -> isAlpha ch || ch == '_' || ch == '$')
80 |     T.cons c <$> idrest
81 | 
82 | 
83 | opChar :: Parser Char
84 | opChar = oneOf ("!$%&*+./<=>?@\\^|-~" :: String)
85 | 
86 | operator :: Parser Text
87 | operator = do
88 |     op <- some opChar
89 |     lexeme $ return $ T.pack op
90 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/Modules.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | module Lasca.Modules where
  4 | 
  5 | import Data.Maybe
  6 | import qualified Data.Text as T
  7 | import qualified Data.Text.IO as TIO
  8 | import Text.Printf
  9 | 
 10 | import           Control.Monad.State
 11 | import           Control.Lens hiding ((<.>))
 12 | 
 13 | import           Data.List
 14 | import           Data.IntMap.Strict             ( IntMap )
 15 | import qualified Data.IntMap.Strict            as IntMap
 16 | import           Data.Map.Strict                ( Map )
 17 | import qualified Data.Map.Strict               as Map
 18 | 
 19 | import System.Environment
 20 | import System.Exit
 21 | import System.Directory
 22 | import System.FilePath
 23 | 
 24 | import Debug.Trace as Debug
 25 | import qualified Text.Megaparsec as Megaparsec
 26 | 
 27 | import Lasca.Syntax
 28 | import Lasca.Parser
 29 | import Lasca.Type
 30 | 
 31 | data LascaModule = LascaModule { 
 32 |     imports :: [LascaModule], 
 33 |     moduleExprs :: [Expr],
 34 |     modName :: Name 
 35 | }
 36 | 
 37 | instance Show LascaModule where
 38 |     show m = show (modName m)
 39 | 
 40 | instance Eq LascaModule where
 41 |     lhs == rhs = modName lhs == modName rhs
 42 | 
 43 | instance Ord LascaModule where
 44 |     compare lhs rhs = compare (modName lhs) (modName rhs)
 45 | 
 46 | data Dependencies = Dependencies {
 47 |     _modsByLevel :: IntMap [LascaModule],
 48 |     _modLevel :: Map LascaModule Int
 49 | } deriving (Show)
 50 | makeLenses 'Dependencies
 51 | 
 52 | calcModulesDependencies :: LascaModule -> Dependencies
 53 | calcModulesDependencies lascaModule = execState
 54 |     (getMaxLevel lascaModule)
 55 |     (Dependencies {_modsByLevel = IntMap.empty, _modLevel = Map.empty})
 56 |   where
 57 |     getMaxLevel :: LascaModule -> State Dependencies Int
 58 |     getMaxLevel m = do
 59 |         s <- get
 60 |         case Map.lookup m (s ^. modLevel) of
 61 |             Just l  -> return l
 62 |             Nothing -> do
 63 |                 let mods = imports m
 64 |                 levels <- forM mods getMaxLevel
 65 |                 let level = case levels of
 66 |                         []     -> 0
 67 |                         levels -> 1 + maximum levels
 68 |                 modLevel %= Map.insert m level
 69 |                 modsByLevel %= IntMap.alter (joinModules m) level
 70 |                 return level
 71 |       where
 72 |         joinModules new Nothing     = Just [new]
 73 |         joinModules new (Just mods) = Just $ new : mods
 74 | 
 75 | linearizeIncludes :: LascaModule -> [LascaModule]
 76 | linearizeIncludes lascaModule = do
 77 |     let all    = calcModulesDependencies lascaModule
 78 |     let pathes = snd <$> IntMap.toList (all ^. modsByLevel)
 79 |     foldr (\mods path -> sort mods ++ path) [] pathes
 80 | 
 81 | fixModuleAndImportPrelude :: FilePath -> [Expr] -> IO [Expr]
 82 | fixModuleAndImportPrelude filename exprs = case exprs of
 83 |     (mod@(Module _ name): exprs) -> do
 84 |         when (takeBaseName filename /= T.unpack (last $ nameToList name)) $
 85 |             die $ printf "Wrong module name in file %s. Module name should match file name, but was %s)" filename (show name)
 86 |         return $ mod : insertImportPrelude name exprs
 87 |     _ -> do
 88 |         let name = Name $ T.pack $ takeBaseName filename
 89 |         let mod = Module emptyMeta name
 90 |         return $ mod : insertImportPrelude name exprs
 91 | 
 92 | insertImportPrelude :: Name -> [Expr] -> [Expr]
 93 | insertImportPrelude name exprs = if name == Name "Prelude" then exprs else Import emptyMeta "Prelude" : exprs
 94 | 
 95 | moduleSearchPaths :: IO [FilePath]
 96 | moduleSearchPaths = do
 97 |     dir <- getCurrentDirectory
 98 |     lascaPathEnv <- lookupEnv "LASCAPATH"
 99 |     let lascaPaths = splitSearchPath $ fromMaybe "" lascaPathEnv
100 |     absPaths <- mapM canonicalizePath lascaPaths
101 |     existingPaths <- filterM doesDirectoryExist absPaths
102 |     -- TODO add XDB paths
103 |     return $ nub $ dir : existingPaths
104 | 
105 | findModulePath :: [FilePath] -> Name -> IO FilePath
106 | findModulePath searchPaths name = do
107 |     let relPath = path name <.> "lasca"
108 |     result <- findFile searchPaths relPath
109 |     case result of
110 |         Just file -> return file
111 |         Nothing -> error $ printf "Couldn't find module %s. Search path: %s" (show name) (show $ intercalate "," searchPaths)
112 |     where
113 |     path (Name n) = T.unpack n
114 |     path (NS prefix n) = path prefix </> path n
115 | 
116 | type Mapping = Map Name LascaModule
117 | 
118 | loadModule :: [FilePath] -> Mapping -> [Name] -> FilePath -> Name -> IO (Mapping, LascaModule)
119 | loadModule searchPaths imported importPath absoluteFilePath name = do
120 |     file <- TIO.readFile absoluteFilePath
121 |     case parseToplevelFilename absoluteFilePath file of
122 |         Left err -> die $ Megaparsec.parseErrorPretty err
123 |         Right exprs -> do
124 |             canonizedExprs <- fixModuleAndImportPrelude absoluteFilePath exprs
125 |             let imports = getImports canonizedExprs
126 |             (newImported, modules) <- loadImports searchPaths imported (name : importPath) imports
127 |             let thisModule = LascaModule { modName = name, imports = modules, moduleExprs = canonizedExprs }
128 |             return (Map.insert name thisModule newImported, thisModule)
129 | 
130 | loadImports :: [FilePath] -> Mapping -> [Name] -> [Name] -> IO (Mapping, [LascaModule])
131 | loadImports searchPaths imported importPath imports = do
132 | --    Debug.traceM $ printf "loadImports %s %s %s" (show imported) (show importPath) (show $ imports)
133 |     foldM (\(imported, modules) name -> do
134 |         (newImported, lascaModule) <- loadImport searchPaths imported importPath name
135 |         return (Map.union imported newImported, lascaModule : modules)
136 |         ) (imported, []) imports
137 | 
138 | loadImport :: [FilePath] -> Mapping -> [Name] -> Name -> IO (Mapping, LascaModule)
139 | loadImport searchPaths imported importPath name = do
140 | --    Debug.traceM $ printf "loadImport %s %s %s" (show imported) (show importPath) (show name)
141 |     when (name `elem` importPath) $ die (printf "Circular dependency in %s -> %s" (show importPath) (show name))
142 |     case name `Map.lookup` imported of
143 |         Just lascaModule -> return (imported, lascaModule)
144 |         Nothing -> do
145 |             absoluteFilePath <- findModulePath searchPaths name
146 |             loadModule searchPaths imported importPath absoluteFilePath name
147 | 
148 | getImports :: [Expr] -> [Name]
149 | getImports exprs = foldl' folder [] exprs
150 |     where
151 |         folder imports (Import _ name) = name : imports
152 |         folder imports _ = imports
153 | 


--------------------------------------------------------------------------------
/src/lib/Lasca/Options.hs:
--------------------------------------------------------------------------------
 1 | module Lasca.Options (
 2 |     LascaOpts(..),
 3 |     TypingMode(..),
 4 |     parseOptions,
 5 |     emptyLascaOpts
 6 | ) where
 7 | 
 8 | import Options.Applicative
 9 | import Data.Semigroup ((<>))
10 | import Data.Version
11 | import Paths_lasca (version)
12 | 
13 | data TypingMode = Static | Dynamic deriving (Eq)
14 | instance Show TypingMode where
15 |     show Static = "static"
16 |     show Dynamic = "dynamic"
17 | 
18 | instance Read TypingMode where
19 |     readsPrec _ "static" =  [(Static, "")]
20 |     readsPrec _ "dynamic" = [(Dynamic, "")]
21 |     readsPrec _ _ = []
22 | 
23 | data LascaOpts = LascaOpts
24 |     { lascaFiles   :: [String]
25 |     , mode         :: TypingMode
26 |     , outputFile   :: String
27 |     , exec         :: Bool
28 |     , verboseMode  :: Bool
29 |     , printLLVMAsm :: Bool
30 |     , printAst     :: Bool
31 |     , printTypes   :: Bool
32 |     , optimization :: Int
33 |     } deriving (Show, Eq)
34 | 
35 | emptyLascaOpts = LascaOpts {
36 |     lascaFiles  = [],
37 |     mode = Static,
38 |     outputFile = "",
39 |     exec = False,
40 |     verboseMode = False,
41 |     printLLVMAsm = False,
42 |     printAst = False,
43 |     printTypes = False,
44 |     optimization = 1 -- default to O1 to enable tail call optimization
45 | }
46 | 
47 | optimizeOpt :: Parser Int
48 | optimizeOpt = option auto
49 |             ( long "optimization-level"
50 |            <> short 'O'
51 |            <> value 0
52 |            <> help "Optimization level for LLVM" )
53 | 
54 | lascaOptsParser :: Parser LascaOpts
55 | lascaOptsParser = LascaOpts
56 |   <$> some (argument str (metavar "FILES..."))
57 |   <*> option auto
58 |       ( long "mode"
59 |       <> short 'm'
60 |       <> value Static
61 |       <> help "Compiler mode. Options are [dynamic | static]. Static by default."
62 |       )
63 |   <*> strOption
64 |       ( short 'o'
65 |       <> value ""
66 |       <> help "Write output to FILE"
67 |       )
68 |   <*> switch
69 |       ( long "exec"
70 |      <> short 'e'
71 |      <> help "Execute immediately" )
72 |   <*> switch
73 |       ( long "verbose"
74 |       <> help "Verbose mode" )
75 |   <*> switch
76 |       ( long "print-llvm"
77 |       <> help "Print LLVM IR" )
78 |   <*> switch
79 |         ( long "print-ast"
80 |         <> help "Print AST" )
81 |   <*> switch
82 |         ( long "print-types"
83 |         <> help "Print inferred types" )
84 |   <*> optimizeOpt
85 | 
86 | 
87 | parseOptions = execParser opts
88 |   where opts = info (helper <*> lascaOptsParser)
89 |                 ( fullDesc
90 |                <> progDesc ("Lasca Compiler version " ++ v)
91 |                <> header ("Lasca Compiler v" ++ v))
92 |         v = showVersion version


--------------------------------------------------------------------------------
/src/lib/Lasca/Type.hs:
--------------------------------------------------------------------------------
  1 | module Lasca.Type where
  2 | 
  3 | import Data.List
  4 | import Data.String
  5 | import Data.Text (Text)
  6 | import qualified Data.Text as T
  7 | import qualified Data.Text.Encoding as Encoding
  8 | import qualified Data.ByteString as BS
  9 | import qualified Data.ByteString.Short as SBS
 10 | import Data.Text.Prettyprint.Doc
 11 | 
 12 | data Name = Name Text | NS Name Name deriving (Eq, Ord)
 13 | 
 14 | instance IsString Name where
 15 |     fromString = Name . T.pack
 16 | 
 17 | instance Show Name where
 18 |     show n = case n of
 19 |         Name s -> T.unpack s
 20 |         NS prefix n -> show prefix ++ "_" ++ show n
 21 | 
 22 | nameToText n = case n of
 23 |     Name n -> n
 24 |     NS prefix n -> T.append (nameToText prefix) (T.cons '_' (nameToText n))
 25 | 
 26 | qualify mod name = if mod == defaultModuleQName then name else NS mod name
 27 | 
 28 | qnameToString n = show n
 29 | 
 30 | qname = Name
 31 | 
 32 | textToSBS :: Text -> SBS.ShortByteString
 33 | textToSBS = SBS.toShort . Encoding.encodeUtf8
 34 | 
 35 | nameToSBS :: Name -> SBS.ShortByteString
 36 | nameToSBS = textToSBS . nameToText
 37 | 
 38 | nameToBS :: Name -> BS.ByteString
 39 | nameToBS = Encoding.encodeUtf8 . nameToText
 40 | 
 41 | nameToList (Name n) = [n]
 42 | nameToList (NS prefix n) = nameToList prefix ++ nameToList n
 43 | 
 44 | defaultModuleName = "Main"
 45 | defaultModuleQName = Name defaultModuleName
 46 | 
 47 | newtype TVar = TV Text
 48 |   deriving (Eq, Ord)
 49 | 
 50 | instance Show TVar where
 51 |   show (TV s) = T.unpack s
 52 | 
 53 | data Type
 54 |   = TVar TVar
 55 |   | TypeIdent Name
 56 |   | TypeFunc Type Type
 57 |   | TypeApply Type [Type]
 58 |   | Forall [TVar] Type
 59 |   deriving (Eq, Ord)
 60 | 
 61 | instance Show Type where
 62 |   show (TVar (TV n)) = T.unpack n
 63 |   show (TypeIdent s) = show s
 64 |   show (TypeFunc l r) = "(" ++ show l ++ " -> " ++ show r ++ ")"
 65 |   show (TypeApply t args) = "(" ++ show t ++ foldl (\acc a -> acc ++ " " ++ show a) "" args ++ ")"
 66 |   show (Forall targs t) = "∀(" ++ intercalate "," (map show targs) ++ ") => " ++ show t
 67 | 
 68 | instance Pretty Name where
 69 |     pretty n = case n of
 70 |         Name s -> pretty s
 71 |         NS prefix n -> pretty prefix <+> "_" <+> pretty n
 72 | 
 73 | instance Pretty TVar where
 74 |   pretty (TV s) = pretty s
 75 | 
 76 | instance Pretty Type where
 77 |     pretty t = case t of
 78 |         (TVar (TV n)) -> pretty n
 79 |         (TypeIdent s) -> pretty s
 80 |         (TypeFunc l r) -> parens $ pretty l <+> "->" <+> pretty r
 81 |         (TypeApply t args) -> parens $pretty t <+> foldl (\acc a -> acc <+> pretty a) "" args
 82 |         (Forall targs t) -> "∀" <> parens (hsep (punctuate comma (map pretty targs))) <+> "=>" <+> pretty t
 83 | 
 84 | 
 85 | typeName tpe = case tpe of
 86 |     TypeIdent n -> n
 87 |     TypeApply t _ -> typeName t
 88 |     Forall _ t -> typeName t
 89 |     _ -> error $ "Should not happen. Type name can't be " ++ show tpe
 90 | 
 91 | infixr `TypeFunc`
 92 | 
 93 | pattern TypeByte     = TypeIdent "Byte"
 94 | pattern TypeInt      = TypeIdent "Int"
 95 | pattern TypeInt16    = TypeIdent "Int16"
 96 | pattern TypeInt32    = TypeIdent "Int32"
 97 | pattern TypeFloat    = TypeIdent "Float"
 98 | pattern TypeBool     = TypeIdent "Bool"
 99 | pattern TypeAny      = TypeIdent "Any"
100 | pattern TypeString   = TypeIdent "String"
101 | pattern TypeUnit     = TypeIdent "Unit"
102 | pattern TypeArray t  = TypeApply (TypeIdent "Array") [t]
103 | pattern TypeByteArray t  = TypeApply (TypeIdent "ByteArray") [t]
104 | pattern TypeArrayInt = TypeArray TypeInt
105 | pattern TypeRef a    = TypeApply (TypeIdent "Var") [a]
106 | 
107 | isIntegralType (TypeIdent t) | t `elem` ["Byte", "Int", "Int16", "Int32"] = True
108 | isIntegralType _ = False
109 | 
110 | 
111 | isAny (TypeIdent "Any") = True
112 | isAny _ = False
113 | 
114 | typeToList tpe = reverse $ go tpe []
115 |   where go (TypeFunc a b) acc = go b (a : acc)
116 |         go (Forall tvars tpe) acc = go tpe acc
117 |         go a acc              = a : acc
118 | 
119 | funcTypeArity this@(TypeFunc a b) = (length $ typeToList this) - 1
120 | funcTypeArity _ = 0


--------------------------------------------------------------------------------
/src/test/TestMain.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE ExtendedDefaultRules #-}
  3 | {-# LANGUAGE TemplateHaskell #-}
  4 | {-# OPTIONS_GHC -fno-warn-type-defaults #-}
  5 | import Test.Tasty
  6 | import Test.Tasty.SmallCheck as SC
  7 | import Test.Tasty.QuickCheck as QC
  8 | import Test.Tasty.HUnit
  9 | import Test.Tasty.Golden as G
 10 | import Test.Tasty.Program
 11 | import System.FilePath
 12 | import System.FilePath.Glob
 13 | import System.Directory
 14 | import Control.Exception as X
 15 | import qualified Text.Megaparsec as Megaparsec
 16 | import Shelly (shelly, run)
 17 | 
 18 | import Data.Text (Text)
 19 | import qualified Data.Text as T
 20 | import qualified Data.Text.IO as TIO
 21 | import Data.Text.Encoding as E
 22 | import qualified Data.ByteString as BS
 23 | import qualified Data.ByteString.Lazy as LBS
 24 | 
 25 | import Lasca.Parser
 26 | import Lasca.Syntax
 27 | import Lasca.Infer
 28 | import Lasca.Type
 29 | import Lasca.Options
 30 | import Lasca.Modules
 31 | 
 32 | import Data.List
 33 | import Data.Ord
 34 | import Data.Foldable
 35 | 
 36 | default (T.Text)
 37 | 
 38 | main :: IO ()
 39 | main = do
 40 |   goldens <- foldMap mkGoldenTests examples
 41 |   defaultMain (testGroup "Tests" ([parserTests, parserTests2,  modulesTests, typerTests] ++ goldens ++ compileTests))
 42 | 
 43 | modul n is = LascaModule { imports = is, moduleExprs = [], modName = n }
 44 | 
 45 | prelude = modul "Prelude" []
 46 | arr = modul "Array" [prelude]
 47 | opt = modul "Option" [prelude]
 48 | lst = modul "List" [opt, prelude]
 49 | test1 = modul "Test1" [test2]
 50 | test2 = modul "Test2" []
 51 | queen = modul "Queen" [arr, lst, test1, test2]
 52 | 
 53 | fromRight (Right a) = a
 54 | 
 55 | modulesTests = testGroup "Module dependency tests"
 56 |   [ testCase "Linearize includes" $ linearizeIncludes queen @?= [prelude, test2, arr, opt, test1, lst, queen] ]
 57 | 
 58 | parseOK s expected = testCase s $ case parseToplevel (T.pack s) of
 59 |     Right e -> assertEqual "" expected e
 60 |     Left  e -> assertFailure $ (show expected) ++ " but got " ++ Megaparsec.parseErrorPretty e
 61 | 
 62 | parseError s expected = testCase ("Error on " ++ s) $ case parseToplevel (T.pack s) of
 63 |     Right e -> assertFailure $ expected ++ " but got " ++ show e
 64 |     Left  e -> assertEqual "" expected $ Megaparsec.parseErrorTextPretty e
 65 | 
 66 | parseOkMatch s f = testCase s $ case parseToplevel (T.pack s) of
 67 |     Right p -> X.catch (f p) $ printErr (show p)
 68 |     Left  e  -> assertFailure $ Megaparsec.parseErrorPretty e
 69 | 
 70 | printErr :: String -> SomeException -> IO ()
 71 | printErr got e =  case fromException e of
 72 |     Just (PatternMatchFail s) -> assertFailure (s ++ "got " ++ got)
 73 |     nothing -> return ()
 74 | 
 75 | defaultModule = Module (withMetaPos 1 1) "<stdin>"
 76 | 
 77 | parseTypeOK s t = testCase s (parseType (T.pack s) @?= Right t)
 78 | 
 79 | parserTests2 = testGroup "Parser tests" [
 80 |       testCase "empty" $ parseToplevel "" @?= Right []
 81 |     , parseOkMatch "module Test_1 . Test2 ; "
 82 |         (\ [Module _ (NS (Name "Test_1") (Name "Test2")) ] -> return ())
 83 |     , parseOkMatch "import Test_1.Test2 ; import Test3"
 84 |         (\ [Import _ (NS (Name "Test_1") (Name "Test2")), Import _ "Test3" ]  -> return ())
 85 |     , parseError "module Test module Another"
 86 |         "unexpected 'm'\nexpecting end of input or import statement\n"
 87 |     , parseError "import  Asdf. "
 88 |         "unexpected '.'\nexpecting end of input, import statement, or top-level declaration\n"
 89 |     , parseError "import ._$#@ "
 90 |         "unexpected '.'\nexpecting qualified identifier (like My.Qualified.Name or SomeName)\n"
 91 |     , parseOkMatch "data Void" (\ [Data _ "Void" _ _] -> return ())
 92 |     , parseOkMatch "data Bool= |True|False" (\ [Data _ "Bool" [] [DataConst "True" [], DataConst "False" []]] -> return ())
 93 |     , parseOkMatch "data User = U(n: String, f)" (\ [Data _ "User" [] [DataConst "U" _]] -> return ())
 94 |     , parseError "data lower" "unexpected 'l'\nexpecting uppercase identifier\n"
 95 |     , parseError "data UppER=lower" "unexpected 'l'\nexpecting '|' or uppercase identifier\n"
 96 |     , parseTypeOK "String" (TypeIdent "String")
 97 |     , parseTypeOK "[Int]" (TypeApply (TypeIdent "Array") [TypeIdent "Int"])
 98 |     , parseTypeOK "(a -> B) -> C" (TypeFunc (TypeFunc (TVar $ TV "a") (TypeIdent "B")) (TypeIdent "C"))
 99 |     ]
100 | 
101 | parserTests = testGroup "Parser tests"
102 |   [ testCase "Parse true" $
103 |       parseExpr "true" @?= Right (Literal emptyMeta (BoolLit True))
104 |   , testCase "Empty String" $
105 |         parseExpr "\"\"" @?= Right (Literal emptyMeta (StringLit ""))
106 |   , testCase "Character Escaping" $
107 |       parseExpr "\"String\n\"" @?= Right (Literal emptyMeta (StringLit "String\n"))
108 |   , testCase "String Interpolation" $
109 |       parseExpr "\"Hello\\t \\\\\\$${ test123 + 1 }\"" @?= Right (Apply emptyMeta (Ident emptyMeta (NS "Prelude" "concat")) [
110 |         Array emptyMeta [Literal emptyMeta $ StringLit "Hello\t \\$",
111 |                Apply emptyMeta (Ident emptyMeta "toString") [Apply (withMetaPos 1 25) (Ident emptyMeta "+") [Ident emptyMeta "test123", Literal (withMetaPos 1 27) (IntLit 1)]]]
112 |       ])
113 |   , testCase "Pattern matching" $
114 |       parseExpr "match true { true -> 1 }" @?= Right (Match emptyMeta (Literal emptyMeta (BoolLit True)) [
115 |         Case (LitPattern (BoolLit True)) (Literal (withMetaPos 1 24) (IntLit 1))])
116 |   , testCase "Pattern matching" $
117 |         parseExpr "match foo { Person(0, name, \"God\", None, _) -> 1 _ -> match false { true -> 4 } }" @?= Right (
118 |           Match emptyMeta (Ident emptyMeta "foo") [
119 |             Case (ConstrPattern "Person" [LitPattern (IntLit 0),VarPattern "name",LitPattern (StringLit "God"),
120 |                   ConstrPattern "None" [],WildcardPattern]) (Literal (withMetaPos 1 50) (IntLit 1)),
121 |             Case WildcardPattern (
122 |               Match emptyMeta (Literal emptyMeta (BoolLit False)) [
123 |                 Case (LitPattern (BoolLit True)) (Literal (withMetaPos 1 83) (IntLit 4))])
124 |           ])
125 |   ]
126 | 
127 | typerTests = testGroup "Typer tests"
128 |   [
129 |     testCase "Pattern matching" $
130 |       parseAndInferExpr "match true { true -> 1 false -> 2 }" @?= TypeInt
131 |   ]
132 | 
133 | data Mode = Dyn | Stat | Both
134 | data Config = Script { name :: String, compMode :: Mode, arguments :: [T.Text] }
135 | 
136 | examples = [
137 |     Script "builtin.lasca" Both [],
138 |     Script "Array.lasca" Both [],
139 |     Script "ArrayBuffer.lasca" Both [],
140 |     Script "String.lasca" Both [],
141 |     Script "List.lasca" Both [],
142 |     Script "binarytrees.lasca" Both ["10"],
143 |     Script "Data.lasca" Both [],
144 |     Script "dynamic.lasca" Dyn [],
145 |     Script "Either.lasca" Both [],
146 |     Script "factorial.lasca" Both ["15"],
147 |     Script "hello.lasca" Both [],
148 |     Script "lambda.lasca" Both [],
149 |     Script "Map.lasca" Both [],
150 |     Script "Option.lasca" Both [],
151 |     Script "regex.lasca" Both [],
152 |     Script "queen.lasca" Both [],
153 |     Script "ski.lasca" Both [],
154 |     Script "nbody.lasca" Both ["50000"],
155 |     Script "nbody2.lasca" Both ["50000"],
156 |     Script "nbody3.lasca" Both ["50000"]
157 |   ]
158 | 
159 | prependPath path script = script { name = path </> (name script) }
160 | withMode s m = s { compMode = m }
161 | 
162 | mkGoldenTests s@(Script path mode args) = do
163 |     let testName = takeBaseName path
164 |     let goldenPath = "src" </> "test" </> "golden" </> replaceExtension path ".golden"
165 |     let example = prependPath "examples" s
166 |     let base = prependPath "libs/base" s
167 |     e <- doesFileExist ("examples" </> path)
168 |     let script = if e then example else base
169 |     let tests = case mode of
170 |           Both -> [ goldenVsString testName goldenPath (action (script `withMode` Stat)),
171 |                     goldenVsString testName goldenPath (action (script `withMode` Dyn))]
172 |           _ -> [goldenVsString testName goldenPath (action script)]
173 |     return tests
174 |   where
175 |       action (Script path mode args) = do
176 |           let txtPath = T.pack path
177 |           actual <- runLasca txtPath mode args
178 |           let bs = E.encodeUtf8 actual
179 |           return (LBS.fromStrict bs)
180 | 
181 | runLasca path mode args = shelly $ do
182 |     let extraArgs = case args of
183 |             [] -> []
184 |             ars -> "--" : args
185 |     case mode of
186 |         Stat -> run "lasca" (["-e", "-O2", "--mode", "static", path] ++ extraArgs)
187 |         Dyn -> run "lasca" (["-e", "-O2", "--mode", "dynamic", path] ++ extraArgs)
188 |         Both -> do
189 |             run "lasca" (["-e", "-O2", "--mode", "static", "--verbose", path] ++ extraArgs)
190 |             run "lasca" (["-e", "-O2", "--mode", "dynamic", path] ++ extraArgs)
191 | 
192 | compileTests = [
193 |         testProgram "Compile hello.lasca" "lasca" ["-O2", "-o", "hello", "examples/hello.lasca"] Nothing
194 |     ]
195 | 
196 | benchTests = testGroup "Bench" [
197 |         testCase "2 KLOC"  $ parseAndInferFile "examples/gen.lasca",
198 |         testCase "10 KLOC" $ parseAndInferFile "examples/gen10k.lasca"
199 |     ]
200 | 
201 | parseAndInferExpr str = let
202 |     expr = fromRight $ parseExpr str
203 |     Right (infered, _) = inferExpr (emptyCtx emptyLascaOpts) defaultTyenv expr
204 |   in infered
205 | 
206 | parseAndInferFile fname = do
207 |     p <- TIO.readFile "libs/base/Prelude.lasca"
208 |     let preludeExprs = fromRight $ parseToplevel p
209 |     file <- TIO.readFile fname
210 |     case parseToplevel file of
211 |         Left err -> error $ Megaparsec.parseErrorPretty err
212 |         Right ex -> do
213 |             let exprs = preludeExprs ++ ex
214 |             typeEnv <- typeCheck (emptyCtx emptyLascaOpts) exprs
215 |             print typeEnv
216 |             True @?= True
217 | 


--------------------------------------------------------------------------------
/src/test/golden/ArrayBuffer.golden:
--------------------------------------------------------------------------------
 1 | ArrayBuffer_ArrayBuffer([<NULL>, <NULL>, <NULL>], 0)
 2 | ArrayBuffer_ArrayBuffer([1, <NULL>, <NULL>], 1)
 3 | ArrayBuffer_ArrayBuffer([1, 2, <NULL>], 2)
 4 | 4 6
 5 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, <NULL>, <NULL>], 4)
 6 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, 4, <NULL>], 5)
 7 | 20 12
 8 | 20 24
 9 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, 4, 5, 6, 7, 8, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>], 9)
10 | ArrayBuffer_ArrayBuffer([0, 3, 4, 5, 6, 7, 8, 7, 8, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>], 7)
11 | ArrayBuffer_ArrayBuffer([-2, -1, 0, 3, 4, 5, 7, 7, 7, 6, 7, 8, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>], 12)
12 | Test 12 false 42
13 | ArrayBuffer_ArrayBuffer([-2, 42, 0, 3, 4, 5, 7, 7, 7, 6, 7, 8, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>], 0)
14 | 


--------------------------------------------------------------------------------
/src/test/golden/Either.golden:
--------------------------------------------------------------------------------
1 | Left is true, right is left false, right is right true
2 | r1 should be 125 and is: Either_Right(125)
3 | 


--------------------------------------------------------------------------------
/src/test/golden/List.golden:
--------------------------------------------------------------------------------
1 | List_Cons(11, List_Cons(12, List_Nil))
2 | Hello world! List_Cons(1, List_Cons(2, List_Nil)). Is empty: false, length = 2
3 | 


--------------------------------------------------------------------------------
/src/test/golden/Map.golden:
--------------------------------------------------------------------------------
 1 | runtimeCompare = -1
 2 | Test isEmpty should be true: true
 3 | Test size should be 0: 0
 4 | Test isEmpty should be false: false
 5 | Test isEmpty should be true: true
 6 | Test size should be 1: 1
 7 | Lookup should be one: Option_Some(one)
 8 | Lookup should be none: Option_None
 9 | Test size should be 2: 2
10 | Lookup should be two: Option_Some(two)
11 | Lookup should be none: Option_None
12 | Test size should be 3: 3
13 | Lookup should be three: Option_Some(three)
14 | Lookup should be none: Option_None
15 | Test size should be 4: 4
16 | Lookup should be four: Option_Some(four)
17 | 4 is member of four: true
18 | Lookup should be none: Option_None
19 | Map_Bin(4, 2, two, Map_Bin(1, 1, one, Map_Tip, Map_Tip), Map_Bin(2, 3, three, Map_Tip, Map_Bin(1, 4, four, Map_Tip, Map_Tip)))
20 | 1000
21 | 999
22 | 


--------------------------------------------------------------------------------
/src/test/golden/Option.golden:
--------------------------------------------------------------------------------
1 | Even Option_None is Option_Some(true)
2 | 


--------------------------------------------------------------------------------
/src/test/golden/String.golden:
--------------------------------------------------------------------------------
 1 | 84
 2 | 868
 3 | 84
 4 | 101
 5 | 225
 6 | 115
 7 | 116
 8 | 117
 9 | 868
10 | [84, 101, 225, 115, 116, 117, 868]
11 | 117
12 | length in codepoints = 7, length in bytes = 9, length in graphemes = 6
13 | 15
14 | toLower T = t teástuͤ
15 | toUpper å = Å TEÁSTUͤ
16 | toTitle å = Å TEÁSTUͤ
17 | capitalize  Å Teástuͤ Aßdƒ
18 | compare -1 0 1 -1 0 1
19 | replace TeástT
20 | Teástuͤ startsWith T: true, endsWith uͤ: true
21 | Teástuͤ startsWith uͤ: false, endsWith T: false
22 | Code point 123 is valid Unicode Scalar: true
23 | Surrogate code point 55296 is valid Unicode Scalar: false
24 | Code point 1114112 is valid Unicode Scalar: false
25 | 0 is digit: true, 9 is digit: true, 'a' is digit: false
26 | String_DecimalNumber String_LowercaseLetter String_UppercaseLetter String_Space String_MathSymbol String_CurrencySymbol
27 | å is letter true, 1 is letter false
28 | å is numeric false, 1 is numeric true, ¾ is numeric true
29 | ' ' is space true, '\t' is space true, '\r' is space true, '\n' is space true, 'U+0085' is space true
30 | 1, 2
31 | 


--------------------------------------------------------------------------------
/src/test/golden/array.golden:
--------------------------------------------------------------------------------
1 | [a, a, b, a, a, a, a, a, a, a]
2 | [b, b, b, b, b, b, b, b, b, b]
3 | [0, 1, 2, 3, 4]
4 | [2, 5, 8]
5 | [a, a, b, a, a, a, a, a, a, a, b, b, b, b, b, b, b, b, b, b]
6 | [a, a, b, a, b, b, b, b, b, a]
7 | Hello
8 | 


--------------------------------------------------------------------------------
/src/test/golden/binarytrees.golden:
--------------------------------------------------------------------------------
1 | stretch tree of depth 11	 check: 4095
2 | 1024	 trees of depth 4	 check: 31744
3 | 256	 trees of depth 4	 check: 32512
4 | 64	 trees of depth 4	 check: 32704
5 | 16	 trees of depth 4	 check: 32752
6 | long lived tree of depth 10	 check: 2047
7 | 


--------------------------------------------------------------------------------
/src/test/golden/builtin.golden:
--------------------------------------------------------------------------------
 1 | 1234567890 -1234567890 true false $123.456000000 -0.001234500 127 -128 String () [1, 2] 3735928559 -493
 2 | 4
 3 | 0
 4 | 5
 5 | 0
 6 | 24
 7 | -9223372036854775808
 8 | 1
 9 | -1
10 | 63
11 | 64
12 | 4
13 | 0
14 | 5
15 | 0
16 | -128
17 | -1
18 | false
19 | true
20 | true
21 | true
22 | true
23 | 1
24 | 1
25 | Correct String
26 | 777
27 | 5
28 | 16
29 | 3
30 | 2
31 | 1
32 | 


--------------------------------------------------------------------------------
/src/test/golden/data.golden:
--------------------------------------------------------------------------------
1 | Data_Cons(1, Data_Cons(2, Data_Nil))
2 | Data_Ident(test)
3 | test
4 | Data_No
5 | true
6 | Hello
7 | 


--------------------------------------------------------------------------------
/src/test/golden/dynamic.golden:
--------------------------------------------------------------------------------
1 | 1
2 | 


--------------------------------------------------------------------------------
/src/test/golden/factorial.golden:
--------------------------------------------------------------------------------
1 | Factorial of 15 is: 1307674368000
2 | 


--------------------------------------------------------------------------------
/src/test/golden/hello.golden:
--------------------------------------------------------------------------------
1 | Вітаю, Світе! Будь Lasca.
2 | 


--------------------------------------------------------------------------------
/src/test/golden/lambda.golden:
--------------------------------------------------------------------------------
1 | 6
2 | Hello
3 | 


--------------------------------------------------------------------------------
/src/test/golden/nbody.golden:
--------------------------------------------------------------------------------
1 | -0.169075164
2 | -0.169078071
3 | 


--------------------------------------------------------------------------------
/src/test/golden/nbody2.golden:
--------------------------------------------------------------------------------
1 | -0.169075164
2 | -0.169078071
3 | 


--------------------------------------------------------------------------------
/src/test/golden/nbody3.golden:
--------------------------------------------------------------------------------
1 | -0.169075164
2 | -0.169078071
3 | 


--------------------------------------------------------------------------------
/src/test/golden/queen.golden:
--------------------------------------------------------------------------------
  1 | GSTQ
  2 | GSTU
  3 | GSTE
  4 | GSTE
  5 | GSTN
  6 | GSHQ
  7 | GSHU
  8 | GSHE
  9 | GSHE
 10 | GSHN
 11 | GSEQ
 12 | GSEU
 13 | GSEE
 14 | GSEE
 15 | GSEN
 16 | GATQ
 17 | GATU
 18 | GATE
 19 | GATE
 20 | GATN
 21 | GAHQ
 22 | GAHU
 23 | GAHE
 24 | GAHE
 25 | GAHN
 26 | GAEQ
 27 | GAEU
 28 | GAEE
 29 | GAEE
 30 | GAEN
 31 | GVTQ
 32 | GVTU
 33 | GVTE
 34 | GVTE
 35 | GVTN
 36 | GVHQ
 37 | GVHU
 38 | GVHE
 39 | GVHE
 40 | GVHN
 41 | GVEQ
 42 | GVEU
 43 | GVEE
 44 | GVEE
 45 | GVEN
 46 | GETQ
 47 | GETU
 48 | GETE
 49 | GETE
 50 | GETN
 51 | GEHQ
 52 | GEHU
 53 | GEHE
 54 | GEHE
 55 | GEHN
 56 | GEEQ
 57 | GEEU
 58 | GEEE
 59 | GEEE
 60 | GEEN
 61 | OSTQ
 62 | OSTU
 63 | OSTE
 64 | OSTE
 65 | OSTN
 66 | OSHQ
 67 | OSHU
 68 | OSHE
 69 | OSHE
 70 | OSHN
 71 | OSEQ
 72 | OSEU
 73 | OSEE
 74 | OSEE
 75 | OSEN
 76 | OATQ
 77 | OATU
 78 | OATE
 79 | OATE
 80 | OATN
 81 | OAHQ
 82 | OAHU
 83 | OAHE
 84 | OAHE
 85 | OAHN
 86 | OAEQ
 87 | OAEU
 88 | OAEE
 89 | OAEE
 90 | OAEN
 91 | OVTQ
 92 | OVTU
 93 | OVTE
 94 | OVTE
 95 | OVTN
 96 | OVHQ
 97 | OVHU
 98 | OVHE
 99 | OVHE
100 | OVHN
101 | OVEQ
102 | OVEU
103 | OVEE
104 | OVEE
105 | OVEN
106 | OETQ
107 | OETU
108 | OETE
109 | OETE
110 | OETN
111 | OEHQ
112 | OEHU
113 | OEHE
114 | OEHE
115 | OEHN
116 | OEEQ
117 | OEEU
118 | OEEE
119 | OEEE
120 | OEEN
121 | DSTQ
122 | DSTU
123 | DSTE
124 | DSTE
125 | DSTN
126 | DSHQ
127 | DSHU
128 | DSHE
129 | DSHE
130 | DSHN
131 | DSEQ
132 | DSEU
133 | DSEE
134 | DSEE
135 | DSEN
136 | DATQ
137 | DATU
138 | DATE
139 | DATE
140 | DATN
141 | DAHQ
142 | DAHU
143 | DAHE
144 | DAHE
145 | DAHN
146 | DAEQ
147 | DAEU
148 | DAEE
149 | DAEE
150 | DAEN
151 | DVTQ
152 | DVTU
153 | DVTE
154 | DVTE
155 | DVTN
156 | DVHQ
157 | DVHU
158 | DVHE
159 | DVHE
160 | DVHN
161 | DVEQ
162 | DVEU
163 | DVEE
164 | DVEE
165 | DVEN
166 | DETQ
167 | DETU
168 | DETE
169 | DETE
170 | DETN
171 | DEHQ
172 | DEHU
173 | DEHE
174 | DEHE
175 | DEHN
176 | DEEQ
177 | DEEU
178 | DEEE
179 | DEEE
180 | DEEN
181 | 


--------------------------------------------------------------------------------
/src/test/golden/regex.golden:
--------------------------------------------------------------------------------
1 | true
2 | Haskell (consider Lasca instead of Haskell) or Python (consider Lasca instead of Python)
3 | 


--------------------------------------------------------------------------------
/src/test/golden/ski.golden:
--------------------------------------------------------------------------------
1 | Hello
2 | 


--------------------------------------------------------------------------------
/stack-shell.nix:
--------------------------------------------------------------------------------
1 | with (import <nixpkgs> {});
2 | {ghc ? haskell.compiler.ghc822}:
3 | let 
4 |     lascart = pkgs.callPackage ./lascart.nix {};
5 | in haskell.lib.buildStackProject {
6 |   name = "lasca";
7 |   buildInputs = [ lascart boehmgc pcre2 ];
8 |   src = ./.;
9 | }


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-12.9
2 | extra-deps:
3 | - multiset-0.3.4
4 | nix:
5 |     enable: false
6 |     shell-file: stack-shell.nix


--------------------------------------------------------------------------------