├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── Setup.hs ├── docs ├── LangsOverview.rst ├── Lasca Design.rst ├── Log of Lasca Development.md └── Syntax.rst ├── examples ├── Data.lasca ├── Either.lasca ├── Json.lasca ├── binarytrees.lasca ├── builtin.lasca ├── dynamic.lasca ├── factorial.lasca ├── hello.lasca ├── lambda.lasca ├── nbody.lasca ├── nbody2.lasca ├── nbody3.lasca ├── queen.lasca ├── regex.lasca └── ski.lasca ├── gencode └── GenBench.hs ├── lasca.cabal ├── lasca.nix ├── lascart.nix ├── libs └── base │ ├── Array.lasca │ ├── ArrayBuffer.lasca │ ├── Bits.lasca │ ├── ByteArray.lasca │ ├── List.lasca │ ├── Map.lasca │ ├── Option.lasca │ ├── Prelude.lasca │ └── String.lasca ├── make-release.sh ├── release.nix ├── rts ├── CMakeLists.txt ├── builtin.c ├── lasca.h ├── runtime.c ├── utf8proc │ ├── CMakeLists.txt │ ├── LICENSE.md │ ├── MANIFEST │ ├── Makefile │ ├── NEWS.md │ ├── README.md │ ├── bench │ │ ├── Makefile │ │ ├── bench.c │ │ ├── icu.c │ │ ├── unistring.c │ │ ├── util.c │ │ └── util.h │ ├── data │ │ ├── Makefile │ │ ├── charwidths.jl │ │ └── data_generator.rb │ ├── lump.md │ ├── test │ │ ├── case.c │ │ ├── charwidth.c │ │ ├── custom.c │ │ ├── graphemetest.c │ │ ├── iterate.c │ │ ├── normtest.c │ │ ├── printproperty.c │ │ ├── tests.c │ │ ├── tests.h │ │ └── valid.c │ ├── utf8proc.c │ ├── utf8proc.h │ ├── utf8proc_data.c │ └── utils.cmake ├── xxhash.c └── xxhash.h ├── shell.nix ├── src ├── lasca │ └── Main.hs ├── lib │ └── Lasca │ │ ├── Codegen.hs │ │ ├── Compiler.hs │ │ ├── Desugar.hs │ │ ├── Emit.hs │ │ ├── EmitCommon.hs │ │ ├── EmitDynamic.hs │ │ ├── EmitStatic.hs │ │ ├── Infer.hs │ │ ├── JIT.hs │ │ ├── Lexer.hs │ │ ├── Modules.hs │ │ ├── Namer.hs │ │ ├── Options.hs │ │ ├── Parser.hs │ │ ├── Syntax.hs │ │ └── Type.hs └── test │ ├── TestMain.hs │ └── golden │ ├── ArrayBuffer.golden │ ├── Either.golden │ ├── List.golden │ ├── Map.golden │ ├── Option.golden │ ├── String.golden │ ├── array.golden │ ├── binarytrees.golden │ ├── builtin.golden │ ├── data.golden │ ├── dynamic.golden │ ├── factorial.golden │ ├── hello.golden │ ├── lambda.golden │ ├── nbody.golden │ ├── nbody2.golden │ ├── nbody3.golden │ ├── queen.golden │ ├── regex.golden │ └── ski.golden ├── stack-shell.nix └── stack.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | .stack* 3 | .cabal* 4 | .liquid/ 5 | .vscode/* 6 | result 7 | .history/ 8 | build/ 9 | out/ 10 | dist 11 | *.iml 12 | target/ 13 | cabal.sandbox.config 14 | hello 15 | *.dll 16 | *.dylib 17 | *.so 18 | *.js 19 | *.ll 20 | *.pdf 21 | *.prof 22 | *.o 23 | *.a 24 | *.out 25 | *.aux 26 | *.hp 27 | .DS_Store 28 | Thumbs.db 29 | 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This is the simple Travis configuration, which is intended for use 2 | # on applications which do not require cross-platform and 3 | # multiple-GHC-version support. For more information and other 4 | # options, see: 5 | # 6 | # https://docs.haskellstack.org/en/stable/travis_ci/ 7 | # 8 | # Copy these contents into the root directory of your Github project in a file 9 | # named .travis.yml 10 | 11 | # Use new container infrastructure to enable caching 12 | sudo: true 13 | 14 | # Do not choose a language; we provide our own build tools. 15 | language: nix 16 | 17 | dist: trusty 18 | 19 | script: nix-build release.nix 20 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.3) 2 | project (LascaRTS) 3 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 4 | add_subdirectory (rts) 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016- Alexander Nemish 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following disclaimer 13 | in the documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PANDOC = pandoc 2 | IFORMAT = markdown 3 | FLAGS = --standalone --toc --highlight-style pygments 4 | 5 | #TEST_RTS = +RTS -sstderr 6 | TEST_RTS = 7 | LASCA_VERSION="0.0.2" 8 | 9 | build: rts 10 | stack build --extra-lib-dirs=build/rts 11 | 12 | fast: rts 13 | stack build --fast -j 8 --extra-lib-dirs=build/rts 14 | 15 | install: build 16 | stack install --extra-lib-dirs=build/rts && stack test --extra-lib-dirs=build/rts 17 | 18 | fastinstall: fast 19 | stack install --fast --extra-lib-dirs=build/rts 20 | 21 | bench: 22 | time lasca -O2 -e examples/gen.lasca 23 | 24 | rts: 25 | mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make && cp rts/liblascart* $(LASCAPATH) 26 | 27 | relink: rts 28 | rm -rf .stack-work/dist/x86_64-osx/Cabal-2.0.1.0/build/Lasca/lasca 29 | rm -rf .stack-work/install 30 | stack build --fast -j 8 --copy-bins --extra-lib-dirs=build/rts 31 | 32 | rusts: 33 | cd rts/rust && cargo build && cp target/debug/liblascarts.dylib ../../../ 34 | 35 | test: 36 | stack test --extra-lib-dirs=build/rts 37 | 38 | fasttest: 39 | stack test -j 8 --fast --extra-lib-dirs=build/rts 40 | 41 | examples: 42 | lasca -O2 -e --mode static libs/base/Array.lasca $(TEST_RTS) 43 | lasca -O2 -e --mode dynamic libs/base/Array.lasca $(TEST_RTS) 44 | lasca -O2 -e --mode static libs/base/List.lasca $(TEST_RTS) 45 | lasca -O2 -e --mode dynamic libs/base/List.lasca $(TEST_RTS) 46 | lasca -O2 -e --mode static libs/base/Option.lasca $(TEST_RTS) 47 | lasca -O2 -e --mode dynamic libs/base/Option.lasca $(TEST_RTS) 48 | lasca -O2 -e --mode static libs/base/String.lasca $(TEST_RTS) 49 | lasca -O2 -e --mode dynamic libs/base/String.lasca $(TEST_RTS) 50 | lasca -O2 -e --mode static examples/Map.lasca $(TEST_RTS) 51 | lasca -O2 -e --mode dynamic examples/Map.lasca $(TEST_RTS) 52 | lasca -O2 -e --mode static examples/Data.lasca $(TEST_RTS) 53 | lasca -O2 -e --mode dynamic examples/Data.lasca $(TEST_RTS) 54 | lasca -O2 -e --mode dynamic examples/dynamic.lasca $(TEST_RTS) 55 | lasca -O2 -e --mode static examples/factorial.lasca $(TEST_RTS) -- 15 56 | lasca -O2 -e --mode dynamic examples/factorial.lasca $(TEST_RTS) -- 15 57 | lasca -O2 -e --mode dynamic examples/hello.lasca $(TEST_RTS) 58 | lasca -O2 -e --mode static examples/hello.lasca $(TEST_RTS) 59 | lasca -O2 -e --mode static examples/lambda.lasca $(TEST_RTS) 60 | lasca -O2 -e --mode dynamic examples/lambda.lasca $(TEST_RTS) 61 | lasca -O2 -e --mode static examples/nbody.lasca $(TEST_RTS) -- 50000 62 | lasca -O2 -e --mode dynamic examples/nbody.lasca $(TEST_RTS) -- 50000 63 | lasca -O2 -e --mode static examples/nbody2.lasca $(TEST_RTS) -- 50000 64 | lasca -O2 -e --mode dynamic examples/nbody2.lasca $(TEST_RTS) -- 50000 65 | lasca -O2 -e --mode static examples/nbody3.lasca $(TEST_RTS) -- 50000 66 | lasca -O2 -e --mode dynamic examples/nbody3.lasca $(TEST_RTS) -- 50000 67 | lasca -O2 -e --mode static examples/binarytrees.lasca $(TEST_RTS) -- 10 68 | lasca -O2 -e --mode dynamic examples/binarytrees.lasca $(TEST_RTS) -- 10 69 | lasca -O2 -e --mode static examples/ski.lasca $(TEST_RTS) 70 | 71 | perf: 72 | stack install --profile -j 8 --extra-lib-dirs=build/rts 73 | time lasca examples/Map.lasca +RTS -sstderr -N4 -p -hc 74 | hp2ps -c lasca.hp 75 | ghc-prof-flamegraph lasca.prof 76 | 77 | release: build 78 | ./make-release.sh ${LASCA_VERSION} 79 | designpdf: 80 | rst2pdf -b 1 docs/Lasca\ Design.rst 81 | 82 | .PHONY: clean examples rts install 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Lasca Language 2 | ============== 3 | 4 | [![Build Status](https://travis-ci.org/nau/lasca-compiler.svg?branch=master)](https://travis-ci.org/nau/lasca-compiler) 5 | [![Join the chat at https://gitter.im/lasca-lang/compiler](https://badges.gitter.im/lasca-lang/Lobby.svg)](https://gitter.im/lasca-lang/compiler?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 6 | 7 | Lasca is Scala shifted towards Haskell. 8 | 9 | Lasca is a LLVM-based statically or dynamically typed strict functional programming language. Simplified OCaml if you will. 10 | 11 | It has a 'dynamic' compilation mode, meaning instant code generation without compile time type checking/inference, allowing instant compilation/execution cycle, and more freedom dynamic languages give. 12 | 13 | It has a full type inference, parametric polymorphism, GC, algebraic data types, pattern matching, 14 | and type classes are coming soon. 15 | 16 | Imagine 17 | 18 | - Scala with fast compilation/start time, optional dynamic typing, and without null 19 | - Go with ADTs, global type inference, and parametric polymorphism 20 | - Haskell with decent records syntax, runtime polymorphism, and string interpolation 21 | - OCaml with typeclasses, overloaded +-*/ for ints and floats, and do-notation 22 | - Rust with garbage collector without <::>!? 23 | - Erlang with types and fast execution 24 | - Python with multithreading, pattern matching, and multiline lambdas 25 | - TypeScript with indentation significant syntax, and LLVM 26 | - Julia with static type checking, and zero-based indexing 27 | 28 | Inspired by: 29 | 30 | - Scala 31 | - Haskell, Liquid Haskell, Linear Haskell, Idris 32 | - OCaml/SML/F#/1ML 33 | - Clojure (persisted data structures, HAMT/CHAMP) 34 | - Go (CSP) 35 | - Erlang (actors, immutability, minimalism) 36 | - Python (docstrings, doctests, syntax) 37 | - Julia 38 | - Swift 39 | - Nim 40 | - Pony 41 | - [Koka](https://github.com/koka-lang/koka) (algebraic effects) 42 | 43 | Ideas 44 | --- 45 | 46 | - light, non-symbol-polluted syntax (Python) 47 | - indentation-based 48 | - readability first 49 | - fast development cycle 50 | - presentation compiler for IDE integration 51 | - IDE-friendly (intellisence 'dot-autocomplete', auto-formatting, compiler API) 52 | - type-safe 53 | - strict functional 54 | - expression-based 55 | - practical first, but as clean and concise as possible 56 | - prefer things done one way 57 | - LLVM backend 58 | - JavaScript/WebAssembly backend (native or via LLVM/emscripten) 59 | - GraalVM backend? 60 | - no OOP and data subclassing/inheritance? 61 | - syntactic sugar is ok 62 | - no null 63 | - annotations (Java/Python-style) 64 | - annotation-based extensions 65 | - macros based metaprogramming (like Scala Macros, Template Haskell) 66 | - Concurrency Oriented Programming (Erlang). Objects are out. Concurrency is in. 67 | - [Gradual Typing](http://homes.soic.indiana.edu/jsiek/what-is-gradual-typing/) 68 | - Deferred Type Errors (runtime compilation mode, Haskell) 69 | - Linear/affine types (Rust, Linear Haskell)? 70 | - Liquid Type system (refinement types, [Leon](http://leon.epfl.ch), [Liquid Haskell](https://github.com/ucsd-progsys/liquidhaskell)) and 71 | [Z3](https://github.com/Z3Prover/z3)/[CVC4](https://cvc4.cs.stanford.edu/web/) as proof assistant. 72 | - [Algebraic Subtyping for module system](https://www.cl.cam.ac.uk/~sd601/thesis.pdf) 73 | - import features (Scala-like) 74 | - compile-time and runtime reflection 75 | - save/distribute AST (Scala TASTY). Full program optimization/reflection 76 | - important things must be greppable and googlable, call it searchability :) 77 | - compiler as a service: [Language Server Protocol](https://langserver.org/) 78 | - markdown/rst comments/docs, doctest (Julia, Python) 79 | - CPS/Actors/π-calculus/STM?, non-blocking IO, reactive 80 | 81 | Example 82 | --- 83 | 84 | Current implementation uses braces and semicolons, but I consider adding indentation-based syntax, or semicolon inference. 85 | 86 | ```haskell 87 | -- Algebraic data type a la Haskell 88 | data JValue 89 | = JNull 90 | | JNum(n: Float) 91 | | JString(s: String) 92 | | JBool(v: Bool) 93 | | JArray(v: [JValue]) 94 | | JObject(v: Map String JValue) 95 | 96 | -- function argument type annotations are optional, compiler infers those 97 | def jsonToString(js: JValue) = match js { 98 | JObject(m) -> 99 | if Map.isEmpty(m) then "{}" else { 100 | println(toString(m)); 101 | res = Array.makeArray(m.size, ""); 102 | var idx = 0; 103 | Map.foreachWithKey(m, { k, v -> 104 | setIndex(res, idx.readVar, "\"${k}\": ${jsonToString(v)}"); 105 | idx := idx.readVar + 1; 106 | }); 107 | s = String.join(", ", res); 108 | "{ ${s} }" 109 | } 110 | JNull -> "null" 111 | JNum(n) -> toString(n) 112 | JBool(v) -> toString(v) 113 | JString(v) -> "\"${v}\"" 114 | JArray(v) -> { 115 | values = Array.map(v, jsonToString); 116 | toString(values); 117 | } 118 | } 119 | ``` 120 | 121 | What Works Right Now 122 | --- 123 | 124 | - JIT and AOT compilation and execution (via LLVM OrcJIT) 125 | - lasca -e hello.lasca to execute 126 | - lasca hello.lasca to create a binary 127 | - type inference 128 | - dynamic typing mode (```lasca -e --mode dynamic hello.lasca```) 129 | - ADTs, inner functions, out of order function definitions 130 | - pattern matching 131 | - calling external C functions 132 | - string interpolation, UTF8 encoded immutable strings 133 | - builtin types: `String`, `Bool`, `Int`, `Byte`, `Int16`, `Int32`, `Float`, `Array`, `ByteArray`, `Var`, `FileHandle` 134 | - implemented `List`, `Option`, `Either`, `Map`, `ArrayBuffer` 135 | - regular expressions with [PCRE-2](https://www.pcre.org/) 136 | - overloaded `+` `-` `*` `/` operators 137 | 138 | Package System 139 | --- 140 | 141 | Consider [Nix](https://nixos.org/nix/) as package manager 142 | 143 | Compiler Modes 144 | ---- 145 | 146 | - Dynamic Mode, aka Prototype Mode. 147 | Syntax is checked. 148 | All types are dynamically checked. 149 | - Static Mode. 150 | Syntax is checked. 151 | Typechecking/inference, faster execution. 152 | - Hardcore 153 | Liquid types enabled. (See Liquid Haskell) 154 | Proves checked. 155 | Array bounds checks eliminated. 156 | 157 | Type System 158 | --- 159 | 160 | - Hindley-Milner by default, dependent types if needed 161 | - traits, kind of type classes 162 | - Liquid types as in Liquid Haskell 163 | 164 | Memory Management 165 | ---- 166 | 167 | GC, concurrent mark and sweep 168 | per actor/green thread GC 169 | Consider [MultiCore Ocaml GC](http://kcsrk.info/multicore/gc/2017/07/06/multicore-ocaml-gc/) 170 | 171 | for now, use [Boehm conservative GC](http://www.hboehm.info/gc/) 172 | 173 | Other 174 | --- 175 | 176 | - indentation significant (i.e. Python, Haskell) 177 | - uppercase Typenames, lowercase idents and type arguments (Haskell/Scala style) 178 | - pattern-matching 179 | - ADT, traits, type classes 180 | - easy C interoperability 181 | - no exceptions (Go/Rust panic style errors) 182 | - don't overuse `'~!@#$%^&* symbols 183 | - default immutability 184 | - string interpolation: "${ident} = ${expression}" 185 | - multiline strings 186 | - Uniform Function Call Syntax (Rust, D). 187 | For example, any function can be a method for its first argument: 188 | 189 | ```scala 190 | def toString(s: String) = ... 191 | "Hello".toString 192 | def plus(l: Num, r: Num) 193 | 1.plus(2) 194 | ``` 195 | 196 | - uniform select principle. Use (.) for record field selection, func calls, package name resolution etc 197 | - UTF-8 strings 198 | - Haskell-like application for type functions: Option Int, Either Int String, etc 199 | 200 | Install on Mac OS using Homebrew 201 | --- 202 | 203 | brew install boehmgc pcre2 204 | brew install nau/lasca/lasca-compiler 205 | 206 | Setup LASCAPATH environment variable. Add this to your .bash_profile 207 | 208 | export LASCAPATH="$(brew --prefix lasca-compiler)/src" 209 | 210 | Try it! 211 | 212 | echo 'def main() = println("Hello Lasca!")' > hello.lasca 213 | lasca -e hello.lasca 214 | > Hello Lasca! 215 | 216 | Add bash completion config for lasca compiler options: 217 | 218 | lasca --bash-completion-script lasca > $(brew --prefix)/etc/bash_completion.d/lasca 219 | 220 | Build on Mac OS 221 | --- 222 | 223 | You need LLVM 6.0 installed, and latest Haskell Stack. 224 | 225 | brew install cmake boehmgc pcre2 226 | 227 | brew install llvm-hs/llvm/llvm-6.0 # this compiles llvm from sources, make take some time 228 | 229 | brew install haskell-stack 230 | 231 | stack setup 232 | 233 | Setup LASCAPATH environment variable. Add this to your .bash_profile 234 | 235 | export LASCAPATH="${lasca-compiler-src-dir}/libs/base" 236 | 237 | Add your `~/.local/bin` directory to your `PATH` 238 | 239 | export PATH=$PATH:~/.local/bin 240 | 241 | Build and install lasca compiler 242 | 243 | make install 244 | 245 | Add bash completion config for lasca compiler options: 246 | 247 | lasca --bash-completion-script lasca > $(brew --prefix)/etc/bash_completion.d/lasca 248 | 249 | Run hello.lasca 250 | 251 | lasca --exec examples/hello.lasca 252 | 253 | Build on Ubuntu 254 | --- 255 | 256 | Requirements: Haskell Stack > 1.6, Cabal > 2.0, LLVM 6, CMake 257 | 258 | Don't install Haskell Stack from apt. [It's likely to be older than 1.6 and won't be able to upgrade](https://askubuntu.com/questions/986596/how-to-upgrade-haskell-stack-on-ubuntu-16-04) 259 | 260 | Do this instead: 261 | 262 | curl -sSL https://get.haskellstack.org/ | sh 263 | 264 | sudo apt install llvm-6.0-dev libgc-dev zlib1g-dev cmake 265 | sudo add-apt-repository universe 266 | sudo apt install libpcre2-dev 267 | export LASCAPATH="${lasca-compiler-src-dir}/libs/base" 268 | export PATH=$PATH:~/.local/bin 269 | stack setup 270 | make install 271 | lasca -e examples/hello.lasca 272 | 273 | Current n-body run 274 | --- 275 | 276 | There are several implementation of [n-body problem]( 277 | http://benchmarksgame.alioth.debian.org/u64q/nbody.html) 278 | Currently it's quite slow due to boxing. 279 | 280 | $ time lasca -e -O2 examples/nbody.lasca -- 50000000 281 | -0.169075164 282 | -0.169059907 283 | 284 | real 7m13.261s 285 | user 7m39.476s 286 | sys 0m38.716s 287 | 288 | find src -name "*.hs" | xargs cat | wc -l 289 | 4738 290 | 291 | cat rts/runtime.c rts/builtin.c rts/lasca.h | wc -l 292 | 1324 293 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain -------------------------------------------------------------------------------- /docs/Log of Lasca Development.md: -------------------------------------------------------------------------------- 1 | # Log of Lasca Development 2 | 3 | These are mostly my rants on things in the industry I found crazy. 4 | Legacy has a overwhelming power. 5 | 6 | ## 19/02/2018 Today I learned how to print 64-bit ints in C on both Mac and Linux. 7 | 8 | This code works fine on MacOs, 9 | 10 | printf("%lli", code); 11 | 12 | but gives this warning on Linux: 13 | 14 | warning: format ‘%lli’ expects argument of type ‘long long int’, 15 | but argument 2 has type ‘int64_t {aka long int}’ [-Wformat=] 16 | 17 | Apparently the right way to print int64_t in printf/snprintf family functions is this: 18 | 19 | #define __STDC_FORMAT_MACROS 20 | #include 21 | 22 | uint64_t i; 23 | printf("%" PRId64 "\n", i); 24 | 25 | Sigh. 26 | 27 | ## 17/08/2018 State of Unicode support in programming languages 28 | 29 | [SO overview](https://stackoverflow.com/questions/1036585/unicode-support-in-various-programming-languages) 30 | 31 | Everything is very, very sad. 32 | Only few modern languages use UTF-8 out of the box: Rust, Julia, and Go. 33 | Others do various hacks or use UTF-16, which is even worse. 34 | Legacy has an overwhelming power. 35 | 36 | Sigh. 37 | 38 | http://utf8everywhere.org/ 39 | [UTF-16 Considered Harmfull](https://softwareengineering.stackexchange.com/questions/102205/should-utf-16-be-considered-harmful) 40 | 41 | ## 08/10/2018 Default Hash Function and Hash table 42 | 43 | Choosing a hash function is a crucial choice. 44 | 45 | Most languages/platforms changed their hashing functions to something more secure. Many chose SipHash: 46 | 47 | - Python (starting in version 3.4) 48 | - Ruby 49 | - Rust 50 | 51 | In Lasca we'll use SipHash 52 | 53 | [SipHash: a fast short-input PRF](https://131002.net/siphash/siphash.pdf) 54 | 55 | [Which hashing algorithm is best for uniqueness and speed?](https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed) 56 | 57 | Java's default String hash algorithm is aweful 58 | 59 | s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 60 | 61 | It's easy to find collisions 62 | 63 | Haskell uses [FNV1](http://isthe.com/chongo/tech/comp/fnv/) in [hashable](http://hackage.haskell.org/package/hashable-1.2.6.1/docs/src/Data-Hashable-Class.html#line-627) 64 | 65 | Links 66 | From this paper 67 | https://bigdata.uni-saarland.de/publications/p249-richter.pdf 68 | 69 | we consider 70 | Mult as the best candidate to be used in practice when 71 | quality results on high throughputs is desired, but at the cost of 72 | a high variance across data distributions 73 | 74 | We can conclude that RH provides a very interesting 75 | trade-off: for a small penalty (often within 1-5%) in peak 76 | performance on the best of cases (all lookups successful), RH 77 | significantly improves on the worst-case over LP in general, up 78 | to more than a factor 4. 79 | Across the whole set of experiments, RH is always among 80 | the top performers, and even the best method for most cases. 81 | This observation holds for all data set sizes we tested. 82 | 83 | As a conclusion, 84 | in a write-heavy workload, quadratic probing looks as the best 85 | option in general. 86 | 87 | Our overall conclusion is that AoS outperforms 88 | SoA by a larger margin than the other way around. Inside 89 | caches (not shown), both methods are comparable in terms of 90 | lookup performance, with AoS performing slightly better. When 91 | using SIMD, SoA has an edge over AoS — at least on current 92 | hardware — because keys are already densely packed. 93 | 94 | https://github.com/leo-yuriev/t1ha 95 | http://cyan4973.github.io/xxHash/ 96 | https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion 97 | https://github.com/google/highwayhash/issues/28 98 | https://medium.freecodecamp.org/hash-table-attack-8e4371fc5261 99 | https://rcoh.me/posts/hash-map-analysis/ 100 | http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ 101 | https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/ 102 | 103 | ### Design Decisions based on research 104 | 105 | Open addressing with Robin Hood Probing and backward shift deletion 106 | Hash function [xxHash 64](http://cyan4973.github.io/xxHash/) 107 | Grow either by prime numbers or powers of 2 108 | Load factor threshold ≈ 0.6-0.7 109 | Collision count threshold ≈ log₂(n) 110 | -------------------------------------------------------------------------------- /examples/Data.lasca: -------------------------------------------------------------------------------- 1 | module Data 2 | -- Records 3 | -- Dynamic mode 4 | import Option 5 | 6 | data Test = Test(a: Int) 7 | 8 | data Point = Point(x: Int, y: Int, z: Test) 9 | 10 | data Expr = Ident(n: String) | Num(nm: Int) | No 11 | 12 | data StringList = Cons(v: String, tail: StringList) | Nil 13 | 14 | def main() = { 15 | t = Test(3); 16 | p1 = Point(12, 2, t); 17 | ident = Ident("test"); 18 | num = Num(1); 19 | no = No; 20 | some = Option.Some(1); 21 | list = Cons("1", Cons("2", Nil)); 22 | println(list.toString); 23 | println(ident.toString); 24 | println(ident.n); 25 | println(no.toString); 26 | s = p1.x + p1.y - p1.z.a; 27 | println(toString(s == 11)); 28 | println("Hello") 29 | } 30 | 31 | -------------------------------------------------------------------------------- /examples/Either.lasca: -------------------------------------------------------------------------------- 1 | module Either 2 | 3 | data Either a b = Left(left: a) | Right(right: b) 4 | 5 | def isLeft(self: Either a b) = match self { 6 | Left(l) -> true 7 | _ -> false 8 | } 9 | 10 | def isRight(self: Either a b) = match self { 11 | Left(l) -> false 12 | _ -> true 13 | } 14 | 15 | def map(self: Either a b, f: b -> c) = match self { 16 | Right(r) -> Right(f(r)) 17 | _ -> self 18 | } 19 | 20 | def main() = { 21 | l = Left("Test"); 22 | r = Right(123); 23 | r1 = map(r, { r -> r + 2 }); 24 | println("Left is ${isLeft(l)}, right is left ${r.isLeft}, right is right ${r.isRight}"); 25 | println("r1 should be 125 and is: ${r1}"); 26 | } -------------------------------------------------------------------------------- /examples/Json.lasca: -------------------------------------------------------------------------------- 1 | import Map 2 | import Array 3 | import String 4 | 5 | data JValue 6 | = JNull 7 | | JNum(n: Float) 8 | | JString(s: String) 9 | | JBool(v: Bool) 10 | | JArray(v: [JValue]) 11 | | JObject(v: Map String JValue) 12 | 13 | def jsonToString(js: JValue) = match js { 14 | JObject(m) -> 15 | if Map.isEmpty(m) then "{}" else { 16 | println(toString(m)); 17 | res = Array.makeArray(m.size, ""); 18 | var idx = 0; 19 | Map.foreachWithKey(m, { k, v -> 20 | setIndex(res, idx.readVar, "\"${k}\": ${jsonToString(v)}"); 21 | idx := idx.readVar + 1; 22 | }); 23 | s = String.join(", ", res); 24 | "{ ${s} }" 25 | } 26 | JNull -> "null" 27 | JNum(n) -> toString(n) 28 | JBool(v) -> toString(v) 29 | JString(v) -> "\"${v}\"" 30 | JArray(v) -> { 31 | values = Array.map(v, jsonToString); 32 | toString(values); 33 | } 34 | } 35 | 36 | def parseJson(str) = { 37 | "asdf" 38 | } 39 | 40 | def main() = { 41 | m = Map.insert(Map.single("message", JArray([JString(""), JNull])), "empty", JBool(true)); 42 | js = JArray([JNum(-3.14), JString("Pen Pineapple Apple Pen"), JObject(m), JObject(Map.empty()), JArray([])]); 43 | println(jsonToString(js)); 44 | } -------------------------------------------------------------------------------- /examples/binarytrees.lasca: -------------------------------------------------------------------------------- 1 | {- 2 | The Computer Language Benchmarks Game 3 | http://benchmarksgame.alioth.debian.org/ 4 | -} 5 | import Array 6 | 7 | data Tree a = Empty | Node(treeLeft: Tree a, treeRight: Tree a) 8 | 9 | def make(d) = 10 | if d == 0 then Node(Empty, Empty) 11 | else let d = d - 1 in Node(make(d), make(d)) 12 | 13 | def check(t) = match t { 14 | Empty -> 0 15 | Node(l, r) -> 1 + check(l) + check(r) 16 | } 17 | 18 | minDepth = 4 19 | maxDepth = { 20 | args = getArgs(); 21 | if Array.length(args) != 2 then 21 else toInt(args[1]) 22 | } 23 | stretchDepth = maxDepth + 1 24 | 25 | def pow(n) = if n == 0 then 1 else 2 * pow(n-1) 26 | 27 | def main() = { 28 | def loop1(d, i) = if i < ((maxDepth - d) / 2 + 1) then { 29 | def loop2(d, i, niter, c) = 30 | if i == niter then c else loop2(d, i + 1, niter, c + check(make(d))); 31 | dd = d + i * 2; 32 | niter = pow(maxDepth - dd + minDepth); 33 | c = loop2(dd, 0, niter, 0); 34 | println("${niter}\t trees of depth ${d}\t check: ${c}"); 35 | loop1(d, i + 1); 36 | } else 0; 37 | 38 | c = check(make(stretchDepth)); 39 | println("stretch tree of depth ${stretchDepth}\t check: ${c}"); 40 | longLivedTree = make(maxDepth); 41 | loop1(minDepth, 0); 42 | println("long lived tree of depth ${maxDepth}\t check: ${check(longLivedTree)}"); 43 | } 44 | -------------------------------------------------------------------------------- /examples/builtin.lasca: -------------------------------------------------------------------------------- 1 | import Bits 2 | import Option 3 | 4 | def testLiterals() = { 5 | a = 1234567890; 6 | b = -1234567890; 7 | c = true; 8 | d = false; 9 | e = 123.456; 10 | f = -123.45e-5; 11 | g = intToByte(127); 12 | h = intToByte(-128); 13 | i = "String"; 14 | j = (); 15 | l = [1, 2]; 16 | m = 0xDEADbeef; 17 | n = -0o755; 18 | println("${a} ${b} ${c} ${d} \$${e} ${f} ${g} ${h} ${i} ${j} ${l} ${m} ${n}"); 19 | } 20 | 21 | def bitwiseOperations() = { 22 | a = intAnd(5, 4); println(a.toString); 23 | a = intAnd(5, 2); println(a.toString); 24 | a = intOr(4, 1); println(a.toString); 25 | a = intXor(3, 3); println(a.toString); 26 | a = intShiftL(3, 3); println(a.toString); 27 | a = intShiftL(1, 63); println(a.toString); 28 | a = intShiftR(4611686018427387904, 62); println(a.toString); 29 | a = intNot(0); println(a.toString); 30 | a = intPopCount(9223372036854775807); println(a.toString); 31 | a = intPopCount(-1); println(a.toString); 32 | 33 | a = byteAnd(intToByte(5), intToByte(4)); println(a.toString); 34 | a = byteAnd(intToByte(5), intToByte(2)); println(a.toString); 35 | a = byteOr(intToByte(4), intToByte(1)); println(a.toString); 36 | a = byteXor(intToByte(3), intToByte(3)); println(a.toString); 37 | a = byteShiftL(intToByte(1), intToByte(7)); println(a.toString); 38 | a = byteNot(intToByte(0)); println(a.toString); 39 | } 40 | 41 | def logicalOperations() = { 42 | a = true and false; println(a.toString); 43 | a = true or false; println(a.toString); 44 | a = not false; println(a.toString); 45 | a = not false and false or true; println(a.toString); 46 | } 47 | 48 | def patternMatching() = { 49 | a = match 1 { 50 | 0 -> false 51 | 1 -> true 52 | _ -> false 53 | }; println(a.toString); 54 | a = match a { 55 | true -> 1 56 | false -> 0 57 | }; println(a.toString); 58 | a = match 3.14 { 59 | 3.14 -> 1 60 | _ -> 0 61 | }; println(a.toString); 62 | a = match "String" { 63 | "" -> "empty" 64 | "String but not this" -> "wrong string" 65 | "String" -> "Correct String" 66 | }; println(a); 67 | a = match Some(1) { 68 | None -> 0 69 | Some(0) -> 666 70 | Some(1) -> 777 71 | }; println(a.toString); 72 | } 73 | 74 | def innerFunctions() = { 75 | a = 3; 76 | def inner1(x) = x + a; -- check capturing outer vars 77 | println("${inner1(2)}"); 78 | 79 | def inner2(x) = { 80 | inner2 = 1; 81 | x + 5 + inner2; -- check shadowing 82 | }; 83 | println("${inner2(10)}"); 84 | 85 | def inner3(x) = if x == 0 then 0 else { 86 | println(toString(x)); 87 | inner3(x - 1); -- inner recursive 88 | }; 89 | inner3(3); 90 | } 91 | 92 | def main() = { 93 | __I_D_E_N_T_123_ = true; 94 | testLiterals(); 95 | bitwiseOperations(); 96 | logicalOperations(); 97 | patternMatching(); 98 | innerFunctions(); 99 | } -------------------------------------------------------------------------------- /examples/dynamic.lasca: -------------------------------------------------------------------------------- 1 | def id(x) = x 2 | 3 | def main() = { 4 | a = [1, "2"]; -- this should be an error at static mode, and work fine in dynamic mode 5 | c = 1; 6 | println(c.toString) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /examples/factorial.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | def fact(n) = if n == 1 then 1 else n * fact(n - 1) 4 | 5 | def main() = { 6 | args = getArgs(); 7 | i = toInt(args[1]); 8 | println("Factorial of ${i} is: ${fact(i)}") 9 | } 10 | -------------------------------------------------------------------------------- /examples/hello.lasca: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lasca -e 2 | def main() = println("Вітаю, Світе! Будь Lasca.") 3 | -------------------------------------------------------------------------------- /examples/lambda.lasca: -------------------------------------------------------------------------------- 1 | def test() = { 2 | def fact(n) = if n == 1 then 1 else n * fact(n - 1) ; 3 | println(toString(fact(3))) 4 | -- println(toString(inner() + three())) 5 | } 6 | 7 | def main() = { 8 | b = println; 9 | test(); 10 | c = "Hello"; 11 | a = { s, d -> b(c) }; 12 | a("test", 1) 13 | } 14 | -------------------------------------------------------------------------------- /examples/nbody.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | Pi = 3.141592653589793 4 | SolarMass = 4.0 * Pi * Pi 5 | DaysPerYear = 365.24 6 | 7 | Jupiter = [ 8 | 4.84143144246472090e+00, -- x 9 | -1.16032004402742839e+00, -- y 10 | -1.03622044471123109e-01, -- z 11 | 1.66007664274403694e-03 * DaysPerYear, -- vx 12 | 7.69901118419740425e-03 * DaysPerYear, -- vy 13 | -6.90460016972063023e-05 * DaysPerYear, -- vz 14 | 9.54791938424326609e-04 * SolarMass, -- mass 15 | ] 16 | 17 | Saturn = [ 18 | 8.34336671824457987e+00, 19 | 4.12479856412430479e+00, 20 | -4.03523417114321381e-01, 21 | -2.76742510726862411e-03 * DaysPerYear, 22 | 4.99852801234917238e-03 * DaysPerYear, 23 | 2.30417297573763929e-05 * DaysPerYear, 24 | 2.85885980666130812e-04 * SolarMass, 25 | ] 26 | 27 | Uranus = [ 28 | 1.28943695621391310e+01, 29 | -1.51111514016986312e+01, 30 | -2.23307578892655734e-01, 31 | 2.96460137564761618e-03 * DaysPerYear, 32 | 2.37847173959480950e-03 * DaysPerYear, 33 | -2.96589568540237556e-05 * DaysPerYear, 34 | 4.36624404335156298e-05 * SolarMass, 35 | ] 36 | 37 | Neptune = [ 38 | 1.53796971148509165e+01, 39 | -2.59193146099879641e+01, 40 | 1.79258772950371181e-01, 41 | 2.68067772490389322e-03 * DaysPerYear, 42 | 1.62824170038242295e-03 * DaysPerYear, 43 | -9.51592254519715870e-05 * DaysPerYear, 44 | 5.15138902046611451e-05 * SolarMass, 45 | ] 46 | 47 | Sun = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass] 48 | 49 | def offsetMomentum(object: [Float], px, py, pz) = { 50 | vx = -px / SolarMass; 51 | vy = -py / SolarMass; 52 | vz = -pz / SolarMass; 53 | [ object[0], object[1], object[2], 54 | vx, vy, vz, 55 | object[6], 56 | ] 57 | } 58 | 59 | def init(bodies: [[Float]]) = { 60 | def go(bodies: [[Float]], i: Int, pxyz: [Float]) = { 61 | body = bodies[i]; 62 | updatedPs = [ 63 | pxyz[0] + body[3] * body[6], 64 | pxyz[1] + body[4] * body[6], 65 | pxyz[2] + body[5] * body[6], 66 | ]; 67 | if i == 0 then updatedPs else go(bodies, i - 1, updatedPs); 68 | }; 69 | 70 | len = length(bodies); 71 | pxyzInit = [0.0, 0.0, 0.0]; 72 | offsets = go(bodies, len - 1, pxyzInit); 73 | sun = offsetMomentum(bodies[0], offsets[0], offsets[1], offsets[2]); 74 | [sun, bodies[1], bodies[2], bodies[3], bodies[4]] 75 | } 76 | 77 | def advance(bodies: [[Float]], dt) = { 78 | def advanceInner(from: [Float], to: [Float], dt) = { 79 | dx = from[0] - to[0]; 80 | dy = from[1] - to[1]; 81 | dz = from[2] - to[2]; 82 | dSquared = dx * dx + dy * dy + dz * dz; 83 | distance = sqrt(dSquared); 84 | mag = dt / (dSquared * distance); 85 | toMass = to[6]; 86 | fromMass = from[6]; 87 | toMassMag = toMass * mag; 88 | fromMassMag = fromMass * mag; 89 | fromVx = from[3] - dx * toMassMag; 90 | fromVy = from[4] - dy * toMassMag; 91 | fromVz = from[5] - dz * toMassMag; 92 | toVx = to[3] + dx * fromMassMag; 93 | toVy = to[4] + dy * fromMassMag; 94 | toVz = to[5] + dz * fromMassMag; 95 | [ 96 | [from[0], from[1], from[2], fromVx, fromVy, fromVz, fromMass], 97 | [to[0], to[1], to[2], toVx, toVy, toVz, toMass], 98 | ] 99 | }; 100 | 101 | r01 = advanceInner(bodies[0], bodies[1], dt); 102 | r02 = advanceInner(r01[0], bodies[2], dt); 103 | r03 = advanceInner(r02[0], bodies[3], dt); 104 | r04 = advanceInner(r03[0], bodies[4], dt); 105 | 106 | r12 = advanceInner(r01[1], r02[1], dt); 107 | r13 = advanceInner(r12[0], r03[1], dt); 108 | r14 = advanceInner(r13[0], r04[1], dt); 109 | 110 | r23 = advanceInner(r12[1], r13[1], dt); 111 | r24 = advanceInner(r23[0], r14[1], dt); 112 | 113 | r34 = advanceInner(r23[1], r24[1], dt); 114 | 115 | sun = bodies[0]; 116 | r040 = r04[0]; 117 | sunVx = r040[3]; 118 | sunVy = r040[4]; 119 | sunVz = r040[5]; 120 | 121 | jupiter = bodies[1]; 122 | r140 = r14[0]; 123 | jupiterVx = r140[3]; 124 | jupiterVy = r140[4]; 125 | jupiterVz = r140[5]; 126 | 127 | saturn = bodies[2]; 128 | r240 = r24[0]; 129 | saturnVx = r240[3]; 130 | saturnVy = r240[4]; 131 | saturnVz = r240[5]; 132 | 133 | uranus = bodies[3]; 134 | r340 = r34[0]; 135 | uranusVx = r340[3]; 136 | uranusVy = r340[4]; 137 | uranusVz = r340[5]; 138 | 139 | neptune = bodies[4]; 140 | r341 = r34[1]; 141 | neptuneVx = r341[3]; 142 | neptuneVy = r341[4]; 143 | neptuneVz = r341[5]; 144 | 145 | [ 146 | [sun[0] + dt * sunVx, sun[1] + dt * sunVy, sun[2] + dt * sunVz, sunVx, sunVy, sunVz, sun[6]], 147 | [jupiter[0] + dt * jupiterVx, jupiter[1] + dt * jupiterVy, jupiter[2] + dt * jupiterVz, jupiterVx, jupiterVy, jupiterVz, jupiter[6]], 148 | [saturn[0] + dt * saturnVx, saturn[1] + dt * saturnVy, saturn[2] + dt * saturnVz, saturnVx, saturnVy, saturnVz, saturn[6]], 149 | [uranus[0] + dt * uranusVx, uranus[1] + dt * uranusVy, uranus[2] + dt * uranusVz, uranusVx, uranusVy, uranusVz, uranus[6]], 150 | [neptune[0] + dt * neptuneVx, neptune[1] + dt * neptuneVy, neptune[2] + dt * neptuneVz, neptuneVx, neptuneVy, neptuneVz, neptune[6]], 151 | ] 152 | 153 | } 154 | 155 | def squared(x: Float, y, z) = x * x + y * y + z * z 156 | 157 | def energy(bodies: [[Float]]) = { 158 | def energyInner(from: [Float], to: [Float]) = { 159 | dx = from[0] - to[0]; 160 | dy = from[1] - to[1]; 161 | dz = from[2] - to[2]; 162 | distance = sqrt(squared(dx, dy, dz)); 163 | from[6] * to[6] / distance; 164 | }; 165 | 166 | sun = bodies[0]; 167 | sunVx = sun[3]; 168 | sunVy = sun[4]; 169 | sunVz = sun[5]; 170 | sunMass = sun[6]; 171 | 172 | jupiter = bodies[1]; 173 | jupiterVx = jupiter[3]; 174 | jupiterVy = jupiter[4]; 175 | jupiterVz = jupiter[5]; 176 | jupiterMass = jupiter[6]; 177 | 178 | saturn = bodies[2]; 179 | saturnVx = saturn[3]; 180 | saturnVy = saturn[4]; 181 | saturnVz = saturn[5]; 182 | saturnMass = saturn[6]; 183 | 184 | uranus = bodies[3]; 185 | uranusVx = uranus[3]; 186 | uranusVy = uranus[4]; 187 | uranusVz = uranus[5]; 188 | uranusMass = uranus[6]; 189 | 190 | neptune = bodies[4]; 191 | neptuneVx = neptune[3]; 192 | neptuneVy = neptune[4]; 193 | neptuneVz = neptune[5]; 194 | neptuneMass = neptune[6]; 195 | 196 | -- Those are not variable reassignment, but shadowing instead, like in OCaml 197 | e = 0.5 * sunMass * squared(sunVx, sunVy, sunVz); 198 | e = e + 0.5 * jupiterMass * squared(jupiterVx, jupiterVy, jupiterVz); 199 | e = e + 0.5 * saturnMass * squared(saturnVx, saturnVy, saturnVz); 200 | e = e + 0.5 * uranusMass * squared(uranusVx, uranusVy, uranusVz); 201 | e = e + 0.5 * neptuneMass * squared(neptuneVx, neptuneVy, neptuneVz); 202 | 203 | e = e - energyInner(bodies[0], bodies[1]); 204 | e = e - energyInner(bodies[0], bodies[2]); 205 | e = e - energyInner(bodies[0], bodies[3]); 206 | e = e - energyInner(bodies[0], bodies[4]); 207 | 208 | e = e - energyInner(bodies[1], bodies[2]); 209 | e = e - energyInner(bodies[1], bodies[3]); 210 | e = e - energyInner(bodies[1], bodies[4]); 211 | 212 | e = e - energyInner(bodies[2], bodies[3]); 213 | e = e - energyInner(bodies[2], bodies[4]); 214 | 215 | e = e - energyInner(bodies[3], bodies[4]); 216 | 217 | e 218 | } 219 | 220 | def calculate(i, bodies) = if i > 0 then calculate(i - 1, advance(bodies, 0.01)) else bodies 221 | 222 | def main() = { 223 | -- set to 50000000 for real benchmark 224 | args = getArgs(); 225 | numIterations = toInt(args[1]); 226 | bodies = [Sun, Jupiter, Saturn, Uranus, Neptune]; 227 | initedBodies = init(bodies); 228 | println(toString(energy(initedBodies))); 229 | advanced = calculate(numIterations, initedBodies); 230 | println(toString(energy(advanced))); 231 | } 232 | -------------------------------------------------------------------------------- /examples/nbody2.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | Pi = 3.141592653589793 4 | SolarMass = 4.0 * Pi * Pi 5 | DaysPerYear = 365.24 6 | 7 | data Body = Body(x: Float, y: Float, z: Float, vx: Float, vy: Float, vz: Float, mass: Float) 8 | 9 | Jupiter = Body( 10 | 4.84143144246472090e+00, -- x 11 | -1.16032004402742839e+00, -- y 12 | -1.03622044471123109e-01, -- z 13 | 1.66007664274403694e-03 * DaysPerYear, -- vx 14 | 7.69901118419740425e-03 * DaysPerYear, -- vy 15 | -6.90460016972063023e-05 * DaysPerYear, -- vz 16 | 9.54791938424326609e-04 * SolarMass, -- mass 17 | ) 18 | 19 | Saturn = Body( 20 | 8.34336671824457987e+00, 21 | 4.12479856412430479e+00, 22 | -4.03523417114321381e-01, 23 | -2.76742510726862411e-03 * DaysPerYear, 24 | 4.99852801234917238e-03 * DaysPerYear, 25 | 2.30417297573763929e-05 * DaysPerYear, 26 | 2.85885980666130812e-04 * SolarMass, 27 | ) 28 | 29 | Uranus = Body( 30 | 1.28943695621391310e+01, 31 | -1.51111514016986312e+01, 32 | -2.23307578892655734e-01, 33 | 2.96460137564761618e-03 * DaysPerYear, 34 | 2.37847173959480950e-03 * DaysPerYear, 35 | -2.96589568540237556e-05 * DaysPerYear, 36 | 4.36624404335156298e-05 * SolarMass, 37 | ) 38 | 39 | Neptune = Body( 40 | 1.53796971148509165e+01, 41 | -2.59193146099879641e+01, 42 | 1.79258772950371181e-01, 43 | 2.68067772490389322e-03 * DaysPerYear, 44 | 1.62824170038242295e-03 * DaysPerYear, 45 | -9.51592254519715870e-05 * DaysPerYear, 46 | 5.15138902046611451e-05 * SolarMass, 47 | ) 48 | 49 | Sun = Body(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass) 50 | 51 | def offsetMomentum(object, px, py, pz) = { 52 | vx = -px / SolarMass; 53 | vy = -py / SolarMass; 54 | vz = -pz / SolarMass; 55 | Body(object.x, object.y, object.z, vx, vy, vz, object.mass) 56 | } 57 | 58 | def init(bodies: [Body]) = { 59 | def go(bodies: [Body], i, pxyz: [Float]) = { 60 | body = bodies[i]; 61 | updatedPs = [ 62 | pxyz[0] + body.vx * body.mass, 63 | pxyz[1] + body.vy * body.mass, 64 | pxyz[2] + body.vz * body.mass, 65 | ]; 66 | if i == 0 then updatedPs else go(bodies, i - 1, updatedPs) 67 | }; 68 | 69 | len = length(bodies); 70 | pxyzInit = [0.0, 0.0, 0.0]; 71 | offsets = go(bodies, len - 1, pxyzInit); 72 | sun = offsetMomentum(bodies[0], offsets[0], offsets[1], offsets[2]); 73 | setIndex(bodies, 0, sun); 74 | bodies 75 | } 76 | 77 | def advanceInner(from, to, dt) = { 78 | dx = from.x - to.x; 79 | dy = from.y - to.y; 80 | dz = from.z - to.z; 81 | dSquared = dx * dx + dy * dy + dz * dz; 82 | distance = sqrt(dSquared); 83 | mag = dt / (dSquared * distance); 84 | toMass = to.mass; 85 | fromMass = from.mass; 86 | toMassMag = toMass * mag; 87 | fromMassMag = fromMass * mag; 88 | fromVx = from.vx - dx * toMassMag; 89 | fromVy = from.vy - dy * toMassMag; 90 | fromVz = from.vz - dz * toMassMag; 91 | toVx = to.vx + dx * fromMassMag; 92 | toVy = to.vy + dy * fromMassMag; 93 | toVz = to.vz + dz * fromMassMag; 94 | [ 95 | Body(from.x, from.y, from.z, fromVx, fromVy, fromVz, fromMass), 96 | Body(to.x, to.y, to.z, toVx, toVy, toVz, toMass), 97 | ] 98 | } 99 | 100 | def updateIdx(bodies: [Body], i, j, from: Body, to: Body) = [ 101 | if i == 0 then from else if j == 0 then to else bodies[0], 102 | if i == 1 then from else if j == 1 then to else bodies[1], 103 | if i == 2 then from else if j == 2 then to else bodies[2], 104 | if i == 3 then from else if j == 3 then to else bodies[3], 105 | if i == 4 then from else if j == 4 then to else bodies[4], 106 | ] 107 | 108 | def update(body: Body, dt: Float) = Body( 109 | body.x + dt * body.vx, body.y + dt * body.vy, body.z + dt * body.vz, 110 | body.vx, body.vy, body.vz, body.mass) 111 | 112 | def advance(bodies: [Body], dt) = { 113 | def loop1(bodies, i, dt) = { 114 | def loop2(bodies: [Body], i: Int, j: Int, dt: Float) = 115 | if j < 5 then { 116 | res = advanceInner(bodies[i], bodies[j], dt); 117 | newBodies = updateIdx(bodies, i, j, res[0], res[1]); 118 | loop2(newBodies, i, j + 1, dt); 119 | } else bodies; 120 | 121 | if i < 5 then { 122 | res = loop2(bodies, i, i + 1, dt); 123 | loop1(res, i + 1, dt); 124 | } else bodies 125 | }; 126 | 127 | bodies1 = loop1(bodies, 0, dt); -- FIXME 128 | [ 129 | update(bodies1[0], dt), 130 | update(bodies1[1], dt), 131 | update(bodies1[2], dt), 132 | update(bodies1[3], dt), 133 | update(bodies1[4], dt), 134 | ] 135 | } 136 | 137 | def squared(x: Float, y: Float, z: Float) = x * x + y * y + z * z 138 | 139 | def energy(bodies: [Body]) = { 140 | def energyInner(from, to) = { 141 | dx = from.x - to.x; 142 | dy = from.y - to.y; 143 | dz = from.z - to.z; 144 | distance = sqrt(squared(dx, dy, dz)); 145 | from.mass * to.mass / distance; 146 | }; 147 | 148 | 149 | sun = bodies[0]; 150 | jupiter = bodies[1]; 151 | saturn = bodies[2]; 152 | uranus = bodies[3]; 153 | neptune = bodies[4]; 154 | 155 | -- Those are not variable reassignment, but shadowing instead, like in OCaml 156 | e = 0.5 * sun.mass * squared(sun.vx, sun.vy, sun.vz); 157 | e = e + 0.5 * jupiter.mass * squared(jupiter.vx, jupiter.vy, jupiter.vz); 158 | e = e + 0.5 * saturn.mass * squared(saturn.vx, saturn.vy, saturn.vz); 159 | e = e + 0.5 * uranus.mass * squared(uranus.vx, uranus.vy, uranus.vz); 160 | e = e + 0.5 * neptune.mass * squared(neptune.vx, neptune.vy, neptune.vz); 161 | 162 | e = e - energyInner(bodies[0], bodies[1]); 163 | e = e - energyInner(bodies[0], bodies[2]); 164 | e = e - energyInner(bodies[0], bodies[3]); 165 | e = e - energyInner(bodies[0], bodies[4]); 166 | 167 | e = e - energyInner(bodies[1], bodies[2]); 168 | e = e - energyInner(bodies[1], bodies[3]); 169 | e = e - energyInner(bodies[1], bodies[4]); 170 | 171 | e = e - energyInner(bodies[2], bodies[3]); 172 | e = e - energyInner(bodies[2], bodies[4]); 173 | 174 | e = e - energyInner(bodies[3], bodies[4]); 175 | 176 | e 177 | } 178 | 179 | def calculate(bodies: [Body], i) = if i > 0 then calculate(advance(bodies, 0.01), i - 1) else bodies 180 | 181 | def main() = { 182 | -- set to 50000000 for real benchmark 183 | args = getArgs(); 184 | numIterations = toInt(args[1]); 185 | bodies = [Sun, Jupiter, Saturn, Uranus, Neptune]; 186 | initedBodies = bodies.init; 187 | println(initedBodies.energy.toString); 188 | advanced = calculate(initedBodies, numIterations); 189 | println(toString(energy(advanced))); 190 | } 191 | -------------------------------------------------------------------------------- /examples/nbody3.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | Pi = 3.141592653589793 4 | SolarMass = 4.0 * Pi * Pi 5 | DaysPerYear = 365.24 6 | 7 | data Body = Body(x: Var Float, y: Var Float, z: Var Float, vx: Var Float, vy: Var Float, vz: Var Float, mass: Float) 8 | 9 | def body(x, y, z, vx, vy, vz, mass) = Body(Var(x), Var(y), Var(z), Var(vx), Var(vy), Var(vz), mass) 10 | 11 | Jupiter = body( 12 | 4.84143144246472090e+00, -- x 13 | -1.16032004402742839e+00, -- y 14 | -1.03622044471123109e-01, -- z 15 | 1.66007664274403694e-03 * DaysPerYear, -- vx 16 | 7.69901118419740425e-03 * DaysPerYear, -- vy 17 | -6.90460016972063023e-05 * DaysPerYear, -- vz 18 | 9.54791938424326609e-04 * SolarMass, -- mass 19 | ) 20 | 21 | Saturn = body( 22 | 8.34336671824457987e+00, 23 | 4.12479856412430479e+00, 24 | -4.03523417114321381e-01, 25 | -2.76742510726862411e-03 * DaysPerYear, 26 | 4.99852801234917238e-03 * DaysPerYear, 27 | 2.30417297573763929e-05 * DaysPerYear, 28 | 2.85885980666130812e-04 * SolarMass, 29 | ) 30 | 31 | Uranus = body( 32 | 1.28943695621391310e+01, 33 | -1.51111514016986312e+01, 34 | -2.23307578892655734e-01, 35 | 2.96460137564761618e-03 * DaysPerYear, 36 | 2.37847173959480950e-03 * DaysPerYear, 37 | -2.96589568540237556e-05 * DaysPerYear, 38 | 4.36624404335156298e-05 * SolarMass, 39 | ) 40 | 41 | Neptune = body( 42 | 1.53796971148509165e+01, 43 | -2.59193146099879641e+01, 44 | 1.79258772950371181e-01, 45 | 2.68067772490389322e-03 * DaysPerYear, 46 | 1.62824170038242295e-03 * DaysPerYear, 47 | -9.51592254519715870e-05 * DaysPerYear, 48 | 5.15138902046611451e-05 * SolarMass, 49 | ) 50 | 51 | Sun = body(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, SolarMass) 52 | 53 | def offsetMomentum(bodies: [Body]) = { 54 | var px = 0.0; 55 | var py = 0.0; 56 | var pz = 0.0; 57 | for(0, 5, { i -> 58 | ibody = bodies[i]; 59 | m = ibody.mass; 60 | px := px.readVar + ibody.vx.readVar * m; 61 | py := py.readVar + ibody.vy.readVar * m; 62 | pz := pz.readVar + ibody.vz.readVar * m; 63 | }); 64 | object = bodies[0]; 65 | object.vx := -px.readVar / SolarMass; 66 | object.vy := -py.readVar / SolarMass; 67 | object.vz := -pz.readVar / SolarMass; 68 | } 69 | 70 | def squared(x: Float, y: Float, z: Float) = x * x + y * y + z * z 71 | 72 | def advance(bodies: [Body], dt) = { 73 | for(0, 5 - 1, { i -> 74 | ibody = bodies[i]; 75 | imass = ibody.mass; 76 | for(i + 1, 5, { j -> 77 | jbody = bodies[j]; 78 | dx = ibody.x.readVar - jbody.x.readVar; 79 | dy = ibody.y.readVar - jbody.y.readVar; 80 | dz = ibody.z.readVar - jbody.z.readVar; 81 | dSquared = squared(dx, dy, dz); 82 | distance = sqrt(dSquared); 83 | mag = dt / (dSquared * distance); 84 | jmass = jbody.mass; 85 | ibody.vx := ibody.vx.readVar - dx * jmass * mag; 86 | ibody.vy := ibody.vy.readVar - dy * jmass * mag; 87 | ibody.vz := ibody.vz.readVar - dz * jmass * mag; 88 | 89 | jbody.vx := jbody.vx.readVar + dx * imass * mag; 90 | jbody.vy := jbody.vy.readVar + dy * imass * mag; 91 | jbody.vz := jbody.vz.readVar + dz * imass * mag; 92 | }); 93 | }); 94 | for(0, 5, { i -> 95 | body = bodies[i]; 96 | body.x := body.x.readVar + dt * body.vx.readVar; 97 | body.y := body.y.readVar + dt * body.vy.readVar; 98 | body.z := body.z.readVar + dt * body.vz.readVar; 99 | }); 100 | bodies 101 | } 102 | 103 | def energy(bodies: [Body]) = { 104 | var e = 0.0; 105 | for(0, 5, { i -> 106 | body = bodies[i]; 107 | e := e.readVar + 0.5 * body.mass * squared(body.vx.readVar, body.vy.readVar, body.vz.readVar); 108 | for(i + 1, 5, { j -> 109 | from = bodies[i]; 110 | to = bodies[j]; 111 | dx = from.x.readVar - to.x.readVar; 112 | dy = from.y.readVar - to.y.readVar; 113 | dz = from.z.readVar - to.z.readVar; 114 | distance = sqrt(squared(dx, dy, dz)); 115 | e := e.readVar - from.mass * to.mass / distance; 116 | }); 117 | }); 118 | e.readVar 119 | } 120 | 121 | def calculate(bodies: [Body], i) = if i > 0 then calculate(advance(bodies, 0.01), i - 1) else bodies 122 | 123 | def main() = { 124 | -- set to 50000000 for real benchmark 125 | args = getArgs(); 126 | numIterations = toInt(args[1]); 127 | bodies = [Sun, Jupiter, Saturn, Uranus, Neptune]; 128 | bodies.offsetMomentum; 129 | println(bodies.energy.toString); 130 | calculate(bodies, numIterations); 131 | println(toString(energy(bodies))); 132 | } 133 | -------------------------------------------------------------------------------- /examples/queen.lasca: -------------------------------------------------------------------------------- 1 | import List 2 | import String 3 | 4 | words = Cons("GOD", Cons("SAVE", Cons("THE", Cons("QUEEN", Nil)))) 5 | 6 | def permute(acc, words) = match words { 7 | Cons(w, ws) -> { 8 | String.foreach(w, { c -> 9 | ch = chr(c); 10 | permute([acc, ch].concat, ws); 11 | }); 12 | } 13 | Nil -> println(acc) 14 | } 15 | def main() = permute("", words) 16 | -------------------------------------------------------------------------------- /examples/regex.lasca: -------------------------------------------------------------------------------- 1 | import String 2 | 3 | def main() = { 4 | regex = compilePattern("(Scala|Haskell|Python|Rust|Ocaml|Java)"); 5 | println(toString(matchRegex(regex, "Haskell"))); 6 | replaced = regexReplace(regex, "Haskell or Python", "\$1 (consider Lasca instead of \$1)"); 7 | println(replaced); 8 | } -------------------------------------------------------------------------------- /examples/ski.lasca: -------------------------------------------------------------------------------- 1 | 2 | def i(x) = x 3 | def k(x, y) = x 4 | def s(x, y, z) = let a = x(z), b = y(z) in a(b) 5 | 6 | def main() = (k(println, 1))(i("Hello")) 7 | 8 | -------------------------------------------------------------------------------- /gencode/GenBench.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | import System.Exit 5 | import System.Environment 6 | import Control.Monad 7 | 8 | genTopLevel idx = "def test" ++ (show idx) ++ "() = let x = 1+1 in if x > 2 then 1 else 2" 9 | genLine idx = "x" ++ show idx ++ " = 1" ++ (if idx > 0 then " + x" ++ show (idx - 1) else "") ++ ";" 10 | 11 | 12 | genNLines funcId n = do 13 | putStrLn $ "def test" ++ show funcId ++ "() = {" 14 | loop "" 0 0 15 | putStrLn "true" 16 | putStrLn "}" 17 | where 18 | loop code lastPrintedLines idx = do 19 | let newline = genLine idx 20 | let updatedCode = code ++ newline 21 | putStrLn newline 22 | if (idx) >= n 23 | then return () -- hPutStrLn stderr ("Generated " ++ (show idx) ++ " lines") 24 | else if (idx - lastPrintedLines) > 1000 25 | then do 26 | let kloc = idx `div` 1000 27 | -- hPutStrLn stderr ("Generated " ++ (show kloc) ++ " klocs") 28 | loop updatedCode idx (idx + 1) 29 | else loop updatedCode lastPrintedLines (idx + 1) 30 | 31 | main :: IO () 32 | main = do 33 | args <- getArgs 34 | 35 | putStrLn "def main() = println(\"Done!\")" 36 | 37 | let (numFuncs, numLines) = case args of 38 | [] -> (10, 20) 39 | [numFuncs, numLines] -> (read numFuncs :: Integer, read numLines :: Integer) 40 | forM_ [1..numFuncs] $ \idx -> genNLines idx numLines 41 | return () 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /lasca.cabal: -------------------------------------------------------------------------------- 1 | name: lasca 2 | version: 0.0.2 3 | license: BSD3 4 | license-file: LICENSE 5 | author: Alexander Nemish 6 | maintainer: anemish@gmail.com 7 | build-type: Simple 8 | extra-source-files: README.md 9 | cabal-version: >= 1.10 10 | 11 | library 12 | default-extensions: OverloadedStrings 13 | RecordWildCards 14 | NamedFieldPuns 15 | FlexibleContexts 16 | PatternSynonyms 17 | StrictData 18 | build-depends: base >=4.9, 19 | text, 20 | utf8-string, 21 | haskeline, 22 | process, 23 | containers, 24 | multiset, 25 | prettyprinter >=1.1, 26 | -- fixed Lexer.float parsing or integer values 27 | megaparsec >= 6.4.0, 28 | scientific, 29 | lens >= 4.15, 30 | llvm-hs >= 5.0.0, 31 | llvm-hs-pure >= 5.0.0, 32 | -- llvm-hs-pretty >= 0.1.0.0, 33 | optparse-applicative, 34 | bytestring, 35 | murmur-hash, 36 | directory, 37 | filepath, 38 | mtl 39 | default-language: Haskell2010 40 | ghc-options: -Wincomplete-patterns 41 | -funbox-strict-fields 42 | -fhide-source-paths 43 | -fPIC 44 | 45 | hs-source-dirs: src/lib 46 | other-modules: Paths_lasca 47 | exposed-modules: 48 | Lasca.Compiler 49 | Lasca.Options 50 | Lasca.Infer 51 | Lasca.Desugar 52 | Lasca.Namer 53 | Lasca.Emit 54 | Lasca.EmitCommon 55 | Lasca.EmitDynamic 56 | Lasca.EmitStatic 57 | Lasca.Codegen 58 | Lasca.JIT 59 | Lasca.Lexer 60 | Lasca.Parser 61 | Lasca.Syntax 62 | Lasca.Type 63 | Lasca.Modules 64 | executable lasca 65 | other-extensions: GADTs 66 | build-depends: base >=4.9, 67 | lasca 68 | Extra-libraries: m, gc, pthread, pcre2-8 69 | default-language: Haskell2010 70 | ghc-options: -rtsopts 71 | -threaded 72 | -Wincomplete-patterns 73 | -funbox-strict-fields 74 | -fPIC 75 | -rdynamic 76 | if os(darwin) 77 | extra-libraries: lascartStatic 78 | ghc-options: -fwhole-archive-hs-libs 79 | else 80 | ghc-options: -optl-Wl,--whole-archive -optl-Wl,-llascartStatic -optl-Wl,--no-whole-archive 81 | hs-source-dirs: src/lasca 82 | main-is: Main.hs 83 | 84 | test-suite lasca-test 85 | type: exitcode-stdio-1.0 86 | hs-source-dirs: src/test 87 | main-is: TestMain.hs 88 | default-extensions: OverloadedStrings 89 | build-depends: base >=4.9, 90 | lasca, 91 | text, 92 | directory, 93 | bytestring, 94 | shelly, 95 | utf8-string, 96 | megaparsec >= 6.0.0, 97 | Glob, 98 | filepath, 99 | tasty, 100 | tasty-hunit, 101 | tasty-smallcheck, 102 | tasty-quickcheck, 103 | tasty-golden, 104 | tasty-program 105 | ghc-options: -threaded -rtsopts -fPIC 106 | if os(darwin) 107 | extra-libraries: lascartStatic 108 | ghc-options: -fwhole-archive-hs-libs 109 | else 110 | ghc-options: -optl-Wl,--whole-archive -optl-Wl,-llascartStatic -optl-Wl,--no-whole-archive 111 | Extra-libraries: gc, pthread, pcre2-8 112 | default-language: Haskell2010 113 | --executable gencode 114 | -- other-extensions: GADTs 115 | -- build-depends: base >=4.9 116 | -- default-language: Haskell2010 117 | -- hs-source-dirs: gencode 118 | -- main-is: GenBench.hs 119 | -------------------------------------------------------------------------------- /lasca.nix: -------------------------------------------------------------------------------- 1 | { mkDerivation, base, bytestring, containers, directory, filepath 2 | , boehmgc, Glob, haskeline, lascart, lens, llvm-hs, llvm-hs-pure 3 | , megaparsec, mtl, multiset, murmur-hash, optparse-applicative 4 | , pcre2, prettyprinter, process, scientific, shelly, stdenv, tasty 5 | , tasty-golden, tasty-hunit, tasty-program, tasty-quickcheck 6 | , tasty-smallcheck, text, utf8-string, pkgs 7 | }@args: 8 | mkDerivation { 9 | pname = "lasca"; 10 | version = "0.0.2"; 11 | src = ./.; 12 | isLibrary = false; 13 | isExecutable = true; 14 | libraryHaskellDepends = [ 15 | base bytestring containers directory filepath haskeline lens 16 | llvm-hs llvm-hs-pure megaparsec mtl multiset murmur-hash 17 | optparse-applicative prettyprinter process scientific text 18 | utf8-string pkgs.libffi lascart 19 | ]; 20 | executableHaskellDepends = [ base lascart ]; 21 | executableSystemDepends = [ boehmgc lascart pcre2 ]; 22 | testHaskellDepends = [ 23 | base bytestring filepath Glob megaparsec shelly tasty tasty-golden 24 | tasty-hunit tasty-program tasty-quickcheck tasty-smallcheck text 25 | utf8-string pkgs.libffi lascart 26 | ]; 27 | testSystemDepends = [ lascart pkgs.libffi ]; 28 | dontStrip = true; 29 | doCheck = true; 30 | doHaddock = false; 31 | preCheck = '' 32 | export PATH="$PATH:dist/build/lasca" 33 | export LASCAPATH="$src/libs/base:${stdenv.lib.getLib lascart}/lib" 34 | echo "New PATH = $PATH" 35 | echo "LASCAPATH = $LASCAPATH" 36 | mkdir -p "$out/libs/base" 37 | cp -r $src/libs/base/* "$out/libs/base" 38 | # cp "${stdenv.lib.getLib lascart}/lib/liblascartStatic.a" "$out/libs/base" 39 | # ld -v 2 40 | # lasca --verbose -O2 examples/hello.lasca 2>&1 41 | ''; 42 | license = stdenv.lib.licenses.bsd3; 43 | } 44 | -------------------------------------------------------------------------------- /lascart.nix: -------------------------------------------------------------------------------- 1 | {stdenv, cmake, libffi, boehmgc, pcre2, pkgconfig, zlib}: 2 | stdenv.mkDerivation rec { 3 | name = "lascart-${version}"; 4 | version = "0.0.2"; 5 | src = ./rts; 6 | nativeBuildInputs = [ pkgconfig cmake ]; 7 | buildInputs = [ pcre2 boehmgc zlib libffi ]; 8 | dontDisableStatic = true; 9 | cmakeFlags = [ 10 | "-DCMAKE_BUILD_TYPE=Debug" 11 | ]; 12 | dontStrip = true; 13 | meta = with stdenv.lib; { 14 | platforms = platforms.linux ++ platforms.darwin; 15 | license = licenses.bsd3; 16 | homepage = http://lasca-lang.org; 17 | description = "Lasca Runtime System"; 18 | }; 19 | } -------------------------------------------------------------------------------- /libs/base/Array.lasca: -------------------------------------------------------------------------------- 1 | module Array 2 | 3 | extern def unsafeCreateArray(size: Int): Array a = "createArray" 4 | extern def makeArray(size: Int, init: a): Array a = "makeArray" 5 | extern def append(first: Array a, second: Array a): Array a = "arrayAppend" 6 | extern def copy(src: Array a, srcPos: Int, dest: Array a, destPos: Int, length: Int): Unit = "arrayCopy" 7 | extern def getIndex(array: Array a, i: Int): a = "arrayGetIndex" 8 | extern def setIndex(array: Array a, i: Int, value: a): Unit = "arraySetIndex" 9 | extern def length(array: Array a): Int = "arrayLength" 10 | extern def init(n: Int, f: Int -> a): Array a = "arrayInit" 11 | 12 | def map(array, f) = { 13 | len = length(array); 14 | init(len, { idx -> f(array[idx]) }); 15 | } 16 | 17 | def foreach(array: [a], f: a -> b): Unit = { 18 | def foreachgo(array, f, i, len) = { 19 | if i < len then { 20 | -- no idea why, but otherwise LLVM doesn't do tail call optimization in foreachgo 21 | apply(f, array[i]); 22 | foreachgo(array, f, i + 1, len); 23 | } else () 24 | }; 25 | foreachgo(array, f, 0, length(array)); 26 | } 27 | 28 | def range(start: Int, end: Int, step: Int): Array Int = { 29 | init((end - start) / step + 1, { i -> start + i * step }) 30 | } 31 | 32 | def transform(array: Array a, f: Int -> a -> a): Array a = { 33 | len = length(array); 34 | def transformGo(array, f, i, len) = { 35 | if i < len then { 36 | setIndex(array, i, f(i, array[i])); 37 | transformGo(array, f, i + 1, len) 38 | } else () 39 | }; 40 | transformGo(array, f, 0, len) 41 | } 42 | 43 | def testArray() = { 44 | a = makeArray(10, "a"); 45 | b = makeArray(10, "b"); 46 | c = init(5, { i -> i.toString }); 47 | d = range(2, 8, 3); 48 | setIndex(a, 2, b[1]); 49 | println(toString(a)); 50 | println(toString(b)); 51 | println(toString(c)); 52 | println(toString(d)); 53 | println(toString(append(a, b))); 54 | copy(b, 0, a, 4, 5); 55 | println(toString(a)); 56 | } 57 | 58 | 59 | def main() = { 60 | testArray(); 61 | println("Hello") 62 | } 63 | -------------------------------------------------------------------------------- /libs/base/ArrayBuffer.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | data ArrayBuffer a = ArrayBuffer( 4 | array: Var (Array a), 5 | getSize: Var Int, 6 | ) 7 | 8 | emptyArray = Array.unsafeCreateArray(0) 9 | initialSize = 16 10 | 11 | def size(self: ArrayBuffer a) = self.getSize.readVar 12 | 13 | def new() = make(initialSize) 14 | 15 | def make(initialCapacity: Int) = if initialCapacity == 0 16 | then ArrayBuffer(Var(emptyArray), Var(0)) 17 | else if initialCapacity > 0 18 | then ArrayBuffer(Var(Array.unsafeCreateArray(initialCapacity)), Var(0)) 19 | else die("Illegal initial capacity ${initialCapacity}, should be >= 0") 20 | 21 | def fromArray(a: Array a) = { 22 | ab = make(Array.length(a)); 23 | appendArray(ab, a); 24 | } 25 | 26 | def isEmpty(self: ArrayBuffer a): Bool = self.getSize.readVar == 0 27 | 28 | def getIndex(self, idx) = (self.array.readVar)[idx] 29 | def setIndex(self, idx, value) = Array.setIndex(self.array.readVar, idx, value) 30 | 31 | def clear(self) = reduceToSize(self, 0) 32 | 33 | def ensureSize(self: ArrayBuffer a, n: Int): Unit = { 34 | sz = Array.length(self.array.readVar); 35 | var newSize = sz; 36 | if n > sz then { 37 | def calc(n, newSize) = if n > newSize.readVar then { 38 | newSize := newSize.readVar * 2; 39 | println("${n} ${newSize.readVar}"); 40 | calc(n, newSize); 41 | } else (); 42 | calc(n, newSize); 43 | newArray = Array.unsafeCreateArray(newSize.readVar); 44 | Array.copy(self.array.readVar, 0, newArray, 0, self.getSize.readVar); 45 | writeVar(self.array, newArray); 46 | } else () 47 | } 48 | 49 | def reduceToSize(self, sz: Int): Unit = { 50 | require(sz <= self.size); 51 | self.getSize := sz; 52 | } 53 | 54 | def append(self: ArrayBuffer a, elem) = { 55 | ensureSize(self, self.size + 1); 56 | a = self.array.readVar; 57 | Array.setIndex(a, self.getSize.readVar, elem); 58 | self.getSize := self.size + 1; 59 | self; 60 | } 61 | 62 | def prepend(self, elem) = { 63 | ensureSize(self, self.size + 1); 64 | a = self.array.readVar; 65 | Array.copy(a, 0, a, 1, self.size); 66 | Array.setIndex(a, 0, elem); 67 | self.getSize := self.size + 1; 68 | self 69 | } 70 | 71 | def appendArray(self: ArrayBuffer a, elems: Array a) = { 72 | sz = Array.length(elems); 73 | ensureSize(self, self.size + sz); 74 | a = self.array.readVar; 75 | var i = self.size; 76 | Array.foreach(elems, { elem -> 77 | Array.setIndex(a, i.readVar, elem); 78 | i := i.readVar + 1; 79 | }); 80 | self.getSize := self.size + sz; 81 | self; 82 | } 83 | 84 | def insertArray(self, n: Int, seq: Array a): Unit = { 85 | if n < 0 or n > self.size 86 | then die("Index out of bounds: ${n}") 87 | else { 88 | len = Array.length(seq); 89 | newSize = self.size + len; 90 | ensureSize(self, newSize); 91 | a = self.array.readVar; 92 | Array.copy(a, n, a, n + len, self.size - n); 93 | var i = n; 94 | Array.foreach(seq, { elem -> 95 | Array.setIndex(a, i.readVar, elem); 96 | i := i.readVar + 1; 97 | }); 98 | self.getSize := newSize; 99 | } 100 | } 101 | 102 | def prependArray(self: ArrayBuffer a, elems: Array a) = { 103 | insertArray(self, 0, elems); 104 | self; 105 | } 106 | 107 | def remove(self, n: Int, count: Int) = { 108 | if count < 0 then die("removing negative number of elements: ${count}") 109 | else if count == 0 then () 110 | else if n < 0 or n > self.size - count then die("at ${n} deleting ${count}") 111 | else { 112 | a = self.array.readVar; 113 | Array.copy(a, n + count, a, n, self.size - (n + count)); 114 | reduceToSize(self, self.size - count); 115 | } 116 | } 117 | 118 | def toArray(self) = { 119 | a = self.array.readVar; 120 | Array.init(self.size, {idx -> }) 121 | } 122 | 123 | def main() = { 124 | ab = make(3); 125 | println("${ab}"); 126 | append(ab, 1); 127 | println("${ab}"); 128 | append(ab, 2); 129 | println("${ab}"); 130 | append(ab, 3); 131 | prepend(ab, 0); 132 | println("${ab}"); 133 | append(ab, 4); 134 | println("${ab}"); 135 | ensureSize(ab, 20); 136 | appendArray(ab, [5, 6, 7, 8]); 137 | println("${ab}"); 138 | remove(ab, 1, 2); 139 | println("${ab}"); 140 | insertArray(ab, 4, [7,7,7]); 141 | prependArray(ab, [-2, -1]); 142 | println("${ab}"); 143 | setIndex(ab, 1, 42); 144 | println("Test ${ab.size} ${ab.isEmpty} ${getIndex(ab, 1)}"); 145 | ab.clear; 146 | println("${ab}"); 147 | } -------------------------------------------------------------------------------- /libs/base/Bits.lasca: -------------------------------------------------------------------------------- 1 | extern def byteAnd(a: Byte, b: Byte): Byte = "byteAnd" 2 | extern def byteOr(a: Byte, b: Byte): Byte = "byteOr" 3 | extern def byteXor(a: Byte, b: Byte): Byte = "byteXor" 4 | extern def byteShiftL(a: Byte, b: Byte): Byte = "byteShiftL" 5 | -- arithmetical shift right 6 | extern def byteShiftR(a: Byte, b: Byte): Byte = "byteShiftR" 7 | extern def byteNot(a: Byte): Byte = "byteNot" 8 | 9 | extern def intAnd(a: Int, b: Int): Int = "intAnd" 10 | extern def intOr(a: Int, b: Int): Int = "intOr" 11 | extern def intXor(a: Int, b: Int): Int = "intXor" 12 | extern def intShiftL(a: Int, b: Int): Int = "intShiftL" 13 | -- arithmetical shift right 14 | extern def intShiftR(a: Int, b: Int): Int = "intShiftR" 15 | extern def intNot(a: Int): Int = "intNot" 16 | extern def intPopCount(a: Int): Int = "intPopCount" -------------------------------------------------------------------------------- /libs/base/ByteArray.lasca: -------------------------------------------------------------------------------- 1 | module ByteArray 2 | 3 | extern def create(size: Int): ByteArray = "createByteArray" 4 | --extern def make(size: Int, init: Byte): ByteArray = "makeByteArray" 5 | extern def copy(src: ByteArray, srcPos: Int, dest: ByteArray, destPos: Int, length: Int): Unit = "byteArrayCopy" 6 | extern def getIndex(array: ByteArray, i: Int): Byte = "byteArrayGetIndex" 7 | extern def setIndex(array: ByteArray, i: Int, value: Byte): Unit = "byteArraySetIndex" 8 | extern def length(array: ByteArray): Int = "byteArrayLength" 9 | 10 | def testArray() = { 11 | a = create(10); 12 | b = create(10); 13 | setIndex(a, 2, intToByte(2)); 14 | setIndex(b, 3, intToByte(3)); 15 | setIndex(a, 3, getIndex(b, 3)); 16 | copy(a, 0, b, 1, 9); -- FIXME 17 | println(toString(a)); 18 | println(toString(b)); 19 | println(toString(b.length)); 20 | } 21 | 22 | def main() = { 23 | testArray(); 24 | println("Hello") 25 | } 26 | -------------------------------------------------------------------------------- /libs/base/List.lasca: -------------------------------------------------------------------------------- 1 | module List 2 | 3 | data List a = Nil | Cons(head: a, tail: List a) 4 | 5 | def isEmpty(l) = match l { 6 | Nil -> true 7 | Cons(_, _) -> false 8 | } 9 | 10 | def foldl(self: List a, z: b, f: a -> b -> b): b = match self { 11 | Nil -> z 12 | Cons(hd, tl) -> foldl(tl, f(hd, z), f) 13 | } 14 | 15 | def foldr(self: List a, z: b, f: a -> b -> b): b = match self { 16 | Nil -> z 17 | Cons(hd, tl) -> f(hd, foldr(tl, z, f)) 18 | } 19 | 20 | def length(l) = foldl(l, 0, { e, acc -> acc + 1}) 21 | 22 | def map(self, f) = foldl(self, Nil, { e, acc -> Cons(f(e), acc) }) 23 | 24 | def filter(self, p: a -> Bool) = match self { 25 | Nil -> Nil 26 | Cons(hd, tl) -> if p(hd) then Cons(hd, filter(tl, p)) else filter(tl, p) 27 | } 28 | 29 | 30 | def main() = { 31 | list = Cons("1", Cons("2", Nil)); 32 | len = list.length; 33 | ints = map(list, toInt); 34 | ints2 = map(ints, { i -> i + 10}); 35 | println(toString(ints2)); 36 | println("Hello world! ${list}. Is empty: ${list.isEmpty}, length = ${len.toString}"); 37 | } -------------------------------------------------------------------------------- /libs/base/Map.lasca: -------------------------------------------------------------------------------- 1 | module Map 2 | 3 | import Option 4 | 5 | data Map k a = Bin(binSize: Int, binKey: k, binValue: a, ltree: Map k a, rtree: Map k a) 6 | | Tip 7 | 8 | data View k a = View(viewKey: k, viewValue: a, viewMap: Map k a) 9 | 10 | def empty() = Tip 11 | 12 | def isEmpty(self: Map k a): Bool = match self { 13 | Tip -> true 14 | _ -> false 15 | } 16 | 17 | def size(self: Map k a): Int = match self { 18 | Tip -> 0 19 | Bin(s, _, _, _, _) -> s 20 | } 21 | 22 | def lookup(self: Map k a, key: k): a = match self { 23 | Tip -> None 24 | Bin(_, kx, x, l, r) -> match runtimeCompare(key, kx) { 25 | -1 -> lookup(l, key) 26 | 1 -> lookup(r, key) 27 | 0 -> Some(x) 28 | } 29 | } 30 | 31 | def member(self, key) = match lookup(self, key) { 32 | None -> false 33 | _ -> true 34 | } 35 | 36 | def single(key, value) = Bin(1, key, value, Tip, Tip) 37 | 38 | Delta = 3 39 | 40 | Ratio = 2 41 | 42 | def bin(k, x, l, r) = Bin(l.size + r.size + 1, k, x, l, r) 43 | 44 | def singleL(k1, x1, t1, t) = match t { 45 | Bin(_, k2, x2, t2, t3) -> bin(k2, x2, bin(k1, x1, t1, t2), t3) 46 | } 47 | 48 | def singleR(k1, x1, t, t3) = match t { 49 | Bin(_, k2, x2, t1, t2) -> bin(k2, x2, t1, bin(k1, x1, t2, t3)) 50 | } 51 | 52 | def doubleL(k1, x1, t1, t) = match t { 53 | Bin(_, k2, x2, Bin(_, k3, x3, t2, t3), t4) -> bin(k3, x3, bin(k1, x1, t1, t2), bin(k2, x2, t3, t4)) 54 | } 55 | 56 | def doubleR(k1, x1, t, t4) = match t { 57 | Bin(_, k2, x2, t1, Bin(_, k3, x3, t2, t3)) -> bin(k3, x3, bin(k2, x2, t1, t2), bin(k1, x1, t3, t4)) 58 | } 59 | 60 | 61 | def rotateL(k, x, l, r) = match r { 62 | Bin(_, _, _, ly, ry) -> if ly.size < Ratio * ry.size then singleL(k, x, l, r) else doubleL(k, x, l, r) 63 | } 64 | 65 | def rotateR(k, x, l, r) = match l { 66 | Bin(_, _, _, ly, ry) -> if ry.size < Ratio * ly.size then singleR(k, x, l, r) else doubleR(k, x, l, r) 67 | } 68 | 69 | def balance(k, x, l, r) = { 70 | if l.size + r.size <= 1 then Bin(l.size + r.size + 1, k, x, l, r) 71 | else if r.size > Delta * l.size then rotateL(k, x, l, r) 72 | else if l.size > Delta * r.size then rotateR(k, x, l, r) 73 | else Bin(l.size + r.size + 1, k, x, l, r) 74 | } 75 | 76 | 77 | 78 | def insert(self, k, x) = { 79 | def insertGo(self, orig, kx, x) = match self { 80 | Tip -> single(orig, x) 81 | Bin(s, ky, y, l, r) -> match runtimeCompare(kx, ky) { 82 | -1 -> let l1 = insertGo(l, orig, kx, x) in balance(ky, y, l1, r) 83 | 1 -> let r1 = insertGo(r, orig, kx, x) in balance(ky, y, l, r1) 84 | 0 -> Bin(s, orig, x, l, r) 85 | } 86 | }; 87 | insertGo(self, k, k, x) 88 | } 89 | 90 | def minViewSure(k, x, l, r) = match l { 91 | Tip -> View(k, x, r) 92 | Bin(_, kx, xl, ll, lr) -> let view = minViewSure(kx, xl, ll, lr) 93 | in View(view.viewKey, view.viewValue, balance(k, x, view.viewMap, r)) 94 | } 95 | 96 | def maxViewSure(k, x, l, r) = match r { 97 | Tip -> View(k, x, l) 98 | Bin(_, kr, xr, rl, rr) -> let view = maxViewSure(kr, xr, rl, rr) 99 | in View(view.viewKey, view.viewValue, balance(k, x, l, view.viewMap)) 100 | } 101 | 102 | {- 103 | glues two trees together. 104 | Assumes that [l] and [r] are already balanced with respect to each other. 105 | -} 106 | def glue(l, r) = match l { 107 | Tip -> r 108 | Bin(sl, kl, xl, ll, lr) -> match r { 109 | Tip -> l 110 | Bin(sr, kr, xr, rl, rr) -> 111 | if sl > sr then match maxViewSure(kl, xl, ll, lr) { 112 | View(km, m, l1) -> balance(km, m, l1, r) 113 | } else match minViewSure(kr, xr, rl, rr) { 114 | View(km, m, r1) -> balance(km, m, l, r1) 115 | } 116 | } 117 | } 118 | 119 | def delete(self, k) = match self { 120 | Tip -> Tip 121 | Bin(_, kx, x, l, r) -> match runtimeCompare(k, kx) { 122 | -1 -> balance(kx, x, delete(l, k), r) 123 | 1 -> balance(kx, x, l, delete(r, k)) 124 | 0 -> glue(l, r) 125 | } 126 | } 127 | 128 | def mapWithKey(self, f) = match self { 129 | Tip -> Tip 130 | Bin(sx, kx, x, l, r) -> let x1 = f(kx, x) in Bin(sx, kx, x1, mapWithKey(l, f), mapWithKey(r, f)) 131 | } 132 | 133 | def foreachWithKey(self, f): Unit = match self { 134 | Tip -> Tip 135 | Bin(sx, kx, x, l, r) -> { 136 | f(kx, x); 137 | foreachWithKey(l, f); 138 | foreachWithKey(r, f); 139 | } 140 | } 141 | 142 | def test(i, m) = if i > 0 then test(i - 1, insert(m, i, i.toString)) else m 143 | 144 | def main() = { 145 | emp = Tip; 146 | one = single(1, "one"); 147 | two = insert(one, 2, "two"); 148 | three = insert(two, 3, "three"); 149 | four = insert(three, 4, "four"); 150 | println("runtimeCompare = ${runtimeCompare(1, 2)}"); 151 | println("Test isEmpty should be true: ${emp.isEmpty}"); 152 | println("Test size should be 0: ${emp.size}"); 153 | println("Test isEmpty should be false: ${one.isEmpty}"); 154 | println("Test isEmpty should be true: ${isEmpty(delete(one, 1))}"); 155 | 156 | println("Test size should be 1: ${one.size}"); 157 | println("Lookup should be one: ${lookup(one, 1)}"); 158 | println("Lookup should be none: ${lookup(one, 2)}"); 159 | 160 | println("Test size should be 2: ${two.size}"); 161 | println("Lookup should be two: ${lookup(two, 2)}"); 162 | println("Lookup should be none: ${lookup(two, 3)}"); 163 | 164 | println("Test size should be 3: ${three.size}"); 165 | println("Lookup should be three: ${lookup(three, 3)}"); 166 | println("Lookup should be none: ${lookup(three, 4)}"); 167 | 168 | println("Test size should be 4: ${four.size}"); 169 | println("Lookup should be four: ${lookup(four, 4)}"); 170 | println("4 is member of four: ${member(four, 4)}"); 171 | println("Lookup should be none: ${lookup(four, 5)}"); 172 | println(toString(four)); 173 | 174 | thou = test(1000, empty()); 175 | println(toString(thou.size)); 176 | println(toString(size(delete(thou, 1000)))); 177 | 178 | } -------------------------------------------------------------------------------- /libs/base/Option.lasca: -------------------------------------------------------------------------------- 1 | module Option 2 | 3 | data Option a = None | Some(v: a) 4 | 5 | def map(self, f: a -> b) = match self { 6 | None -> None 7 | Some(value) -> Some(f(value)) 8 | } 9 | 10 | 11 | def main() = { 12 | println("Even ${None} is ${Some(true)}") 13 | } 14 | -------------------------------------------------------------------------------- /libs/base/Prelude.lasca: -------------------------------------------------------------------------------- 1 | module Prelude 2 | 3 | extern def libcErrno(): Int = "libcErrno" 4 | extern def libcError(error: Int): String = "libcError" 5 | extern def libcCurError(): String = "libcCurError" 6 | 7 | extern def print(s: String): Unit = "print" 8 | extern def println(s: String): Unit = "println" 9 | extern def toString(a: a): String = "toString" 10 | extern def sqrt(a: Float): Float = "sqrt" 11 | extern def getArgs(): Array String = "getArgs" 12 | extern def toInt(s: String): Int = "toInt" 13 | extern def concat(strings: Array String): String = "concat" 14 | extern def exit(code: Int): a = "exit" 15 | 16 | extern def runtimeIsConstr(constr: a, name: String): Bool = "runtimeIsConstr" 17 | extern def runtimeCheckTag(value: a, tag: Int): Bool = "runtimeCheckTag" 18 | extern def runtimeCompare(lhs: a, rhs: a): Int = "runtimeCompare" 19 | 20 | extern def intToByte(i: Int): Byte = "intToByte" 21 | extern def byteToInt(i: Byte): Int = "byteToInt" 22 | extern def intToInt16(i: Int): Int16 = "intToInt16" 23 | extern def int16ToInt(i: Int16): Int = "int16ToInt" 24 | extern def intToInt32(i: Int): Int32 = "intToInt32" 25 | extern def int32ToInt(i: Int32): Int = "int32ToInt" 26 | extern def intToFloat(i: Int): Float = "intToFloat64" 27 | extern def floatToInt(i: Float): Int = "float64ToInt" 28 | 29 | extern def intRem(a: Int, b: Int): Int = "intRem" 30 | 31 | extern def hashCode(value: a): Int = "lascaHashCode" 32 | 33 | data Var a = Var(readVar: a) 34 | 35 | data FileHandle 36 | 37 | extern def openFile(filename: String, mode: String): FileHandle = "lascaOpenFile" 38 | extern def readFile(filename: String): String = "lascaReadFile" 39 | extern def writeFile(filename: String, string: String): Unit = "lascaWriteFile" 40 | 41 | extern def writeVar(ref: Var a, value: a): Var a = "writeVar" 42 | 43 | extern def getCwd(): String = "lascaGetCwd" 44 | extern def chdir(path: String): Option String = "lascaChdir" 45 | extern def getEnv(name: String): Option String = "getEnv" 46 | extern def lascaSetEnv(name: String, value: String, replace: Bool): Int = "setEnv" 47 | extern def lascaUnsetEnv(name: String): Int = "unsetEnv" 48 | 49 | def unarynot(e: Bool): Bool = if e then false else true 50 | 51 | def die(msg: String): a = { 52 | println(msg); 53 | exit(1); 54 | } 55 | 56 | def undefined() = die("undefined") 57 | 58 | def require(req: Bool) = if req then () else die("Requirement failed") 59 | 60 | def setEnv(name: String, value: String, replace: Bool): Unit = { 61 | if lascaSetEnv(name, value, replace) == -1 then die("setEnv: ${libcCurError()}") else () 62 | } 63 | 64 | def unsetEnv(name: String): Unit = { 65 | if lascaUnsetEnv(name) == -1 then die("unsetEnv: ${libcCurError()}") else () 66 | } 67 | 68 | def apply(f, arg) = f(arg) 69 | def apply2(f, arg1, arg2) = f(arg1, arg2) 70 | 71 | def for(start: Int, end: Int, f: Int -> a) = { 72 | if start < end then { 73 | apply(f, start); -- no idea why, but without apply LLVM doesn't do tail call optimization 74 | for(start + 1, end, f); 75 | } else (); 76 | } 77 | -------------------------------------------------------------------------------- /libs/base/String.lasca: -------------------------------------------------------------------------------- 1 | import Array 2 | 3 | {- 4 | Strings are UTF-8 encoded. 5 | `Char` and `Code Point` mean valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff 6 | http://unicode.org/glossary/#unicode_scalar_value 7 | -} 8 | 9 | -- length of UTF-8 encoded byte string 10 | extern def bytesCount(s: String): Int = "bytesLength" 11 | extern def chr(codePoint: Int32): String = "codePointToString" 12 | extern def fromCharArray(chars: Array Int32): String = "codePointsToString" 13 | extern def charToLower(codePoint: Int32): Int32 = "utf8proc_tolower" 14 | extern def charToUpper(codePoint: Int32): Int32 = "utf8proc_toupper" 15 | extern def charToTitle(codePoint: Int32): Int32 = "utf8proc_totitle" 16 | extern def isValidUnicodeScalar(codePoint: Int32): Bool = "utf8proc_codepoint_valid" 17 | extern def iterate(s: String, f: Int32 -> Bool): Unit = "codePointsIterate" 18 | extern def graphemeIterate(s: String, f: String -> Bool): Unit = "graphemesIterate" 19 | extern def utf8procCategory(c: Int32): Int = "utf8proc_category" 20 | 21 | data GeneralCategory 22 | = UppercaseLetter -- ^ Lu: Letter, Uppercase 23 | | LowercaseLetter -- ^ Ll: Letter, Lowercase 24 | | TitlecaseLetter -- ^ Lt: Letter, Titlecase 25 | | ModifierLetter -- ^ Lm: Letter, Modifier 26 | | OtherLetter -- ^ Lo: Letter, Other 27 | | NonSpacingMark -- ^ Mn: Mark, Non-Spacing 28 | | SpacingCombiningMark -- ^ Mc: Mark, Spacing Combining 29 | | EnclosingMark -- ^ Me: Mark, Enclosing 30 | | DecimalNumber -- ^ Nd: Number, Decimal 31 | | LetterNumber -- ^ Nl: Number, Letter 32 | | OtherNumber -- ^ No: Number, Other 33 | | ConnectorPunctuation -- ^ Pc: Punctuation, Connector 34 | | DashPunctuation -- ^ Pd: Punctuation, Dash 35 | | OpenPunctuation -- ^ Ps: Punctuation, Open 36 | | ClosePunctuation -- ^ Pe: Punctuation, Close 37 | | InitialQuote -- ^ Pi: Punctuation, Initial quote 38 | | FinalQuote -- ^ Pf: Punctuation, Final quote 39 | | OtherPunctuation -- ^ Po: Punctuation, Other 40 | | MathSymbol -- ^ Sm: Symbol, Math 41 | | CurrencySymbol -- ^ Sc: Symbol, Currency 42 | | ModifierSymbol -- ^ Sk: Symbol, Modifier 43 | | OtherSymbol -- ^ So: Symbol, Other 44 | | Space -- ^ Zs: Separator, Space 45 | | LineSeparator -- ^ Zl: Separator, Line 46 | | ParagraphSeparator -- ^ Zp: Separator, Paragraph 47 | | Control -- ^ Cc: Other, Control 48 | | Format -- ^ Cf: Other, Format 49 | | Surrogate -- ^ Cs: Other, Surrogate 50 | | PrivateUse -- ^ Co: Other, Private Use 51 | | NotAssigned -- ^ Cn: Other, Not Assigned 52 | 53 | def generalCategory(char: Int32): GeneralCategory = match utf8procCategory(char) { 54 | 0 -> NotAssigned 55 | 1 -> UppercaseLetter 56 | 2 -> LowercaseLetter 57 | 3 -> TitlecaseLetter 58 | 4 -> ModifierLetter 59 | 5 -> OtherLetter 60 | 6 -> NonSpacingMark 61 | 7 -> SpacingCombiningMark 62 | 8 -> EnclosingMark 63 | 9 -> DecimalNumber 64 | 10 -> LetterNumber 65 | 11 -> OtherNumber 66 | 12 -> ConnectorPunctuation 67 | 13 -> DashPunctuation 68 | 14 -> OpenPunctuation 69 | 15 -> ClosePunctuation 70 | 16 -> InitialQuote 71 | 17 -> FinalQuote 72 | 18 -> OtherPunctuation 73 | 19 -> MathSymbol 74 | 20 -> CurrencySymbol 75 | 21 -> ModifierSymbol 76 | 22 -> OtherSymbol 77 | 23 -> Space 78 | 24 -> LineSeparator 79 | 25 -> ParagraphSeparator 80 | 26 -> Control 81 | 27 -> Format 82 | 28 -> Surrogate 83 | 29 -> PrivateUse 84 | } 85 | 86 | {- 87 | Regular Expression stuff. Lasca uses PCRE2 library internally. 88 | -} 89 | data Pattern 90 | 91 | extern def compilePattern(pattern: String): Pattern = "lascaCompileRegex" 92 | extern def matchRegex(pattern: Pattern, str: String): Bool = "lascaMatchRegex" 93 | extern def regexReplace(pattern: Pattern, str: String, replacement: String): String = "lascaRegexReplace" 94 | 95 | def replace(heystack: String, needle: String, replacement: String) = { 96 | p = compilePattern(needle); 97 | regexReplace(p, heystack, replacement) 98 | } 99 | 100 | def startsWith(s: String, prefix: String): Bool = { 101 | -- FIXME: write decent implementation 102 | p = compilePattern("^${prefix}.*"); 103 | matchRegex(p, s); 104 | } 105 | 106 | def endsWith(s: String, suffix: String): Bool = { 107 | -- FIXME: write decent implementation 108 | p = compilePattern(".*${suffix}\$"); 109 | matchRegex(p, s); 110 | } 111 | 112 | def foreach(s: String, f: Int32 -> a): Unit = iterate(s, { char -> f(char); true }) 113 | 114 | def codePointAt(s: String, index: Int): Int32 = { 115 | var i = 0; 116 | var result = -1.intToInt32; 117 | iterate(s, { char -> 118 | if i.readVar < index then { 119 | i := i.readVar + 1; 120 | true; 121 | } 122 | else { 123 | result := char; 124 | false; 125 | } 126 | }); 127 | if result.readVar == -1.intToInt32 then die("Index is out of range: ${index}") else result.readVar; 128 | } 129 | 130 | def ord(s: String) = codePointAt(s, 0) 131 | 132 | def foldl(s: String, zero: a, f: a -> Int32 -> a): a = { 133 | var acc = zero; 134 | iterate(s, { char -> acc := f(acc.readVar, char); true }); 135 | acc.readVar 136 | } 137 | 138 | def codePointCount(s: String): Int = foldl(s, 0, { len, c -> len + 1}) 139 | 140 | def graphemeCount(s: String): Int = { 141 | var count = 0; 142 | graphemeIterate(s, { g -> 143 | count := count.readVar + 1; 144 | true 145 | }); 146 | count.readVar 147 | } 148 | 149 | def compareLength(s: String, length: Int): Int = { 150 | var i = 0; 151 | var result = 0; 152 | iterate(s, { char -> 153 | i := i.readVar + 1; 154 | i.readVar <= length 155 | }); 156 | runtimeCompare(i.readVar, length); 157 | } 158 | 159 | def length(s) = codePointCount(s) 160 | 161 | def codePoints(s: String): Array Int32 = { 162 | array = makeArray(s.codePointCount, 0.intToInt32); 163 | foldl(s, 0, { idx, char -> setIndex(array, idx, char); idx + 1 }); 164 | array; 165 | } 166 | 167 | def map(s: String, f: Int32 -> Int32): String = { 168 | array = s.codePoints; 169 | transform(array, { i, cp -> f(cp) }); 170 | fromCharArray(array); 171 | } 172 | 173 | def toLower(s: String): String = if compareLength(s, 1) == 0 then chr(charToLower(ord(s))) else map(s, charToLower) 174 | 175 | def toUpper(s: String): String = if compareLength(s, 1) == 0 then chr(charToUpper(ord(s))) else map(s, charToUpper) 176 | 177 | def toTitle(s: String): String = if compareLength(s, 1) == 0 then chr(charToTitle(ord(s))) else map(s, charToTitle) 178 | 179 | def capitalize(s: String): String = match compareLength(s, 1) { 180 | -1 -> s -- empty string 181 | 0 -> chr(charToUpper(ord(s))) -- toUpper a single code point 182 | _ -> { 183 | array = s.codePoints; 184 | setIndex(array, 0, charToUpper(array[0])); 185 | fromCharArray(array); 186 | } 187 | } 188 | 189 | def joinGo(i: Int, arr: [String], len: Int, separator: String, strings: [String]) = { 190 | if i < len then { 191 | j = 2 * i; 192 | setIndex(arr, j - 1, separator); 193 | setIndex(arr, j, strings[i]); 194 | joinGo(i + 1, arr, len, separator, strings); 195 | } else (); 196 | } 197 | 198 | def join(separator: String, strings: [String]): String = match Array.length(strings) { 199 | 0 -> "" 200 | 1 -> strings[0] 201 | len -> { 202 | arr = Array.makeArray(2 * len - 1, ""); 203 | setIndex(arr, 0, strings[0]); 204 | joinGo(1, arr, len, separator, strings); 205 | concat(arr); 206 | } 207 | } 208 | 209 | def isDigit(char: Int32) = (char - 48.intToInt32) <= 9.intToInt32 210 | def isLetter(char: Int32) = let cat = utf8procCategory(char) in 1 <= cat and cat <= 5 -- Letters 211 | def isNumeric(char: Int32) = let cat = utf8procCategory(char) in 9 <= cat and cat <= 11 -- Numbers 212 | def isSpace(char: Int32) = char == 32.intToInt32 213 | or 9.intToInt32 <= char and char <= 13.intToInt32 214 | or char == 133.intToInt32 215 | or 160.intToInt32 <= char and runtimeCompare(generalCategory(char), Space) == 0 216 | 217 | def main() = { 218 | empty = ""; 219 | test = "Teástuͤ"; 220 | symbol = "uͤ"; 221 | upperT = "T"; 222 | lowerA = "å"; 223 | asdf = "aßdƒ"; 224 | println(toString(codePointAt(test, 0))); 225 | println(toString(codePointAt(test, 6))); 226 | String.foreach(test, { cp -> println(cp.toString) }); 227 | println(toString(codePoints(test))); 228 | println(toString(symbol.ord)); 229 | println("length in codepoints = ${codePointCount(test)}, length in bytes = ${bytesCount(test)}, length in graphemes = ${graphemeCount(test)}"); 230 | res = foldl("12345", 0, { acc, c -> acc + toInt(chr(c)); }); 231 | println(res.toString); 232 | println("toLower T = ${chr(charToLower(upperT.ord))} ${toLower(test)}"); 233 | println("toUpper å = ${chr(charToUpper(lowerA.ord))} ${toUpper(test)}"); 234 | println("toTitle å = ${chr(charToTitle(lowerA.ord))} ${toTitle(test)}"); 235 | println("capitalize ${capitalize(empty)} ${capitalize(lowerA)} ${capitalize(test)} ${capitalize(asdf)}"); 236 | println("compare ${compareLength(empty, 1)} ${compareLength(empty, 0)} ${compareLength(empty, -1)} ${compareLength(test, 10)} ${compareLength(test, 7)} ${compareLength(test, 0)}"); 237 | println("replace ${replace(test, symbol, upperT)}"); 238 | println("${test} startsWith ${upperT}: ${startsWith(test, upperT)}, endsWith ${symbol}: ${endsWith(test, symbol)}"); 239 | println("${test} startsWith ${symbol}: ${startsWith(test, symbol)}, endsWith ${upperT}: ${endsWith(test, upperT)}"); 240 | println("Code point 123 is valid Unicode Scalar: ${isValidUnicodeScalar(123.intToInt32)}"); 241 | println("Surrogate code point 55296 is valid Unicode Scalar: ${isValidUnicodeScalar(55296.intToInt32)}"); 242 | println("Code point 1114112 is valid Unicode Scalar: ${isValidUnicodeScalar(1114112.intToInt32)}"); 243 | println("0 is digit: ${isDigit("0".ord)}, 9 is digit: ${isDigit("9".ord)}, 'a' is digit: ${isDigit("a".ord)}"); 244 | gc = { s -> generalCategory(ord(s)) }; 245 | println("${gc("1")} ${gc("a")} ${gc("A")} ${gc(" ")} ${gc("≈")} ${gc("€")}"); 246 | println("å is letter ${isLetter("å".ord)}, 1 is letter ${isLetter("1".ord)}"); 247 | println("å is numeric ${isNumeric("å".ord)}, 1 is numeric ${isNumeric("1".ord)}, ¾ is numeric ${isNumeric("¾".ord)}"); 248 | sp = { s -> isSpace(s.ord) }; 249 | println("' ' is space ${sp(" ")}, '\\t' is space ${sp("\t")}, '\\r' is space ${sp("\r")}, '\\n' is space ${sp("\n")}, 'U+0085' is space ${isSpace(133.intToInt32)}"); 250 | println(String.join(", ", ["1", "2"])); 251 | } 252 | -------------------------------------------------------------------------------- /make-release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eux 4 | 5 | makeRelease() { 6 | local version=$1 7 | echo "Making release of version $version" 8 | 9 | rm -rf dist 10 | mkdir -p dist/{bin,src,lib,bash_completion} 11 | cp "$(stack path --dist-dir)/build/lasca/lasca" dist/bin 12 | 13 | LLVM_PATH="/usr/local/opt/llvm-6.0/lib/llvm-6.0/lib" 14 | echo "${LLVM_PATH}" 15 | cp "${LLVM_PATH}/libLLVM.dylib" dist/lib 16 | cp "${LLVM_PATH}/libc++.1.0.dylib" dist/lib 17 | install_name_tool -add_rpath @executable_path/../lib dist/bin/lasca 18 | install_name_tool -change "${LLVM_PATH}/libLLVM.dylib" @rpath/libLLVM.dylib dist/bin/lasca 19 | install_name_tool -change "${LLVM_PATH}/libc++.1.0.dylib" @rpath/libc++.1.0.dylib dist/bin/lasca 20 | 21 | chmod 0644 dist/lib/* 22 | install_name_tool -id @rpath/libLLVM.dylib dist/lib/libLLVM.dylib 23 | install_name_tool -id @rpath/libc++.1.0.dylib dist/lib/libc++.1.0.dylib 24 | cp build/rts/liblascartStatic.a dist/src 25 | cp libs/base/*.lasca dist/src 26 | lasca --bash-completion-script lasca > dist/bash_completion/lasca 27 | (cd dist; tar -czf "../lasca-${version}.tar.gz" .) 28 | shasum -a 256 "lasca-${version}.tar.gz" 29 | # sed -E -e 's/sha256 "[a-zA-Z0-9]+"/sha256 $(SUM)/' ../homebrew-lasca/lasca-compiler.rb 30 | } 31 | 32 | makeRelease $1 -------------------------------------------------------------------------------- /release.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import (builtins.fetchGit { 3 | name = "nixpkgs"; 4 | url = "https://github.com/nixos/nixpkgs.git"; 5 | rev = "6ec64973bc3a48b0c54d11c782e8b88b550a8eab"; 6 | ref = "release-18.09";}) {}; 7 | lascart = pkgs.callPackage ./lascart.nix {}; 8 | in 9 | pkgs.haskellPackages.callPackage ./lasca.nix { inherit lascart; } -------------------------------------------------------------------------------- /rts/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(objlib OBJECT runtime.c builtin.c lasca.h utf8proc/utf8proc.c utf8proc/utf8proc.h xxhash.h) 2 | add_library (lascart SHARED $) 3 | add_library (lascartStatic $) 4 | # set_target_properties(lascartStatic PROPERTIES OUTPUT_NAME lascart) 5 | set(CMAKE_SHARED_LIBRARY_SUFFIX ".so") 6 | 7 | find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR}) 8 | 9 | if( EXISTS "${FFI_INCLUDE_PATH}/ffi.h" ) 10 | message("Found lib ffi in ${FFI_INCLUDE_PATH}") 11 | else() 12 | find_path(FFI_INCLUDE_PATH ffi/ffi.h PATHS ${FFI_INCLUDE_DIR}) 13 | if( EXISTS "${FFI_INCLUDE_PATH}/ffi/ffi.h" ) 14 | message("Searching ffi.h ${FFI_INCLUDE_PATH}") 15 | set(FFI_INCLUDE_PATH "${FFI_INCLUDE_PATH}/ffi" CACHE INTERNAL "") 16 | endif() 17 | endif() 18 | 19 | message("Found ffi.h in ${FFI_INCLUDE_PATH}") 20 | 21 | find_library(FFI_LIBRARY ffi PATHS ${FFI_LIBRARY_DIR}) 22 | if( NOT FFI_LIBRARY ) 23 | message(FATAL_ERROR "libffi is not found.") 24 | endif() 25 | 26 | 27 | find_path(GC_INCLUDE_PATH gc.h PATHS ${GC_INCLUDE_DIR}) 28 | find_library(GC_LIBRARY gc PATHS ${GC_LIBRARY_DIR}) 29 | message("Found gc.h in ${GC_INCLUDE_PATH}") 30 | 31 | find_library(PCRE2_LIBRARY pcre2-8 PATHS ${PCRE2_LIBRARY_DIR}) 32 | message("Found pcre in ${PCRE2_LIBRARY}") 33 | 34 | target_include_directories(objlib PRIVATE ${GC_INCLUDE_PATH} ${FFI_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/utf8proc) 35 | 36 | target_link_libraries(lascart m ${GC_LIBRARY} ${FFI_LIBRARY} ${PCRE2_LIBRARY}) 37 | target_link_libraries(lascartStatic m ${GC_LIBRARY} ${FFI_LIBRARY} ${PCRE2_LIBRARY}) 38 | 39 | install(TARGETS lascart LIBRARY DESTINATION lib) 40 | install(TARGETS lascartStatic LIBRARY ARCHIVE DESTINATION lib) -------------------------------------------------------------------------------- /rts/lasca.h: -------------------------------------------------------------------------------- 1 | #ifndef LASCA_H 2 | #define LASCA_H 3 | #define PCRE2_CODE_UNIT_WIDTH 8 4 | #include 5 | #define XXH_ACCEPT_NULL_INPUT_POINTER 1 6 | #define XXH_INLINE_ALL 7 | #include "xxhash.h" 8 | 9 | 10 | // Operators 11 | static const int64_t ADD = 10; 12 | static const int64_t SUB = 11; // x - y 13 | static const int64_t MUL = 12; 14 | static const int64_t DIV = 13; // x / y 15 | static const int64_t MOD = 14; // x % y 16 | 17 | static const int64_t EQ = 42; // x == y 18 | static const int64_t NE = 43; // x != y 19 | static const int64_t LT = 44; // x < y 20 | static const int64_t LE = 45; // x <= y 21 | static const int64_t GE = 46; // x >= y 22 | static const int64_t GT = 47; // x > y 23 | // Boolean unary operations 24 | static const int64_t ZNOT = 50; // !x 25 | 26 | // Boolean binary operations 27 | static const int64_t ZOR = 60; // x || y 28 | static const int64_t ZAND = 61; // x && y 29 | 30 | typedef struct { 31 | const char* name; 32 | } LaType; 33 | 34 | typedef struct { 35 | const LaType* type; 36 | void* fields[]; 37 | } Box; 38 | 39 | typedef Box Unit; 40 | 41 | typedef struct { 42 | const LaType* type; 43 | int8_t num; 44 | } Byte; 45 | 46 | typedef struct { 47 | const LaType* type; 48 | int64_t num; 49 | } Int; 50 | 51 | typedef struct { 52 | const LaType* type; 53 | int16_t num; 54 | } Int16; 55 | 56 | typedef struct { 57 | const LaType* type; 58 | int32_t num; 59 | } Int32; 60 | 61 | typedef Byte Bool; 62 | 63 | typedef struct { 64 | const LaType* type; 65 | double num; 66 | } Float64; 67 | 68 | typedef struct { 69 | const LaType* type; 70 | int64_t length; 71 | char bytes[]; 72 | } String; 73 | 74 | typedef struct { 75 | const LaType* type; 76 | int64_t funcIdx; 77 | int64_t argc; 78 | Box** argv; 79 | } Closure; 80 | 81 | typedef struct { 82 | const LaType* type; 83 | int64_t length; 84 | Box* data[]; 85 | } Array; 86 | 87 | typedef struct { 88 | const LaType* type; 89 | int64_t tag; 90 | Box* values[]; 91 | } DataValue; 92 | 93 | typedef DataValue Option; 94 | 95 | typedef struct { 96 | const LaType* type; 97 | String* error; 98 | } Unknown; 99 | 100 | typedef struct { 101 | const LaType* type; 102 | pcre2_code *re; 103 | } Pattern; 104 | 105 | typedef struct { 106 | String* name; 107 | void * funcPtr; 108 | int64_t arity; 109 | } Function; 110 | 111 | typedef struct { 112 | int64_t size; 113 | Function functions[]; 114 | } Functions; 115 | 116 | typedef struct { 117 | LaType* type; 118 | // int64_t tag; // it's not set now. Not sure we need this 119 | String* name; 120 | int64_t numFields; 121 | String* fields[]; 122 | } Struct; 123 | 124 | typedef struct { 125 | LaType* type; 126 | String* name; 127 | int64_t numValues; 128 | Struct* constructors[]; 129 | } Data; 130 | 131 | typedef struct { 132 | int64_t size; 133 | Data* data[]; 134 | } Types; 135 | 136 | typedef struct { 137 | int64_t argc; 138 | Box* argv; 139 | } Environment; 140 | 141 | typedef struct { 142 | Functions* functions; 143 | Types* types; 144 | int8_t verbose; 145 | } Runtime; 146 | 147 | typedef struct { 148 | int64_t line; 149 | int64_t column; 150 | } Position; 151 | 152 | #define asBool(ptr) ((Bool*)ptr) 153 | #define asByte(ptr) ((Byte*)ptr) 154 | #define asInt(ptr) ((Int*)ptr) 155 | #define asInt16(ptr) ((Int16*)ptr) 156 | #define asInt32(ptr) ((Int32*)ptr) 157 | #define asFloat(ptr) ((Float64*)ptr) 158 | #define asString(ptr) ((String*)ptr) 159 | #define asDataValue(ptr) ((DataValue*)ptr) 160 | #define asClosure(ptr) ((Closure*)ptr) 161 | #define asArray(ptr) ((Array*)ptr) 162 | #define asByteArray(ptr) ((String*)ptr) 163 | 164 | extern Unit UNIT_SINGLETON; 165 | extern Bool TRUE_SINGLETON; 166 | extern Bool FALSE_SINGLETON; 167 | extern DataValue NONE; 168 | // Primitive Types 169 | extern const LaType* LAUNIT ; 170 | extern const LaType* LABOOL ; 171 | extern const LaType* LABYTE ; 172 | extern const LaType* LAINT16 ; 173 | extern const LaType* LAINT32 ; 174 | extern const LaType* LAINT ; 175 | extern const LaType* LAFLOAT64; 176 | extern const LaType* LASTRING ; 177 | extern const LaType* LACLOSURE; 178 | extern const LaType* LAARRAY ; 179 | extern const LaType* LABYTEARRAY; 180 | extern const LaType* LAFILE_HANDLE; 181 | extern const LaType* LAPATTERN; 182 | extern const LaType* LAOPTION; 183 | extern unsigned long long xxHashSeed; 184 | 185 | bool eqTypes(const LaType* lhs, const LaType* rhs); 186 | void *gcMalloc(size_t s); 187 | String* __attribute__ ((pure)) makeString(const char * str); 188 | Box *box(const LaType* type_id, void *value); 189 | Int* boxInt(int64_t i); 190 | Int16* boxInt16(int16_t i); 191 | Int32* boxInt32(int32_t i); 192 | void * unbox(const LaType* expected, const Box* ti); 193 | int64_t runtimeCompare(Box* lhs, Box* rhs); 194 | Box* runtimeApply(Box* val, int64_t argc, Box* argv[], Position pos); 195 | String* toString(const Box* value); 196 | Box* println(const Box* val); 197 | Box* boxArray(size_t size, ...); 198 | Array* createArray(size_t size); 199 | const char * __attribute__ ((const)) typeIdToName(const LaType* typeId); 200 | DataValue* some(Box* value); 201 | 202 | #endif -------------------------------------------------------------------------------- /rts/utf8proc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.8) 2 | 3 | include (utils.cmake) 4 | 5 | disallow_intree_builds() 6 | 7 | project (utf8proc C) 8 | 9 | # This is the ABI version number, which may differ from the 10 | # API version number (defined in utf8proc.h). 11 | # Be sure to also update these in Makefile and MANIFEST! 12 | set(SO_MAJOR 2) 13 | set(SO_MINOR 1) 14 | set(SO_PATCH 0) 15 | 16 | add_definitions ( 17 | -DUTF8PROC_EXPORTS 18 | ) 19 | 20 | if (NOT MSVC) 21 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -std=c99 -pedantic -Wall") 22 | endif () 23 | 24 | add_library (utf8proc 25 | utf8proc.c 26 | utf8proc.h 27 | ) 28 | 29 | set_target_properties (utf8proc PROPERTIES 30 | POSITION_INDEPENDENT_CODE ON 31 | VERSION "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}" 32 | SOVERSION ${SO_MAJOR} 33 | ) 34 | -------------------------------------------------------------------------------- /rts/utf8proc/LICENSE.md: -------------------------------------------------------------------------------- 1 | ## utf8proc license ## 2 | 3 | **utf8proc** is a software package originally developed 4 | by Jan Behrens and the rest of the Public Software Group, who 5 | deserve nearly all of the credit for this library, that is now maintained by the Julia-language developers. Like the original utf8proc, 6 | whose copyright and license statements are reproduced below, all new 7 | work on the utf8proc library is licensed under the [MIT "expat" 8 | license](http://opensource.org/licenses/MIT): 9 | 10 | *Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.* 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a 13 | copy of this software and associated documentation files (the "Software"), 14 | to deal in the Software without restriction, including without limitation 15 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 16 | and/or sell copies of the Software, and to permit persons to whom the 17 | Software is furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in 20 | all copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 | DEALINGS IN THE SOFTWARE. 29 | 30 | ## Original utf8proc license ## 31 | 32 | *Copyright (c) 2009, 2013 Public Software Group e. V., Berlin, Germany* 33 | 34 | Permission is hereby granted, free of charge, to any person obtaining a 35 | copy of this software and associated documentation files (the "Software"), 36 | to deal in the Software without restriction, including without limitation 37 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 38 | and/or sell copies of the Software, and to permit persons to whom the 39 | Software is furnished to do so, subject to the following conditions: 40 | 41 | The above copyright notice and this permission notice shall be included in 42 | all copies or substantial portions of the Software. 43 | 44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 47 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 49 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 50 | DEALINGS IN THE SOFTWARE. 51 | 52 | ## Unicode data license ## 53 | 54 | This software distribution contains derived data from a modified version of 55 | the Unicode data files. The following license applies to that data: 56 | 57 | **COPYRIGHT AND PERMISSION NOTICE** 58 | 59 | *Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed 60 | under the Terms of Use in http://www.unicode.org/copyright.html.* 61 | 62 | Permission is hereby granted, free of charge, to any person obtaining a 63 | copy of the Unicode data files and any associated documentation (the "Data 64 | Files") or Unicode software and any associated documentation (the 65 | "Software") to deal in the Data Files or Software without restriction, 66 | including without limitation the rights to use, copy, modify, merge, 67 | publish, distribute, and/or sell copies of the Data Files or Software, and 68 | to permit persons to whom the Data Files or Software are furnished to do 69 | so, provided that (a) the above copyright notice(s) and this permission 70 | notice appear with all copies of the Data Files or Software, (b) both the 71 | above copyright notice(s) and this permission notice appear in associated 72 | documentation, and (c) there is clear notice in each modified Data File or 73 | in the Software as well as in the documentation associated with the Data 74 | File(s) or Software that the data or software has been modified. 75 | 76 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 77 | KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 78 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF 79 | THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS 80 | INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR 81 | CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 82 | USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 83 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 84 | PERFORMANCE OF THE DATA FILES OR SOFTWARE. 85 | 86 | Except as contained in this notice, the name of a copyright holder shall 87 | not be used in advertising or otherwise to promote the sale, use or other 88 | dealings in these Data Files or Software without prior written 89 | authorization of the copyright holder. 90 | 91 | Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be 92 | registered in some jurisdictions. All other trademarks and registered 93 | trademarks mentioned herein are the property of their respective owners. 94 | -------------------------------------------------------------------------------- /rts/utf8proc/MANIFEST: -------------------------------------------------------------------------------- 1 | include/ 2 | include/utf8proc.h 3 | lib/ 4 | lib/libutf8proc.a 5 | lib/libutf8proc.so -> libutf8proc.so.2.1.0 6 | lib/libutf8proc.so.2 -> libutf8proc.so.2.1.0 7 | lib/libutf8proc.so.2.1.0 8 | -------------------------------------------------------------------------------- /rts/utf8proc/Makefile: -------------------------------------------------------------------------------- 1 | # libutf8proc Makefile 2 | 3 | # programs 4 | AR?=ar 5 | CC?=gcc 6 | INSTALL=install 7 | FIND=find 8 | 9 | # compiler settings 10 | CFLAGS ?= -O2 11 | PICFLAG = -fPIC 12 | C99FLAG = -std=c99 13 | WCFLAGS = -Wall -pedantic 14 | UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS 15 | 16 | # shared-library version MAJOR.MINOR.PATCH ... this may be *different* 17 | # from the utf8proc version number because it indicates ABI compatibility, 18 | # not API compatibility: MAJOR should be incremented whenever *binary* 19 | # compatibility is broken, even if the API is backward-compatible. 20 | # The API version number is defined in utf8proc.h. 21 | # Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt! 22 | MAJOR=2 23 | MINOR=1 24 | PATCH=0 25 | 26 | OS := $(shell uname) 27 | ifeq ($(OS),Darwin) # MacOS X 28 | SHLIB_EXT = dylib 29 | SHLIB_VERS_EXT = $(MAJOR).dylib 30 | else # GNU/Linux, at least (Windows should probably use cmake) 31 | SHLIB_EXT = so 32 | SHLIB_VERS_EXT = so.$(MAJOR).$(MINOR).$(PATCH) 33 | endif 34 | 35 | # installation directories (for 'make install') 36 | prefix=/usr/local 37 | libdir=$(prefix)/lib 38 | includedir=$(prefix)/include 39 | 40 | # meta targets 41 | 42 | .PHONY: all clean data update manifest install 43 | 44 | all: libutf8proc.a libutf8proc.$(SHLIB_EXT) 45 | 46 | clean: 47 | rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT) 48 | ifneq ($(OS),Darwin) 49 | rm -f libutf8proc.so.$(MAJOR) 50 | endif 51 | rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom 52 | rm -rf MANIFEST.new tmp 53 | $(MAKE) -C bench clean 54 | $(MAKE) -C data clean 55 | 56 | data: data/utf8proc_data.c.new 57 | 58 | update: data/utf8proc_data.c.new 59 | cp -f data/utf8proc_data.c.new utf8proc_data.c 60 | 61 | manifest: MANIFEST.new 62 | 63 | # real targets 64 | 65 | data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl 66 | $(MAKE) -C data utf8proc_data.c.new 67 | 68 | utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c 69 | $(CC) $(UCFLAGS) -c -o utf8proc.o utf8proc.c 70 | 71 | libutf8proc.a: utf8proc.o 72 | rm -f libutf8proc.a 73 | $(AR) rs libutf8proc.a utf8proc.o 74 | 75 | libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o 76 | $(CC) $(LDFLAGS) -shared -o $@ -Wl,-soname -Wl,libutf8proc.so.$(MAJOR) utf8proc.o 77 | chmod a-x $@ 78 | 79 | libutf8proc.so: libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) 80 | ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@ 81 | ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@.$(MAJOR) 82 | 83 | libutf8proc.$(MAJOR).dylib: utf8proc.o 84 | $(CC) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH) 85 | 86 | libutf8proc.dylib: libutf8proc.$(MAJOR).dylib 87 | ln -f -s libutf8proc.$(MAJOR).dylib $@ 88 | 89 | install: libutf8proc.a libutf8proc.$(SHLIB_EXT) libutf8proc.$(SHLIB_VERS_EXT) 90 | mkdir -m 755 -p $(DESTDIR)$(includedir) 91 | $(INSTALL) -m 644 utf8proc.h $(DESTDIR)$(includedir) 92 | mkdir -m 755 -p $(DESTDIR)$(libdir) 93 | $(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir) 94 | $(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir) 95 | ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT) 96 | ifneq ($(OS),Darwin) 97 | ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR) 98 | endif 99 | 100 | MANIFEST.new: 101 | rm -rf tmp 102 | $(MAKE) install prefix=/usr DESTDIR=$(PWD)/tmp 103 | $(FIND) tmp/usr -mindepth 1 -type l -printf "%P -> %l\n" -or -type f -printf "%P\n" -or -type d -printf "%P/\n" | LC_ALL=C sort > $@ 104 | rm -rf tmp 105 | 106 | # Test programs 107 | 108 | data/NormalizationTest.txt: 109 | $(MAKE) -C data NormalizationTest.txt 110 | 111 | data/GraphemeBreakTest.txt: 112 | $(MAKE) -C data GraphemeBreakTest.txt 113 | 114 | test/tests.o: test/tests.c test/tests.h utf8proc.h 115 | $(CC) $(UCFLAGS) -c -o test/tests.o test/tests.c 116 | 117 | test/normtest: test/normtest.c test/tests.o utf8proc.o utf8proc.h test/tests.h 118 | $(CC) $(UCFLAGS) test/normtest.c test/tests.o utf8proc.o -o $@ 119 | 120 | test/graphemetest: test/graphemetest.c test/tests.o utf8proc.o utf8proc.h test/tests.h 121 | $(CC) $(UCFLAGS) test/graphemetest.c test/tests.o utf8proc.o -o $@ 122 | 123 | test/printproperty: test/printproperty.c test/tests.o utf8proc.o utf8proc.h test/tests.h 124 | $(CC) $(UCFLAGS) test/printproperty.c test/tests.o utf8proc.o -o $@ 125 | 126 | test/charwidth: test/charwidth.c test/tests.o utf8proc.o utf8proc.h test/tests.h 127 | $(CC) $(UCFLAGS) test/charwidth.c test/tests.o utf8proc.o -o $@ 128 | 129 | test/valid: test/valid.c test/tests.o utf8proc.o utf8proc.h test/tests.h 130 | $(CC) $(UCFLAGS) test/valid.c test/tests.o utf8proc.o -o $@ 131 | 132 | test/iterate: test/iterate.c test/tests.o utf8proc.o utf8proc.h test/tests.h 133 | $(CC) $(UCFLAGS) test/iterate.c test/tests.o utf8proc.o -o $@ 134 | 135 | test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h 136 | $(CC) $(UCFLAGS) test/case.c test/tests.o utf8proc.o -o $@ 137 | 138 | test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h 139 | $(CC) $(UCFLAGS) test/custom.c test/tests.o utf8proc.o -o $@ 140 | 141 | check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o 142 | $(MAKE) -C bench 143 | test/normtest data/NormalizationTest.txt 144 | test/graphemetest data/GraphemeBreakTest.txt 145 | test/charwidth 146 | test/valid 147 | test/iterate 148 | test/case 149 | test/custom 150 | -------------------------------------------------------------------------------- /rts/utf8proc/NEWS.md: -------------------------------------------------------------------------------- 1 | # utf8proc release history # 2 | 3 | ## Version 2.1 ## 4 | 5 | 2016-12-26: 6 | 7 | - New functions `utf8proc_map_custom` and `utf8proc_decompose_custom` 8 | to allow user-supplied transformations of codepoints, in conjunction 9 | with other transformations ([#89]). 10 | 11 | - New function `utf8proc_normalize_utf32` to apply normalizations 12 | directly to UTF-32 data (not just UTF-8) ([#88]). 13 | 14 | - Fixed stack overflow that could occur due to incorrect definition 15 | of `UINT16_MAX` with some compilers ([#84]). 16 | 17 | - Fixed conflict with `stdbool.h` in Visual Studio ([#90]). 18 | 19 | - Updated font metrics to use Unifont 9.0.04. 20 | 21 | ## Version 2.0.2 ## 22 | 23 | 2016-07-27: 24 | 25 | - Move `-Wmissing-prototypes` warning flag from `Makefile` to `.travis.yml` 26 | since MSVC does not understand this flag and it is occasionally useful to 27 | build using MSVC through the `Makefile` ([#79]). 28 | 29 | - Use a different variable name for a nested loop in `bench/bench.c`, and 30 | declare it in a C89 way rather than inside the `for` to avoid "error: 31 | 'for' loop initial declarations are only allowed in C99 mode" ([#80]). 32 | 33 | ## Version 2.0.1 ## 34 | 35 | 2016-07-13: 36 | 37 | - Bug fix in `utf8proc_grapheme_break_stateful` ([#77]). 38 | 39 | - Tests now use versioned Unicode files, so they will no longer 40 | break when a new version of Unicode is released ([#78]). 41 | 42 | ## Version 2.0 ## 43 | 44 | 2016-07-13: 45 | 46 | - Updated for Unicode 9.0 ([#70]). 47 | 48 | - New `utf8proc_grapheme_break_stateful` to handle the complicated 49 | grapheme-breaking rules in Unicode 9. The old `utf8proc_grapheme_break` 50 | is still provided, but may incorrectly identify grapheme breaks 51 | in some Unicode-9 sequences. 52 | 53 | - Smaller Unicode tables ([#62], [#68]). This required changes 54 | in the `utf8proc_property_t` structure, which breaks backward 55 | compatibility if you access this `struct` directly. The 56 | functions in the API remain backward-compatible, however. 57 | 58 | - Buffer overrun fix ([#66]). 59 | 60 | ## Version 1.3.1 ## 61 | 62 | 2015-11-02: 63 | 64 | - Do not export symbol for internal function `unsafe_encode_char()` ([#55]). 65 | 66 | - Install relative symbolic links for shared libraries ([#58]). 67 | 68 | - Enable and fix compiler warnings ([#55], [#58]). 69 | 70 | - Add missing files to `make clean` ([#58]). 71 | 72 | ## Version 1.3 ## 73 | 74 | 2015-07-06: 75 | 76 | - Updated for Unicode 8.0 ([#45]). 77 | 78 | - New `utf8proc_tolower` and `utf8proc_toupper` functions, portable 79 | replacements for `towlower` and `towupper` in the C library ([#40]). 80 | 81 | - Don't treat Unicode "non-characters" as invalid, and improved 82 | validity checking in general ([#35]). 83 | 84 | - Prefix all typedefs with `utf8proc_`, e.g. `utf8proc_int32_t`, 85 | to avoid collisions with other libraries ([#32]). 86 | 87 | - Rename `DLLEXPORT` to `UTF8PROC_DLLEXPORT` to prevent collisions. 88 | 89 | - Fix build breakage in the benchmark routines. 90 | 91 | - More fine-grained Makefile variables (`PICFLAG` etcetera), so that 92 | compilation flags can be selectively overridden, and in particular 93 | so that `CFLAGS` can be changed without accidentally eliminating 94 | necessary flags like `-fPIC` and `-std=c99` ([#43]). 95 | 96 | - Updated character-width tables based on Unifont 8.0.01 ([#51]) and 97 | the Unicode 8 character categories ([#47]). 98 | 99 | ## Version 1.2 ## 100 | 101 | 2015-03-28: 102 | 103 | - Updated for Unicode 7.0 ([#6]). 104 | 105 | - New function `utf8proc_grapheme_break(c1,c2)` that returns whether 106 | there is a grapheme break between `c1` and `c2` ([#20]). 107 | 108 | - New function `utf8proc_charwidth(c)` that returns the number of 109 | column-positions that should be required for `c`; essentially a 110 | portable replacment for `wcwidth(c)` ([#27]). 111 | 112 | - New function `utf8proc_category(c)` that returns the Unicode 113 | category of `c` (as one of the constants `UTF8PROC_CATEGORY_xx`). 114 | Also, a function `utf8proc_category_string(c)` that returns the Unicode 115 | category of `c` as a two-character string. 116 | 117 | - `cmake` script `CMakeLists.txt`, in addition to `Makefile`, for 118 | easier compilation on Windows ([#28]). 119 | 120 | - Various `Makefile` improvements: a `make check` target to perform 121 | tests ([#13]), `make install`, a rule to automate updating the Unicode 122 | tables, etcetera. 123 | 124 | - The shared library is now versioned (e.g. has a soname on GNU/Linux) ([#24]). 125 | 126 | - C++/MSVC compatibility ([#17]). 127 | 128 | - Most `#defined` constants are now `enums` ([#29]). 129 | 130 | - New preprocessor constants `UTF8PROC_VERSION_MAJOR`, 131 | `UTF8PROC_VERSION_MINOR`, and `UTF8PROC_VERSION_PATCH` for compile-time 132 | detection of the API version. 133 | 134 | - Doxygen-formatted documentation ([#29]). 135 | 136 | - The Ruby and PostgreSQL plugins have been removed due to lack of testing ([#22]). 137 | 138 | ## Version 1.1.6 ## 139 | 140 | 2013-11-27: 141 | 142 | - PostgreSQL 9.2 and 9.3 compatibility (lowercase `c` language name) 143 | 144 | ## Version 1.1.5 ## 145 | 146 | 2009-08-20: 147 | 148 | - Use `RSTRING_PTR()` and `RSTRING_LEN()` instead of `RSTRING()->ptr` and 149 | `RSTRING()->len` for ruby1.9 compatibility (and `#define` them, if not 150 | existent) 151 | 152 | 2009-10-02: 153 | 154 | - Patches for compatibility with Microsoft Visual Studio 155 | 156 | 2009-10-08: 157 | 158 | - Fixes to make utf8proc usable in C++ programs 159 | 160 | 2009-10-16: 161 | 162 | ## Version 1.1.4 ## 163 | 164 | 2009-06-14: 165 | 166 | - replaced C++ style comments for compatibility reasons 167 | - added typecasts to suppress compiler warnings 168 | - removed redundant source files for ruby-gemfile generation 169 | 170 | 2009-08-19: 171 | 172 | - Changed copyright notice for Public Software Group e. V. 173 | - Minor changes in the `README` file 174 | 175 | ## Version 1.1.3 ## 176 | 177 | 2008-10-04: 178 | 179 | - Added a function `utf8proc_version` returning a string containing the version 180 | number of the library. 181 | - Included a target `libutf8proc.dylib` for MacOSX. 182 | 183 | 2009-05-01: 184 | - PostgreSQL 8.3 compatibility (use of `SET_VARSIZE` macro) 185 | 186 | ## Version 1.1.2 ## 187 | 188 | 2007-07-25: 189 | 190 | - Fixed a serious bug in the data file generator, which caused characters 191 | being treated incorrectly, when stripping default ignorable characters or 192 | calculating grapheme cluster boundaries. 193 | 194 | ## Version 1.1.1 ## 195 | 196 | 2007-06-25: 197 | 198 | - Added a new PostgreSQL function `unistrip`, which behaves like `unifold`, 199 | but also removes all character marks (e.g. accents). 200 | 201 | 2007-07-22: 202 | 203 | - Changed license from BSD to MIT style. 204 | - Added a new function `utf8proc_codepoint_valid` to the C library. 205 | - Changed compiler flags in `Makefile` from `-g -O0` to `-O2` 206 | - The ruby script, which was used to build the `utf8proc_data.c` file, is now 207 | included in the distribution. 208 | 209 | ## Version 1.0.3 ## 210 | 211 | 2007-03-16: 212 | 213 | - Fixed a bug in the ruby library, which caused an error, when splitting an 214 | empty string at grapheme cluster boundaries (method `String#utf8chars`). 215 | 216 | ## Version 1.0.2 ## 217 | 218 | 2006-09-21: 219 | 220 | - included a check in `Integer#utf8`, which raises an exception, if the given 221 | code-point is invalid because of being too high (this was missing yet) 222 | 223 | 2006-12-26: 224 | 225 | - added support for PostgreSQL version 8.2 226 | 227 | ## Version 1.0.1 ## 228 | 229 | 2006-09-20: 230 | 231 | - included a gem file for the ruby version of the library 232 | 233 | Release of version 1.0.1 234 | 235 | ## Version 1.0 ## 236 | 237 | 2006-09-17: 238 | 239 | - added the `LUMP` option, which lumps certain characters together (see `lump.md`) (also used for the PostgreSQL `unifold` function) 240 | - added the `STRIPMARK` option, which strips marking characters (or marks of composed characters) 241 | - deprecated ruby method `String#char_ary` in favour of `String#utf8chars` 242 | 243 | ## Version 0.3 ## 244 | 245 | 2006-07-18: 246 | 247 | - changed normalization from NFC to NFKC for postgresql unifold function 248 | 249 | 2006-08-04: 250 | 251 | - added support to mark the beginning of a grapheme cluster with 0xFF (option: `CHARBOUND`) 252 | - added the ruby method `String#chars`, which is returning an array of UTF-8 encoded grapheme clusters 253 | - added `NLF2LF` transformation in postgresql `unifold` function 254 | - added the `DECOMPOSE` option, if you neither use `COMPOSE` or `DECOMPOSE`, no normalization will be performed (different from previous versions) 255 | - using integer constants rather than C-strings for character properties 256 | - fixed (hopefully) a problem with the ruby library on Mac OS X, which occurred when compiler optimization was switched on 257 | 258 | ## Version 0.2 ## 259 | 260 | 2006-06-05: 261 | 262 | - changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition 263 | - improved efficiency of PostgreSQL function (no transformation to C string is done) 264 | 265 | 2006-06-20: 266 | 267 | - added -fpic compiler flag in Makefile 268 | - fixed bug in the C code for the ruby library (usage of non-existent function) 269 | 270 | ## Version 0.1 ## 271 | 272 | 2006-06-02: initial release of version 0.1 273 | 274 | [#6]: https://github.com/JuliaLang/utf8proc/issues/6 275 | [#13]: https://github.com/JuliaLang/utf8proc/issues/13 276 | [#17]: https://github.com/JuliaLang/utf8proc/issues/17 277 | [#20]: https://github.com/JuliaLang/utf8proc/issues/20 278 | [#22]: https://github.com/JuliaLang/utf8proc/issues/22 279 | [#24]: https://github.com/JuliaLang/utf8proc/issues/24 280 | [#27]: https://github.com/JuliaLang/utf8proc/issues/27 281 | [#28]: https://github.com/JuliaLang/utf8proc/issues/28 282 | [#29]: https://github.com/JuliaLang/utf8proc/issues/29 283 | [#32]: https://github.com/JuliaLang/utf8proc/issues/32 284 | [#35]: https://github.com/JuliaLang/utf8proc/issues/35 285 | [#40]: https://github.com/JuliaLang/utf8proc/issues/40 286 | [#43]: https://github.com/JuliaLang/utf8proc/issues/43 287 | [#45]: https://github.com/JuliaLang/utf8proc/issues/45 288 | [#47]: https://github.com/JuliaLang/utf8proc/issues/47 289 | [#51]: https://github.com/JuliaLang/utf8proc/issues/51 290 | [#55]: https://github.com/JuliaLang/utf8proc/issues/55 291 | [#58]: https://github.com/JuliaLang/utf8proc/issues/58 292 | [#62]: https://github.com/JuliaLang/utf8proc/issues/62 293 | [#66]: https://github.com/JuliaLang/utf8proc/issues/66 294 | [#68]: https://github.com/JuliaLang/utf8proc/issues/68 295 | [#70]: https://github.com/JuliaLang/utf8proc/issues/70 296 | [#77]: https://github.com/JuliaLang/utf8proc/issues/77 297 | [#78]: https://github.com/JuliaLang/utf8proc/issues/78 298 | [#79]: https://github.com/JuliaLang/utf8proc/issues/79 299 | [#80]: https://github.com/JuliaLang/utf8proc/issues/80 300 | [#84]: https://github.com/JuliaLang/utf8proc/pull/84 301 | [#88]: https://github.com/JuliaLang/utf8proc/pull/88 302 | [#89]: https://github.com/JuliaLang/utf8proc/pull/89 303 | [#90]: https://github.com/JuliaLang/utf8proc/issues/90 304 | -------------------------------------------------------------------------------- /rts/utf8proc/README.md: -------------------------------------------------------------------------------- 1 | # utf8proc 2 | [![Travis CI Status](https://travis-ci.org/JuliaLang/utf8proc.png)](https://travis-ci.org/JuliaLang/utf8proc) 3 | [![AppVeyor Status](https://ci.appveyor.com/api/projects/status/aou20lfkyhj8xbwq/branch/master?svg=true)](https://ci.appveyor.com/project/tkelman/utf8proc/branch/master) 4 | 5 | 6 | [utf8proc](http://julialang.org/utf8proc/) is a small, clean C 7 | library that provides Unicode normalization, case-folding, and other 8 | operations for data in the [UTF-8 9 | encoding](http://en.wikipedia.org/wiki/UTF-8). It was [initially 10 | developed](http://www.public-software-group.org/utf8proc) by Jan 11 | Behrens and the rest of the [Public Software 12 | Group](http://www.public-software-group.org/), who deserve *nearly all 13 | of the credit* for this package. With the blessing of the Public 14 | Software Group, the [Julia developers](http://julialang.org/) have 15 | taken over development of utf8proc, since the original developers have 16 | moved to other projects. 17 | 18 | (utf8proc is used for basic Unicode 19 | support in the [Julia language](http://julialang.org/), and the Julia 20 | developers became involved because they wanted to add Unicode 7 support and other features.) 21 | 22 | (The original utf8proc package also includes Ruby and PostgreSQL plug-ins. 23 | We removed those from utf8proc in order to focus exclusively on the C 24 | library for the time being, but plan to add them back in or release them as separate packages.) 25 | 26 | The utf8proc package is licensed under the 27 | free/open-source [MIT "expat" 28 | license](http://opensource.org/licenses/MIT) (plus certain Unicode 29 | data governed by the similarly permissive [Unicode data 30 | license](http://www.unicode.org/copyright.html#Exhibit1)); please see 31 | the included `LICENSE.md` file for more detailed information. 32 | 33 | ## Quick Start 34 | 35 | For compilation of the C library run `make`. 36 | 37 | ## General Information 38 | 39 | The C library is found in this directory after successful compilation 40 | and is named `libutf8proc.a` (for the static library) and 41 | `libutf8proc.so` (for the dynamic library). 42 | 43 | The Unicode version supported is 9.0.0. 44 | 45 | For Unicode normalizations, the following options are used: 46 | 47 | * Normalization Form C: `STABLE`, `COMPOSE` 48 | * Normalization Form D: `STABLE`, `DECOMPOSE` 49 | * Normalization Form KC: `STABLE`, `COMPOSE`, `COMPAT` 50 | * Normalization Form KD: `STABLE`, `DECOMPOSE`, `COMPAT` 51 | 52 | ## C Library 53 | 54 | The documentation for the C library is found in the `utf8proc.h` header file. 55 | `utf8proc_map` is function you will most likely be using for mapping UTF-8 56 | strings, unless you want to allocate memory yourself. 57 | 58 | ## To Do 59 | 60 | See the Github [issues list](https://github.com/JuliaLang/utf8proc/issues). 61 | 62 | ## Contact 63 | 64 | Bug reports, feature requests, and other queries can be filed at 65 | the [utf8proc issues page on Github](https://github.com/JuliaLang/utf8proc/issues). 66 | 67 | ## See also 68 | 69 | An independent Lua translation of this library, [lua-mojibake](https://github.com/differentprogramming/lua-mojibake), is also available. 70 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/Makefile: -------------------------------------------------------------------------------- 1 | CURL=curl 2 | 3 | CC = cc 4 | CFLAGS = -O2 -std=c99 -pedantic -Wall 5 | 6 | all: bench 7 | 8 | LIBUTF8PROC = ../utf8proc.o 9 | 10 | bench: bench.o util.o $(LIBUTF8PROC) 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bench.o util.o $(LIBUTF8PROC) 12 | 13 | DATAURL = https://raw.githubusercontent.com/duerst/eprun/master/benchmark 14 | DATAFILES = Deutsch_.txt Japanese_.txt Korean_.txt Vietnamese_.txt 15 | 16 | $(DATAFILES): 17 | $(CURL) -O $(DATAURL)/$@ 18 | 19 | bench.out: $(DATAFILES) bench 20 | ./bench -nfkc $(DATAFILES) > $@ 21 | 22 | # you may need make CPPFLAGS=... LDFLAGS=... to help it find ICU 23 | icu: icu.o util.o 24 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ icu.o util.o -licuuc 25 | 26 | icu.out: $(DATAFILES) icu 27 | ./icu $(DATAFILES) > $@ 28 | 29 | unistring: unistring.o util.o 30 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ unistring.o util.o -lunistring 31 | 32 | unistring.out: $(DATAFILES) unistring 33 | ./unistring $(DATAFILES) > $@ 34 | 35 | .c.o: 36 | $(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $< 37 | 38 | clean: 39 | rm -rf *.o *.txt bench *.out icu unistring 40 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/bench.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utf8proc.h" 6 | #include "util.h" 7 | 8 | int main(int argc, char **argv) 9 | { 10 | int i, j; 11 | int options = 0; 12 | 13 | for (i = 1; i < argc; ++i) { 14 | if (!strcmp(argv[i], "-nfkc")) { 15 | options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT; 16 | continue; 17 | } 18 | if (!strcmp(argv[i], "-nfkd")) { 19 | options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE|UTF8PROC_COMPAT; 20 | continue; 21 | } 22 | if (!strcmp(argv[i], "-nfc")) { 23 | options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE; 24 | continue; 25 | } 26 | if (!strcmp(argv[i], "-nfd")) { 27 | options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE; 28 | continue; 29 | } 30 | if (!strcmp(argv[i], "-casefold")) { 31 | options |= UTF8PROC_CASEFOLD; 32 | continue; 33 | } 34 | if (argv[i][0] == '-') { 35 | fprintf(stderr, "unrecognized option: %s\n", argv[i]); 36 | return EXIT_FAILURE; 37 | } 38 | 39 | size_t len; 40 | uint8_t *src = readfile(argv[i], &len); 41 | if (!src) { 42 | fprintf(stderr, "error reading %s\n", argv[i]); 43 | return EXIT_FAILURE; 44 | } 45 | uint8_t *dest; 46 | mytime start = gettime(); 47 | for (j = 0; j < 100; ++j) { 48 | utf8proc_map(src, len, &dest, options); 49 | free(dest); 50 | } 51 | printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); 52 | free(src); 53 | } 54 | 55 | return EXIT_SUCCESS; 56 | } 57 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/icu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* ICU4C */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "util.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | int i; 15 | 16 | UErrorCode err; 17 | UConverter *uc = ucnv_open("UTF8", &err); 18 | if (U_FAILURE(err)) return EXIT_FAILURE; 19 | 20 | const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err); 21 | if (U_FAILURE(err)) return EXIT_FAILURE; 22 | 23 | for (i = 1; i < argc; ++i) { 24 | if (argv[i][0] == '-') { 25 | fprintf(stderr, "unrecognized option: %s\n", argv[i]); 26 | return EXIT_FAILURE; 27 | } 28 | 29 | size_t len; 30 | uint8_t *src = readfile(argv[i], &len); 31 | if (!src) { 32 | fprintf(stderr, "error reading %s\n", argv[i]); 33 | return EXIT_FAILURE; 34 | } 35 | 36 | /* convert UTF8 data to ICU's UTF16 */ 37 | UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar)); 38 | ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err); 39 | if (U_FAILURE(err)) return EXIT_FAILURE; 40 | size_t ulen = u_strlen(usrc); 41 | 42 | /* ICU's insane normalization API requires you to 43 | know the size of the destination buffer in advance, 44 | or alternatively to repeatly try normalizing and 45 | double the buffer size until it succeeds. Here, I just 46 | allocate a huge destination buffer to avoid the issue. */ 47 | UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar)); 48 | 49 | mytime start = gettime(); 50 | for (int i = 0; i < 100; ++i) { 51 | unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err); 52 | if (U_FAILURE(err)) return EXIT_FAILURE; 53 | } 54 | printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); 55 | free(udest); 56 | free(usrc); 57 | free(src); 58 | } 59 | 60 | return EXIT_SUCCESS; 61 | } 62 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/unistring.c: -------------------------------------------------------------------------------- 1 | /* comparitive benchmark of GNU libunistring */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | /* libunistring */ 8 | #include 9 | #include 10 | 11 | #include "util.h" 12 | 13 | int main(int argc, char **argv) 14 | { 15 | int i; 16 | uninorm_t nf = UNINORM_NFKC; 17 | 18 | for (i = 1; i < argc; ++i) { 19 | if (!strcmp(argv[i], "-nfkc")) { 20 | nf = UNINORM_NFKC; 21 | continue; 22 | } 23 | if (!strcmp(argv[i], "-nfkd")) { 24 | nf = UNINORM_NFKD; 25 | continue; 26 | } 27 | if (!strcmp(argv[i], "-nfc")) { 28 | nf = UNINORM_NFC; 29 | continue; 30 | } 31 | if (!strcmp(argv[i], "-nfd")) { 32 | nf = UNINORM_NFD; 33 | continue; 34 | } 35 | if (argv[i][0] == '-') { 36 | fprintf(stderr, "unrecognized option: %s\n", argv[i]); 37 | return EXIT_FAILURE; 38 | } 39 | 40 | size_t len; 41 | uint8_t *src = readfile(argv[i], &len); 42 | if (!src) { 43 | fprintf(stderr, "error reading %s\n", argv[i]); 44 | return EXIT_FAILURE; 45 | } 46 | 47 | size_t destlen; 48 | uint8_t *dest; 49 | mytime start = gettime(); 50 | for (int i = 0; i < 100; ++i) { 51 | dest = u8_normalize(nf, src, len, NULL, &destlen); 52 | if (!dest) return EXIT_FAILURE; 53 | free(dest); 54 | } 55 | printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); 56 | free(src); 57 | } 58 | 59 | return EXIT_SUCCESS; 60 | } 61 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "util.h" 6 | 7 | /* read file named FILENAME into an array of *len bytes, 8 | returning NULL on error */ 9 | uint8_t *readfile(const char *filename, size_t *len) 10 | { 11 | *len = 0; 12 | struct stat st; 13 | if (0 != stat(filename, &st)) return NULL; 14 | *len = st.st_size; 15 | FILE *f = fopen(filename, "r"); 16 | if (!f) return NULL; 17 | uint8_t *s = (uint8_t *) malloc(sizeof(uint8_t) * *len); 18 | if (!s) return NULL; 19 | if (fread(s, 1, *len, f) != *len) { 20 | free(s); 21 | s = NULL; 22 | } 23 | fclose(f); 24 | return s; 25 | } 26 | 27 | mytime gettime(void) { 28 | mytime t; 29 | gettimeofday(&t, NULL); 30 | return t; 31 | } 32 | 33 | /* time difference in seconds */ 34 | double elapsed(mytime t1, mytime t0) 35 | { 36 | return (double)(t1.tv_sec - t0.tv_sec) + 37 | (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6; 38 | } 39 | 40 | -------------------------------------------------------------------------------- /rts/utf8proc/bench/util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 1 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | uint8_t *readfile(const char *filename, size_t *len); 13 | 14 | typedef struct timeval mytime; 15 | mytime gettime(void); 16 | double elapsed(mytime t1, mytime t0); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif /* UTIL_H */ 23 | -------------------------------------------------------------------------------- /rts/utf8proc/data/Makefile: -------------------------------------------------------------------------------- 1 | # Unicode data generation rules. Except for the test data files, most 2 | # users will not use these Makefile rules, which are primarily to re-generate 3 | # unicode_data.c when we get a new Unicode version or charwidth data; they 4 | # require ruby, fontforge, and julia to be installed. 5 | 6 | # programs 7 | CURL=curl 8 | RUBY=ruby 9 | PERL=perl 10 | MAKE=make 11 | JULIA=julia 12 | FONTFORGE=fontforge 13 | CURLFLAGS = --retry 5 --location 14 | 15 | .PHONY: clean 16 | 17 | .DELETE_ON_ERROR: 18 | 19 | utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt CharWidths.txt 20 | $(RUBY) data_generator.rb < UnicodeData.txt > $@ 21 | 22 | # GNU Unifont version for font metric calculations: 23 | UNIFONT_VERSION=9.0.04 24 | 25 | unifont.ttf: 26 | $(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://mirrors.kernel.org/gnu/unifont/unifont-$(UNIFONT_VERSION)/unifont-$(UNIFONT_VERSION).ttf 27 | 28 | unifont_upper.ttf: 29 | $(CURL) $(CURLFLAGS) -o $@ $(URLCACHE)https://mirrors.kernel.org/gnu/unifont/unifont-$(UNIFONT_VERSION)/unifont_upper-$(UNIFONT_VERSION).ttf 30 | 31 | %.sfd: %.ttf 32 | $(FONTFORGE) -lang=ff -c "Open(\"$<\");Save(\"$@\");Quit(0);" 33 | 34 | CharWidths.txt: charwidths.jl unifont.sfd unifont_upper.sfd EastAsianWidth.txt 35 | $(JULIA) charwidths.jl > $@ 36 | 37 | # Unicode data version 38 | UNICODE_VERSION=9.0.0 39 | 40 | UnicodeData.txt: 41 | $(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt 42 | 43 | EastAsianWidth.txt: 44 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt 45 | 46 | GraphemeBreakProperty.txt: 47 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt 48 | 49 | DerivedCoreProperties.txt: 50 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt 51 | 52 | CompositionExclusions.txt: 53 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CompositionExclusions.txt 54 | 55 | CaseFolding.txt: 56 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/CaseFolding.txt 57 | 58 | NormalizationTest.txt: 59 | $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt 60 | 61 | GraphemeBreakTest.txt: 62 | $(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@ 63 | 64 | clean: 65 | rm -f UnicodeData.txt EastAsianWidth.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt CharWidths.txt unifont*.ttf unifont*.sfd 66 | rm -f utf8proc_data.c.new 67 | -------------------------------------------------------------------------------- /rts/utf8proc/data/charwidths.jl: -------------------------------------------------------------------------------- 1 | # Following work by @jiahao, we compute character widths using a combination of 2 | # * advance widths from GNU Unifont (advance width 512 = 1 en) 3 | # * UAX 11: East Asian Width 4 | # * a few exceptions as needed 5 | # Adapted from http://nbviewer.ipython.org/gist/jiahao/07e8b08bf6d8671e9734 6 | # 7 | # Requires Julia (obviously) and FontForge. 8 | 9 | ############################################################################# 10 | # Julia 0.3/0.4 compatibility (taken from Compat package) 11 | if VERSION < v"0.4.0-dev+1387" 12 | typealias AbstractString String 13 | end 14 | if VERSION < v"0.4.0-dev+1419" 15 | const UInt32 = Uint32 16 | end 17 | if VERSION < v"0.4.0-dev+3874" 18 | Base.parse{T<:Integer}(::Type{T}, s::AbstractString) = parseint(T, s) 19 | end 20 | 21 | CharWidths = Dict{Int,Int}() 22 | 23 | ############################################################################# 24 | # Use ../libutf8proc for category codes, rather than the one in Julia, 25 | # to minimize bootstrapping complexity when a new version of Unicode comes out. 26 | catcode(c) = ccall((:utf8proc_category,"../libutf8proc"), Cint, (Int32,), c) 27 | 28 | # use Base.UTF8proc module to get category codes constants, since 29 | # we won't change these in utf8proc. 30 | import Base.UTF8proc 31 | 32 | ############################################################################# 33 | # Use a default width of 1 for all character categories that are 34 | # letter/symbol/number-like. This can be overriden by Unifont or UAX 11 35 | # below, but provides a useful nonzero fallback for new codepoints when 36 | # a new Unicode version has been released but Unifont hasn't been updated yet. 37 | 38 | zerowidth = Set{Int}() # categories that may contain zero-width chars 39 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CN) 40 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_MN) 41 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_MC) 42 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ME) 43 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_SK) 44 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZS) 45 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZL) 46 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_ZP) 47 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CC) 48 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CF) 49 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CS) 50 | push!(zerowidth, UTF8proc.UTF8PROC_CATEGORY_CO) 51 | for c in 0x0000:0x110000 52 | if catcode(c) ∉ zerowidth 53 | CharWidths[c] = 1 54 | end 55 | end 56 | 57 | ############################################################################# 58 | # Widths from GNU Unifont 59 | 60 | #Read sfdfile for character widths 61 | function parsesfd(filename::AbstractString, CharWidths::Dict{Int,Int}=Dict{Int,Int}()) 62 | state=:seekchar 63 | lineno = 0 64 | codepoint = width = nothing 65 | for line in readlines(open(filename)) 66 | lineno += 1 67 | if state==:seekchar #StartChar: nonmarkingreturn 68 | if contains(line, "StartChar: ") 69 | codepoint = nothing 70 | width = nothing 71 | state = :readdata 72 | end 73 | elseif state==:readdata #Encoding: 65538 -1 2, Width: 1024 74 | contains(line, "Encoding:") && (codepoint = parse(Int, split(line)[3])) 75 | contains(line, "Width:") && (width = parse(Int, split(line)[2])) 76 | if codepoint!=nothing && width!=nothing && codepoint >= 0 77 | w=div(width, 512) # 512 units to the en 78 | if w > 0 79 | # only add nonzero widths, since (1) the default is zero 80 | # and (2) this circumvents some apparent bugs in Unifont 81 | # (https://savannah.gnu.org/bugs/index.php?45395) 82 | CharWidths[codepoint] = w 83 | end 84 | state = :seekchar 85 | end 86 | end 87 | end 88 | CharWidths 89 | end 90 | CharWidths=parsesfd("unifont.sfd", CharWidths) 91 | CharWidths=parsesfd("unifont_upper.sfd", CharWidths) 92 | 93 | ############################################################################# 94 | # Widths from UAX #11: East Asian Width 95 | # .. these take precedence over the Unifont width for all codepoints 96 | # listed explicitly as wide/full/narrow/half-width 97 | 98 | for line in readlines(open("EastAsianWidth.txt")) 99 | #Strip comments 100 | line[1] == '#' && continue 101 | precomment = split(line, '#')[1] 102 | #Parse code point range and width code 103 | tokens = split(precomment, ';') 104 | length(tokens) >= 2 || continue 105 | charrange = tokens[1] 106 | width = strip(tokens[2]) 107 | #Parse code point range into Julia UnitRange 108 | rangetokens = split(charrange, "..") 109 | charstart = parse(UInt32, "0x"*rangetokens[1]) 110 | charend = parse(UInt32, "0x"*rangetokens[length(rangetokens)>1 ? 2 : 1]) 111 | 112 | #Assign widths 113 | for c in charstart:charend 114 | if width=="W" || width=="F" # wide or full 115 | CharWidths[c]=2 116 | elseif width=="Na"|| width=="H" # narrow or half 117 | CharWidths[c]=1 118 | end 119 | end 120 | end 121 | 122 | ############################################################################# 123 | # A few exceptions to the above cases, found by manual comparison 124 | # to other wcwidth functions and similar checks. 125 | 126 | for c in keys(CharWidths) 127 | cat = catcode(c) 128 | 129 | # make sure format control character (category Cf) have width 0, 130 | # except for the Arabic characters 0x06xx (see unicode std 6.2, sec. 8.2) 131 | if cat==UTF8proc.UTF8PROC_CATEGORY_CF && c ∉ [0x0601,0x0602,0x0603,0x06dd] 132 | CharWidths[c]=0 133 | end 134 | 135 | # Unifont has nonzero width for a number of non-spacing combining 136 | # characters, e.g. (in 7.0.06): f84,17b4,17b5,180b,180d,2d7f, and 137 | # the variation selectors 138 | if cat==UTF8proc.UTF8PROC_CATEGORY_MN 139 | CharWidths[c]=0 140 | end 141 | 142 | # We also assign width of zero to unassigned and private-use 143 | # codepoints (Unifont includes ConScript Unicode Registry PUA fonts, 144 | # but since these are nonstandard it seems questionable to recognize them). 145 | if cat==UTF8proc.UTF8PROC_CATEGORY_CO || cat==UTF8proc.UTF8PROC_CATEGORY_CN 146 | CharWidths[c]=0 147 | end 148 | 149 | # for some reason, Unifont has width-2 glyphs for ASCII control chars 150 | if cat==UTF8proc.UTF8PROC_CATEGORY_CC 151 | CharWidths[c]=0 152 | end 153 | end 154 | 155 | #By definition, should have zero width (on the same line) 156 | #0x002028 '
' category: Zl name: LINE SEPARATOR/ 157 | #0x002029 '
' category: Zp name: PARAGRAPH SEPARATOR/ 158 | CharWidths[0x2028]=0 159 | CharWidths[0x2029]=0 160 | 161 | #By definition, should be narrow = width of 1 en space 162 | #0x00202f ' ' category: Zs name: NARROW NO-BREAK SPACE/ 163 | CharWidths[0x202f]=1 164 | 165 | #By definition, should be wide = width of 1 em space 166 | #0x002001 ' ' category: Zs name: EM QUAD/ 167 | #0x002003 ' ' category: Zs name: EM SPACE/ 168 | CharWidths[0x2001]=2 169 | CharWidths[0x2003]=2 170 | 171 | ############################################################################# 172 | # Output (to a file or pipe) for processing by data_generator.rb 173 | # ... don't bother to output zero widths since that will be the default. 174 | 175 | firstc = 0x000000 176 | lastv = 0 177 | uhex(c) = uppercase(hex(c,4)) 178 | for c in 0x0000:0x110000 179 | v = get(CharWidths, c, 0) 180 | if v != lastv || c == 0x110000 181 | v < 4 || error("invalid charwidth $v for $c") 182 | if firstc+1 < c 183 | println(uhex(firstc), "..", uhex(c-1), "; ", lastv) 184 | else 185 | println(uhex(firstc), "; ", lastv) 186 | end 187 | firstc = c 188 | lastv = v 189 | end 190 | end 191 | -------------------------------------------------------------------------------- /rts/utf8proc/lump.md: -------------------------------------------------------------------------------- 1 | ``` 2 | U+0020 <-- all space characters (general category Zs) 3 | U+0027 ' <-- left/right single quotation mark U+2018..2019, 4 | modifier letter apostrophe U+02BC, 5 | modifier letter vertical line U+02C8 6 | U+002D - <-- all dash characters (general category Pd), 7 | minus U+2212 8 | U+002F / <-- fraction slash U+2044, 9 | division slash U+2215 10 | U+003A : <-- ratio U+2236 11 | U+003C < <-- single left-pointing angle quotation mark U+2039, 12 | left-pointing angle bracket U+2329, 13 | left angle bracket U+3008 14 | U+003E > <-- single right-pointing angle quotation mark U+203A, 15 | right-pointing angle bracket U+232A, 16 | right angle bracket U+3009 17 | U+005C \ <-- set minus U+2216 18 | U+005E ^ <-- modifier letter up arrowhead U+02C4, 19 | modifier letter circumflex accent U+02C6, 20 | caret U+2038, 21 | up arrowhead U+2303 22 | U+005F _ <-- all connector characters (general category Pc), 23 | modifier letter low macron U+02CD 24 | U+0060 ` <-- modifier letter grave accent U+02CB 25 | U+007C | <-- divides U+2223 26 | U+007E ~ <-- tilde operator U+223C 27 | ``` 28 | -------------------------------------------------------------------------------- /rts/utf8proc/test/case.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int error = 0, better = 0; 7 | utf8proc_int32_t c; 8 | 9 | (void) argc; /* unused */ 10 | (void) argv; /* unused */ 11 | 12 | /* some simple sanity tests of the character widths */ 13 | for (c = 0; c <= 0x110000; ++c) { 14 | utf8proc_int32_t l = utf8proc_tolower(c); 15 | utf8proc_int32_t u = utf8proc_toupper(c); 16 | 17 | check(l == c || utf8proc_codepoint_valid(l), "invalid tolower"); 18 | check(u == c || utf8proc_codepoint_valid(u), "invalid toupper"); 19 | 20 | if (sizeof(wint_t) > 2 || c < (1<<16)) { 21 | wint_t l0 = towlower(c), u0 = towupper(c); 22 | 23 | /* OS unicode tables may be out of date. But if they 24 | do have a lower/uppercase mapping, hopefully it 25 | is correct? */ 26 | if (l0 != c && l0 != l) { 27 | fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n", 28 | l, c, l0); 29 | ++error; 30 | } 31 | else if (l0 != l) { /* often true for out-of-date OS unicode */ 32 | ++better; 33 | /* printf("%x != towlower(%x) == %x\n", l, c, l0); */ 34 | } 35 | if (u0 != c && u0 != u) { 36 | fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n", 37 | u, c, u0); 38 | ++error; 39 | } 40 | else if (u0 != u) { /* often true for out-of-date OS unicode */ 41 | ++better; 42 | /* printf("%x != towupper(%x) == %x\n", u, c, u0); */ 43 | } 44 | } 45 | } 46 | check(!error, "utf8proc case conversion FAILED %d tests.", error); 47 | printf("More up-to-date than OS unicode tables for %d tests.\n", better); 48 | printf("utf8proc case conversion tests SUCCEEDED.\n"); 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /rts/utf8proc/test/charwidth.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include 3 | #include 4 | 5 | static int my_isprint(int c) { 6 | int cat = utf8proc_get_property(c)->category; 7 | return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) || 8 | (c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd); 9 | } 10 | 11 | int main(int argc, char **argv) 12 | { 13 | int c, error = 0, updates = 0; 14 | 15 | (void) argc; /* unused */ 16 | (void) argv; /* unused */ 17 | 18 | /* some simple sanity tests of the character widths */ 19 | for (c = 0; c <= 0x110000; ++c) { 20 | int cat = utf8proc_get_property(c)->category; 21 | int w = utf8proc_charwidth(c); 22 | if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) && 23 | w > 0) { 24 | fprintf(stderr, "nonzero width %d for combining char %x\n", w, c); 25 | error = 1; 26 | } 27 | if (w == 0 && 28 | ((cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) || 29 | (cat >= UTF8PROC_CATEGORY_ND && cat <= UTF8PROC_CATEGORY_SC) || 30 | (cat >= UTF8PROC_CATEGORY_SO && cat <= UTF8PROC_CATEGORY_ZS))) { 31 | fprintf(stderr, "zero width for symbol-like char %x\n", c); 32 | error = 1; 33 | } 34 | if (c <= 127 && ((!isprint(c) && w > 0) || 35 | (isprint(c) && wcwidth(c) != w))) { 36 | fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n", 37 | wcwidth(c), w, 38 | isprint(c) ? "printable" : "non-printable", c); 39 | error = 1; 40 | } 41 | if (!my_isprint(c) && w > 0) { 42 | fprintf(stderr, "non-printing %x had width %d\n", c, w); 43 | error = 1; 44 | } 45 | } 46 | check(!error, "utf8proc_charwidth FAILED tests."); 47 | 48 | /* print some other information by compariing with system wcwidth */ 49 | printf("Mismatches with system wcwidth (not necessarily errors):\n"); 50 | for (c = 0; c <= 0x110000; ++c) { 51 | int w = utf8proc_charwidth(c); 52 | int wc = wcwidth(c); 53 | if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue; 54 | /* lots of these errors for out-of-date system unicode tables */ 55 | if (wc == -1 && my_isprint(c) && w > 0) { 56 | updates += 1; 57 | #if 0 58 | printf(" wcwidth(%x) = -1 for printable char\n", c); 59 | #endif 60 | } 61 | if (wc == -1 && !my_isprint(c) && w > 0) 62 | printf(" wcwidth(%x) = -1 for non-printable width-%d char\n", c, w); 63 | if (wc >= 0 && wc != w) 64 | printf(" wcwidth(%x) = %d != charwidth %d\n", c, wc, w); 65 | } 66 | printf(" ... (positive widths for %d chars unknown to wcwidth) ...\n", 67 | updates); 68 | printf("Character-width tests SUCCEEDED.\n"); 69 | 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /rts/utf8proc/test/custom.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | static int thunk_test = 1; 4 | 5 | static utf8proc_int32_t custom(utf8proc_int32_t codepoint, void *thunk) 6 | { 7 | check(((int *) thunk) == &thunk_test, "unexpected thunk passed"); 8 | if (codepoint == 'a') 9 | return 'b'; 10 | if (codepoint == 'S') 11 | return 0x00df; /* ß */ 12 | return codepoint; 13 | } 14 | 15 | int main(void) 16 | { 17 | utf8proc_uint8_t input[] = {0x41,0x61,0x53,0x62,0xef,0xbd,0x81,0x00}; /* "AaSb\uff41" */ 18 | utf8proc_uint8_t correct[] = {0x61,0x62,0x73,0x73,0x62,0x61,0x00}; /* "abssba" */ 19 | utf8proc_uint8_t *output; 20 | utf8proc_map_custom(input, 0, &output, UTF8PROC_CASEFOLD | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_NULLTERM, 21 | custom, &thunk_test); 22 | printf("mapped \"%s\" -> \"%s\"\n", (char*)input, (char*)output); 23 | check(strlen((char*) output) == 6, "incorrect output length"); 24 | check(!memcmp(correct, output, 7), "incorrect output data"); 25 | free(output); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /rts/utf8proc/test/graphemetest.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | int main(int argc, char **argv) 4 | { 5 | char *buf = NULL; 6 | size_t bufsize = 0; 7 | FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; 8 | utf8proc_uint8_t src[1024]; 9 | int len; 10 | 11 | check(f != NULL, "error opening GraphemeBreakTest.txt"); 12 | while (getline(&buf, &bufsize, f) > 0) { 13 | size_t bi = 0, si = 0; 14 | lineno += 1; 15 | 16 | if (lineno % 100 == 0) 17 | printf("checking line %zd...\n", lineno); 18 | 19 | if (buf[0] == '#') continue; 20 | 21 | while (buf[bi]) { 22 | bi = skipspaces(buf, bi); 23 | if (buf[bi] == '/') { /* grapheme break */ 24 | src[si++] = '/'; 25 | bi++; 26 | } 27 | else if (buf[bi] == '+') { /* no break */ 28 | bi++; 29 | } 30 | else if (buf[bi] == '#') { /* start of comments */ 31 | break; 32 | } 33 | else { /* hex-encoded codepoint */ 34 | len = encode((char*) (src + si), buf + bi) - 1; 35 | while (src[si]) ++si; /* advance to NUL termination */ 36 | bi += len; 37 | } 38 | } 39 | if (si && src[si-1] == '/') 40 | --si; /* no break after final grapheme */ 41 | src[si] = 0; /* NUL-terminate */ 42 | 43 | if (si) { 44 | utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */ 45 | size_t i = 0, j = 0; 46 | utf8proc_ssize_t glen; 47 | utf8proc_uint8_t *g; /* utf8proc_map grapheme results */ 48 | while (i < si) { 49 | if (src[i] != '/') 50 | utf8[j++] = src[i++]; 51 | else 52 | i++; 53 | } 54 | glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND); 55 | if (glen == UTF8PROC_ERROR_INVALIDUTF8) { 56 | /* the test file contains surrogate codepoints, which are only for UTF-16 */ 57 | printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno); 58 | } 59 | else { 60 | check(glen >= 0, "utf8proc_map error = %s", 61 | utf8proc_errmsg(glen)); 62 | for (i = 0; i <= glen; ++i) 63 | if (g[i] == 0xff) 64 | g[i] = '/'; /* easier-to-read output (/ is not in test strings) */ 65 | check(!strcmp((char*)g, (char*)src), 66 | "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src); 67 | } 68 | free(g); 69 | } 70 | } 71 | fclose(f); 72 | printf("Passed tests after %zd lines!\n", lineno); 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /rts/utf8proc/test/iterate.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include 3 | #include 4 | 5 | static int tests; 6 | static int error; 7 | 8 | #define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__) 9 | #define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__) 10 | 11 | static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line) 12 | { 13 | utf8proc_int32_t out[16]; 14 | utf8proc_ssize_t ret; 15 | 16 | /* Make a copy to ensure that memory is left uninitialized after "len" 17 | * bytes. This way, Valgrind can detect overreads. 18 | */ 19 | unsigned char tmp[16]; 20 | memcpy(tmp, buf, len); 21 | 22 | tests++; 23 | if ((ret = utf8proc_iterate(tmp, len, out)) != retval) { 24 | fprintf(stderr, "Failed (%d):", line); 25 | for (int i = 0; i < len ; i++) { 26 | fprintf(stderr, " 0x%02x", tmp[i]); 27 | } 28 | fprintf(stderr, " -> %zd\n", ret); 29 | error++; 30 | } 31 | } 32 | 33 | int main(int argc, char **argv) 34 | { 35 | uint32_t byt; 36 | unsigned char buf[16]; 37 | 38 | tests = error = 0; 39 | 40 | // Check valid sequences that were considered valid erroneously before 41 | buf[0] = 0xef; 42 | buf[1] = 0xb7; 43 | for (byt = 0x90; byt < 0xa0; byt++) { 44 | CHECKVALID(2, byt, 3); 45 | } 46 | // Check 0xfffe and 0xffff 47 | buf[1] = 0xbf; 48 | CHECKVALID(2, 0xbe, 3); 49 | CHECKVALID(2, 0xbf, 3); 50 | // Check 0x??fffe & 0x??ffff 51 | for (byt = 0x1fffe; byt < 0x110000; byt += 0x10000) { 52 | buf[0] = 0xf0 | (byt >> 18); 53 | buf[1] = 0x80 | ((byt >> 12) & 0x3f); 54 | CHECKVALID(3, 0xbe, 4); 55 | CHECKVALID(3, 0xbf, 4); 56 | } 57 | 58 | // Continuation byte not after lead 59 | for (byt = 0x80; byt < 0xc0; byt++) { 60 | CHECKINVALID(0, byt, 1); 61 | } 62 | 63 | // Continuation byte not after lead 64 | for (byt = 0x80; byt < 0xc0; byt++) { 65 | CHECKINVALID(0, byt, 1); 66 | } 67 | 68 | // Test lead bytes 69 | for (byt = 0xc0; byt <= 0xff; byt++) { 70 | // Single lead byte at end of string 71 | CHECKINVALID(0, byt, 1); 72 | // Lead followed by non-continuation character < 0x80 73 | CHECKINVALID(1, 65, 2); 74 | // Lead followed by non-continuation character > 0xbf 75 | CHECKINVALID(1, 0xc0, 2); 76 | } 77 | 78 | // Test overlong 2-byte 79 | buf[0] = 0xc0; 80 | for (byt = 0x81; byt <= 0xbf; byt++) { 81 | CHECKINVALID(1, byt, 2); 82 | } 83 | buf[0] = 0xc1; 84 | for (byt = 0x80; byt <= 0xbf; byt++) { 85 | CHECKINVALID(1, byt, 2); 86 | } 87 | 88 | // Test overlong 3-byte 89 | buf[0] = 0xe0; 90 | buf[2] = 0x80; 91 | for (byt = 0x80; byt <= 0x9f; byt++) { 92 | CHECKINVALID(1, byt, 3); 93 | } 94 | 95 | // Test overlong 4-byte 96 | buf[0] = 0xf0; 97 | buf[2] = 0x80; 98 | buf[3] = 0x80; 99 | for (byt = 0x80; byt <= 0x8f; byt++) { 100 | CHECKINVALID(1, byt, 4); 101 | } 102 | 103 | // Test 4-byte > 0x10ffff 104 | buf[0] = 0xf4; 105 | buf[2] = 0x80; 106 | buf[3] = 0x80; 107 | for (byt = 0x90; byt <= 0xbf; byt++) { 108 | CHECKINVALID(1, byt, 4); 109 | } 110 | buf[1] = 0x80; 111 | for (byt = 0xf5; byt <= 0xf7; byt++) { 112 | CHECKINVALID(0, byt, 4); 113 | } 114 | 115 | // Test 5-byte 116 | buf[4] = 0x80; 117 | for (byt = 0xf8; byt <= 0xfb; byt++) { 118 | CHECKINVALID(0, byt, 5); 119 | } 120 | 121 | // Test 6-byte 122 | buf[5] = 0x80; 123 | for (byt = 0xfc; byt <= 0xfd; byt++) { 124 | CHECKINVALID(0, byt, 6); 125 | } 126 | 127 | // Test 7-byte 128 | buf[6] = 0x80; 129 | CHECKINVALID(0, 0xfe, 7); 130 | 131 | // Three and above byte sequences 132 | for (byt = 0xe0; byt < 0xf0; byt++) { 133 | // Lead followed by only 1 continuation byte 134 | CHECKINVALID(0, byt, 2); 135 | // Lead ended by non-continuation character < 0x80 136 | CHECKINVALID(2, 65, 3); 137 | // Lead ended by non-continuation character > 0xbf 138 | CHECKINVALID(2, 0xc0, 3); 139 | } 140 | 141 | // 3-byte encoded surrogate character(s) 142 | buf[0] = 0xed; buf[2] = 0x80; 143 | // Single surrogate 144 | CHECKINVALID(1, 0xa0, 3); 145 | // Trailing surrogate first 146 | CHECKINVALID(1, 0xb0, 3); 147 | 148 | // Four byte sequences 149 | buf[1] = 0x80; 150 | for (byt = 0xf0; byt < 0xf5; byt++) { 151 | // Lead followed by only 1 continuation bytes 152 | CHECKINVALID(0, byt, 2); 153 | // Lead followed by only 2 continuation bytes 154 | CHECKINVALID(0, byt, 3); 155 | // Lead followed by non-continuation character < 0x80 156 | CHECKINVALID(3, 65, 4); 157 | // Lead followed by non-continuation character > 0xbf 158 | CHECKINVALID(3, 0xc0, 4); 159 | 160 | } 161 | 162 | check(!error, "utf8proc_iterate FAILED %d tests out of %d", error, tests); 163 | printf("utf8proc_iterate tests SUCCEEDED, (%d) tests passed.\n", tests); 164 | 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /rts/utf8proc/test/normtest.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #define CHECK_NORM(NRM, norm, src) { \ 4 | char *src_norm = (char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \ 5 | check(!strcmp(norm, src_norm), \ 6 | "normalization failed for %s -> %s", src, norm); \ 7 | free(src_norm); \ 8 | } 9 | 10 | int main(int argc, char **argv) 11 | { 12 | char *buf = NULL; 13 | size_t bufsize = 0; 14 | FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; 15 | char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024]; 16 | 17 | check(f != NULL, "error opening NormalizationTest.txt"); 18 | while (getline(&buf, &bufsize, f) > 0) { 19 | size_t offset; 20 | lineno += 1; 21 | 22 | if (buf[0] == '@') { 23 | printf("line %zd: %s", lineno, buf + 1); 24 | continue; 25 | } 26 | else if (lineno % 1000 == 0) 27 | printf("checking line %zd...\n", lineno); 28 | 29 | if (buf[0] == '#') continue; 30 | 31 | offset = encode(source, buf); 32 | offset += encode(NFC, buf + offset); 33 | offset += encode(NFD, buf + offset); 34 | offset += encode(NFKC, buf + offset); 35 | offset += encode(NFKD, buf + offset); 36 | 37 | CHECK_NORM(NFC, NFC, source); 38 | CHECK_NORM(NFC, NFC, NFC); 39 | CHECK_NORM(NFC, NFC, NFD); 40 | CHECK_NORM(NFC, NFKC, NFKC); 41 | CHECK_NORM(NFC, NFKC, NFKD); 42 | 43 | CHECK_NORM(NFD, NFD, source); 44 | CHECK_NORM(NFD, NFD, NFC); 45 | CHECK_NORM(NFD, NFD, NFD); 46 | CHECK_NORM(NFD, NFKD, NFKC); 47 | CHECK_NORM(NFD, NFKD, NFKD); 48 | 49 | CHECK_NORM(NFKC, NFKC, source); 50 | CHECK_NORM(NFKC, NFKC, NFC); 51 | CHECK_NORM(NFKC, NFKC, NFD); 52 | CHECK_NORM(NFKC, NFKC, NFKC); 53 | CHECK_NORM(NFKC, NFKC, NFKD); 54 | 55 | CHECK_NORM(NFKD, NFKD, source); 56 | CHECK_NORM(NFKD, NFKD, NFC); 57 | CHECK_NORM(NFKD, NFKD, NFD); 58 | CHECK_NORM(NFKD, NFKD, NFKC); 59 | CHECK_NORM(NFKD, NFKD, NFKD); 60 | } 61 | fclose(f); 62 | printf("Passed tests after %zd lines!\n", lineno); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /rts/utf8proc/test/printproperty.c: -------------------------------------------------------------------------------- 1 | /* simple test program to print out the utf8proc properties for a codepoint */ 2 | 3 | #include "tests.h" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | int i; 8 | 9 | for (i = 1; i < argc; ++i) { 10 | unsigned int c; 11 | if (!strcmp(argv[i], "-V")) { 12 | printf("utf8proc version %s\n", utf8proc_version()); 13 | continue; 14 | } 15 | check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]); 16 | const utf8proc_property_t *p = utf8proc_get_property(c); 17 | printf("U+%s:\n" 18 | " category = %s\n" 19 | " combining_class = %d\n" 20 | " bidi_class = %d\n" 21 | " decomp_type = %d\n" 22 | " uppercase_mapping = %x\n" 23 | " lowercase_mapping = %x\n" 24 | " titlecase_mapping = %x\n" 25 | " comb_index = %d\n" 26 | " bidi_mirrored = %d\n" 27 | " comp_exclusion = %d\n" 28 | " ignorable = %d\n" 29 | " control_boundary = %d\n" 30 | " boundclass = %d\n" 31 | " charwidth = %d\n", 32 | argv[i], 33 | utf8proc_category_string(c), 34 | p->combining_class, 35 | p->bidi_class, 36 | p->decomp_type, 37 | utf8proc_toupper(c), 38 | utf8proc_tolower(c), 39 | utf8proc_totitle(c), 40 | p->comb_index, 41 | p->bidi_mirrored, 42 | p->comp_exclusion, 43 | p->ignorable, 44 | p->control_boundary, 45 | p->boundclass, 46 | utf8proc_charwidth(c)); 47 | } 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /rts/utf8proc/test/tests.c: -------------------------------------------------------------------------------- 1 | /* Common functions for our test programs. */ 2 | 3 | #include "tests.h" 4 | 5 | size_t lineno = 0; 6 | 7 | void check(int cond, const char *format, ...) 8 | { 9 | if (!cond) { 10 | va_list args; 11 | fprintf(stderr, "line %zd: ", lineno); 12 | va_start(args, format); 13 | vfprintf(stderr, format, args); 14 | va_end(args); 15 | fprintf(stderr, "\n"); 16 | exit(1); 17 | } 18 | } 19 | 20 | size_t skipspaces(const char *buf, size_t i) 21 | { 22 | while (isspace(buf[i])) ++i; 23 | return i; 24 | } 25 | 26 | /* if buf points to a sequence of codepoints encoded as hexadecimal strings, 27 | separated by whitespace, and terminated by any character not in 28 | [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string 29 | in dest, returning the number of bytes read from buf */ 30 | size_t encode(char *dest, const char *buf) 31 | { 32 | size_t i = 0, j, d = 0; 33 | for (;;) { 34 | int c; 35 | i = skipspaces(buf, i); 36 | for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j) 37 | ; /* find end of hex input */ 38 | if (j == i) { /* no codepoint found */ 39 | dest[d] = 0; /* NUL-terminate destination string */ 40 | return i + 1; 41 | } 42 | check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i); 43 | i = j; /* skip to char after hex input */ 44 | d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /rts/utf8proc/test/tests.h: -------------------------------------------------------------------------------- 1 | /* Common functions and includes for our test programs. */ 2 | 3 | /* 4 | * Set feature macro to enable getline() and wcwidth(). 5 | * 6 | * Please refer to section 2.2.1 of POSIX.1-2008: 7 | * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_02_01_02 8 | */ 9 | #define _XOPEN_SOURCE 700 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "../utf8proc.h" 18 | 19 | extern size_t lineno; 20 | 21 | void check(int cond, const char *format, ...); 22 | size_t skipspaces(const char *buf, size_t i); 23 | size_t encode(char *dest, const char *buf); 24 | -------------------------------------------------------------------------------- /rts/utf8proc/test/valid.c: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) 6 | { 7 | int c, error = 0; 8 | 9 | (void) argc; /* unused */ 10 | (void) argv; /* unused */ 11 | 12 | /* some simple sanity tests of */ 13 | for (c = 0; c < 0xd800; c++) { 14 | if (!utf8proc_codepoint_valid(c)) { 15 | fprintf(stderr, "Failed: codepoint_valid(%04x) -> false\n", c); 16 | error++; 17 | } 18 | } 19 | for (;c < 0xe000; c++) { 20 | if (utf8proc_codepoint_valid(c)) { 21 | fprintf(stderr, "Failed: codepoint_valid(%04x) -> true\n", c); 22 | error++; 23 | } 24 | } 25 | for (;c < 0x110000; c++) { 26 | if (!utf8proc_codepoint_valid(c)) { 27 | fprintf(stderr, "Failed: codepoint_valid(%06x) -> false\n", c); 28 | error++; 29 | } 30 | } 31 | for (;c < 0x110010; c++) { 32 | if (utf8proc_codepoint_valid(c)) { 33 | fprintf(stderr, "Failed: codepoint_valid(%06x) -> true\n", c); 34 | error++; 35 | } 36 | } 37 | check(!error, "utf8proc_codepoint_valid FAILED %d tests.", error); 38 | printf("Validity tests SUCCEEDED.\n"); 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /rts/utf8proc/utils.cmake: -------------------------------------------------------------------------------- 1 | 2 | function (disallow_intree_builds) 3 | # Adapted from LLVM's toplevel CMakeLists.txt file 4 | if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE ) 5 | message(FATAL_ERROR " 6 | In-source builds are not allowed. CMake would overwrite the 7 | makefiles distributed with utf8proc. Please create a directory 8 | and run cmake from there. Building in a subdirectory is 9 | fine, e.g.: 10 | 11 | mkdir build 12 | cd build 13 | cmake .. 14 | 15 | This process created the file `CMakeCache.txt' and the 16 | directory `CMakeFiles'. Please delete them. 17 | 18 | ") 19 | endif() 20 | endfunction() 21 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | (import ./release.nix).env -------------------------------------------------------------------------------- /src/lasca/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Strict #-} 2 | module Main where 3 | 4 | import qualified Lasca.Compiler 5 | 6 | main :: IO () 7 | main = Lasca.Compiler.main 8 | -------------------------------------------------------------------------------- /src/lib/Lasca/Compiler.hs: -------------------------------------------------------------------------------- 1 | module Lasca.Compiler where 2 | 3 | import Lasca.Namer 4 | import Lasca.Desugar 5 | import Lasca.Codegen 6 | import Lasca.EmitCommon 7 | import Lasca.Emit 8 | import qualified Lasca.EmitStatic as EmitStatic 9 | import qualified Lasca.EmitDynamic as EmitDynamic 10 | import Lasca.JIT 11 | import Lasca.Infer 12 | import Lasca.Type 13 | import Lasca.Syntax 14 | import Lasca.Options 15 | import Lasca.Modules 16 | 17 | 18 | import Control.Monad 19 | import Data.Maybe 20 | import Text.Printf 21 | import qualified Data.Text as T 22 | import qualified Data.ByteString.Char8 as Char8 23 | 24 | import System.Info 25 | import System.Environment 26 | import System.Exit 27 | import System.Process 28 | import System.Directory 29 | import System.FilePath 30 | import System.IO ( hGetContents ) 31 | import System.IO.Error 32 | import Data.List 33 | import qualified Data.Map.Strict as Map 34 | import Debug.Trace as Debug 35 | import Control.Applicative 36 | 37 | import qualified LLVM.Module as LLVM 38 | import qualified LLVM.Target as LLVM 39 | import qualified LLVM.Relocation as Reloc 40 | import qualified LLVM.Target.Options as TO 41 | import qualified LLVM.CodeModel as CodeModel 42 | import qualified LLVM.CodeGenOpt as CodeGenOpt 43 | 44 | 45 | parsePhase opts filename = do 46 | exists <- doesFileExist filename 47 | if exists then do 48 | absoluteFilePath <- canonicalizePath filename 49 | searchPaths <- moduleSearchPaths 50 | (imported, mainModule) <- loadModule searchPaths Map.empty [] absoluteFilePath (Name $ T.pack filename) 51 | let linearized = linearizeIncludes mainModule 52 | let ex = foldr (\m exprs -> moduleExprs m ++ exprs) [] linearized 53 | -- Debug.traceM $ printf "AAA %s\n%s" (show exprs1) (show ex) 54 | when (verboseMode opts) $ putStrLn $ printf "Parsed OK, imported %s, linearized: %s" (show imported) (show linearized) 55 | when (printAst opts) $ mapM_ print ex 56 | when (verboseMode opts) $ putStrLn ("Compiler mode is " ++ show (mode opts)) 57 | return ex 58 | else error $ printf "Couldn't open file %s" (show filename) 59 | 60 | runPhases opts filename = do 61 | exprs <- parsePhase opts filename 62 | let (named, state) = namerPhase opts exprs 63 | let ctx = _context state 64 | let mainModule = _currentModule state 65 | let mainFunctionName = NS mainModule "main" 66 | let desugared = desugarPhase ctx named 67 | typed <- if mode opts == Static 68 | then typerPhase opts ctx filename desugared 69 | else return desugared 70 | let desugared2 = patmatPhase ctx typed 71 | let desugared3 = lambdaLiftPhase ctx desugared2 -- must be after typechecking 72 | let !desugared4 = delambdafyPhase ctx desugared3 -- must be after typechecking 73 | when (printAst opts) $ putStrLn $ intercalate "\n" (map printExprWithType desugared4) 74 | let mod = codegenPhase ctx filename desugared4 mainFunctionName 75 | if exec opts then do 76 | when (verboseMode opts) $ putStrLn "Running JIT" 77 | runJIT opts mod 78 | else compileExecutable opts filename mod 79 | 80 | typerPhase opts ctx filename exprs = do 81 | result <- typeCheck ctx exprs 82 | case result of 83 | Right (env, typedExprs) -> do 84 | when (verboseMode opts) $ putStrLn "typechecked OK" 85 | when (printTypes opts) $ putStrLn (showPretty env) 86 | return typedExprs 87 | Left e -> do 88 | dir <- getCurrentDirectory 89 | let source = dir filename 90 | die (source ++ ":" ++ showTypeError e) 91 | 92 | codegenPhase context filename exprs mainFunctionName = do 93 | let opts = _lascaOpts context 94 | let modo = emptyModule filename 95 | let cgen = if mode opts == Static then EmitStatic.cgen else EmitDynamic.cgen 96 | let ctx = collectGlobals context exprs 97 | runLLVM modo $ do 98 | declareStdFuncs 99 | fmt <- genFunctionMap exprs 100 | let defs = reverse (_dataDefs ctx) 101 | tst <- genTypesStruct ctx defs 102 | genRuntime opts fmt tst 103 | forM_ exprs $ \expr -> do 104 | defineStringConstants expr 105 | codegenTop ctx cgen expr 106 | codegenStartFunc ctx cgen mainFunctionName 107 | 108 | processMainFile :: LascaOpts -> String -> IO () 109 | processMainFile opts filename = runPhases opts filename 110 | 111 | findCCompiler = do 112 | ccEnv <- lookupEnv "CC" 113 | cl5 <- findExecutable "clang-5" 114 | clang <- findExecutable "clang" 115 | gcc <- findExecutable "gcc" 116 | return $ ccEnv <|> cl5 <|> clang <|> gcc 117 | 118 | withHostTargetMachine :: (LLVM.TargetMachine -> IO a) -> IO a 119 | withHostTargetMachine f = do 120 | LLVM.initializeAllTargets 121 | triple <- LLVM.getProcessTargetTriple 122 | cpu <- LLVM.getHostCPUName 123 | features <- LLVM.getHostCPUFeatures 124 | (target, _) <- LLVM.lookupTarget Nothing triple 125 | LLVM.withTargetOptions $ \options -> 126 | LLVM.withTargetMachine target triple cpu features options Reloc.PIC CodeModel.Default CodeGenOpt.Default f 127 | 128 | 129 | compileExecutable opts fname mod = do 130 | withOptimizedModule opts mod $ \context m -> do 131 | ll <- LLVM.moduleLLVMAssembly m 132 | let asm = Char8.unpack ll 133 | writeFile (fname ++ ".ll") asm 134 | withHostTargetMachine $ \tm -> LLVM.writeObjectToFile tm (LLVM.File (fname ++ ".o")) m 135 | let outputPath = case outputFile opts of 136 | [] -> dropExtension fname 137 | path -> path 138 | let optLevel = optimization opts 139 | let optimizationOpts = ["-O" ++ show optLevel | optLevel > 0] 140 | result <- findCCompiler 141 | lascaPathEnv <- lookupEnv "LASCAPATH" 142 | let lascaPath = fromMaybe "." lascaPathEnv 143 | absLascaPathEnv <- mapM canonicalizePath (splitSearchPath lascaPath) 144 | let cc = fromMaybe (error "Did find C compiler. Install Clang or GCC, or define CC environment variable") result 145 | lascartStaticLink = ["-llascartStatic"] 146 | lascartDynamicLink = ["-llascart"] 147 | libLascaLink = ["-rdynamic"] 148 | -- passes --export-dynamic to the linker. 149 | -- Needed for OrcJit to to able to dynamicly load generated `main` function 150 | ++ lascartStaticLink 151 | -- ++ lascartDynamicLink 152 | libDirs = fmap (\p -> "-L" ++ p) absLascaPathEnv 153 | links = ["-lgc", "-lffi", "-lm", "-lpcre2-8"] 154 | -- object files must be specified before libraries for successful static linking 155 | let args = optimizationOpts ++ [ "-o", outputPath, fname ++ ".o"] ++ libDirs ++ ["-fPIC", "-g"] ++ libLascaLink ++ links 156 | let command = unwords $ cc : args 157 | when (verboseMode opts) $ putStrLn command 158 | (output, errCode) <- getProcessOutput command 159 | -- putStrLn output 160 | when (errCode /= ExitSuccess) $ die output 161 | -- return () 162 | 163 | getProcessOutput :: String -> IO (String, ExitCode) 164 | getProcessOutput command = 165 | -- Create the process 166 | do (_pIn, pOut, pErr, handle) <- runInteractiveCommand command 167 | -- Wait for the process to finish and store its exit code 168 | exitCode <- waitForProcess handle 169 | -- Get the standard output. 170 | output <- hGetContents pOut 171 | stderr <- hGetContents pErr 172 | -- return both the output and the exit code. 173 | return (output ++ stderr, exitCode) 174 | 175 | runLasca :: LascaOpts -> IO () 176 | runLasca opts = do 177 | if null (lascaFiles opts) 178 | then die ("need file") -- TODO show help 179 | else do 180 | let file = head (lascaFiles opts) 181 | processMainFile opts file 182 | 183 | main :: IO () 184 | main = do 185 | opts <- parseOptions 186 | runLasca opts 187 | -------------------------------------------------------------------------------- /src/lib/Lasca/Emit.hs: -------------------------------------------------------------------------------- 1 | module Lasca.Emit (codegenTop, collectGlobals) where 2 | 3 | import Text.Printf 4 | import Data.String 5 | import Data.Text (Text) 6 | import qualified Data.Text as T 7 | import qualified Data.Text.Encoding as Encoding 8 | import Data.Map.Strict (Map) 9 | import qualified Data.Map.Strict as Map 10 | import Data.Set (Set) 11 | import qualified Data.Set as Set 12 | import qualified Debug.Trace as Debug 13 | 14 | import Control.Monad.State 15 | import Control.Lens.Operators 16 | 17 | import Lasca.Codegen 18 | import Lasca.Type 19 | import Lasca.EmitCommon 20 | import qualified Lasca.EmitStatic as EmitStatic 21 | import Lasca.Syntax 22 | 23 | genExternalFuncWrapper ctx f@(Let True meta name returnType lam _) = do 24 | modState <- get 25 | let codeGenResult = codeGen modState 26 | blocks = createBlocks codeGenResult 27 | retType = typeMapping returnType 28 | 29 | define retType (nameToSBS name) (toSig externArgs) blocks 30 | where 31 | (externArgs, Literal _ (StringLit externName)) = uncurryLambda lam 32 | codeGen modState = execCodegen [] modState $ do 33 | entry <- addBlock entryBlockName 34 | setBlock entry 35 | let argTypes = map (\(Arg n t) -> t) externArgs 36 | largs <- forM externArgs $ \(Arg n tpe) -> do 37 | let argName = nameToSBS n 38 | let ref = typeToLaTypeRef tpe 39 | when (isDynamicMode ctx && tpe `Set.member` autoBoxedTypes) $ do 40 | r <- callBuiltin "unbox" [constOp ref, localPtr argName] -- check primitive types 41 | return () 42 | EmitStatic.resolveBoxing EmitStatic.anyTypeVar tpe (localPtr argName) 43 | let retType = externalTypeMapping returnType 44 | -- Debug.traceM $ printf "%s genExternalFuncWrapper %s, retType %s" (show name) (show $ externFuncLLvmType f) (show retType) 45 | res <- call (externFuncLLvmType f) (textToSBS externName) largs 46 | wrapped <- EmitStatic.resolveBoxing returnType EmitStatic.anyTypeVar res 47 | ret wrapped 48 | genExternalFuncWrapper ctx other = error $ "genExternalFuncWrapper got " ++ (show other) 49 | 50 | 51 | collectGlobals ctx exprs = do 52 | execState (mapM toplevel exprs) ctx 53 | where 54 | toplevel expr = case expr of 55 | Let False meta name _ expr EmptyExpr -> globalVals %= Map.insert name expr 56 | Let True meta name _ lam EmptyExpr -> globalFunctions %= Map.insert name expr 57 | _ -> return () 58 | 59 | codegenTop ctx cgen topExpr = case topExpr of 60 | this@(Let False meta name _ expr _) -> do 61 | modify (\s -> s { _globalValsInit = _globalValsInit s ++ [(name, expr)] }) 62 | let valType = llvmTypeOf this 63 | -- Debug.traceM $ printf "Cons %s: %s" (show name) (show valType) 64 | defineGlobal (nameToSBS name) valType (Just $ defaultValueForType valType) 65 | 66 | f@(Let True meta name tpe lam _) -> do 67 | if meta ^. isExternal then do 68 | let (Literal _ (StringLit externName)) = body 69 | external (externalTypeMapping tpe) (textToSBS externName) (externArgsToSig args) False [] 70 | genExternalFuncWrapper ctx f 71 | else do 72 | modState <- get 73 | let codeGenResult = codeGen modState 74 | let blocks = createBlocks codeGenResult 75 | mapM_ defineStringLit (generatedStrings codeGenResult) 76 | let retType = mappedReturnType args funcType 77 | define retType (nameToSBS name) largs blocks 78 | where 79 | (args, body) = uncurryLambda lam 80 | 81 | funcType = typeOf lam 82 | largs = map (\(n, t) -> (nameToSBS n, t)) argsWithTypes 83 | 84 | funcTypeToLlvm (Arg name _) (TypeFunc a b, acc) = (b, (name, typeMapping a) : acc) 85 | funcTypeToLlvm arg t = error $ "AAA3" ++ show arg ++ show t 86 | 87 | argsWithTypes = do 88 | -- Debug.traceM $ printf "codegenTop %s(%s): %s" (show name) (show args) (show funcType) 89 | reverse $ snd $ foldr funcTypeToLlvm (funcType, []) (reverse args) 90 | 91 | codeGen modState = execCodegen [] modState $ do 92 | -- Debug.traceM $ printf "argsWithTypes %s" (show argsWithTypes) 93 | entry <- addBlock entryBlockName 94 | setBlock entry 95 | forM_ argsWithTypes $ \(n, t) -> do 96 | var <- alloca t 97 | store var (local t (nameToSBS n)) 98 | -- Debug.traceM $ printf "assign %s: %s = %s" n (show t) (show var) 99 | assign n var 100 | cgen ctx body >>= ret 101 | 102 | (Data _ name tvars constructors) -> return () 103 | Module{} -> return () 104 | Import{} -> return () 105 | _ -> error $ printf "Expression of this kind should not get to codegenTop. It's a bug. %s at %s" 106 | (show topExpr) (show $ exprPosition topExpr) -------------------------------------------------------------------------------- /src/lib/Lasca/EmitDynamic.hs: -------------------------------------------------------------------------------- 1 | module Lasca.EmitDynamic where 2 | 3 | import LLVM.Module 4 | import LLVM.Context 5 | import LLVM.Analysis 6 | import LLVM.PassManager 7 | 8 | import qualified LLVM.AST as AST 9 | import qualified LLVM.AST.Global 10 | import qualified LLVM.AST.Type as T 11 | import qualified LLVM.AST.Instruction as I 12 | import qualified LLVM.AST.Constant as C 13 | import qualified LLVM.AST.Float as F 14 | import qualified LLVM.AST.IntegerPredicate as IP 15 | import qualified LLVM.AST.FloatingPointPredicate as FP 16 | import qualified LLVM.AST.FunctionAttribute as FA 17 | import qualified LLVM.AST.IntegerPredicate as IPred 18 | 19 | -- import qualified Data.Text as Text 20 | import qualified Data.ByteString as ByteString 21 | import qualified Data.Text.Encoding as Encoding 22 | import Text.Printf 23 | import qualified Data.ByteString.UTF8 as UTF8 24 | import Data.String 25 | import qualified Data.ByteString.Char8 as Char8 26 | import qualified Data.ByteString as BS 27 | import qualified Data.ByteString.Short as SBS 28 | 29 | import LLVM.ExecutionEngine ( withMCJIT, withModuleInEngine, getFunction ) 30 | 31 | import qualified Data.Text 32 | import qualified Data.ByteString 33 | import qualified Data.Text.Encoding 34 | import Data.Digest.Murmur32 35 | import Data.Maybe 36 | import qualified Data.List as List 37 | import Data.Word 38 | import Data.Int 39 | import Control.Monad.State 40 | import Control.Monad.Except 41 | import Control.Applicative 42 | import qualified Control.Lens as Lens 43 | import Control.Lens.Operators 44 | import Data.Map.Strict (Map) 45 | import qualified Data.Map.Strict as Map 46 | import Data.Set (Set) 47 | import qualified Data.Set as Set 48 | import qualified Data.Sequence as Seq 49 | import qualified Debug.Trace as Debug 50 | import System.Exit 51 | import System.Directory 52 | import System.FilePath 53 | 54 | import Lasca.Codegen 55 | import Lasca.Type 56 | import Lasca.EmitCommon 57 | import Lasca.Infer 58 | import qualified Lasca.Syntax as S 59 | import Lasca.Syntax (Ctx) 60 | import qualified Lasca.Options as Opts 61 | 62 | cgen :: Ctx -> S.Expr -> Codegen AST.Operand 63 | cgen ctx (S.Let False meta a _ b c) = do 64 | i <- alloca $ llvmTypeOf b 65 | val <- cgen ctx b 66 | store i val 67 | assign a i 68 | cgen ctx c 69 | cgen ctx (S.Ident meta name) = do 70 | syms <- gets symtab 71 | modState <- gets moduleState 72 | let mapping = functions modState 73 | case lookup name syms of 74 | Just x -> 75 | -- Debug.trace ("Local " ++ show name) 76 | load x 77 | Nothing | name `Map.member` S._globalFunctions ctx -> boxClosure name mapping [] 78 | | name `Map.member` S._globalVals ctx -> load (globalOp ptrType (nameToSBS name)) 79 | | otherwise -> boxError (nameToText name) 80 | cgen ctx (S.Literal meta l) = do 81 | -- Debug.traceM $ "Generating literal " ++ show l ++ " on " ++ show (S.pos meta) 82 | boxLit l meta 83 | cgen ctx this@(S.Array meta exprs) = do 84 | vs <- sequence [cgen ctx e | e <- exprs] 85 | boxArray vs 86 | cgen ctx this@(S.Select meta tree expr) = cgenSelect ctx this 87 | 88 | cgen ctx this@(S.Apply meta (S.Ident _ "unary-") [expr]) = cgenApplyUnOp ctx this 89 | cgen ctx this@(S.Apply meta (S.Ident _ fn) [lhs, rhs]) | fn `Map.member` binops = cgenApplyBinOp ctx this 90 | cgen ctx (S.Apply meta expr args) = cgenApply ctx meta expr args 91 | cgen ctx (S.Closure _ funcName enclosedVars) = do 92 | modState <- gets moduleState 93 | let mapping = functions modState 94 | boxClosure funcName mapping enclosedVars 95 | cgen ctx m@S.Match{} = 96 | error $ printf "Match expressions should be already desugared! %s at: %s" (show m) (show $ S.exprPosition m) 97 | cgen ctx (S.If meta cond tr fl) = cgenIfDynamic ctx meta cond tr fl 98 | cgen ctx e = error ("cgen shit " ++ show e) 99 | 100 | cgenIfDynamic ctx meta cond tr fl = do 101 | let resultType = llvmTypeOf tr 102 | let test = do 103 | cond <- cgen ctx cond 104 | -- unbox Bool 105 | bool <- unboxBoolDynamically cond 106 | instr (I.ICmp IP.EQ bool constTrue []) 107 | cgenIf resultType test (cgen ctx tr) (cgen ctx fl) 108 | 109 | cgenSelect ctx this@(S.Select meta tree expr) = do 110 | tree <- cgen ctx tree 111 | e <- cgen ctx expr 112 | let pos = createPosition $ S.pos meta 113 | callBuiltin "runtimeSelect" [tree, e, constOp pos] 114 | cgenSelect ctx e = error ("cgenSelect should only be called on Select, but called on" ++ show e) 115 | 116 | cgenApplyUnOp ctx this@(S.Apply meta op@(S.Ident _ "unary-") [expr]) = do 117 | lexpr <- cgen ctx expr 118 | callBuiltin "runtimeUnaryOp" [constIntOp 1, lexpr] 119 | cgenApplyUnOp ctx e = error ("cgenApplyUnOp should only be called on Apply, but called on" ++ show e) 120 | 121 | cgenApplyBinOp ctx (S.Apply meta (S.Ident _ fn) [lhs, rhs]) = do 122 | llhs <- cgen ctx lhs 123 | lrhs <- cgen ctx rhs 124 | let code = fromMaybe (error ("Couldn't find binop " ++ show fn)) (Map.lookup fn binops) 125 | let codeOp = constIntOp code 126 | callBuiltin "runtimeBinOp" [codeOp, llhs, lrhs] 127 | cgenApplyBinOp ctx e = error ("cgenApplyBinOp should only be called on Apply, but called on" ++ show e) 128 | 129 | cgenApply ctx meta expr args = do 130 | syms <- gets symtab 131 | let symMap = Map.fromList syms 132 | let isGlobal fn = (fn `Map.member` S._globalFunctions ctx) && not (fn `Map.member` symMap) 133 | case expr of 134 | -- TODO Here are BUGZZZZ!!!! :) 135 | -- TODO check arguments! 136 | -- this is done to speed-up calls if you `a global function 137 | S.Ident _ fn | isGlobal fn -> do 138 | let f = S._globalFunctions ctx Map.! fn 139 | -- Debug.traceM $ printf "Calling %s" fn 140 | largs <- forM args $ \arg -> cgen ctx arg 141 | call (funcLLvmType f) (nameToSBS fn) largs 142 | 143 | expr -> do 144 | modState <- gets moduleState 145 | e <- cgen ctx expr 146 | largs <- mapM (cgen ctx) args 147 | let argc = constIntOp (length largs) 148 | sargsPtr <- allocaSize ptrType argc 149 | let asdf (idx, arg) = do 150 | p <- getelementptr sargsPtr [idx] 151 | store p arg 152 | sargs <- bitcast sargsPtr ptrType -- runtimeApply accepts i8*, so need to bitcast. Remove when possible 153 | -- cdecl calling convension, arguments passed right to left 154 | sequence_ [asdf (constIntOp i, a) | (i, a) <- zip [0..] largs] 155 | let pos = createPosition $ S.pos meta 156 | callBuiltin "runtimeApply" [e, argc, sargs, constOp pos] 157 | -------------------------------------------------------------------------------- /src/lib/Lasca/JIT.hs: -------------------------------------------------------------------------------- 1 | module Lasca.JIT ( 2 | runJIT, 3 | withOptimizedModule 4 | ) where 5 | 6 | import Data.Int 7 | import Data.Word 8 | import qualified Data.Text.IO as T 9 | import qualified Data.Text.Lazy as LT 10 | import qualified Data.Text.IO as TIO 11 | import System.IO 12 | import Foreign.Ptr 13 | import Foreign.C.String 14 | import Foreign.C.Types 15 | import Foreign.Marshal.Array 16 | import Lasca.Syntax 17 | import Lasca.Options 18 | 19 | import Control.Monad.Except 20 | 21 | import qualified LLVM.AST as AST 22 | import LLVM.CodeModel 23 | import LLVM.Context 24 | import LLVM.Module as Mod 25 | import LLVM.Target hiding (withHostTargetMachine) 26 | 27 | import LLVM.Analysis 28 | import LLVM.PassManager 29 | import LLVM.Transforms 30 | import LLVM.OrcJIT 31 | import LLVM.OrcJIT.CompileLayer 32 | import LLVM.Linking (loadLibraryPermanently, getSymbolAddressInProcess) 33 | import qualified LLVM.CodeGenOpt as CodeGenOpt 34 | import qualified LLVM.CodeModel as CodeModel 35 | import qualified LLVM.Relocation as Reloc 36 | --import LLVM.Pretty (ppllvm) 37 | 38 | import qualified Data.ByteString as BS 39 | import qualified Data.ByteString.Char8 as Char8 40 | 41 | foreign import ccall "dynamic" mainFun :: FunPtr (Int -> Ptr CString -> IO ()) -> Int -> Ptr CString -> IO () 42 | 43 | passes :: Int -> PassSetSpec 44 | passes level = defaultCuratedPassSetSpec { optLevel = Just (fromIntegral level) } 45 | 46 | withHostTargetMachine :: (TargetMachine -> IO a) -> IO a 47 | withHostTargetMachine f = do 48 | initializeAllTargets 49 | triple <- getProcessTargetTriple 50 | cpu <- getHostCPUName 51 | features <- getHostCPUFeatures 52 | (target, _) <- lookupTarget Nothing triple 53 | withTargetOptions $ \options -> 54 | -- Make it PIC, otherwise it won't work with shared libraries 55 | withTargetMachine target triple cpu features options Reloc.PIC CodeModel.Default CodeGenOpt.Default f 56 | 57 | 58 | resolver :: IRCompileLayer l -> SymbolResolver 59 | resolver compileLayer = 60 | SymbolResolver 61 | (\s -> findSymbol compileLayer s True) 62 | (\s -> 63 | fmap (\a -> Right $ JITSymbol a (JITSymbolFlags False True False True)) (getSymbolAddressInProcess s) 64 | ) 65 | 66 | {- 67 | Read https://purelyfunctional.org/posts/2018-04-02-llvm-hs-jit-external-function.html 68 | for explanation. 69 | -} 70 | runJIT :: LascaOpts -> AST.Module -> IO () 71 | runJIT opts mod = do 72 | -- putStrLn $ LT.unpack $ ppllvm mod 73 | b <- loadLibraryPermanently Nothing 74 | unless (not b) (error "Couldn’t load library") 75 | withOptimizedModule opts mod $ \context m -> 76 | withHostTargetMachine $ \tm -> 77 | withObjectLinkingLayer $ \linkingLayer -> 78 | withIRCompileLayer linkingLayer tm $ \compileLayer -> 79 | withModule compileLayer m 80 | (resolver compileLayer) $ \moduleHandle -> do 81 | mainSymbol <- mangleSymbol compileLayer "main" 82 | (Right (JITSymbol mainFn _)) <- findSymbol compileLayer mainSymbol True 83 | let args = lascaFiles opts 84 | let len = length args 85 | cargs <- mapM newCString args 86 | array <- mallocArray len 87 | pokeArray array cargs 88 | result <- mainFun (castPtrToFunPtr (wordPtrToPtr mainFn)) len array 89 | return () 90 | 91 | withOptimizedModule opts mod f = withContext $ \context -> 92 | withModuleFromAST context mod $ \m -> 93 | withPassManager (passes (optimization opts)) $ \pm -> do 94 | -- Optimization Pass 95 | -- linkModules m stdModule 96 | runPassManager pm m 97 | optmod <- moduleAST m 98 | when (printLLVMAsm opts) $ do 99 | s <- moduleLLVMAssembly m 100 | Char8.putStrLn s 101 | f context m 102 | -------------------------------------------------------------------------------- /src/lib/Lasca/Lexer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | module Lasca.Lexer where 4 | 5 | import Data.Void 6 | import Data.Text (Text) 7 | import qualified Data.Text as T 8 | import Data.Scientific 9 | import Data.Char 10 | import qualified Data.List.NonEmpty as NonEmpty 11 | import Text.Megaparsec 12 | import Control.Monad (void, when) 13 | import Text.Megaparsec.Char 14 | import qualified Text.Megaparsec.Char.Lexer as L 15 | 16 | type Parser = Parsec Void Text 17 | 18 | ops = ["+","*","-","/",";", "==", ":=", "=",",",".","<",">","|",":"] 19 | keywords = ["module", "import", "data", "def", "extern", 20 | "if", "then", "else", "in", "let", "true", "false", "match", "do", "lazy", "var", "and", "not", "or" 21 | ] 22 | 23 | sc :: Parser () -- ‘sc’ stands for “space consumer” 24 | sc = L.space (void space1) lineComment blockComment 25 | where lineComment = (string "--" <|> string "#") *> void (takeWhileP (Just "character") (/= '\n')) 26 | blockComment = L.skipBlockComment "{-" "-}" 27 | 28 | identChar = alphaNumChar 29 | 30 | lexeme = L.lexeme sc 31 | 32 | symbol = L.symbol sc 33 | 34 | integer = lexeme (try (char '0' *> char' 'x' *> L.hexadecimal) 35 | <|> try (char '0' *> char' 'o' *> L.octal) 36 | <|> try L.decimal) 37 | 38 | stringLiteral :: Parser Text 39 | stringLiteral = do 40 | char '"' 41 | l <- manyTill L.charLiteral (char '"') 42 | return $ T.pack l 43 | 44 | float = lexeme L.float 45 | signedInteger = L.signed sc integer 46 | signedFloat = L.signed sc float 47 | parens = between (symbol "(") (symbol ")") 48 | brackets = between (symbol "[") (symbol "]") 49 | braces = between (symbol "{") (symbol "}") 50 | comma = symbol "," 51 | semi = symbol ";" 52 | commaSep p = p `sepBy` comma 53 | trailCommaSep p = p `sepEndBy` comma 54 | semiSep p = p `sepBy` semi 55 | 56 | reserved :: Text -> Parser () 57 | reserved w = string w *> notFollowedBy identChar *> sc 58 | 59 | reservedOp :: Text -> Parser () 60 | reservedOp w = string w *> notFollowedBy opChar *> sc 61 | 62 | identOp = lexeme $ some opChar 63 | 64 | upperIdentifier = lexeme $ try (do 65 | c <- upperChar 66 | T.cons c <$> idrest 67 | "uppercase identifier") 68 | 69 | identifier :: Parser Text 70 | identifier = lexeme $ try $ do 71 | ident <- identifierOrReserved 72 | when (ident `elem` keywords) $ unexpected . Label . NonEmpty.fromList $ "reserved " ++ (T.unpack ident) 73 | when (ident == "_") $ unexpected . Label . NonEmpty.fromList $ "wildcard" 74 | return ident 75 | 76 | idrest = takeWhileP Nothing (\ch -> isAlphaNum ch || ch == '_' || ch == '$') 77 | 78 | identifierOrReserved = lexeme $ try $ do 79 | c <- satisfy (\ch -> isAlpha ch || ch == '_' || ch == '$') 80 | T.cons c <$> idrest 81 | 82 | 83 | opChar :: Parser Char 84 | opChar = oneOf ("!$%&*+./<=>?@\\^|-~" :: String) 85 | 86 | operator :: Parser Text 87 | operator = do 88 | op <- some opChar 89 | lexeme $ return $ T.pack op 90 | -------------------------------------------------------------------------------- /src/lib/Lasca/Modules.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | module Lasca.Modules where 4 | 5 | import Data.Maybe 6 | import qualified Data.Text as T 7 | import qualified Data.Text.IO as TIO 8 | import Text.Printf 9 | 10 | import Control.Monad.State 11 | import Control.Lens hiding ((<.>)) 12 | 13 | import Data.List 14 | import Data.IntMap.Strict ( IntMap ) 15 | import qualified Data.IntMap.Strict as IntMap 16 | import Data.Map.Strict ( Map ) 17 | import qualified Data.Map.Strict as Map 18 | 19 | import System.Environment 20 | import System.Exit 21 | import System.Directory 22 | import System.FilePath 23 | 24 | import Debug.Trace as Debug 25 | import qualified Text.Megaparsec as Megaparsec 26 | 27 | import Lasca.Syntax 28 | import Lasca.Parser 29 | import Lasca.Type 30 | 31 | data LascaModule = LascaModule { 32 | imports :: [LascaModule], 33 | moduleExprs :: [Expr], 34 | modName :: Name 35 | } 36 | 37 | instance Show LascaModule where 38 | show m = show (modName m) 39 | 40 | instance Eq LascaModule where 41 | lhs == rhs = modName lhs == modName rhs 42 | 43 | instance Ord LascaModule where 44 | compare lhs rhs = compare (modName lhs) (modName rhs) 45 | 46 | data Dependencies = Dependencies { 47 | _modsByLevel :: IntMap [LascaModule], 48 | _modLevel :: Map LascaModule Int 49 | } deriving (Show) 50 | makeLenses 'Dependencies 51 | 52 | calcModulesDependencies :: LascaModule -> Dependencies 53 | calcModulesDependencies lascaModule = execState 54 | (getMaxLevel lascaModule) 55 | (Dependencies {_modsByLevel = IntMap.empty, _modLevel = Map.empty}) 56 | where 57 | getMaxLevel :: LascaModule -> State Dependencies Int 58 | getMaxLevel m = do 59 | s <- get 60 | case Map.lookup m (s ^. modLevel) of 61 | Just l -> return l 62 | Nothing -> do 63 | let mods = imports m 64 | levels <- forM mods getMaxLevel 65 | let level = case levels of 66 | [] -> 0 67 | levels -> 1 + maximum levels 68 | modLevel %= Map.insert m level 69 | modsByLevel %= IntMap.alter (joinModules m) level 70 | return level 71 | where 72 | joinModules new Nothing = Just [new] 73 | joinModules new (Just mods) = Just $ new : mods 74 | 75 | linearizeIncludes :: LascaModule -> [LascaModule] 76 | linearizeIncludes lascaModule = do 77 | let all = calcModulesDependencies lascaModule 78 | let pathes = snd <$> IntMap.toList (all ^. modsByLevel) 79 | foldr (\mods path -> sort mods ++ path) [] pathes 80 | 81 | fixModuleAndImportPrelude :: FilePath -> [Expr] -> IO [Expr] 82 | fixModuleAndImportPrelude filename exprs = case exprs of 83 | (mod@(Module _ name): exprs) -> do 84 | when (takeBaseName filename /= T.unpack (last $ nameToList name)) $ 85 | die $ printf "Wrong module name in file %s. Module name should match file name, but was %s)" filename (show name) 86 | return $ mod : insertImportPrelude name exprs 87 | _ -> do 88 | let name = Name $ T.pack $ takeBaseName filename 89 | let mod = Module emptyMeta name 90 | return $ mod : insertImportPrelude name exprs 91 | 92 | insertImportPrelude :: Name -> [Expr] -> [Expr] 93 | insertImportPrelude name exprs = if name == Name "Prelude" then exprs else Import emptyMeta "Prelude" : exprs 94 | 95 | moduleSearchPaths :: IO [FilePath] 96 | moduleSearchPaths = do 97 | dir <- getCurrentDirectory 98 | lascaPathEnv <- lookupEnv "LASCAPATH" 99 | let lascaPaths = splitSearchPath $ fromMaybe "" lascaPathEnv 100 | absPaths <- mapM canonicalizePath lascaPaths 101 | existingPaths <- filterM doesDirectoryExist absPaths 102 | -- TODO add XDB paths 103 | return $ nub $ dir : existingPaths 104 | 105 | findModulePath :: [FilePath] -> Name -> IO FilePath 106 | findModulePath searchPaths name = do 107 | let relPath = path name <.> "lasca" 108 | result <- findFile searchPaths relPath 109 | case result of 110 | Just file -> return file 111 | Nothing -> error $ printf "Couldn't find module %s. Search path: %s" (show name) (show $ intercalate "," searchPaths) 112 | where 113 | path (Name n) = T.unpack n 114 | path (NS prefix n) = path prefix path n 115 | 116 | type Mapping = Map Name LascaModule 117 | 118 | loadModule :: [FilePath] -> Mapping -> [Name] -> FilePath -> Name -> IO (Mapping, LascaModule) 119 | loadModule searchPaths imported importPath absoluteFilePath name = do 120 | file <- TIO.readFile absoluteFilePath 121 | case parseToplevelFilename absoluteFilePath file of 122 | Left err -> die $ Megaparsec.parseErrorPretty err 123 | Right exprs -> do 124 | canonizedExprs <- fixModuleAndImportPrelude absoluteFilePath exprs 125 | let imports = getImports canonizedExprs 126 | (newImported, modules) <- loadImports searchPaths imported (name : importPath) imports 127 | let thisModule = LascaModule { modName = name, imports = modules, moduleExprs = canonizedExprs } 128 | return (Map.insert name thisModule newImported, thisModule) 129 | 130 | loadImports :: [FilePath] -> Mapping -> [Name] -> [Name] -> IO (Mapping, [LascaModule]) 131 | loadImports searchPaths imported importPath imports = do 132 | -- Debug.traceM $ printf "loadImports %s %s %s" (show imported) (show importPath) (show $ imports) 133 | foldM (\(imported, modules) name -> do 134 | (newImported, lascaModule) <- loadImport searchPaths imported importPath name 135 | return (Map.union imported newImported, lascaModule : modules) 136 | ) (imported, []) imports 137 | 138 | loadImport :: [FilePath] -> Mapping -> [Name] -> Name -> IO (Mapping, LascaModule) 139 | loadImport searchPaths imported importPath name = do 140 | -- Debug.traceM $ printf "loadImport %s %s %s" (show imported) (show importPath) (show name) 141 | when (name `elem` importPath) $ die (printf "Circular dependency in %s -> %s" (show importPath) (show name)) 142 | case name `Map.lookup` imported of 143 | Just lascaModule -> return (imported, lascaModule) 144 | Nothing -> do 145 | absoluteFilePath <- findModulePath searchPaths name 146 | loadModule searchPaths imported importPath absoluteFilePath name 147 | 148 | getImports :: [Expr] -> [Name] 149 | getImports exprs = foldl' folder [] exprs 150 | where 151 | folder imports (Import _ name) = name : imports 152 | folder imports _ = imports 153 | -------------------------------------------------------------------------------- /src/lib/Lasca/Options.hs: -------------------------------------------------------------------------------- 1 | module Lasca.Options ( 2 | LascaOpts(..), 3 | TypingMode(..), 4 | parseOptions, 5 | emptyLascaOpts 6 | ) where 7 | 8 | import Options.Applicative 9 | import Data.Semigroup ((<>)) 10 | import Data.Version 11 | import Paths_lasca (version) 12 | 13 | data TypingMode = Static | Dynamic deriving (Eq) 14 | instance Show TypingMode where 15 | show Static = "static" 16 | show Dynamic = "dynamic" 17 | 18 | instance Read TypingMode where 19 | readsPrec _ "static" = [(Static, "")] 20 | readsPrec _ "dynamic" = [(Dynamic, "")] 21 | readsPrec _ _ = [] 22 | 23 | data LascaOpts = LascaOpts 24 | { lascaFiles :: [String] 25 | , mode :: TypingMode 26 | , outputFile :: String 27 | , exec :: Bool 28 | , verboseMode :: Bool 29 | , printLLVMAsm :: Bool 30 | , printAst :: Bool 31 | , printTypes :: Bool 32 | , optimization :: Int 33 | } deriving (Show, Eq) 34 | 35 | emptyLascaOpts = LascaOpts { 36 | lascaFiles = [], 37 | mode = Static, 38 | outputFile = "", 39 | exec = False, 40 | verboseMode = False, 41 | printLLVMAsm = False, 42 | printAst = False, 43 | printTypes = False, 44 | optimization = 1 -- default to O1 to enable tail call optimization 45 | } 46 | 47 | optimizeOpt :: Parser Int 48 | optimizeOpt = option auto 49 | ( long "optimization-level" 50 | <> short 'O' 51 | <> value 0 52 | <> help "Optimization level for LLVM" ) 53 | 54 | lascaOptsParser :: Parser LascaOpts 55 | lascaOptsParser = LascaOpts 56 | <$> some (argument str (metavar "FILES...")) 57 | <*> option auto 58 | ( long "mode" 59 | <> short 'm' 60 | <> value Static 61 | <> help "Compiler mode. Options are [dynamic | static]. Static by default." 62 | ) 63 | <*> strOption 64 | ( short 'o' 65 | <> value "" 66 | <> help "Write output to FILE" 67 | ) 68 | <*> switch 69 | ( long "exec" 70 | <> short 'e' 71 | <> help "Execute immediately" ) 72 | <*> switch 73 | ( long "verbose" 74 | <> help "Verbose mode" ) 75 | <*> switch 76 | ( long "print-llvm" 77 | <> help "Print LLVM IR" ) 78 | <*> switch 79 | ( long "print-ast" 80 | <> help "Print AST" ) 81 | <*> switch 82 | ( long "print-types" 83 | <> help "Print inferred types" ) 84 | <*> optimizeOpt 85 | 86 | 87 | parseOptions = execParser opts 88 | where opts = info (helper <*> lascaOptsParser) 89 | ( fullDesc 90 | <> progDesc ("Lasca Compiler version " ++ v) 91 | <> header ("Lasca Compiler v" ++ v)) 92 | v = showVersion version -------------------------------------------------------------------------------- /src/lib/Lasca/Type.hs: -------------------------------------------------------------------------------- 1 | module Lasca.Type where 2 | 3 | import Data.List 4 | import Data.String 5 | import Data.Text (Text) 6 | import qualified Data.Text as T 7 | import qualified Data.Text.Encoding as Encoding 8 | import qualified Data.ByteString as BS 9 | import qualified Data.ByteString.Short as SBS 10 | import Data.Text.Prettyprint.Doc 11 | 12 | data Name = Name Text | NS Name Name deriving (Eq, Ord) 13 | 14 | instance IsString Name where 15 | fromString = Name . T.pack 16 | 17 | instance Show Name where 18 | show n = case n of 19 | Name s -> T.unpack s 20 | NS prefix n -> show prefix ++ "_" ++ show n 21 | 22 | nameToText n = case n of 23 | Name n -> n 24 | NS prefix n -> T.append (nameToText prefix) (T.cons '_' (nameToText n)) 25 | 26 | qualify mod name = if mod == defaultModuleQName then name else NS mod name 27 | 28 | qnameToString n = show n 29 | 30 | qname = Name 31 | 32 | textToSBS :: Text -> SBS.ShortByteString 33 | textToSBS = SBS.toShort . Encoding.encodeUtf8 34 | 35 | nameToSBS :: Name -> SBS.ShortByteString 36 | nameToSBS = textToSBS . nameToText 37 | 38 | nameToBS :: Name -> BS.ByteString 39 | nameToBS = Encoding.encodeUtf8 . nameToText 40 | 41 | nameToList (Name n) = [n] 42 | nameToList (NS prefix n) = nameToList prefix ++ nameToList n 43 | 44 | defaultModuleName = "Main" 45 | defaultModuleQName = Name defaultModuleName 46 | 47 | newtype TVar = TV Text 48 | deriving (Eq, Ord) 49 | 50 | instance Show TVar where 51 | show (TV s) = T.unpack s 52 | 53 | data Type 54 | = TVar TVar 55 | | TypeIdent Name 56 | | TypeFunc Type Type 57 | | TypeApply Type [Type] 58 | | Forall [TVar] Type 59 | deriving (Eq, Ord) 60 | 61 | instance Show Type where 62 | show (TVar (TV n)) = T.unpack n 63 | show (TypeIdent s) = show s 64 | show (TypeFunc l r) = "(" ++ show l ++ " -> " ++ show r ++ ")" 65 | show (TypeApply t args) = "(" ++ show t ++ foldl (\acc a -> acc ++ " " ++ show a) "" args ++ ")" 66 | show (Forall targs t) = "∀(" ++ intercalate "," (map show targs) ++ ") => " ++ show t 67 | 68 | instance Pretty Name where 69 | pretty n = case n of 70 | Name s -> pretty s 71 | NS prefix n -> pretty prefix <+> "_" <+> pretty n 72 | 73 | instance Pretty TVar where 74 | pretty (TV s) = pretty s 75 | 76 | instance Pretty Type where 77 | pretty t = case t of 78 | (TVar (TV n)) -> pretty n 79 | (TypeIdent s) -> pretty s 80 | (TypeFunc l r) -> parens $ pretty l <+> "->" <+> pretty r 81 | (TypeApply t args) -> parens $pretty t <+> foldl (\acc a -> acc <+> pretty a) "" args 82 | (Forall targs t) -> "∀" <> parens (hsep (punctuate comma (map pretty targs))) <+> "=>" <+> pretty t 83 | 84 | 85 | typeName tpe = case tpe of 86 | TypeIdent n -> n 87 | TypeApply t _ -> typeName t 88 | Forall _ t -> typeName t 89 | _ -> error $ "Should not happen. Type name can't be " ++ show tpe 90 | 91 | infixr `TypeFunc` 92 | 93 | pattern TypeByte = TypeIdent "Byte" 94 | pattern TypeInt = TypeIdent "Int" 95 | pattern TypeInt16 = TypeIdent "Int16" 96 | pattern TypeInt32 = TypeIdent "Int32" 97 | pattern TypeFloat = TypeIdent "Float" 98 | pattern TypeBool = TypeIdent "Bool" 99 | pattern TypeAny = TypeIdent "Any" 100 | pattern TypeString = TypeIdent "String" 101 | pattern TypeUnit = TypeIdent "Unit" 102 | pattern TypeArray t = TypeApply (TypeIdent "Array") [t] 103 | pattern TypeByteArray t = TypeApply (TypeIdent "ByteArray") [t] 104 | pattern TypeArrayInt = TypeArray TypeInt 105 | pattern TypeRef a = TypeApply (TypeIdent "Var") [a] 106 | 107 | isIntegralType (TypeIdent t) | t `elem` ["Byte", "Int", "Int16", "Int32"] = True 108 | isIntegralType _ = False 109 | 110 | 111 | isAny (TypeIdent "Any") = True 112 | isAny _ = False 113 | 114 | typeToList tpe = reverse $ go tpe [] 115 | where go (TypeFunc a b) acc = go b (a : acc) 116 | go (Forall tvars tpe) acc = go tpe acc 117 | go a acc = a : acc 118 | 119 | funcTypeArity this@(TypeFunc a b) = (length $ typeToList this) - 1 120 | funcTypeArity _ = 0 -------------------------------------------------------------------------------- /src/test/TestMain.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE ExtendedDefaultRules #-} 3 | {-# LANGUAGE TemplateHaskell #-} 4 | {-# OPTIONS_GHC -fno-warn-type-defaults #-} 5 | import Test.Tasty 6 | import Test.Tasty.SmallCheck as SC 7 | import Test.Tasty.QuickCheck as QC 8 | import Test.Tasty.HUnit 9 | import Test.Tasty.Golden as G 10 | import Test.Tasty.Program 11 | import System.FilePath 12 | import System.FilePath.Glob 13 | import System.Directory 14 | import Control.Exception as X 15 | import qualified Text.Megaparsec as Megaparsec 16 | import Shelly (shelly, run) 17 | 18 | import Data.Text (Text) 19 | import qualified Data.Text as T 20 | import qualified Data.Text.IO as TIO 21 | import Data.Text.Encoding as E 22 | import qualified Data.ByteString as BS 23 | import qualified Data.ByteString.Lazy as LBS 24 | 25 | import Lasca.Parser 26 | import Lasca.Syntax 27 | import Lasca.Infer 28 | import Lasca.Type 29 | import Lasca.Options 30 | import Lasca.Modules 31 | 32 | import Data.List 33 | import Data.Ord 34 | import Data.Foldable 35 | 36 | default (T.Text) 37 | 38 | main :: IO () 39 | main = do 40 | goldens <- foldMap mkGoldenTests examples 41 | defaultMain (testGroup "Tests" ([parserTests, parserTests2, modulesTests, typerTests] ++ goldens ++ compileTests)) 42 | 43 | modul n is = LascaModule { imports = is, moduleExprs = [], modName = n } 44 | 45 | prelude = modul "Prelude" [] 46 | arr = modul "Array" [prelude] 47 | opt = modul "Option" [prelude] 48 | lst = modul "List" [opt, prelude] 49 | test1 = modul "Test1" [test2] 50 | test2 = modul "Test2" [] 51 | queen = modul "Queen" [arr, lst, test1, test2] 52 | 53 | fromRight (Right a) = a 54 | 55 | modulesTests = testGroup "Module dependency tests" 56 | [ testCase "Linearize includes" $ linearizeIncludes queen @?= [prelude, test2, arr, opt, test1, lst, queen] ] 57 | 58 | parseOK s expected = testCase s $ case parseToplevel (T.pack s) of 59 | Right e -> assertEqual "" expected e 60 | Left e -> assertFailure $ (show expected) ++ " but got " ++ Megaparsec.parseErrorPretty e 61 | 62 | parseError s expected = testCase ("Error on " ++ s) $ case parseToplevel (T.pack s) of 63 | Right e -> assertFailure $ expected ++ " but got " ++ show e 64 | Left e -> assertEqual "" expected $ Megaparsec.parseErrorTextPretty e 65 | 66 | parseOkMatch s f = testCase s $ case parseToplevel (T.pack s) of 67 | Right p -> X.catch (f p) $ printErr (show p) 68 | Left e -> assertFailure $ Megaparsec.parseErrorPretty e 69 | 70 | printErr :: String -> SomeException -> IO () 71 | printErr got e = case fromException e of 72 | Just (PatternMatchFail s) -> assertFailure (s ++ "got " ++ got) 73 | nothing -> return () 74 | 75 | defaultModule = Module (withMetaPos 1 1) "" 76 | 77 | parseTypeOK s t = testCase s (parseType (T.pack s) @?= Right t) 78 | 79 | parserTests2 = testGroup "Parser tests" [ 80 | testCase "empty" $ parseToplevel "" @?= Right [] 81 | , parseOkMatch "module Test_1 . Test2 ; " 82 | (\ [Module _ (NS (Name "Test_1") (Name "Test2")) ] -> return ()) 83 | , parseOkMatch "import Test_1.Test2 ; import Test3" 84 | (\ [Import _ (NS (Name "Test_1") (Name "Test2")), Import _ "Test3" ] -> return ()) 85 | , parseError "module Test module Another" 86 | "unexpected 'm'\nexpecting end of input or import statement\n" 87 | , parseError "import Asdf. " 88 | "unexpected '.'\nexpecting end of input, import statement, or top-level declaration\n" 89 | , parseError "import ._$#@ " 90 | "unexpected '.'\nexpecting qualified identifier (like My.Qualified.Name or SomeName)\n" 91 | , parseOkMatch "data Void" (\ [Data _ "Void" _ _] -> return ()) 92 | , parseOkMatch "data Bool= |True|False" (\ [Data _ "Bool" [] [DataConst "True" [], DataConst "False" []]] -> return ()) 93 | , parseOkMatch "data User = U(n: String, f)" (\ [Data _ "User" [] [DataConst "U" _]] -> return ()) 94 | , parseError "data lower" "unexpected 'l'\nexpecting uppercase identifier\n" 95 | , parseError "data UppER=lower" "unexpected 'l'\nexpecting '|' or uppercase identifier\n" 96 | , parseTypeOK "String" (TypeIdent "String") 97 | , parseTypeOK "[Int]" (TypeApply (TypeIdent "Array") [TypeIdent "Int"]) 98 | , parseTypeOK "(a -> B) -> C" (TypeFunc (TypeFunc (TVar $ TV "a") (TypeIdent "B")) (TypeIdent "C")) 99 | ] 100 | 101 | parserTests = testGroup "Parser tests" 102 | [ testCase "Parse true" $ 103 | parseExpr "true" @?= Right (Literal emptyMeta (BoolLit True)) 104 | , testCase "Empty String" $ 105 | parseExpr "\"\"" @?= Right (Literal emptyMeta (StringLit "")) 106 | , testCase "Character Escaping" $ 107 | parseExpr "\"String\n\"" @?= Right (Literal emptyMeta (StringLit "String\n")) 108 | , testCase "String Interpolation" $ 109 | parseExpr "\"Hello\\t \\\\\\$${ test123 + 1 }\"" @?= Right (Apply emptyMeta (Ident emptyMeta (NS "Prelude" "concat")) [ 110 | Array emptyMeta [Literal emptyMeta $ StringLit "Hello\t \\$", 111 | Apply emptyMeta (Ident emptyMeta "toString") [Apply (withMetaPos 1 25) (Ident emptyMeta "+") [Ident emptyMeta "test123", Literal (withMetaPos 1 27) (IntLit 1)]]] 112 | ]) 113 | , testCase "Pattern matching" $ 114 | parseExpr "match true { true -> 1 }" @?= Right (Match emptyMeta (Literal emptyMeta (BoolLit True)) [ 115 | Case (LitPattern (BoolLit True)) (Literal (withMetaPos 1 24) (IntLit 1))]) 116 | , testCase "Pattern matching" $ 117 | parseExpr "match foo { Person(0, name, \"God\", None, _) -> 1 _ -> match false { true -> 4 } }" @?= Right ( 118 | Match emptyMeta (Ident emptyMeta "foo") [ 119 | Case (ConstrPattern "Person" [LitPattern (IntLit 0),VarPattern "name",LitPattern (StringLit "God"), 120 | ConstrPattern "None" [],WildcardPattern]) (Literal (withMetaPos 1 50) (IntLit 1)), 121 | Case WildcardPattern ( 122 | Match emptyMeta (Literal emptyMeta (BoolLit False)) [ 123 | Case (LitPattern (BoolLit True)) (Literal (withMetaPos 1 83) (IntLit 4))]) 124 | ]) 125 | ] 126 | 127 | typerTests = testGroup "Typer tests" 128 | [ 129 | testCase "Pattern matching" $ 130 | parseAndInferExpr "match true { true -> 1 false -> 2 }" @?= TypeInt 131 | ] 132 | 133 | data Mode = Dyn | Stat | Both 134 | data Config = Script { name :: String, compMode :: Mode, arguments :: [T.Text] } 135 | 136 | examples = [ 137 | Script "builtin.lasca" Both [], 138 | Script "Array.lasca" Both [], 139 | Script "ArrayBuffer.lasca" Both [], 140 | Script "String.lasca" Both [], 141 | Script "List.lasca" Both [], 142 | Script "binarytrees.lasca" Both ["10"], 143 | Script "Data.lasca" Both [], 144 | Script "dynamic.lasca" Dyn [], 145 | Script "Either.lasca" Both [], 146 | Script "factorial.lasca" Both ["15"], 147 | Script "hello.lasca" Both [], 148 | Script "lambda.lasca" Both [], 149 | Script "Map.lasca" Both [], 150 | Script "Option.lasca" Both [], 151 | Script "regex.lasca" Both [], 152 | Script "queen.lasca" Both [], 153 | Script "ski.lasca" Both [], 154 | Script "nbody.lasca" Both ["50000"], 155 | Script "nbody2.lasca" Both ["50000"], 156 | Script "nbody3.lasca" Both ["50000"] 157 | ] 158 | 159 | prependPath path script = script { name = path (name script) } 160 | withMode s m = s { compMode = m } 161 | 162 | mkGoldenTests s@(Script path mode args) = do 163 | let testName = takeBaseName path 164 | let goldenPath = "src" "test" "golden" replaceExtension path ".golden" 165 | let example = prependPath "examples" s 166 | let base = prependPath "libs/base" s 167 | e <- doesFileExist ("examples" path) 168 | let script = if e then example else base 169 | let tests = case mode of 170 | Both -> [ goldenVsString testName goldenPath (action (script `withMode` Stat)), 171 | goldenVsString testName goldenPath (action (script `withMode` Dyn))] 172 | _ -> [goldenVsString testName goldenPath (action script)] 173 | return tests 174 | where 175 | action (Script path mode args) = do 176 | let txtPath = T.pack path 177 | actual <- runLasca txtPath mode args 178 | let bs = E.encodeUtf8 actual 179 | return (LBS.fromStrict bs) 180 | 181 | runLasca path mode args = shelly $ do 182 | let extraArgs = case args of 183 | [] -> [] 184 | ars -> "--" : args 185 | case mode of 186 | Stat -> run "lasca" (["-e", "-O2", "--mode", "static", path] ++ extraArgs) 187 | Dyn -> run "lasca" (["-e", "-O2", "--mode", "dynamic", path] ++ extraArgs) 188 | Both -> do 189 | run "lasca" (["-e", "-O2", "--mode", "static", "--verbose", path] ++ extraArgs) 190 | run "lasca" (["-e", "-O2", "--mode", "dynamic", path] ++ extraArgs) 191 | 192 | compileTests = [ 193 | testProgram "Compile hello.lasca" "lasca" ["-O2", "-o", "hello", "examples/hello.lasca"] Nothing 194 | ] 195 | 196 | benchTests = testGroup "Bench" [ 197 | testCase "2 KLOC" $ parseAndInferFile "examples/gen.lasca", 198 | testCase "10 KLOC" $ parseAndInferFile "examples/gen10k.lasca" 199 | ] 200 | 201 | parseAndInferExpr str = let 202 | expr = fromRight $ parseExpr str 203 | Right (infered, _) = inferExpr (emptyCtx emptyLascaOpts) defaultTyenv expr 204 | in infered 205 | 206 | parseAndInferFile fname = do 207 | p <- TIO.readFile "libs/base/Prelude.lasca" 208 | let preludeExprs = fromRight $ parseToplevel p 209 | file <- TIO.readFile fname 210 | case parseToplevel file of 211 | Left err -> error $ Megaparsec.parseErrorPretty err 212 | Right ex -> do 213 | let exprs = preludeExprs ++ ex 214 | typeEnv <- typeCheck (emptyCtx emptyLascaOpts) exprs 215 | print typeEnv 216 | True @?= True 217 | -------------------------------------------------------------------------------- /src/test/golden/ArrayBuffer.golden: -------------------------------------------------------------------------------- 1 | ArrayBuffer_ArrayBuffer([, , ], 0) 2 | ArrayBuffer_ArrayBuffer([1, , ], 1) 3 | ArrayBuffer_ArrayBuffer([1, 2, ], 2) 4 | 4 6 5 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, , ], 4) 6 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, 4, ], 5) 7 | 20 12 8 | 20 24 9 | ArrayBuffer_ArrayBuffer([0, 1, 2, 3, 4, 5, 6, 7, 8, , , , , , , , , , , , , , , ], 9) 10 | ArrayBuffer_ArrayBuffer([0, 3, 4, 5, 6, 7, 8, 7, 8, , , , , , , , , , , , , , , ], 7) 11 | ArrayBuffer_ArrayBuffer([-2, -1, 0, 3, 4, 5, 7, 7, 7, 6, 7, 8, , , , , , , , , , , , ], 12) 12 | Test 12 false 42 13 | ArrayBuffer_ArrayBuffer([-2, 42, 0, 3, 4, 5, 7, 7, 7, 6, 7, 8, , , , , , , , , , , , ], 0) 14 | -------------------------------------------------------------------------------- /src/test/golden/Either.golden: -------------------------------------------------------------------------------- 1 | Left is true, right is left false, right is right true 2 | r1 should be 125 and is: Either_Right(125) 3 | -------------------------------------------------------------------------------- /src/test/golden/List.golden: -------------------------------------------------------------------------------- 1 | List_Cons(11, List_Cons(12, List_Nil)) 2 | Hello world! List_Cons(1, List_Cons(2, List_Nil)). Is empty: false, length = 2 3 | -------------------------------------------------------------------------------- /src/test/golden/Map.golden: -------------------------------------------------------------------------------- 1 | runtimeCompare = -1 2 | Test isEmpty should be true: true 3 | Test size should be 0: 0 4 | Test isEmpty should be false: false 5 | Test isEmpty should be true: true 6 | Test size should be 1: 1 7 | Lookup should be one: Option_Some(one) 8 | Lookup should be none: Option_None 9 | Test size should be 2: 2 10 | Lookup should be two: Option_Some(two) 11 | Lookup should be none: Option_None 12 | Test size should be 3: 3 13 | Lookup should be three: Option_Some(three) 14 | Lookup should be none: Option_None 15 | Test size should be 4: 4 16 | Lookup should be four: Option_Some(four) 17 | 4 is member of four: true 18 | Lookup should be none: Option_None 19 | Map_Bin(4, 2, two, Map_Bin(1, 1, one, Map_Tip, Map_Tip), Map_Bin(2, 3, three, Map_Tip, Map_Bin(1, 4, four, Map_Tip, Map_Tip))) 20 | 1000 21 | 999 22 | -------------------------------------------------------------------------------- /src/test/golden/Option.golden: -------------------------------------------------------------------------------- 1 | Even Option_None is Option_Some(true) 2 | -------------------------------------------------------------------------------- /src/test/golden/String.golden: -------------------------------------------------------------------------------- 1 | 84 2 | 868 3 | 84 4 | 101 5 | 225 6 | 115 7 | 116 8 | 117 9 | 868 10 | [84, 101, 225, 115, 116, 117, 868] 11 | 117 12 | length in codepoints = 7, length in bytes = 9, length in graphemes = 6 13 | 15 14 | toLower T = t teástuͤ 15 | toUpper å = Å TEÁSTUͤ 16 | toTitle å = Å TEÁSTUͤ 17 | capitalize Å Teástuͤ Aßdƒ 18 | compare -1 0 1 -1 0 1 19 | replace TeástT 20 | Teástuͤ startsWith T: true, endsWith uͤ: true 21 | Teástuͤ startsWith uͤ: false, endsWith T: false 22 | Code point 123 is valid Unicode Scalar: true 23 | Surrogate code point 55296 is valid Unicode Scalar: false 24 | Code point 1114112 is valid Unicode Scalar: false 25 | 0 is digit: true, 9 is digit: true, 'a' is digit: false 26 | String_DecimalNumber String_LowercaseLetter String_UppercaseLetter String_Space String_MathSymbol String_CurrencySymbol 27 | å is letter true, 1 is letter false 28 | å is numeric false, 1 is numeric true, ¾ is numeric true 29 | ' ' is space true, '\t' is space true, '\r' is space true, '\n' is space true, 'U+0085' is space true 30 | 1, 2 31 | -------------------------------------------------------------------------------- /src/test/golden/array.golden: -------------------------------------------------------------------------------- 1 | [a, a, b, a, a, a, a, a, a, a] 2 | [b, b, b, b, b, b, b, b, b, b] 3 | [0, 1, 2, 3, 4] 4 | [2, 5, 8] 5 | [a, a, b, a, a, a, a, a, a, a, b, b, b, b, b, b, b, b, b, b] 6 | [a, a, b, a, b, b, b, b, b, a] 7 | Hello 8 | -------------------------------------------------------------------------------- /src/test/golden/binarytrees.golden: -------------------------------------------------------------------------------- 1 | stretch tree of depth 11 check: 4095 2 | 1024 trees of depth 4 check: 31744 3 | 256 trees of depth 4 check: 32512 4 | 64 trees of depth 4 check: 32704 5 | 16 trees of depth 4 check: 32752 6 | long lived tree of depth 10 check: 2047 7 | -------------------------------------------------------------------------------- /src/test/golden/builtin.golden: -------------------------------------------------------------------------------- 1 | 1234567890 -1234567890 true false $123.456000000 -0.001234500 127 -128 String () [1, 2] 3735928559 -493 2 | 4 3 | 0 4 | 5 5 | 0 6 | 24 7 | -9223372036854775808 8 | 1 9 | -1 10 | 63 11 | 64 12 | 4 13 | 0 14 | 5 15 | 0 16 | -128 17 | -1 18 | false 19 | true 20 | true 21 | true 22 | true 23 | 1 24 | 1 25 | Correct String 26 | 777 27 | 5 28 | 16 29 | 3 30 | 2 31 | 1 32 | -------------------------------------------------------------------------------- /src/test/golden/data.golden: -------------------------------------------------------------------------------- 1 | Data_Cons(1, Data_Cons(2, Data_Nil)) 2 | Data_Ident(test) 3 | test 4 | Data_No 5 | true 6 | Hello 7 | -------------------------------------------------------------------------------- /src/test/golden/dynamic.golden: -------------------------------------------------------------------------------- 1 | 1 2 | -------------------------------------------------------------------------------- /src/test/golden/factorial.golden: -------------------------------------------------------------------------------- 1 | Factorial of 15 is: 1307674368000 2 | -------------------------------------------------------------------------------- /src/test/golden/hello.golden: -------------------------------------------------------------------------------- 1 | Вітаю, Світе! Будь Lasca. 2 | -------------------------------------------------------------------------------- /src/test/golden/lambda.golden: -------------------------------------------------------------------------------- 1 | 6 2 | Hello 3 | -------------------------------------------------------------------------------- /src/test/golden/nbody.golden: -------------------------------------------------------------------------------- 1 | -0.169075164 2 | -0.169078071 3 | -------------------------------------------------------------------------------- /src/test/golden/nbody2.golden: -------------------------------------------------------------------------------- 1 | -0.169075164 2 | -0.169078071 3 | -------------------------------------------------------------------------------- /src/test/golden/nbody3.golden: -------------------------------------------------------------------------------- 1 | -0.169075164 2 | -0.169078071 3 | -------------------------------------------------------------------------------- /src/test/golden/queen.golden: -------------------------------------------------------------------------------- 1 | GSTQ 2 | GSTU 3 | GSTE 4 | GSTE 5 | GSTN 6 | GSHQ 7 | GSHU 8 | GSHE 9 | GSHE 10 | GSHN 11 | GSEQ 12 | GSEU 13 | GSEE 14 | GSEE 15 | GSEN 16 | GATQ 17 | GATU 18 | GATE 19 | GATE 20 | GATN 21 | GAHQ 22 | GAHU 23 | GAHE 24 | GAHE 25 | GAHN 26 | GAEQ 27 | GAEU 28 | GAEE 29 | GAEE 30 | GAEN 31 | GVTQ 32 | GVTU 33 | GVTE 34 | GVTE 35 | GVTN 36 | GVHQ 37 | GVHU 38 | GVHE 39 | GVHE 40 | GVHN 41 | GVEQ 42 | GVEU 43 | GVEE 44 | GVEE 45 | GVEN 46 | GETQ 47 | GETU 48 | GETE 49 | GETE 50 | GETN 51 | GEHQ 52 | GEHU 53 | GEHE 54 | GEHE 55 | GEHN 56 | GEEQ 57 | GEEU 58 | GEEE 59 | GEEE 60 | GEEN 61 | OSTQ 62 | OSTU 63 | OSTE 64 | OSTE 65 | OSTN 66 | OSHQ 67 | OSHU 68 | OSHE 69 | OSHE 70 | OSHN 71 | OSEQ 72 | OSEU 73 | OSEE 74 | OSEE 75 | OSEN 76 | OATQ 77 | OATU 78 | OATE 79 | OATE 80 | OATN 81 | OAHQ 82 | OAHU 83 | OAHE 84 | OAHE 85 | OAHN 86 | OAEQ 87 | OAEU 88 | OAEE 89 | OAEE 90 | OAEN 91 | OVTQ 92 | OVTU 93 | OVTE 94 | OVTE 95 | OVTN 96 | OVHQ 97 | OVHU 98 | OVHE 99 | OVHE 100 | OVHN 101 | OVEQ 102 | OVEU 103 | OVEE 104 | OVEE 105 | OVEN 106 | OETQ 107 | OETU 108 | OETE 109 | OETE 110 | OETN 111 | OEHQ 112 | OEHU 113 | OEHE 114 | OEHE 115 | OEHN 116 | OEEQ 117 | OEEU 118 | OEEE 119 | OEEE 120 | OEEN 121 | DSTQ 122 | DSTU 123 | DSTE 124 | DSTE 125 | DSTN 126 | DSHQ 127 | DSHU 128 | DSHE 129 | DSHE 130 | DSHN 131 | DSEQ 132 | DSEU 133 | DSEE 134 | DSEE 135 | DSEN 136 | DATQ 137 | DATU 138 | DATE 139 | DATE 140 | DATN 141 | DAHQ 142 | DAHU 143 | DAHE 144 | DAHE 145 | DAHN 146 | DAEQ 147 | DAEU 148 | DAEE 149 | DAEE 150 | DAEN 151 | DVTQ 152 | DVTU 153 | DVTE 154 | DVTE 155 | DVTN 156 | DVHQ 157 | DVHU 158 | DVHE 159 | DVHE 160 | DVHN 161 | DVEQ 162 | DVEU 163 | DVEE 164 | DVEE 165 | DVEN 166 | DETQ 167 | DETU 168 | DETE 169 | DETE 170 | DETN 171 | DEHQ 172 | DEHU 173 | DEHE 174 | DEHE 175 | DEHN 176 | DEEQ 177 | DEEU 178 | DEEE 179 | DEEE 180 | DEEN 181 | -------------------------------------------------------------------------------- /src/test/golden/regex.golden: -------------------------------------------------------------------------------- 1 | true 2 | Haskell (consider Lasca instead of Haskell) or Python (consider Lasca instead of Python) 3 | -------------------------------------------------------------------------------- /src/test/golden/ski.golden: -------------------------------------------------------------------------------- 1 | Hello 2 | -------------------------------------------------------------------------------- /stack-shell.nix: -------------------------------------------------------------------------------- 1 | with (import {}); 2 | {ghc ? haskell.compiler.ghc822}: 3 | let 4 | lascart = pkgs.callPackage ./lascart.nix {}; 5 | in haskell.lib.buildStackProject { 6 | name = "lasca"; 7 | buildInputs = [ lascart boehmgc pcre2 ]; 8 | src = ./.; 9 | } -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-12.9 2 | extra-deps: 3 | - multiset-0.3.4 4 | nix: 5 | enable: false 6 | shell-file: stack-shell.nix --------------------------------------------------------------------------------