├── .cirrus.yml ├── .clang-format ├── .clang-tidy ├── .gitignore ├── LICENSE ├── README.md ├── THIRD_PARTY.md ├── examples ├── ISO │ ├── Hello.mod │ └── Mandelbrot.mod └── PIM4 │ └── gcdlcm.mod ├── include ├── asttool │ ├── ASTDefinition.h │ ├── Class.h │ ├── ClassBuilder.h │ ├── ClassEmitter.h │ ├── Diagnostic.h │ ├── Lexer.h │ ├── Main.h │ ├── Parser.h │ ├── VarStore.h │ ├── Variables.def │ ├── ast.inc │ └── asttool.inc ├── lltool │ ├── Algo.h │ ├── Diagnostic.h │ ├── Grammar.h │ ├── GrammarBuilder.h │ ├── Lexer.h │ ├── Main.h │ ├── Node.h │ ├── Parser.h │ ├── RDPEmitter.h │ ├── VarStore.h │ ├── Variables.def │ └── lltool.g.inc └── m2lang │ ├── AST │ ├── PervasiveFunctions.def │ └── PervasiveTypes.def │ └── Basic │ └── TokenKinds.def ├── lib ├── AST │ ├── AST.cppm │ ├── ASTContext.cppm │ ├── Module.cppm │ ├── PervasiveKinds.cppm │ ├── ast.ast │ ├── ast.inc │ └── xmake.lua ├── ASTtool │ ├── ClassBuilder.cpp │ ├── ClassEmitter.cpp │ ├── Diagnostic.cpp │ ├── Lexer.cpp │ ├── Main.cpp │ ├── Parser.cpp │ ├── README.md │ ├── VarStore.cpp │ ├── asttool.ast │ ├── asttool.g │ └── xmake.lua ├── Basic │ ├── Basic.cppm │ ├── Config.cppm │ ├── Diagnostic.cppm │ ├── Diagnostic.def │ ├── LangOptions.cppm │ ├── LangOptions.def │ ├── TargetInfo.cppm │ ├── TargetOptions.cppm │ ├── TokenKinds.cppm │ ├── Version.cppm │ ├── Version.inc.in │ └── xmake.lua ├── CodeGen │ ├── CGDebug.h │ ├── CGModule.cppm │ ├── CGProcedure.cppm │ ├── CGTBAA.cppm │ ├── CGUnit.cppm │ ├── CodeGenerator.cppm │ └── xmake.lua ├── LLVM │ ├── ADT.cppm │ ├── llvm.cppm │ └── xmake.lua ├── LLtool │ ├── Algo.cpp │ ├── Diagnostic.cpp │ ├── Grammar.cpp │ ├── GrammarBuilder.cpp │ ├── Lexer.cpp │ ├── Main.cpp │ ├── Parser.cpp │ ├── RDPEmitter.cpp │ ├── README.md │ ├── VarStore.cpp │ ├── lltool.g │ └── xmake.lua ├── Lexer │ ├── DirectiveParser.g │ ├── Lexer.cppm │ ├── Module.cppm │ ├── Preprocessor.cppm │ ├── Token.cppm │ └── xmake.lua ├── Parser │ ├── M2Parser.cppm │ ├── Modula-2.g │ ├── Parser.cppm │ └── xmake.lua └── Sema │ ├── Sema.cppm │ └── xmake.lua ├── scripts └── vcenv.bat ├── test ├── M2 │ ├── CodeGen │ │ ├── Array1.mod │ │ ├── Call.mod │ │ ├── CheckModVisibility.mod │ │ ├── CheckModVisibility2.mod │ │ ├── CheckModVisibility3.mod │ │ ├── Enum.mod │ │ ├── For.mod │ │ ├── IfElse.mod │ │ ├── LocalMod.mod │ │ ├── Loop.mod │ │ ├── OpenArray.mod │ │ ├── PassByRef.mod │ │ ├── Pointer.mod │ │ ├── Repeat.mod │ │ └── While.mod │ └── Directives │ │ └── Nested.mod ├── Unit │ ├── lit.cfg.py │ └── lit.site.cfg.py.in ├── lit.cfg.py └── lit.site.cfg.py.in ├── tools └── driver │ ├── driver.cpp │ └── xmake.lua ├── unittests ├── LLtool │ ├── AlgoTest.cpp │ └── FirstFollowSetTest.cpp └── Lexer │ └── LexerTest.cpp ├── utils ├── ASTtool │ ├── ASTtool.cpp │ └── xmake.lua ├── FileCheck-19.1 │ └── FileCheck.cpp ├── LLtool │ ├── LLtool.cpp │ └── xmake.lua ├── README.md ├── count │ └── count.c ├── m2lang-lit │ └── llvm-lit.in ├── not │ └── not.cpp └── vscode │ └── README.md ├── xmake.lua └── xmake ├── local-repo └── packages │ └── l │ └── llvm │ └── xmake.lua └── rules └── xmake.lua /.cirrus.yml: -------------------------------------------------------------------------------- 1 | task: 2 | name: FreeBSD 14.2 3 | freebsd_instance: 4 | image_family: freebsd-14-2 5 | environment: 6 | # The $CIRRUS_WORKING_DIR is /tmp/cirrus-ci-build. It contains a clone 7 | # of the repository. 8 | LLVM_ROOT_DIR: /usr/local/llvm19 9 | CC: $LLVM_ROOT_DIR/bin/clang 10 | BUILD_DIR: $CIRRUS_WORKING_DIR/../build 11 | CACHE_DIR: $CIRRUS_WORKING_DIR/../cache 12 | XMAKE_GLOBALDIR: $CACHE_DIR/bin/.xmake 13 | XMAKE_ROOTDIR: $CACHE_DIR/bin 14 | XMAKE_PROGRAM_DIR: $CACHE_DIR/share/xmake 15 | XMAKE_ROOT: y 16 | PATH: $CACHE_DIR/bin:${LLVM_ROOT_DIR}/bin:${PATH} 17 | install_tools_script: | 18 | pkg install -y git llvm19 googletest bash gmake wget 19 | local_cache: 20 | folder: ../cache 21 | fingerprint_key: 20241229_b3_$CIRRUS_OS 22 | populate_script: | 23 | mkdir -p $CACHE_DIR 24 | wget https://github.com/xmake-io/xmake/releases/download/v2.9.7/xmake-v2.9.7.tar.gz 25 | tar xzf xmake-v2.9.7.tar.gz 26 | cd xmake-2.9.7 27 | ./configure 28 | gmake 29 | prefix=$CACHE_DIR ./scripts/get.sh __local__ __install_only__ 30 | xmake repo --update 31 | checkenv_script: | 32 | sysctl -n hw.ncpu 33 | xmake --version 34 | lit --version 35 | clang --version 36 | clang++ --version 37 | compile_script: | 38 | mkdir $BUILD_DIR 39 | xmake config --buildir=$BUILD_DIR --toolchain=clang -v 40 | xmake build 41 | xmake run driver --version 42 | test_script: | 43 | xmake run driver --version 44 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-readability-identifier-naming' 2 | # Note that the readability-identifier-naming check is disabled, there are too 3 | # many violations in the codebase and they create too much noise in clang-tidy 4 | # results. 5 | # Naming settings are kept for documentation purposes and allowing to run the 6 | # check if the users would override this file, e.g. via a command-line arg. 7 | CheckOptions: 8 | - key: readability-identifier-naming.ClassCase 9 | value: CamelCase 10 | - key: readability-identifier-naming.EnumCase 11 | value: CamelCase 12 | - key: readability-identifier-naming.FunctionCase 13 | value: camelBack 14 | - key: readability-identifier-naming.MemberCase 15 | value: CamelCase 16 | - key: readability-identifier-naming.ParameterCase 17 | value: CamelCase 18 | - key: readability-identifier-naming.UnionCase 19 | value: CamelCase 20 | - key: readability-identifier-naming.VariableCase 21 | value: CamelCase 22 | - key: readability-identifier-naming.IgnoreMainLikeFunctions 23 | value: 1 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #==============================================================================# 2 | # This file specifies intentionally untracked files that git should ignore. 3 | # See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html 4 | #==============================================================================# 5 | 6 | #==============================================================================# 7 | # File extensions to be ignored anywhere in the tree. 8 | #==============================================================================# 9 | # Temp files created by most text editors. 10 | *~ 11 | # Merge files created by git. 12 | *.orig 13 | # Byte compiled python modules. 14 | *.pyc 15 | # vim swap files 16 | .*.sw? 17 | .sw? 18 | #OS X specific files. 19 | .DS_store 20 | 21 | # Nested build directory 22 | /build 23 | 24 | #==============================================================================# 25 | # Directories to ignore (do not add trailing '/'s, they skip symlinks). 26 | #==============================================================================# 27 | # VS2017 and VSCode config files. 28 | .vscode 29 | .vs 30 | # clangd index 31 | .clangd 32 | # xmake config files. 33 | .xmake 34 | 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | m2lang - The LLVM-based Modula-2 compiler 2 | ========================================= 3 | 4 | [![Build Status](https://img.shields.io/cirrus/github/redstar/m2lang/master?logo=Cirrus%20CI&label=Cirrus%20CI)][1] 5 | 6 | Introduction 7 | ------------ 8 | 9 | See this [FOSDEM 2019 talk](https://fosdem.org/2019/schedule/event/llvm_irgen/) for some details. 10 | 11 | Implementation decisions 12 | ------------------------ 13 | 14 | - LLVM is only external dependency 15 | - Uses C++20 modules 16 | - xmake for building 17 | - ISO Modula-2 language first 18 | - Usea "declare-before-use" (single-pass) model 19 | - Goal is target several platforms from the beginning 20 | 21 | Current status 22 | -------------- 23 | 24 | - The lexer is done. 25 | - The preprocessor (for handling of directives) is is based on the draft 26 | technical report ["Interfacing Modula-2 to C", Annex B](http://www.zi.biologie.uni-muenchen.de/~enger/SC22WG13/im2c-981130.html#TR-AXI-PRAGMAS) 27 | and aims to be compatible to the [Macintosh p1 compiler](https://modula2.awiedemann.de/manual/comp4.html#L4_2). 28 | Parsing of directives is implemented, with mostly no functionality. 29 | - The parser is based on ISO Modula-2 with generic and OO additions. 30 | Some LL(1) conflicts are still left in the grammar, so not every source is parsed correctly. 31 | - The parser is generated by [LLtool](https://github.com/redstar/LLtool). A C++ 32 | port of LLtool is now integrated into this project, but not yet enabled. 33 | - Error recovery in the parser is based on "panic mode", using the follow sets 34 | of the current and the active callers. 35 | - The AST nodes are generated by [ASTtool](lib/ASTtool). 36 | - Error messages are outputted using llvm::SourceMgr for nice presentation. 37 | - Only a dummy driver exists to see the parser in action. 38 | - A couple of IR statements is emitted to the console after succesful parsing. 39 | 40 | [1]: https://cirrus-ci.com/github/redstar/m2lang "Cirrus CI Build Status" 41 | -------------------------------------------------------------------------------- /THIRD_PARTY.md: -------------------------------------------------------------------------------- 1 | Some of the test cases are based on the examples given in the book 2 | 3 | Modula-2: Abstractions for Data and 4 | Programming Structures 5 | (Using ISO-Standard Modula-2) 6 | 2004-2005 Edition 7 | by 8 | Richard J. Sutcliffe 9 | 10 | See https://www.arjay.bc.ca/Modula-2/Text/index.html 11 | -------------------------------------------------------------------------------- /examples/ISO/Hello.mod: -------------------------------------------------------------------------------- 1 | MODULE Hello; 2 | 3 | FROM STextIO IMPORT WriteString, WriteLn; 4 | 5 | BEGIN 6 | WriteString("Hello Modula-2 world"); 7 | WriteLn 8 | END Hello. 9 | -------------------------------------------------------------------------------- /examples/ISO/Mandelbrot.mod: -------------------------------------------------------------------------------- 1 | MODULE Mandelbrot; 2 | (* 3 | * Computes a ascii based picture of a specified part of the mandelbrot set. 4 | * 5 | * Change define values, then recompile: 6 | * MAX_ITERATION, ..., RESOLUTION 7 | * 8 | * Based on the sample program mandelbrot2.c in the AIX 5L Porting Guide 9 | * http://www.redbooks.ibm.com/abstracts/sg246034.html?Open 10 | * 11 | *) 12 | 13 | IMPORT STextIO; 14 | 15 | (* Values can be changed for different output. *) 16 | CONST 17 | MAX_ITERATION = 262144; 18 | MAX_LENGTH = 100.0; 19 | X_MIN = -2.1; 20 | Y_MIN = -1.1; 21 | X_MAX = 0.7; 22 | Y_MAX = 1.1; 23 | RESOLUTION = 24; (* Vertical resolution, horizontal is then derived *) 24 | 25 | (* Do not change the following values. *) 26 | CONST 27 | COLORS = " -:=+oxOX@#"; 28 | xres = TRUNC(LFLOAT(RESOLUTION)*3.2)-1; 29 | yres = RESOLUTION-1; 30 | xmin = X_MIN; 31 | ymin = Y_MIN; 32 | xstep = (X_MAX - X_MIN) / (LFLOAT(RESOLUTION)*3.2); 33 | ystep = (Y_MAX - Y_MIN) / LFLOAT(RESOLUTION); 34 | 35 | VAR 36 | pixels: ARRAY [0..yres] OF ARRAY [0..xres] OF CARDINAL; 37 | 38 | (***************************************** 39 | ** Compute row specified in y 40 | ******************************************) 41 | 42 | PROCEDURE Row(y: CARDINAL); 43 | VAR 44 | x: CARDINAL; 45 | iteration: CARDINAL; 46 | z1, z2, t1: LONGREAL; 47 | cindx, exp : CARDINAL; 48 | BEGIN 49 | FOR x := 0 TO xres DO 50 | iteration := 0; 51 | z1 := 0.0; 52 | z2 := 0.0; 53 | REPEAT 54 | t1 := z1*z1 - z2*z2 + (xmin + LFLOAT(x)*xstep); 55 | z2 := 2.0 * z1*z2 + (ymin + LFLOAT(y)*ystep); 56 | z1 := t1; 57 | INC(iteration); 58 | UNTIL (iteration >= MAX_ITERATION) OR (z1*z1 + z2*z2 >= MAX_LENGTH); 59 | cindx := 0; 60 | exp := 1; 61 | REPEAT 62 | INC(cindx); 63 | exp := 2*exp; 64 | UNTIL exp >= iteration; 65 | IF iteration >= MAX_ITERATION THEN 66 | pixels[y][x] := 0; 67 | ELSE 68 | pixels[y][x] := cindx MOD LENGTH(COLORS); 69 | END 70 | END 71 | END Row; 72 | 73 | VAR 74 | x, y: CARDINAL; 75 | BEGIN 76 | (* Compute a row at the time *) 77 | FOR y := 0 TO yres DO 78 | Row(y); 79 | END; 80 | 81 | (* Print out fractal pixels *) 82 | FOR y := 0 TO yres DO 83 | FOR x := 0 TO xres DO 84 | STextIO.WriteChar(COLORS[pixels[y][x]]) 85 | END; 86 | STextIO.WriteLn 87 | END 88 | END Mandelbrot. 89 | -------------------------------------------------------------------------------- /examples/PIM4/gcdlcm.mod: -------------------------------------------------------------------------------- 1 | MODULE gcdlcm; 2 | FROM InOut IMPORT ReadInt, WriteLn, WriteString, WriteInt; 3 | VAR x, y, u, v: INTEGER; 4 | BEGIN 5 | WriteString("x = "); ReadInt(x); WriteLn; 6 | WriteString("y = "); ReadInt(y); 7 | u := x; v := y; 8 | WHILE x # y DO 9 | IF x > y THEN 10 | x := x - y; u := u + v 11 | ELSE 12 | y := y - x; v := v + u 13 | END 14 | END; 15 | WriteInt(x, 6); WriteInt((u+v) DIV 2, 6); WriteLn 16 | END gcdlcm. 17 | -------------------------------------------------------------------------------- /include/asttool/ASTDefinition.h: -------------------------------------------------------------------------------- 1 | //===--- ASTDefinition.h - Definition of AST --------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the class holding the complete definition of an AST. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_ASTDEFINITION_H 15 | #define ASTTOOL_ASTDEFINITION_H 16 | 17 | #include "asttool/Class.h" 18 | #include "llvm/ADT/DenseMap.h" 19 | #include "llvm/ADT/MapVector.h" 20 | 21 | namespace asttool { 22 | 23 | class ASTDefinition { 24 | llvm::DenseMap Typedefs; 25 | llvm::SmallMapVector Classes; 26 | 27 | public: 28 | ASTDefinition(llvm::DenseMap Typedefs, 29 | llvm::SmallMapVector Classes) 30 | : Typedefs(Typedefs), Classes(Classes) {} 31 | 32 | llvm::DenseMap &getTypedefs() { 33 | return Typedefs; 34 | }; 35 | llvm::SmallMapVector &getClasses() { 36 | return Classes; 37 | }; 38 | }; 39 | } // namespace asttool 40 | #endif 41 | -------------------------------------------------------------------------------- /include/asttool/Class.h: -------------------------------------------------------------------------------- 1 | //===--- Class.h - ASTtool data type for classes ----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the Class data structure. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_CLASS_H 15 | #define ASTTOOL_CLASS_H 16 | 17 | #include "llvm/ADT/SmallVector.h" 18 | #include "llvm/ADT/StringRef.h" 19 | #include "llvm/Support/SMLoc.h" 20 | 21 | namespace asttool { 22 | 23 | #define AST_DECLARATION 24 | #include "asttool/ast.inc" 25 | 26 | using MemberList = llvm::SmallVector; 27 | using LetList = llvm::SmallVector; 28 | } // namespace asttool 29 | #endif 30 | -------------------------------------------------------------------------------- /include/asttool/ClassBuilder.h: -------------------------------------------------------------------------------- 1 | //===--- ClassBuilder.h - ASTtool builder class -----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the ClassBuilder helper class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_CLASSBUILDER_H 15 | #define ASTTOOL_CLASSBUILDER_H 16 | 17 | #include "asttool/ASTDefinition.h" 18 | #include "asttool/Diagnostic.h" 19 | #include "asttool/VarStore.h" 20 | #include "llvm/ADT/DenseMap.h" 21 | #include "llvm/ADT/SmallVector.h" 22 | #include "llvm/ADT/StringRef.h" 23 | #include "llvm/Support/SMLoc.h" 24 | 25 | namespace asttool { 26 | 27 | class ClassBuilder { 28 | private: 29 | Diagnostic &Diag; 30 | 31 | llvm::StringRef LanguageName; 32 | llvm::SMLoc LanguageLoc; 33 | 34 | llvm::DenseMap Typedefs; 35 | 36 | llvm::SmallMapVector Classes; 37 | 38 | VarStore Variables; 39 | 40 | void error(llvm::SMLoc Loc, llvm::Twine Msg); 41 | void warning(llvm::SMLoc Loc, llvm::Twine Msg); 42 | void note(llvm::SMLoc Loc, llvm::Twine Msg); 43 | 44 | public: 45 | ClassBuilder(Diagnostic &Diag) : Diag(Diag) {} 46 | ASTDefinition build(); 47 | const VarStore &varStore() { return Variables; } 48 | 49 | void actOnLanguage(Identifier Name); 50 | void actOnTypedef(Identifier Name, llvm::StringRef Code); 51 | void finalizeTypedefs(); 52 | void actOnTypedecl(Class::ClassType CType, Identifier Name, Class *SuperClass, 53 | MemberList &Body, LetList &LetDefintions); 54 | void actOnField(llvm::SmallVectorImpl &MemberList, 55 | unsigned Properties, Identifier Name, Identifier TypeName, 56 | bool TypeIsList, bool IsDefault, llvm::StringRef Code); 57 | void actOnEnum(llvm::SmallVectorImpl &MemberList, Identifier Name, 58 | llvm::StringRef Code); 59 | void actOnLet(llvm::SmallVectorImpl &LetList, Identifier Name, 60 | Class *SuperClass, bool IsDefault, llvm::StringRef Code); 61 | void actOnSuperClass(Class *&SuperClass, Identifier Name); 62 | void actOnPropertyIn(unsigned &Properties, llvm::SMLoc Loc); 63 | void actOnPropertyOut(unsigned &Properties, llvm::SMLoc Loc); 64 | void actOnDefine(const llvm::SMLoc Loc, llvm::StringRef Name, 65 | llvm::StringRef Value, var::VarType Type); 66 | }; 67 | } // namespace asttool 68 | #endif 69 | -------------------------------------------------------------------------------- /include/asttool/ClassEmitter.h: -------------------------------------------------------------------------------- 1 | //===--- ClassEmitter.h - ASTtool class source emitter ----------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the emitter for ASTtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_CLASSEMITTER_H 15 | #define ASTTOOL_CLASSEMITTER_H 16 | 17 | namespace llvm { 18 | class raw_ostream; 19 | } // namespace llvm 20 | namespace asttool { 21 | class ASTDefinition; 22 | class VarStore; 23 | 24 | void emitClass(ASTDefinition &ASTDef, const VarStore &Vars, 25 | llvm::raw_ostream &OS); 26 | 27 | } // namespace asttool 28 | #endif -------------------------------------------------------------------------------- /include/asttool/Diagnostic.h: -------------------------------------------------------------------------------- 1 | //===--- Diagnostic.h - ASTtool diagnostic output ---------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the error printing interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_DIAGNOSTIC_H 15 | #define ASTTOOL_DIAGNOSTIC_H 16 | 17 | #include "llvm/Support/SMLoc.h" 18 | 19 | namespace llvm { 20 | class SourceMgr; 21 | class Twine; 22 | } // namespace llvm 23 | 24 | namespace asttool { 25 | 26 | class Diagnostic { 27 | llvm::SourceMgr &SrcMgr; 28 | unsigned Errors; 29 | 30 | public: 31 | Diagnostic(llvm::SourceMgr &SrcMgr) : SrcMgr(SrcMgr), Errors(0) {} 32 | 33 | llvm::SourceMgr &srcMgr() { return SrcMgr; } 34 | 35 | bool errorsOccured() { return Errors > 0; } 36 | unsigned errorsPrinted() { return Errors; } 37 | 38 | void error(llvm::SMLoc Loc, const llvm::Twine &Msg); 39 | void error(const char *Loc, const llvm::Twine &Msg); 40 | void warning(llvm::SMLoc Loc, const llvm::Twine &Msg); 41 | void warning(const char *Loc, const llvm::Twine &Msg); 42 | void note(llvm::SMLoc Loc, const llvm::Twine &Msg); 43 | void note(const char *Loc, const llvm::Twine &Msg); 44 | }; 45 | } // namespace asttool 46 | #endif -------------------------------------------------------------------------------- /include/asttool/Lexer.h: -------------------------------------------------------------------------------- 1 | //===--- Lexer.h - ASTtool lexer --------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the ASTtool lexer interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_LEXER_H 15 | #define ASTTOOL_LEXER_H 16 | 17 | #include "asttool/Diagnostic.h" 18 | #include "llvm/ADT/StringMap.h" 19 | #include "llvm/ADT/StringRef.h" 20 | #include "llvm/Support/SMLoc.h" 21 | #include "llvm/Support/SourceMgr.h" 22 | 23 | namespace asttool { 24 | 25 | class Lexer; 26 | 27 | namespace tok { 28 | enum TokenKind : unsigned short { 29 | unknown, 30 | eoi, 31 | identifier, 32 | qualidentifier, 33 | code, 34 | string, 35 | equal, 36 | comma, 37 | semi, 38 | colon, 39 | lesscolon, 40 | percentpercent, 41 | kw_base, 42 | kw_default, 43 | kw_define, 44 | kw_enum, 45 | kw_in, 46 | kw_language, 47 | kw_let, 48 | kw_list, 49 | kw_node, 50 | kw_out, 51 | kw_plain, 52 | kw_typedef, 53 | NUM_TOKENS 54 | }; 55 | } // end namespace tok 56 | 57 | class Token { 58 | friend class Lexer; 59 | 60 | /// The location of the token. 61 | const char *Ptr; 62 | 63 | /// The length of the token. 64 | size_t Length; 65 | 66 | /// Kind - The actual flavor of token this is. 67 | tok::TokenKind Kind; 68 | 69 | public: 70 | tok::TokenKind getKind() const { return Kind; } 71 | 72 | /// is/isNot - Predicates to check if this token is a specific kind, as in 73 | /// "if (Tok.is(tok::l_brace)) {...}". 74 | bool is(tok::TokenKind K) const { return Kind == K; } 75 | bool isNot(tok::TokenKind K) const { return Kind != K; } 76 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 77 | return is(K1) || is(K2); 78 | } 79 | template 80 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const { 81 | return is(K1) || isOneOf(K2, Ks...); 82 | } 83 | 84 | llvm::SMLoc getLoc() const { return llvm::SMLoc::getFromPointer(Ptr); } 85 | size_t getLength() const { return Length; } 86 | 87 | llvm::StringRef getData() { return llvm::StringRef(Ptr, Length); } 88 | }; 89 | 90 | class Lexer { 91 | llvm::SourceMgr &SrcMgr; 92 | Diagnostic Diag; 93 | 94 | const char *CurPtr = nullptr; 95 | llvm::StringRef CurBuf; 96 | 97 | /// CurBuffer - This is the current buffer index we're lexing from as managed 98 | /// by the SourceMgr object. 99 | unsigned CurBuffer = 0; 100 | 101 | public: 102 | Lexer(llvm::SourceMgr &SrcMgr) : SrcMgr(SrcMgr), Diag(Diagnostic(SrcMgr)) { 103 | CurBuffer = SrcMgr.getMainFileID(); 104 | CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 105 | CurPtr = CurBuf.begin(); 106 | } 107 | 108 | Diagnostic &getDiagnostic() { return Diag; } 109 | 110 | void next(Token &Tok); 111 | 112 | private: 113 | void identifier(Token &Tok); 114 | void code(Token &Tok, char Open, const char Close, tok::TokenKind Kind); 115 | void keyword(Token &Tok); 116 | void string(Token &Tok); 117 | void multilinecomment(); 118 | void singlelinecomment(); 119 | 120 | void printError(const char *Loc, const llvm::Twine &Msg); 121 | void formToken(Token &Result, const char *TokEnd, tok::TokenKind Kind); 122 | }; 123 | } // namespace asttool 124 | #endif -------------------------------------------------------------------------------- /include/asttool/Main.h: -------------------------------------------------------------------------------- 1 | //===--- Main.h - ASTtool main entry point ----------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the main entry point for ASTtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_MAIN_H 15 | #define ASTTOOL_MAIN_H 16 | 17 | namespace asttool { 18 | 19 | int runASTtoolMain(const char *Argv0); 20 | 21 | } // namespace asttool 22 | #endif -------------------------------------------------------------------------------- /include/asttool/Parser.h: -------------------------------------------------------------------------------- 1 | //===--- Parser.h - ASTtool parser ------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the ASTtool parser class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef ASTTOOL_PARSER_H 15 | #define ASTTOOL_PARSER_H 16 | 17 | #include "asttool/ClassBuilder.h" 18 | #include "asttool/Lexer.h" 19 | 20 | namespace asttool { 21 | class Node; 22 | class VarStore; 23 | 24 | class Parser { 25 | Lexer Lex; 26 | Token Tok; 27 | 28 | ClassBuilder Builder; 29 | 30 | void advance() { Lex.next(Tok); } 31 | 32 | bool consume(tok::TokenKind ExpectedTok) { 33 | if (expect(ExpectedTok)) 34 | return true; 35 | advance(); 36 | return false; 37 | } 38 | 39 | bool expect(tok::TokenKind ExpectedTok) { 40 | if (Tok.is(ExpectedTok)) { 41 | return false; 42 | } 43 | // There must be a better way! 44 | error(); 45 | return true; 46 | } 47 | 48 | void error() { getDiag().error(Tok.getLoc(), "unexpected token"); } 49 | 50 | public: 51 | Parser(llvm::SourceMgr &SrcMgr) 52 | : Lex(Lexer(SrcMgr)), Builder(ClassBuilder(Lex.getDiagnostic())) { 53 | advance(); 54 | } 55 | 56 | Diagnostic &getDiag() { return Lex.getDiagnostic(); } 57 | 58 | ASTDefinition parse(VarStore &Vars); 59 | 60 | private: 61 | #define PARSER_DECLARATION 62 | #include "asttool/asttool.inc" 63 | #undef PARSER_DECLARATION 64 | }; 65 | } // namespace asttool 66 | #endif -------------------------------------------------------------------------------- /include/asttool/VarStore.h: -------------------------------------------------------------------------------- 1 | //===--- VarStore.h - ASTtool variable container ----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variable container. Variables can be defined in the ASR file 11 | /// with the %define directive. They are mainly used to customize the code 12 | /// generation. 13 | /// 14 | //===----------------------------------------------------------------------===// 15 | 16 | #ifndef ASTTOOL_VARSTORE_H 17 | #define ASTTOOL_VARSTORE_H 18 | 19 | #include "llvm/ADT/StringRef.h" 20 | #include "llvm/Support/Error.h" 21 | 22 | namespace asttool { 23 | 24 | namespace var { 25 | 26 | enum VarName { 27 | #define VAR(NAME, VAR, TYPE, DEFAULT) VAR, 28 | #include "asttool/Variables.def" 29 | NUM_VARIABLES 30 | }; 31 | 32 | enum VarType { Identifier, Code, String, Flag }; 33 | } // namespace var 34 | 35 | class VarStore { 36 | llvm::StringRef Vars[var::NUM_VARIABLES]; 37 | var::VarType getType(var::VarName) const; 38 | llvm::StringRef getDefault(var::VarName) const; 39 | 40 | public: 41 | VarStore(); 42 | 43 | llvm::Error add(llvm::StringRef Name, llvm::StringRef Value, 44 | var::VarType Type); 45 | 46 | void set(var::VarName Name, llvm::StringRef Value); 47 | 48 | llvm::StringRef getVar(var::VarName Name) const { 49 | assert(Name != var::NUM_VARIABLES); 50 | return Vars[Name].empty() ? getDefault(Name) : Vars[Name]; 51 | } 52 | 53 | bool getFlag(var::VarName Name) const { 54 | assert(getType(Name) == var::Flag && "getFlag() requires flag variable"); 55 | return Vars[Name] == "true"; 56 | } 57 | }; 58 | } // namespace asttool 59 | #endif 60 | -------------------------------------------------------------------------------- /include/asttool/Variables.def: -------------------------------------------------------------------------------- 1 | //===--- Variables.def - LLtool variable definition -------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variables known to ASTtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef VAR 15 | #define VAR(NAME, VAR, TYPE, DEFAULT) 16 | #endif 17 | 18 | // Only supported value is "c++". 19 | VAR("language", Language, String, "c++") 20 | 21 | // Prefix for fields and parameters. 22 | VAR("api.prefix", ApiPrefix, String, "_") 23 | 24 | // Name of the discriminator enum used for LLVM RTTI. 25 | VAR("api.rtti.type", ApiRTTIType, Code, "__KindType") 26 | 27 | // Base type of the discriminator enum. 28 | VAR("api.rtti.basetype", ApiRTTIBaseType, Code, "unsigned") 29 | 30 | // Name of the discriminator. 31 | VAR("api.rtti.member", ApiRTTIMember, Code, "__Kind") 32 | 33 | // Prefix for members of RTTI kind enum. 34 | VAR("api.rtti.memberprefix", ApiRTTIMemberPrefix, Code, "K_") 35 | 36 | #undef VAR 37 | -------------------------------------------------------------------------------- /include/lltool/Algo.h: -------------------------------------------------------------------------------- 1 | //===--- Algo.h - LLtool algorithms definition ------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the graph algorithms. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_ALGO_H 15 | #define LLTOOL_ALGO_H 16 | 17 | #include "lltool/Grammar.h" 18 | 19 | namespace lltool { 20 | 21 | /** 22 | * Marks all symbols reachable from the start symbol. 23 | * 24 | * Params: 25 | * grammar = grammar for which the reachability of the symbols is 26 | * computed 27 | */ 28 | void calculateReachable(Grammar &G); 29 | 30 | /** 31 | * Calculates the epsilon productivity for each element of the grammar. 32 | * 33 | * Params: 34 | * grammar = grammar for which the epsilon productivity of the 35 | * symbols is computed 36 | */ 37 | void calculateDerivesEpsilon(Grammar &G); 38 | 39 | /** 40 | * Calculates the productivity of each symbol of the grammar. 41 | * 42 | * Params: 43 | * grammar = grammar for which the productivity of the symbols is 44 | * computed 45 | */ 46 | void calculateProductive(Grammar &G); 47 | 48 | /** 49 | * Computes the epsilon-free first sets of the grammar. 50 | * 51 | * Params: 52 | * grammar = grammar for which the epsilon-free first sets is 53 | * computed 54 | */ 55 | void calculateFirstSets(Grammar &G); 56 | 57 | /** 58 | * Computes the follow sets of the grammar. 59 | * 60 | * Params: 61 | * grammar = grammar for which the follow sets is computed 62 | */ 63 | void calculateFollowSets(Grammar &G); 64 | } // namespace lltool 65 | #endif 66 | -------------------------------------------------------------------------------- /include/lltool/Diagnostic.h: -------------------------------------------------------------------------------- 1 | //===--- Diagnostic.h - LLtool diagnostic output ----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the error printing interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_DIAGNOSTIC_H 15 | #define LLTOOL_DIAGNOSTIC_H 16 | 17 | #include "llvm/Support/SMLoc.h" 18 | 19 | namespace llvm { 20 | class SourceMgr; 21 | class Twine; 22 | } // namespace llvm 23 | 24 | namespace lltool { 25 | 26 | class Diagnostic { 27 | llvm::SourceMgr &SrcMgr; 28 | unsigned Errors; 29 | 30 | public: 31 | Diagnostic(llvm::SourceMgr &SrcMgr) : SrcMgr(SrcMgr), Errors(0) {} 32 | 33 | llvm::SourceMgr &srcMgr() { return SrcMgr; } 34 | 35 | bool errorsOccured() { return Errors > 0; } 36 | unsigned errorsPrinted() { return Errors; } 37 | 38 | void error(llvm::SMLoc Loc, const llvm::Twine &Msg); 39 | void error(const char *Loc, const llvm::Twine &Msg); 40 | void warning(llvm::SMLoc Loc, const llvm::Twine &Msg); 41 | void warning(const char *Loc, const llvm::Twine &Msg); 42 | void note(llvm::SMLoc Loc, const llvm::Twine &Msg); 43 | void note(const char *Loc, const llvm::Twine &Msg); 44 | }; 45 | } // namespace lltool 46 | #endif -------------------------------------------------------------------------------- /include/lltool/Grammar.h: -------------------------------------------------------------------------------- 1 | //===--- Grammar.h - LLtool grammar definition ------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the grammar. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_GRAMMAR_H 15 | #define LLTOOL_GRAMMAR_H 16 | 17 | #include "lltool/Node.h" 18 | #include "llvm/ADT/IndexedMap.h" 19 | #include "llvm/ADT/iterator_range.h" 20 | #include 21 | 22 | namespace llvm { 23 | class raw_ostream; 24 | } 25 | 26 | namespace lltool { 27 | 28 | class Diagnostic; 29 | 30 | class Grammar { 31 | Nonterminal *Nonterminals; 32 | Nonterminal *SyntheticStartSymbol; 33 | Terminal *EoiTerminal; 34 | std::vector Nodes; 35 | llvm::IndexedMap TerminalMap; 36 | 37 | public: 38 | using range_type = llvm::iterator_range::iterator>; 39 | 40 | Grammar() 41 | : Nonterminals(nullptr), SyntheticStartSymbol(nullptr), 42 | EoiTerminal(nullptr), Nodes(), TerminalMap() {} 43 | Grammar(Nonterminal *Nonterminals, Nonterminal *StartSymbol, 44 | Nonterminal *SyntheticStartSymbol, Terminal *EoiTerminal, 45 | std::vector &Nodes, llvm::IndexedMap &TerminalMap) 46 | : Nonterminals(Nonterminals), SyntheticStartSymbol(SyntheticStartSymbol), 47 | EoiTerminal(EoiTerminal), Nodes(Nodes), TerminalMap(TerminalMap) {} 48 | 49 | Nonterminal *syntheticStartSymbol() const { return SyntheticStartSymbol; } 50 | Terminal *eoiTerminal() const { return EoiTerminal; } 51 | llvm::iterator_range::iterator> nodes() { 52 | return llvm::make_range(Nodes.begin(), Nodes.end()); 53 | } 54 | 55 | llvm::iterator_range< 56 | NodeIterator> 57 | nonterminals() { 58 | return llvm::iterator_range< 59 | NodeIterator>( 60 | NodeIterator( 61 | Nonterminals), 62 | nullptr); 63 | } 64 | 65 | Terminal *map(unsigned N) const { return TerminalMap[N]; } 66 | unsigned numberOfTerminals() const { 67 | return static_cast(TerminalMap.size()); 68 | } 69 | 70 | void writeYAML(llvm::raw_ostream &OS); 71 | void performAnalysis(Diagnostic &Diag); 72 | }; 73 | } // namespace lltool 74 | #endif 75 | -------------------------------------------------------------------------------- /include/lltool/GrammarBuilder.h: -------------------------------------------------------------------------------- 1 | //===--- GrammarBuilder.h - LLtool ast and graph construction ---*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the ast and graph construction grammar. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_GRAMMARBUILDER_H 15 | #define LLTOOL_GRAMMARBUILDER_H 16 | 17 | #include "lltool/Diagnostic.h" 18 | #include "lltool/Grammar.h" 19 | #include "lltool/VarStore.h" 20 | #include "llvm/ADT/StringRef.h" 21 | #include "llvm/ADT/StringMap.h" 22 | #include "llvm/Support/SMLoc.h" 23 | #include 24 | 25 | namespace lltool { 26 | 27 | class GrammarBuilder { 28 | Diagnostic &Diag; 29 | std::vector Nodes; 30 | llvm::StringMap Terminals; 31 | VarStore Variables; 32 | 33 | // List of all nonterminal symbols, in same order as in source file. 34 | Nonterminal *Nonterminals; 35 | 36 | // Last inserted nonterminal symbol (for list construction). 37 | Nonterminal *LastNT; 38 | 39 | // Number of next terminal symbol. 40 | unsigned NextTerminalNo; 41 | 42 | llvm::StringRef StartName; 43 | llvm::SMLoc StartLoc; 44 | 45 | llvm::StringRef EoiName; 46 | llvm::SMLoc EoiLoc; 47 | 48 | llvm::StringRef LanguageName; 49 | llvm::SMLoc LanguageLoc; 50 | 51 | void error(llvm::SMLoc Loc, llvm::Twine Msg); 52 | void warning(llvm::SMLoc Loc, llvm::Twine Msg); 53 | void note(llvm::SMLoc Loc, llvm::Twine Msg); 54 | 55 | Nonterminal *addSyntheticStart(Nonterminal *StartSymbol, 56 | Terminal *EoiTerminal); 57 | Nonterminal *findStartSymbol(); 58 | void resolve(); 59 | 60 | public: 61 | GrammarBuilder(Diagnostic &Diag) 62 | : Diag(Diag), Nonterminals(nullptr), LastNT(nullptr), NextTerminalNo(0) {} 63 | Grammar build(); 64 | const VarStore &varStore() { return Variables; } 65 | Nonterminal *nonterminal(const llvm::SMLoc Loc, llvm::StringRef Name); 66 | Terminal *terminal(const llvm::SMLoc Loc, llvm::StringRef Name, 67 | llvm::StringRef ExternalName = ""); 68 | SymbolRef *symbol(const llvm::SMLoc Loc, llvm::StringRef Name, 69 | bool IsTerminal = false); 70 | Code *code(const llvm::SMLoc Loc, llvm::StringRef Code); 71 | Sequence *sequence(const llvm::SMLoc Loc); 72 | Group *group(const llvm::SMLoc Loc, Node *Enclosed, Group::CardinalityKind Cardinality); 73 | Alternative *alternative(const llvm::SMLoc Loc, Node *Seq); 74 | void argument(Node *Node, llvm::StringRef Arg); 75 | void startSymbol(const llvm::SMLoc Loc, llvm::StringRef Name); 76 | void eoiSymbol(const llvm::SMLoc Loc, llvm::StringRef Name); 77 | void language(const llvm::SMLoc Loc, llvm::StringRef Name); 78 | void define(const llvm::SMLoc Loc, llvm::StringRef Name, 79 | llvm::StringRef Value, var::VarType Type); 80 | }; 81 | } // namespace lltool 82 | #endif 83 | -------------------------------------------------------------------------------- /include/lltool/Lexer.h: -------------------------------------------------------------------------------- 1 | //===--- Lexer.h - LLtool lexer ---------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the grammar. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_LEXER_H 15 | #define LLTOOL_LEXER_H 16 | 17 | #include "lltool/Diagnostic.h" 18 | #include "llvm/ADT/StringMap.h" 19 | #include "llvm/ADT/StringRef.h" 20 | #include "llvm/Support/SMLoc.h" 21 | #include "llvm/Support/SourceMgr.h" 22 | 23 | namespace lltool { 24 | 25 | class Lexer; 26 | 27 | namespace tok { 28 | enum TokenKind : unsigned short { 29 | unknown, 30 | eoi, 31 | identifier, 32 | qualidentifier, 33 | string, 34 | argument, 35 | code, 36 | equal, 37 | colon, 38 | comma, 39 | semi, 40 | pipe, 41 | l_paren, 42 | r_paren, 43 | r_parenquestion, 44 | r_parenstar, 45 | r_parenplus, 46 | percentpercent, 47 | kw_eoi, 48 | kw_define, 49 | kw_if, 50 | kw_language, 51 | kw_start, 52 | kw_token, 53 | NUM_TOKENS 54 | }; 55 | } // end namespace tok 56 | 57 | class Token { 58 | friend class Lexer; 59 | 60 | /// The location of the token. 61 | const char *Ptr; 62 | 63 | /// The length of the token. 64 | size_t Length; 65 | 66 | /// Kind - The actual flavor of token this is. 67 | tok::TokenKind Kind; 68 | 69 | public: 70 | tok::TokenKind getKind() const { return Kind; } 71 | 72 | /// is/isNot - Predicates to check if this token is a specific kind, as in 73 | /// "if (Tok.is(tok::l_brace)) {...}". 74 | bool is(tok::TokenKind K) const { return Kind == K; } 75 | bool isNot(tok::TokenKind K) const { return Kind != K; } 76 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 77 | return is(K1) || is(K2); 78 | } 79 | template 80 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const { 81 | return is(K1) || isOneOf(K2, Ks...); 82 | } 83 | 84 | llvm::SMLoc getLoc() const { return llvm::SMLoc::getFromPointer(Ptr); } 85 | size_t getLength() const { return Length; } 86 | 87 | llvm::StringRef getData() { return llvm::StringRef(Ptr, Length); } 88 | }; 89 | 90 | class Lexer { 91 | llvm::SourceMgr &SrcMgr; 92 | Diagnostic Diag; 93 | 94 | const char *CurPtr = nullptr; 95 | llvm::StringRef CurBuf; 96 | 97 | /// CurBuffer - This is the current buffer index we're lexing from as managed 98 | /// by the SourceMgr object. 99 | unsigned CurBuffer = 0; 100 | 101 | public: 102 | Lexer(llvm::SourceMgr &SrcMgr) : SrcMgr(SrcMgr), Diag(Diagnostic(SrcMgr)) { 103 | CurBuffer = SrcMgr.getMainFileID(); 104 | CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 105 | CurPtr = CurBuf.begin(); 106 | } 107 | 108 | Diagnostic &getDiagnostic() { return Diag; } 109 | 110 | void next(Token &Tok); 111 | 112 | private: 113 | void identifier(Token &Tok); 114 | void code(Token &Tok, char Open, const char Close, tok::TokenKind Kind); 115 | void keyword(Token &Tok); 116 | void string(Token &Tok); 117 | void multilinecomment(); 118 | void singlelinecomment(); 119 | 120 | void printError(const char *Loc, const llvm::Twine &Msg); 121 | void formToken(Token &Result, const char *TokEnd, tok::TokenKind Kind); 122 | }; 123 | } // namespace lltool 124 | #endif -------------------------------------------------------------------------------- /include/lltool/Main.h: -------------------------------------------------------------------------------- 1 | //===--- Main.h - LLtool main entry point -----------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the main entry point for LLtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_MAIN_H 15 | #define LLTOOL_MAIN_H 16 | 17 | namespace lltool { 18 | 19 | int runLLtoolMain(const char *Argv0); 20 | 21 | } // namespace lltool 22 | #endif -------------------------------------------------------------------------------- /include/lltool/Parser.h: -------------------------------------------------------------------------------- 1 | //===--- Parser.h - LLtool parser -------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the LLtool parser class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_PARSER_H 15 | #define LLTOOL_PARSER_H 16 | 17 | #include "lltool/GrammarBuilder.h" 18 | #include "lltool/Lexer.h" 19 | 20 | namespace lltool { 21 | class Node; 22 | 23 | class Parser { 24 | [[maybe_unused]] 25 | llvm::SourceMgr &SrcMgr; 26 | Lexer Lex; 27 | Token Tok; 28 | 29 | GrammarBuilder Builder; 30 | 31 | void advance() { Lex.next(Tok); } 32 | 33 | bool consume(tok::TokenKind ExpectedTok) { 34 | if (Tok.is(ExpectedTok)) { 35 | advance(); 36 | return false; 37 | } 38 | // There must be a better way! 39 | error(); 40 | return true; 41 | } 42 | 43 | bool expect(tok::TokenKind ExpectedTok) { 44 | if (Tok.is(ExpectedTok)) { 45 | return false; 46 | } 47 | return true; 48 | } 49 | 50 | void error() { getDiag().error(Tok.getLoc(), "unexpected token"); } 51 | 52 | public: 53 | Parser(llvm::SourceMgr &SrcMgr) 54 | : SrcMgr(SrcMgr), Lex(Lexer(SrcMgr)), 55 | Builder(GrammarBuilder(Lex.getDiagnostic())) { 56 | advance(); 57 | } 58 | 59 | Diagnostic &getDiag() { return Lex.getDiagnostic(); } 60 | 61 | void parse(Grammar &G, VarStore &V); 62 | 63 | private: 64 | #define PARSER_DECLARATION 65 | #include "lltool/lltool.g.inc" 66 | #undef PARSER_DECLARATION 67 | }; 68 | } // namespace lltool 69 | #endif -------------------------------------------------------------------------------- /include/lltool/RDPEmitter.h: -------------------------------------------------------------------------------- 1 | //===--- RDPEmitter.h - LLtool recursive descent parser emitter -*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the emitter for LLtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef LLTOOL_RDPEMITTER_H 15 | #define LLTOOL_RDPEMITTER_H 16 | 17 | namespace llvm { 18 | class raw_ostream; 19 | } // namespace llvm 20 | namespace lltool { 21 | class Grammar; 22 | class VarStore; 23 | 24 | void emitRDP(Grammar &Grammar, VarStore &Vars, llvm::raw_ostream &OS); 25 | 26 | } // namespace lltool 27 | #endif -------------------------------------------------------------------------------- /include/lltool/VarStore.h: -------------------------------------------------------------------------------- 1 | //===--- VarStore.h - LLtool variable container -----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variable container. Variables can be defined in the grammar file 11 | /// with the %define directive. They are mainly used to customize the code 12 | /// generation. 13 | /// 14 | //===----------------------------------------------------------------------===// 15 | 16 | #ifndef LLTOOL_VARSTORE_H 17 | #define LLTOOL_VARSTORE_H 18 | 19 | #include "llvm/ADT/StringRef.h" 20 | #include "llvm/Support/Error.h" 21 | 22 | namespace lltool { 23 | 24 | namespace var { 25 | 26 | enum VarName { 27 | #define VAR(NAME, VAR, TYPE, DEFAULT) VAR, 28 | #include "lltool/Variables.def" 29 | NUM_VARIABLES 30 | }; 31 | 32 | enum VarType { Identifier, Code, String, Flag }; 33 | } // namespace var 34 | 35 | class VarStore { 36 | llvm::StringRef Vars[var::NUM_VARIABLES]; 37 | var::VarType getType(var::VarName) const; 38 | llvm::StringRef getDefault(var::VarName) const; 39 | 40 | public: 41 | VarStore(); 42 | 43 | llvm::Error add(llvm::StringRef Name, llvm::StringRef Value, 44 | var::VarType Type); 45 | 46 | void set(var::VarName Name, llvm::StringRef Value); 47 | 48 | llvm::StringRef getVar(var::VarName Name) const { 49 | assert(Name != var::NUM_VARIABLES); 50 | return Vars[Name].empty() ? getDefault(Name) : Vars[Name]; 51 | } 52 | 53 | bool getFlag(var::VarName Name) const { 54 | assert(getType(Name) == var::Flag && "getFlag() requires flag variable"); 55 | return Vars[Name] == "true"; 56 | } 57 | }; 58 | } // namespace lltool 59 | #endif 60 | -------------------------------------------------------------------------------- /include/lltool/Variables.def: -------------------------------------------------------------------------------- 1 | //===--- Variables.def - LLtool variable definition -------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variables known to LLtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef VAR 15 | #define VAR(NAME, VAR, TYPE) 16 | #endif 17 | 18 | // Only supported value is "c++". 19 | VAR("language", Language, String, "c++") 20 | 21 | // Name of the parser class. 22 | VAR("api.parser.class", ApiParserClass, Code, "Parser") 23 | 24 | // The name of the variable holding the current token. 25 | VAR("api.token.name", ApiTokenName, Code, "Tok") 26 | 27 | // The name space used for tokens. 28 | VAR("api.token.namespace", ApiTokenNamespace, Code, "tok") 29 | 30 | // Prefix used for internal types, variables, and parameter. 31 | VAR("api.prefix", ApiPrefix, Code, "_") 32 | 33 | // Should a switch be generated instead of if/else? 34 | VAR("code.prefer.switch", CodePreferSwitch, Flag, "") 35 | 36 | /* Possible other useful variables: 37 | VAR("api.namespace", ApiNamespace, Code) 38 | VAR("api.symbol.prefix", ApiSymbolPrefix, Code) 39 | VAR("api.token.prefix", ApiTokenPrefix, Code) 40 | */ 41 | 42 | #undef VAR 43 | -------------------------------------------------------------------------------- /include/m2lang/AST/PervasiveFunctions.def: -------------------------------------------------------------------------------- 1 | //===--- PervasiveFunctions.def - M2 Language Family Pervasive Functions --===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the pervasive functions. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | // Types: 15 | // c - character type 16 | // ? - real number or whole number type 17 | // o - ordinal number 18 | 19 | #ifndef PROCEDURE 20 | #define PROCEDURE(Name) 21 | #endif 22 | 23 | #ifndef FUNCTION 24 | #define FUNCTION(Name) 25 | #endif 26 | 27 | // Procedures 28 | PROCEDURE(DEC) 29 | PROCEDURE(DISPOSE) 30 | PROCEDURE(EXCL) 31 | PROCEDURE(HALT) 32 | PROCEDURE(INC) 33 | PROCEDURE(INCL) 34 | PROCEDURE(NEW) 35 | 36 | // Functions 37 | FUNCTION(ABS) 38 | FUNCTION(CAP) 39 | FUNCTION(CHR) 40 | FUNCTION(CMPLX) 41 | FUNCTION(FLOAT) 42 | FUNCTION(HIGH) 43 | FUNCTION(IM) 44 | FUNCTION(INT) 45 | FUNCTION(LENGTH) 46 | FUNCTION(LFLOAT) 47 | FUNCTION(MAX) 48 | FUNCTION(MIN) 49 | FUNCTION(ODD) 50 | FUNCTION(ORD) 51 | FUNCTION(RE) 52 | FUNCTION(SIZE) 53 | FUNCTION(TRUNC) 54 | FUNCTION(VAL) 55 | 56 | #undef FUNCTION 57 | #undef PROCEDURE -------------------------------------------------------------------------------- /include/m2lang/AST/PervasiveTypes.def: -------------------------------------------------------------------------------- 1 | //===--- PervasiveTypes.def - M2 Language Family Pervasive Types ----------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the pervasive types. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef BUILTIN_TYPE 15 | #define BUILTIN_TYPE(Id) 16 | #endif 17 | 18 | #ifndef PERVASIVE_TYPE 19 | #define PERVASIVE_TYPE(Id, Name) BUILTIN_TYPE(Id) 20 | #endif 21 | 22 | #ifndef ORDINAL_TYPE 23 | #define ORDINAL_TYPE(Id, Name) PERVASIVE_TYPE(Id, Name) 24 | #endif 25 | 26 | #ifndef WHOLENUMBER_TYPE 27 | #define WHOLENUMBER_TYPE(Id, Name) ORDINAL_TYPE(Id, Name) 28 | #endif 29 | 30 | #ifndef SIGNED_TYPE 31 | #define SIGNED_TYPE(Id, Name) WHOLENUMBER_TYPE(Id, Name) 32 | #endif 33 | 34 | #ifndef UNSIGNED_TYPE 35 | #define UNSIGNED_TYPE(Id, Name) WHOLENUMBER_TYPE(Id, Name) 36 | #endif 37 | 38 | #ifndef FLOATING_TYPE 39 | #define FLOATING_TYPE(Id, Name) PERVASIVE_TYPE(Id, Name) 40 | #endif 41 | 42 | #ifndef COMPLEX_TYPE 43 | #define COMPLEX_TYPE(Id, Name) PERVASIVE_TYPE(Id, Name) 44 | #endif 45 | 46 | // Only used for procedures. 47 | BUILTIN_TYPE(Void) 48 | 49 | // The whole number type for whole number literal values. 50 | BUILTIN_TYPE(WholeNumber) 51 | 52 | // The real number type for real number literal values. 53 | BUILTIN_TYPE(RealNumber) 54 | 55 | // The complex number type for complex number literal values. 56 | BUILTIN_TYPE(ComplexNumber) 57 | 58 | // The string type for string literal values. 59 | BUILTIN_TYPE(StringLiteral) 60 | 61 | // The nil type. 62 | BUILTIN_TYPE(Nil) 63 | 64 | // Pervasive types without shared properties. 65 | PERVASIVE_TYPE(Bitset, "BITSET") 66 | PERVASIVE_TYPE(Proc, "PROC") 67 | PERVASIVE_TYPE(Protection, "PROTECTION") 68 | 69 | ORDINAL_TYPE(Boolean, "BOOLEAN") 70 | ORDINAL_TYPE(Char, "CHAR") 71 | 72 | SIGNED_TYPE(Integer, "INTEGER") 73 | 74 | UNSIGNED_TYPE(Cardinal, "CARDINAL") 75 | 76 | FLOATING_TYPE(Real, "REAL") 77 | FLOATING_TYPE(LongReal, "LONGREAL") 78 | 79 | COMPLEX_TYPE(Complex, "COMPLEX") 80 | COMPLEX_TYPE(LongComplex, "LONGCOMPLEX") 81 | 82 | #undef COMPLEX_TYPE 83 | #undef FLOATING_TYPE 84 | #undef SIGNED_TYPE 85 | #undef UNSIGNED_TYPE 86 | #undef WHOLENUMBER_TYPE 87 | #undef ORDINAL_TYPE 88 | #undef PERVASIVE_TYPE 89 | #undef BUILTIN_TYPE 90 | -------------------------------------------------------------------------------- /include/m2lang/Basic/TokenKinds.def: -------------------------------------------------------------------------------- 1 | //===--- TokenKinds.def - Modula-2 Token Kind Database --------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #ifndef TOK 10 | #define TOK(X) 11 | #endif 12 | #ifndef PUNCTUATOR 13 | #define PUNCTUATOR(X,Y) TOK(X) 14 | #endif 15 | #ifndef KEYWORD 16 | #define KEYWORD(X,Y) TOK(kw_ ## X) 17 | #endif 18 | #ifndef DIRECTIVE 19 | #define DIRECTIVE(X) TOK(kw_ ## X) 20 | #endif 21 | 22 | // These define members of the tok::* namespace. 23 | 24 | TOK(unknown) // Not a token. 25 | TOK(eof) // End of file. 26 | TOK(comment) // Comment 27 | 28 | TOK(identifier) // abcde123 29 | 30 | TOK(integer_literal) // 123, 123B, 123H 31 | TOK(real_literal) // 123.4E+5 32 | TOK(char_literal) // 12C 33 | TOK(string_literal) // "foo", 'foo' 34 | 35 | PUNCTUATOR(plus, "+") 36 | PUNCTUATOR(minus, "-") 37 | PUNCTUATOR(star, "*") 38 | PUNCTUATOR(slash, "/") 39 | PUNCTUATOR(colonequal, ":=") 40 | PUNCTUATOR(period, ".") 41 | PUNCTUATOR(comma, ",") 42 | PUNCTUATOR(semi, ";") 43 | PUNCTUATOR(l_paren, "(") 44 | PUNCTUATOR(l_square, "[") 45 | PUNCTUATOR(l_brace, "{") 46 | PUNCTUATOR(caret, "^") 47 | PUNCTUATOR(equal, "=") 48 | PUNCTUATOR(hash, "#") 49 | PUNCTUATOR(less, "<") 50 | PUNCTUATOR(greater, ">") 51 | PUNCTUATOR(lessequal, "<=") 52 | PUNCTUATOR(greaterequal, ">=") 53 | PUNCTUATOR(ellipsis, "..") 54 | PUNCTUATOR(colon, ":") 55 | PUNCTUATOR(r_paren, ")") 56 | PUNCTUATOR(r_square, "]") 57 | PUNCTUATOR(r_brace, "}") 58 | PUNCTUATOR(pipe, "|") 59 | PUNCTUATOR(lessstar, "<*") // Begin of directive 60 | PUNCTUATOR(stargreater, "*>") // End of directive 61 | 62 | // Keywords. These turn into kw_* tokens. 63 | // Flags allowed: 64 | // KEYALL - This is a keyword in all variants of Modula-2 65 | // KEYPIM - This is a keyword from "Programming in Modula-2", 4th edition. 66 | // KEYISO - This is a keyword introduced in ISO 10514-1. 67 | // KEYISOGS - This is a keyword introduced in ISO 10514-2 (genrics). 68 | // KEYISOOO - This is a keyword introduced in ISO 10514-3 (OO layer). 69 | // KEYR10 - This is a keyword introduced with R10. 70 | // KEYM2P - This is a keyword introduced with Modula-2+. 71 | 72 | KEYWORD(ABSTRACT , KEYISOOO) 73 | KEYWORD(ALIAS , KEYR10) 74 | KEYWORD(AND , KEYALL) 75 | KEYWORD(ARGLIST , KEYR10) 76 | KEYWORD(ARRAY , KEYALL) 77 | KEYWORD(AS , KEYISOOO) 78 | KEYWORD(BEGIN , KEYALL) 79 | KEYWORD(BITS , KEYM2P) 80 | KEYWORD(BLUEPRINT , KEYR10) 81 | KEYWORD(BY , KEYALL) 82 | KEYWORD(CASE , KEYALL) 83 | KEYWORD(CLASS , KEYISOOO) 84 | KEYWORD(CONST , KEYALL) 85 | KEYWORD(COPY , KEYR10) 86 | KEYWORD(DEFINITION , KEYALL) 87 | KEYWORD(DIV , KEYALL) 88 | KEYWORD(DO , KEYALL) 89 | KEYWORD(ELSE , KEYALL) 90 | KEYWORD(ELSIF , KEYALL) 91 | KEYWORD(END , KEYALL) 92 | KEYWORD(EXCEPT , KEYISO | KEYM2P) 93 | KEYWORD(EXCEPTION , KEYM2P) 94 | KEYWORD(EXIT , KEYALL) 95 | KEYWORD(EXPORT , KEYPIM | KEYISO | KEYM2P) 96 | KEYWORD(FINALLY , KEYISO | KEYM2P) 97 | KEYWORD(FOR , KEYALL) 98 | KEYWORD(FORWARD , KEYISO) 99 | KEYWORD(FROM , KEYALL) 100 | KEYWORD(GENERIC , KEYISOGS) 101 | KEYWORD(GENLIB , KEYR10) 102 | KEYWORD(GUARD , KEYISOOO) 103 | KEYWORD(IF , KEYALL) 104 | KEYWORD(IMPLEMENTATION , KEYALL) 105 | KEYWORD(IMPORT , KEYALL) 106 | KEYWORD(IN , KEYALL) 107 | KEYWORD(INHERIT , KEYISOOO) 108 | KEYWORD(LOCK , KEYM2P) 109 | KEYWORD(LOOP , KEYALL) 110 | KEYWORD(MOD , KEYALL) 111 | KEYWORD(MODULE , KEYALL) 112 | KEYWORD(NEW , KEYR10) 113 | KEYWORD(NONE , KEYR10) 114 | KEYWORD(NOT , KEYALL) 115 | KEYWORD(OF , KEYALL) 116 | KEYWORD(OPAQUE , KEYM2P | KEYR10) 117 | KEYWORD(OR , KEYALL) 118 | KEYWORD(OVERRIDE , KEYISOOO) 119 | KEYWORD(PACKEDSET , KEYISO) 120 | KEYWORD(PASSING , KEYM2P) 121 | KEYWORD(POINTER , KEYALL) 122 | KEYWORD(PROCEDURE , KEYALL) 123 | KEYWORD(QUALIFIED , KEYALL) 124 | KEYWORD(RAISES , KEYM2P) 125 | KEYWORD(READONLY , KEYISOOO) 126 | KEYWORD(RECORD , KEYALL) 127 | KEYWORD(REF , KEYM2P) 128 | KEYWORD(REFERENTIAL , KEYR10) 129 | KEYWORD(RELEASE , KEYR10) 130 | KEYWORD(REM , KEYISO) 131 | KEYWORD(REPEAT , KEYALL) 132 | KEYWORD(RETAIN , KEYR10) 133 | KEYWORD(RETRY , KEYISO) 134 | KEYWORD(RETURN , KEYALL) 135 | KEYWORD(REVEAL , KEYISOOO) 136 | KEYWORD(SAFE , KEYM2P) 137 | KEYWORD(SET , KEYALL) 138 | KEYWORD(THEN , KEYALL) 139 | KEYWORD(TO , KEYALL) 140 | KEYWORD(TRACED , KEYISOOO) 141 | KEYWORD(TRY , KEYM2P) 142 | KEYWORD(TYPE , KEYALL) 143 | KEYWORD(TYPECASE , KEYM2P) 144 | KEYWORD(UNSAFEGUARDED , KEYISOOO) 145 | KEYWORD(UNTIL , KEYALL) 146 | KEYWORD(VAR , KEYALL) 147 | KEYWORD(WHILE , KEYALL) 148 | KEYWORD(WITH , KEYPIM | KEYISO | KEYM2P) 149 | KEYWORD(YIELD , KEYR10) 150 | 151 | // Additional keywords only recognized in directives. 152 | DIRECTIVE(ASSIGN) 153 | DIRECTIVE(DEFINE) 154 | DIRECTIVE(ENVIRON) 155 | DIRECTIVE(PUSH) 156 | DIRECTIVE(POP) 157 | 158 | #undef DIRECTIVE 159 | #undef KEYWORD 160 | #undef PUNCTUATOR 161 | #undef TOK -------------------------------------------------------------------------------- /lib/AST/AST.cppm: -------------------------------------------------------------------------------- 1 | //===--- AST.cppm - Modula-2 Abstract Syntax Tree -------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the AST implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/APFloat.h" 17 | #include "llvm/ADT/APInt.h" 18 | #include "llvm/ADT/PointerUnion.h" 19 | #include "llvm/ADT/SmallVector.h" 20 | #include "llvm/ADT/StringMap.h" 21 | #include "llvm/ADT/StringRef.h" 22 | #include "llvm/Support/Debug.h" 23 | #include "llvm/Support/raw_ostream.h" 24 | #include "llvm/Support/SMLoc.h" 25 | 26 | export module m2lang.ast:AST; 27 | 28 | import :PervasiveKinds; 29 | import m2lang.basic; 30 | 31 | export namespace m2lang { 32 | 33 | class Constant; 34 | class Declaration; 35 | class Expression; 36 | class FixedRecordField; 37 | class FormalParameter; 38 | class FormalParameterType; 39 | class GuardedStatement; 40 | class ImportItem; 41 | class Selector; 42 | class Statement; 43 | class Type; 44 | class TypeDenoter; 45 | using ActualParameter = llvm::PointerUnion; 46 | 47 | class Scope { 48 | Scope *Parent; 49 | llvm::StringMap Symbols; 50 | 51 | public: 52 | Scope(Scope *Parent = nullptr) : Parent(Parent) {} 53 | 54 | bool insert(Declaration *Declaration); 55 | Declaration *lookup(llvm::StringRef Name, bool SearchParent = true); 56 | 57 | Scope *getParent() { return Parent; } 58 | 59 | void dump() const; 60 | }; 61 | 62 | // TODO Evaluate average size of these lists. 63 | using ActualParameterList = llvm::SmallVector; 64 | using ConstantList = llvm::SmallVector; 65 | using DeclarationList = llvm::SmallVector; 66 | using FormalParameterList = llvm::SmallVector; 67 | using ExpressionList = llvm::SmallVector; 68 | using ImportItemList = llvm::SmallVector; 69 | using SelectorList = llvm::SmallVector; 70 | using StatementList = llvm::SmallVector; 71 | using GuardedStatementList = llvm::SmallVector; 72 | using TypeDenoterList = llvm::SmallVector; 73 | 74 | using FormalParameterTypeList = llvm::SmallVector; 75 | using RecordFieldList = llvm::SmallVector; 76 | 77 | using StringIndexMap = llvm::StringMap; 78 | 79 | class Identifier { 80 | llvm::SMLoc Loc; 81 | llvm::StringRef Name; 82 | 83 | public: 84 | Identifier() = default; 85 | Identifier(llvm::SMLoc Loc, llvm::StringRef Name) : Loc(Loc), Name(Name) {} 86 | 87 | llvm::SMLoc getLoc() const { return Loc; } 88 | llvm::StringRef getName() const { return Name; } 89 | }; 90 | 91 | using IdentifierList = llvm::SmallVector; 92 | 93 | using VariableIdentifierList = 94 | llvm::SmallVector, 4>; 95 | 96 | class OperatorInfo { 97 | llvm::SMLoc Loc; 98 | uint32_t Kind : 16; 99 | uint32_t IsUnspecified : 1; 100 | 101 | public: 102 | OperatorInfo() : Loc(), Kind(tok::unknown), IsUnspecified(true) {} 103 | OperatorInfo(llvm::SMLoc Loc, tok::TokenKind Kind, bool IsUnspecified = false) 104 | : Loc(Loc), Kind(Kind), IsUnspecified(IsUnspecified) {} 105 | 106 | llvm::SMLoc getLocation() const { return Loc; } 107 | tok::TokenKind getKind() const { return static_cast(Kind); } 108 | bool isUnspecified() const { return IsUnspecified; } 109 | }; 110 | 111 | #define AST_DECLARATION 112 | #define AST_DISPATCHER 113 | #include "ast.inc" 114 | 115 | } // namespace m2lang 116 | 117 | namespace m2lang { 118 | #define AST_DEFINITION 119 | #include "ast.inc" 120 | } // namespace m2lang 121 | 122 | using namespace m2lang; 123 | 124 | bool Scope::insert(Declaration *Decl) { 125 | return Symbols 126 | .insert(std::pair(Decl->getName(), Decl)) 127 | .second; 128 | } 129 | 130 | Declaration *Scope::lookup(llvm::StringRef Name, bool SearchParent) { 131 | Scope *S = this; 132 | while (S) { 133 | llvm::StringMap::const_iterator I = S->Symbols.find(Name); 134 | if (I != S->Symbols.end()) 135 | return I->second; 136 | if (SearchParent) 137 | S = S->getParent(); 138 | else 139 | break; 140 | } 141 | return nullptr; 142 | } 143 | 144 | void Scope::dump() const { 145 | llvm::dbgs() << "Scope<" << this << "> {\n"; 146 | for (auto Key : Symbols.keys()) 147 | llvm::dbgs() << " " << Key << "\n"; 148 | llvm::dbgs() << "}\n"; 149 | } 150 | -------------------------------------------------------------------------------- /lib/AST/Module.cppm: -------------------------------------------------------------------------------- 1 | //===--- Module.cppm - Modula-2 Abstract Syntax Tree ----------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the AST implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | export module m2lang.ast; 15 | 16 | export import :AST; 17 | export import :ASTContext; 18 | export import :PervasiveKinds; 19 | -------------------------------------------------------------------------------- /lib/AST/PervasiveKinds.cppm: -------------------------------------------------------------------------------- 1 | //===--- PervasiveKinds.cppm - Pervasive type enumerations ----------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Utility functions for pervasive types. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | export module m2lang.ast:PervasiveKinds; 15 | 16 | export namespace m2lang { 17 | 18 | namespace pervasive { 19 | enum PervasiveTypeKind { 20 | #define BUILTIN_TYPE(Id) Id, 21 | #include "m2lang/AST/PervasiveTypes.def" 22 | }; 23 | 24 | const char *getPervasiveTypeName(PervasiveTypeKind Kind); 25 | 26 | enum PervasiveFunctionKind { 27 | #define PROCEDURE(Name) Proc_##Name, 28 | #define FUNCTION(Name) Func_##Name, 29 | #include "m2lang/AST/PervasiveFunctions.def" 30 | }; 31 | 32 | const char *getPervasiveFunctionName(PervasiveFunctionKind Kind); 33 | 34 | } // namespace pervasive 35 | 36 | } // namespace m2lang 37 | 38 | using namespace m2lang; 39 | 40 | const char *pervasive::getPervasiveTypeName(PervasiveTypeKind Kind) { 41 | switch (Kind) { 42 | #define PERVASIVE_TYPE(Id, Name) \ 43 | case pervasive::Id: \ 44 | return #Name; 45 | #include "m2lang/AST/PervasiveTypes.def" 46 | default: 47 | return ""; 48 | } 49 | } 50 | 51 | const char *pervasive::getPervasiveFunctionName(PervasiveFunctionKind Kind) { 52 | switch (Kind) { 53 | #define PROCEDURE(Name) \ 54 | case pervasive::Proc_##Name: \ 55 | return #Name; 56 | #define FUNCTION(Name) \ 57 | case pervasive::Func_##Name: \ 58 | return #Name; 59 | #include "m2lang/AST/PervasiveFunctions.def" 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lib/AST/xmake.lua: -------------------------------------------------------------------------------- 1 | target("ast") 2 | add_packages("llvm") 3 | set_kind("static") 4 | add_deps("basic") 5 | add_files("*.cppm", {public = true}) 6 | -------------------------------------------------------------------------------- /lib/ASTtool/Diagnostic.cpp: -------------------------------------------------------------------------------- 1 | //===--- Diag.h - ASTtool diagnostic output ---------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the error printing interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "asttool/Diagnostic.h" 15 | #include "llvm/ADT/Twine.h" 16 | #include "llvm/Support/SMLoc.h" 17 | #include "llvm/Support/SourceMgr.h" 18 | 19 | using namespace asttool; 20 | 21 | void Diagnostic::error(llvm::SMLoc Loc, const llvm::Twine &Msg) { 22 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Error, Msg); 23 | ++Errors; 24 | } 25 | 26 | void Diagnostic::error(const char *Loc, const llvm::Twine &Msg) { 27 | error(llvm::SMLoc::getFromPointer(Loc), Msg); 28 | } 29 | 30 | void Diagnostic::warning(llvm::SMLoc Loc, const llvm::Twine &Msg) { 31 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Warning, Msg); 32 | } 33 | 34 | void Diagnostic::warning(const char *Loc, const llvm::Twine &Msg) { 35 | warning(llvm::SMLoc::getFromPointer(Loc), Msg); 36 | } 37 | 38 | void Diagnostic::note(llvm::SMLoc Loc, const llvm::Twine &Msg) { 39 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Note, Msg); 40 | } 41 | 42 | void Diagnostic::note(const char *Loc, const llvm::Twine &Msg) { 43 | note(llvm::SMLoc::getFromPointer(Loc), Msg); 44 | } 45 | -------------------------------------------------------------------------------- /lib/ASTtool/Lexer.cpp: -------------------------------------------------------------------------------- 1 | //===--- Lexer.cpp - ASTtool lexer ------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the ASTtool lexer implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "asttool/Lexer.h" 15 | #include "llvm/ADT/StringSwitch.h" 16 | 17 | using namespace asttool; 18 | 19 | namespace charinfo { 20 | LLVM_READNONE inline bool isDigit(char C) { return C >= '0' && C <= '9'; } 21 | 22 | LLVM_READNONE inline bool isLetter(char C) { 23 | return (C >= 'A' && C <= 'Z') || (C >= 'a' && C <= 'z'); 24 | } 25 | } // namespace charinfo 26 | 27 | void Lexer::next(Token &Tok) { 28 | repeat: 29 | while (*CurPtr == '\r' || *CurPtr == '\n' || *CurPtr == ' ' || 30 | *CurPtr == '\t' || *CurPtr == '\f' || *CurPtr == '\v') { 31 | ++CurPtr; 32 | } 33 | if (!*CurPtr) { 34 | Tok.Kind = tok::eoi; 35 | return; 36 | } 37 | if (charinfo::isLetter(*CurPtr)) { 38 | identifier(Tok); 39 | return; 40 | } 41 | switch (*CurPtr) { 42 | #define CASE(ch, tok) \ 43 | case ch: \ 44 | formToken(Tok, CurPtr + 1, tok); \ 45 | return 46 | CASE('=', tok::equal); 47 | CASE(':', tok::colon); 48 | CASE(',', tok::comma); 49 | CASE(';', tok::semi); 50 | #undef CASE 51 | case '<': 52 | if (*(CurPtr + 1) == ':') { 53 | formToken(Tok, CurPtr + 2, tok::lesscolon); 54 | return; 55 | } 56 | break; 57 | case '/': 58 | if (char Ch = *(CurPtr + 1)) { 59 | if (Ch == '*') { 60 | multilinecomment(); 61 | goto repeat; 62 | } 63 | if (Ch == '/') { 64 | singlelinecomment(); 65 | goto repeat; 66 | } 67 | } 68 | break; 69 | case '"': 70 | case '\'': 71 | string(Tok); 72 | return; 73 | case '{': 74 | code(Tok, '{', '}', tok::code); 75 | return; 76 | case '%': 77 | if (*(CurPtr + 1) == '%') 78 | formToken(Tok, CurPtr + 2, tok::percentpercent); 79 | else 80 | keyword(Tok); 81 | return; 82 | default: 83 | break; 84 | } 85 | formToken(Tok, CurPtr + 1, tok::unknown); 86 | } 87 | 88 | void Lexer::identifier(Token &Tok) { 89 | bool Qualified = false; 90 | const char *Start = CurPtr; 91 | const char *End = CurPtr + 1; 92 | while (charinfo::isLetter(*End) || charinfo::isDigit(*End) || *End == '_' || 93 | *End == '.') { 94 | if (*End == '.') 95 | Qualified = true; 96 | ++End; 97 | } 98 | formToken(Tok, End, Qualified ? tok::qualidentifier : tok::identifier); 99 | Tok.Ptr = Start; 100 | } 101 | 102 | void Lexer::keyword(Token &Tok) { 103 | const char *Start = CurPtr; 104 | const char *End = CurPtr + 1; 105 | while (charinfo::isLetter(*End) || charinfo::isDigit(*End)) 106 | ++End; 107 | // Exclude '%' from compare. 108 | llvm::StringRef Keyword = llvm::StringRef(Start + 1, End - Start - 1); 109 | tok::TokenKind Kind = llvm::StringSwitch(Keyword) 110 | .Case("base", tok::kw_base) 111 | .Case("default", tok::kw_default) 112 | .Case("define", tok::kw_define) 113 | .Case("enum", tok::kw_enum) 114 | .Case("in", tok::kw_in) 115 | .Case("language", tok::kw_language) 116 | .Case("let", tok::kw_let) 117 | .Case("list", tok::kw_list) 118 | .Case("node", tok::kw_node) 119 | .Case("out", tok::kw_out) 120 | .Case("plain", tok::kw_plain) 121 | .Case("typedef", tok::kw_typedef) 122 | .Default(tok::unknown); 123 | if (Kind == tok::unknown) 124 | Diag.error(Start, "unrecognized keyword"); 125 | formToken(Tok, End, Kind); 126 | } 127 | 128 | void Lexer::code(Token &Tok, char Open, const char Close, tok::TokenKind Kind) { 129 | const char *Start = CurPtr; 130 | const char *End = CurPtr + 1; 131 | const bool Dot = *End == '.'; 132 | if (Dot) { 133 | do { 134 | ++End; 135 | while (*End && *End != Close) 136 | ++End; 137 | } while (Dot && *End && Start + 1 < End && End[-1] != '.'); 138 | } else { 139 | unsigned Level = 1; 140 | while (*End && (*End != Close || --Level)) { 141 | if (*End == Open) 142 | ++Level; 143 | ++End; 144 | } 145 | } 146 | if (!*End) 147 | Diag.error(Start, "unterminated code"); 148 | formToken(Tok, End + 1, Kind); 149 | } 150 | 151 | void Lexer::string(Token &Tok) { 152 | const char *Start = CurPtr; 153 | const char *End = CurPtr + 1; 154 | while (*End && *End != *Start && *CurPtr != '\n' && *CurPtr != '\r') 155 | ++End; 156 | if (*CurPtr == '\n' || *CurPtr == '\r') { 157 | Diag.error(Start, "unterminated string"); 158 | } 159 | formToken(Tok, End + 1, tok::string); 160 | Tok.Ptr = Start; 161 | } 162 | 163 | void Lexer::multilinecomment() { 164 | const char *Start = CurPtr; 165 | CurPtr += 2; 166 | do { 167 | while (*CurPtr && *CurPtr != '*') 168 | ++CurPtr; 169 | ++CurPtr; 170 | } while (*CurPtr && *CurPtr != '/'); 171 | if (!*CurPtr) 172 | Diag.error(Start, "unterminated comment"); 173 | ++CurPtr; 174 | } 175 | 176 | void Lexer::singlelinecomment() { 177 | // Line endings: Unix \n, Mac \r, Dos/Windows \r\n 178 | while (*CurPtr && *CurPtr != '\n' && *CurPtr != '\r') 179 | ++CurPtr; 180 | if (*(CurPtr + 1) && *CurPtr == '\r' && *(CurPtr + 1) == '\n') 181 | ++CurPtr; 182 | ++CurPtr; 183 | } 184 | 185 | void Lexer::formToken(Token &Tok, const char *TokEnd, tok::TokenKind Kind) { 186 | size_t TokLen = TokEnd - CurPtr; 187 | Tok.Ptr = CurPtr; 188 | Tok.Length = TokLen; 189 | Tok.Kind = Kind; 190 | CurPtr = TokEnd; 191 | } -------------------------------------------------------------------------------- /lib/ASTtool/Main.cpp: -------------------------------------------------------------------------------- 1 | //===--- Main.cpp - ASTtool LLtool main entry point -------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements the main entry point for ASTtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "asttool/Main.h" 15 | #include "asttool/ASTDefinition.h" 16 | #include "asttool/ClassEmitter.h" 17 | #include "asttool/Diagnostic.h" 18 | #include "asttool/Parser.h" 19 | #include "llvm/Support/CommandLine.h" 20 | #include "llvm/Support/FileSystem.h" 21 | #include "llvm/Support/InitLLVM.h" 22 | #include "llvm/Support/MemoryBuffer.h" 23 | #include "llvm/Support/ToolOutputFile.h" 24 | #include "llvm/Support/raw_ostream.h" 25 | 26 | using namespace asttool; 27 | 28 | static llvm::cl::opt 29 | OutputFilename("o", llvm::cl::desc("Output filename"), 30 | llvm::cl::value_desc("filename"), llvm::cl::init("-")); 31 | 32 | static llvm::cl::opt InputFilename(llvm::cl::Positional, 33 | llvm::cl::desc(""), 34 | llvm::cl::init("-")); 35 | 36 | static llvm::cl::opt 37 | WriteIfChanged("write-if-changed", 38 | llvm::cl::desc("Only write output if it changed")); 39 | 40 | static int reportError(const char *ProgName, llvm::Twine Msg) { 41 | llvm::errs() << ProgName << ": " << Msg; 42 | llvm::errs().flush(); 43 | return 1; 44 | } 45 | 46 | int asttool::runASTtoolMain(const char *Argv0) { 47 | // Read the input file. 48 | llvm::ErrorOr> FileOrErr = 49 | llvm::MemoryBuffer::getFileOrSTDIN(InputFilename); 50 | if (std::error_code EC = FileOrErr.getError()) 51 | return reportError(Argv0, "Could not open input file '" + InputFilename + 52 | "': " + EC.message() + "\n"); 53 | 54 | llvm::SourceMgr SrcMgr; 55 | 56 | // Tell SrcMgr about this buffer, which is what the parser will pick up. 57 | SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), llvm::SMLoc()); 58 | 59 | // Parser input. 60 | Parser TheParser(SrcMgr); 61 | VarStore Vars; 62 | ASTDefinition ASTDef = TheParser.parse(Vars); 63 | 64 | // Do not generate output, if syntactically or semantically errors occured. 65 | if (TheParser.getDiag().errorsOccured()) 66 | return reportError(Argv0, llvm::Twine(TheParser.getDiag().errorsPrinted()) + 67 | " errors.\n"); 68 | 69 | // Write output to memory. 70 | std::string OutString; 71 | llvm::raw_string_ostream Out(OutString); 72 | emitClass(ASTDef, Vars, Out); 73 | 74 | if (WriteIfChanged) { 75 | // Only updates the real output file if there are any differences. 76 | // This prevents recompilation of all the files depending on it if there 77 | // aren't any. 78 | if (auto ExistingOrErr = llvm::MemoryBuffer::getFile(OutputFilename)) 79 | if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) 80 | return 0; 81 | } 82 | 83 | std::error_code EC; 84 | llvm::ToolOutputFile OutFile(OutputFilename, EC, llvm::sys::fs::OF_None); 85 | if (EC) 86 | return reportError(Argv0, "error opening " + OutputFilename + ":" + 87 | EC.message() + "\n"); 88 | OutFile.os() << Out.str(); 89 | 90 | // Declare success. 91 | OutFile.keep(); 92 | return 0; 93 | } -------------------------------------------------------------------------------- /lib/ASTtool/Parser.cpp: -------------------------------------------------------------------------------- 1 | //===--- Parser.cpp - ASTtool parser ----------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements the ASTtool parser class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "asttool/Parser.h" 15 | 16 | namespace asttool { 17 | using llvm::SMLoc; 18 | using llvm::StringRef; 19 | } // namespace asttool 20 | 21 | using namespace asttool; 22 | 23 | namespace { 24 | template T tokenAs(Token Tok) { return T(Tok); } 25 | 26 | template <> Identifier tokenAs(Token Tok) { 27 | return Identifier(Tok.getLoc(), Tok.getData()); 28 | } 29 | } // namespace 30 | 31 | ASTDefinition Parser::parse(VarStore &Vars) { 32 | _TokenBitSet FollowSet{tok::eoi}; 33 | parseAsttool(FollowSet); 34 | Vars = Builder.varStore(); 35 | return Builder.build(); 36 | } 37 | 38 | #define PARSER_DEFINITION 39 | #include "asttool/asttool.inc" 40 | #undef PARSER_DEFINITION 41 | -------------------------------------------------------------------------------- /lib/ASTtool/VarStore.cpp: -------------------------------------------------------------------------------- 1 | //===--- VarStore.cpp - ASTtool variable container --------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variable container. Variables can be defined in the AST file 11 | /// with the %define directive. They are mainly used to customize the code 12 | /// generation. 13 | /// 14 | //===----------------------------------------------------------------------===// 15 | 16 | #include "asttool/VarStore.h" 17 | 18 | using namespace asttool; 19 | 20 | namespace { 21 | static const char *ExternalNames[] = { 22 | #define VAR(NAME, VAR, TYPE, DEFAULT) NAME, 23 | #include "asttool/Variables.def" 24 | }; 25 | static var::VarType Types[] = { 26 | #define VAR(NAME, VAR, TYPE, DEFAULT) var::TYPE, 27 | #include "asttool/Variables.def" 28 | }; 29 | static llvm::StringLiteral Defaults[] = { 30 | #define VAR(NAME, VAR, TYPE, DEFAULT) DEFAULT, 31 | #include "asttool/Variables.def" 32 | }; 33 | } // namespace 34 | 35 | var::VarType VarStore::getType(var::VarName Name) const { 36 | assert(Name != var::NUM_VARIABLES); 37 | return Types[Name]; 38 | } 39 | 40 | llvm::StringRef VarStore::getDefault(var::VarName Name) const { 41 | assert(Name != var::NUM_VARIABLES); 42 | return Defaults[Name]; 43 | } 44 | 45 | VarStore::VarStore() {} 46 | 47 | llvm::Error VarStore::add(llvm::StringRef Name, llvm::StringRef Value, 48 | var::VarType Type) { 49 | unsigned Idx = 0; 50 | for (; Idx < var::NUM_VARIABLES; ++Idx) 51 | if (ExternalNames[Idx] == Name) 52 | break; 53 | if (Idx >= var::NUM_VARIABLES) { 54 | return llvm::make_error( 55 | llvm::Twine("unknown variable name ").concat(Name), 56 | llvm::inconvertibleErrorCode()); 57 | } 58 | if (Types[Idx] != Type) { 59 | return llvm::make_error("wrong variable type", 60 | llvm::inconvertibleErrorCode()); 61 | } 62 | if (Type == var::Flag && (Value != "" && Value != "true")) { 63 | return llvm::make_error( 64 | llvm::Twine("wrong value for flag variable ").concat(Name), 65 | llvm::inconvertibleErrorCode()); 66 | } 67 | Vars[Idx] = Value; 68 | return llvm::Error::success(); 69 | } 70 | 71 | void VarStore::set(var::VarName Name, llvm::StringRef Value) { 72 | assert(Name != var::NUM_VARIABLES); 73 | var::VarType Ty = Types[Name]; 74 | assert(Ty != var::Flag || (Value == "" || Value == "true")); 75 | Vars[Name] = Value; 76 | } 77 | -------------------------------------------------------------------------------- /lib/ASTtool/asttool.ast: -------------------------------------------------------------------------------- 1 | /* AST for ASTtool */ 2 | %language "c++" 3 | %typedef Loc {llvm::SMLoc} 4 | %typedef StringRef {llvm::StringRef} 5 | %% 6 | 7 | %plain Identifier = 8 | %in Loc : Loc, 9 | %in String : StringRef 10 | ; 11 | 12 | %base Member = 13 | %in Name : Identifier 14 | ; 15 | 16 | %node Enum <: Member = 17 | %in Code : StringRef 18 | ; 19 | 20 | %node Field <: Member = 21 | %enum Property {In = 0x01, Out = 0x02}, 22 | %enum InitFlags {None = 0x00, Code = 0x01, Default = 0x03}, 23 | %in Properties : unsigned, 24 | %in Initializer : unsigned, 25 | %in TypeName : StringRef, 26 | %in TypeIsList : bool, 27 | %in Code : StringRef 28 | ; 29 | 30 | %base Declaration = 31 | %in Name : Identifier 32 | ; 33 | 34 | %node Let <: Declaration = 35 | %in Class : Class, 36 | %in Field : Field, 37 | %in Code : StringRef, 38 | %in Default : bool 39 | ; 40 | 41 | %node Class <: Declaration = 42 | %enum ClassType {Plain, Base, Node}, 43 | %in Type : ClassType, 44 | %in SuperClass : Class, 45 | %in Members : %list Member, 46 | %in LetDefaults : %list Let, 47 | SubClasses : %list Class, 48 | Defined : bool = {false} 49 | ; 50 | -------------------------------------------------------------------------------- /lib/ASTtool/asttool.g: -------------------------------------------------------------------------------- 1 | /* Grammar for ASTtool */ 2 | %language "c++" 3 | %define api.parser.class {Parser} 4 | %token identifier, qualidentifier, code, string 5 | %token "%typedef" = kw_typedef, "%node" = kw_node, "%base" = kw_base 6 | %token "%language" = kw_language, "%plain" = kw_plain, "%list" = kw_list 7 | %token "%enum" = kw_enum, "%in" = kw_in, "%out" = kw_out, "%let" = kw_let 8 | %token "%default" = kw_default, "%define" = kw_define 9 | %start asttool 10 | %% 11 | asttool 12 | : ( header )? ( typedecl )+ ; 13 | 14 | header 15 | : ("%typedef" typedeflist 16 | | "%language" string { Builder.actOnLanguage(tokenAs(Tok)); } 17 | | "%define" { SMLoc Loc; StringRef Ident, Value; var::VarType Type = var::Flag; } 18 | ( { Loc = Tok.getLoc(); Ident = Tok.getData(); } 19 | ( identifier | qualidentifier ) 20 | ) 21 | ( { Value = Tok.getData(); } 22 | ( code { Type = var::Code; } 23 | | identifier { Type = var::Identifier; } 24 | | qualidentifier { Type = var::Identifier; } 25 | | string { Type = var::String; } 26 | ) 27 | )? { Builder.actOnDefine(Loc, Ident, Value, Type); } 28 | )* 29 | "%%" { Builder.finalizeTypedefs(); } 30 | ; 31 | 32 | typedeflist 33 | : typedef ("," typedef )* 34 | ; 35 | 36 | typedef 37 | : identifier { Identifier Name = tokenAs(Tok); } 38 | code { Builder.actOnTypedef(Name, Tok.getData()); } 39 | ; 40 | 41 | typedecl 42 | : { Class::ClassType CType; } 43 | ( "%node" { CType = Class::Node; } 44 | | "%base" { CType = Class::Base; } 45 | | "%plain" { CType = Class::Plain; } 46 | ) 47 | identifier { Identifier Name = tokenAs(Tok); } 48 | { Class *Super = nullptr; } 49 | ( super )? 50 | { MemberList MemberList; LetList LetList; } 51 | ( "=" body )? 52 | ";" { Builder.actOnTypedecl(CType, Name, Super, MemberList, LetList); } 53 | ; 54 | 55 | super 56 | : "<:" identifier { Builder.actOnSuperClass(Super, tokenAs(Tok)); } 57 | ; 58 | 59 | body &MemberList, llvm::SmallVectorImpl &LetList, Class *Super> 60 | : ( decl ( "," decl )* )? 61 | ; 62 | 63 | decl &MemberList, llvm::SmallVectorImpl &LetList, Class *Super> 64 | : { unsigned Properties = 0; } 65 | ( property )? 66 | identifier { Identifier Name = tokenAs(Tok); } 67 | ":" { bool TypeIsList = false; } 68 | ( "%list" { TypeIsList = true; } 69 | )? 70 | identifier { Identifier TypeName = tokenAs(Tok); } 71 | { bool IsDefault = false; llvm::StringRef Code; } 72 | ( init )? 73 | { Builder.actOnField(MemberList, Properties, Name, TypeName, TypeIsList, IsDefault, Code); } 74 | | "%enum" identifier { Identifier Name = tokenAs(Tok); } 75 | code { Builder.actOnEnum(MemberList, Name, Tok.getData()); } 76 | | "%let" identifier { Identifier Name = tokenAs(Tok); } 77 | { bool IsDefault; llvm::StringRef Code; } 78 | init { Builder.actOnLet(LetList, Name, Super, IsDefault, Code); } 79 | ; 80 | 81 | init 82 | : "=" ( "%default" { IsDefault = true; } 83 | | code { IsDefault = false; Code = Tok.getData(); } 84 | ) 85 | ; 86 | 87 | property 88 | : ( "%in" { Builder.actOnPropertyIn(Properties, Tok.getLoc()); } 89 | | "%out" { Builder.actOnPropertyOut(Properties, Tok.getLoc()); } 90 | )+ 91 | ; -------------------------------------------------------------------------------- /lib/ASTtool/xmake.lua: -------------------------------------------------------------------------------- 1 | target("asttool") 2 | add_packages("llvm") 3 | set_kind("static") 4 | add_files("*.cpp") 5 | -------------------------------------------------------------------------------- /lib/Basic/Basic.cppm: -------------------------------------------------------------------------------- 1 | //===--- Basic.cppm - Basic module defintion ------------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the module m2lang.basic; 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | export module m2lang.basic; 17 | 18 | export import :Config; 19 | export import :Diagnostic; 20 | export import :LangOptions; 21 | export import :TargetInfo; 22 | export import :TargetOptions; 23 | export import :TokenKinds; 24 | export import :Version; 25 | -------------------------------------------------------------------------------- /lib/Basic/Config.cppm: -------------------------------------------------------------------------------- 1 | //===--- Config.cppm - Global configuration -------------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines configuration flags. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/Support/Compiler.h" 17 | 18 | export module m2lang.basic:Config; 19 | 20 | export namespace m2lang { 21 | namespace cfg { 22 | constexpr bool hasAArch64Target = LLVM_HAS_AARCH64_TARGET; 23 | constexpr bool hasARMTarget = LLVM_HAS_ARM_TARGET; 24 | constexpr bool hasPowerPCTarget = LLVM_HAS_POWERPC_TARGET; 25 | constexpr bool hasSystemZTarget = LLVM_HAS_SYSTEMZ_TARGET; 26 | constexpr bool hasX86Target = LLVM_HAS_X86_TARGET; 27 | } // namespace cfg 28 | } // namespace m2lang 29 | -------------------------------------------------------------------------------- /lib/Basic/Diagnostic.cppm: -------------------------------------------------------------------------------- 1 | //===--- Diagnostic.cppm - M2 Language Family Diagnostic Handling ---------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | // Implements the Diagnostic-related interfaces. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/IntrusiveRefCntPtr.h" 17 | #include "llvm/ADT/SmallVector.h" 18 | #include "llvm/ADT/StringRef.h" 19 | #include "llvm/Support/SMLoc.h" 20 | #include "llvm/Support/SourceMgr.h" 21 | 22 | export module m2lang.basic:Diagnostic; 23 | 24 | export namespace m2lang { 25 | 26 | namespace diag { 27 | enum { 28 | #define DIAG(X, Y, Z) X, 29 | #include "Diagnostic.def" 30 | #undef DIAG 31 | }; 32 | } // namespace diag 33 | 34 | class DiagnosticBuilder; 35 | 36 | class DiagnosticsEngine : public llvm::RefCountedBase { 37 | friend class DiagnosticBuilder; 38 | 39 | /// The location of the current diagnostic that is in flight. 40 | llvm::SMLoc CurDiagLoc; 41 | 42 | /// The ID of the current diagnostic that is in flight. 43 | /// 44 | /// This is set to std::numeric_limits::max() when there is no 45 | /// diagnostic in flight. 46 | unsigned CurDiagID; 47 | 48 | enum { 49 | /// The maximum number of arguments we can hold. 50 | /// 51 | /// We currently only support up to 10 arguments (%0-%9). A single 52 | /// diagnostic with more than that almost certainly has to be simplified 53 | /// anyway. 54 | MaxArguments = 10, 55 | }; 56 | 57 | /// The source manager associated with this diagnostics engine. 58 | llvm::SourceMgr &SrcMgr; 59 | 60 | /// Number of errors reported 61 | unsigned NumErrors; 62 | 63 | llvm::SmallVector Args; 64 | 65 | void emitDiagnostics(); 66 | 67 | void formatDiagnostic(llvm::StringRef DiagStr, 68 | llvm::SmallVectorImpl &OutStr) const; 69 | 70 | public: 71 | explicit DiagnosticsEngine(llvm::SourceMgr &SrcMgr); 72 | DiagnosticsEngine(const DiagnosticsEngine &) = delete; 73 | DiagnosticsEngine &operator=(const DiagnosticsEngine &) = delete; 74 | 75 | void clear() { 76 | CurDiagID = std::numeric_limits::max(); 77 | Args.clear(); 78 | } 79 | 80 | DiagnosticBuilder report(llvm::SMLoc Loc, unsigned DiagID); 81 | 82 | unsigned getNumErrors() { return NumErrors; } 83 | }; 84 | 85 | class DiagnosticBuilder { 86 | friend DiagnosticsEngine; 87 | 88 | DiagnosticsEngine *Diag; 89 | 90 | explicit DiagnosticBuilder(DiagnosticsEngine *Diag) : Diag(Diag) {} 91 | 92 | void emit() { Diag->emitDiagnostics(); } 93 | 94 | public: 95 | ~DiagnosticBuilder() { emit(); } 96 | 97 | void addArg(llvm::StringRef Arg) const { Diag->Args.push_back(Arg); } 98 | }; 99 | 100 | inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, 101 | llvm::StringRef S) { 102 | DB.addArg(S); 103 | return DB; 104 | } 105 | 106 | inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, 107 | const char *Str) { 108 | DB.addArg(Str); 109 | return DB; 110 | } 111 | 112 | } // namespace m2lang 113 | 114 | using namespace m2lang; 115 | 116 | namespace { 117 | const char *DiagnosticText[] = { 118 | #define DIAG(X, Y, Z) Z, 119 | #include "Diagnostic.def" 120 | #undef DIAG 121 | }; 122 | const char *getDiagnosticText(unsigned DiagID) { 123 | return DiagnosticText[DiagID]; 124 | } 125 | 126 | llvm::SourceMgr::DiagKind DiagnosticKind[] = { 127 | #define DIAG(X, Y, Z) llvm::SourceMgr::DK_##Y, 128 | #include "Diagnostic.def" 129 | }; 130 | llvm::SourceMgr::DiagKind getDiagnosticKind(unsigned DiagID) { 131 | return DiagnosticKind[DiagID]; 132 | } 133 | } // namespace 134 | 135 | DiagnosticsEngine::DiagnosticsEngine(llvm::SourceMgr &SrcMgr) 136 | : SrcMgr(SrcMgr), NumErrors(0) { 137 | clear(); 138 | } 139 | 140 | void DiagnosticsEngine::formatDiagnostic( 141 | llvm::StringRef DiagStr, llvm::SmallVectorImpl &OutStr) const { 142 | // Very hacky. A more robust implemantation is required. 143 | const char *Ptr = DiagStr.begin(); 144 | const char *DiagEnd = DiagStr.end(); 145 | while (Ptr != DiagEnd) { 146 | if (Ptr[0] != '%') { 147 | const char *StrEnd = std::find(Ptr, DiagEnd, '%'); 148 | OutStr.append(Ptr, StrEnd); 149 | Ptr = StrEnd; 150 | continue; 151 | } else { 152 | ++Ptr; 153 | // Needs a check! 154 | unsigned ArgNo = *Ptr++ - '0'; 155 | OutStr.append(Args[ArgNo].begin(), Args[ArgNo].end()); 156 | } 157 | } 158 | } 159 | 160 | void DiagnosticsEngine::emitDiagnostics() { 161 | const char *DiagText = getDiagnosticText(CurDiagID); 162 | const llvm::SourceMgr::DiagKind Kind = getDiagnosticKind(CurDiagID); 163 | llvm::SmallVector Msg; 164 | formatDiagnostic(DiagText, Msg); 165 | SrcMgr.PrintMessage(CurDiagLoc, Kind, Msg); 166 | if (Kind == llvm::SourceMgr::DK_Error) 167 | ++NumErrors; 168 | clear(); 169 | } 170 | 171 | DiagnosticBuilder DiagnosticsEngine::report(llvm::SMLoc Loc, unsigned DiagID) { 172 | assert(CurDiagID == std::numeric_limits::max() && 173 | "Multiple diagnostics in flight at once!"); 174 | CurDiagLoc = Loc; 175 | CurDiagID = DiagID; 176 | return DiagnosticBuilder(this); 177 | } 178 | -------------------------------------------------------------------------------- /lib/Basic/Diagnostic.def: -------------------------------------------------------------------------------- 1 | //===--- Diagnostic.def - Modula-2 Diagnostic Messages --------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #ifndef DIAG 10 | #define DIAG(X,Y,Z) 11 | #endif 12 | 13 | // Messages from Lexer 14 | DIAG(err_unterminated_block_comment, Error, "unterminated (* comment") 15 | DIAG(err_unterminated_directive, Error, "unterminated <* directive") 16 | DIAG(err_unterminated_char_or_string, Error, "missing terminating character") 17 | DIAG(err_exponent_has_no_digits, Error, "exponent has no digits") 18 | DIAG(err_non_octal_digit_in_number, Error, "octal number has non-octal digits") 19 | DIAG(err_non_octal_digit_in_char, Error, "char literal has non-octal digits") 20 | DIAG(err_hex_digit_in_decimal, Error, "decimal number contains hex digit") 21 | DIAG(err_not_allowed_in_r10, Error, "symbol not allowed in R10") 22 | DIAG(err_requires_iso, Error, "symbol only allowed in ISO") 23 | 24 | // Messages from Preprocessor 25 | DIAG(err_unexpected_elseif_in_directive, Error, "unexpected ELSIF in directive") 26 | DIAG(err_unexpected_else_in_directive, Error, "unexpected ELSE in directive") 27 | DIAG(err_unexpected_end_in_directive, Error, "unexpected END in directive") 28 | DIAG(warn_version_tag_not_found, Warning, "version tag %0 not found, assuming FALSE") 29 | 30 | // Messages from Parser 31 | DIAG(err_expected, Error, "expected %0 but found %1") 32 | DIAG(err_unexpected_symbol, Error, "unexpected symbol") 33 | 34 | // Messages from Sema 35 | DIAG(err_symbol_already_declared, Error, "symbol %0 is already declared in this scope") 36 | DIAG(note_symbol_already_declared, Note, "symbol %0 was already declared here") 37 | DIAG(err_module_identifier_not_equal, Error, "module identifier at begin and end not equal - expected %0 but found %1") 38 | DIAG(err_proc_identifier_not_equal, Error, "procedure identifier at begin and end not equal - expected %0 but found %1") 39 | DIAG(err_symbol_not_declared, Error, "symbol %0 is not declared") 40 | DIAG(err_undeclared_type, Error, "undeclared type %0") 41 | DIAG(err_type_expected, Error, "expected %0 to be a type") 42 | DIAG(err_ordinal_type_expected, Error, "ordinal type expected") 43 | 44 | DIAG(err_duplicate_field, Error, "duplicate field %0") 45 | DIAG(note_previous_declaration, Note, "previous declaration is here") 46 | 47 | DIAG(err_exit_not_inside_loop, Error, "EXIT not inside LOOP") 48 | DIAG(err_with_requires_record_type, Error, "WITH statement requires a record type") 49 | 50 | DIAG(err_symbol_already_in_export_list, Error, "symbol %0 already appeared in the export list") 51 | DIAG(note_symbol_already_in_export_list, Note, "symbol %0 appeared here in the export list") 52 | DIAG(err_exported_symbol_undeclared, Error, "exported symbol %0 is not declared in module %1") 53 | 54 | DIAG(err_symbol_already_in_import_list, Error, "symbol %0 already appeared in the import list") 55 | DIAG(note_symbol_already_in_import_list, Note, "symbol %0 appeared here in the import list") 56 | DIAG(err_imported_symbol_undeclared, Error, "imported symbol %0 is not declared in module %1") 57 | DIAG(err_module_expected, Error, "module name expected") 58 | 59 | DIAG(warn_ambigous_negation, Warning, "Negation is ambigous. Please consider using parenthesis.") 60 | 61 | DIAG(err_module_requires_simple_return, Error, "Only simple RETURN allowed in module block") 62 | DIAG(err_procedure_requires_simple_return, Error, "Only simple RETURN allowed in proper procedure") 63 | DIAG(err_function_requires_return_expression, Error, "RETURN in function requires expression") 64 | 65 | DIAG(err_not_requires_boolean_expression, Error, "NOT requires a boolean expression") 66 | DIAG(err_condition_requires_boolean_expression, Error, "condition requires a boolean expression") 67 | DIAG(err_expressions_are_not_compatible, Error, "left and right expression are not compatible") 68 | DIAG(err_expressions_are_not_assignable, Error, "left and right expression are not assignable") 69 | DIAG(err_ordinal_expressions_required, Error, "expression must be of ordinal type") 70 | DIAG(err_simple_variable_required, Error, "identifier must be simple variable") 71 | DIAG(err_whole_number_type_required, Error, "whole number type required") 72 | DIAG(err_constant_expected, Error, "constant expected") 73 | 74 | #undef DIAG -------------------------------------------------------------------------------- /lib/Basic/LangOptions.cppm: -------------------------------------------------------------------------------- 1 | //===--- LangOptions.cppm - M2 Language Family Language Options -----------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the m2lang::LangOptions interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | export module m2lang.basic:LangOptions; 15 | 16 | export namespace m2lang { 17 | 18 | /// Bitfields of LangOptions, split out from LangOptions in order to ensure that 19 | /// this large collection of bitfields is a trivial class type. 20 | class LangOptionsBase { 21 | public: 22 | // Define simple language options (with no accessors). 23 | #define LANGOPT(Name, Bits, Default, Description) unsigned Name : Bits; 24 | #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) 25 | #include "LangOptions.def" 26 | 27 | protected: 28 | // Define language options of enumeration type. These are private, and will 29 | // have accessors (below). 30 | #define LANGOPT(Name, Bits, Default, Description) 31 | #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ 32 | unsigned Name : Bits; 33 | #include "LangOptions.def" 34 | }; 35 | 36 | /// Keeps track of the various options that can be 37 | /// enabled, which controls the dialect of C or C++ that is accepted. 38 | class LangOptions : public LangOptionsBase { 39 | public: 40 | }; 41 | 42 | } // namespace m2lang 43 | -------------------------------------------------------------------------------- /lib/Basic/LangOptions.def: -------------------------------------------------------------------------------- 1 | //===--- LangOptions.def - M2 Language option database --------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | // 9 | // This file defines the language options. Users of this file must 10 | // define the LANGOPT macro to make use of this information. 11 | // 12 | // Optionally, the user may also define: 13 | // 14 | // ENUM_LANGOPT: for options that have enumeration, rather than unsigned, type. 15 | // 16 | //===----------------------------------------------------------------------===// 17 | 18 | #ifndef LANGOPT 19 | # error Define the LANGOPT macro to handle language options 20 | #endif 21 | 22 | #ifndef ENUM_LANGOPT 23 | # define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ 24 | LANGOPT(Name, Bits, Default, Description) 25 | #endif 26 | 27 | LANGOPT(PIM , 1, 0, "Modula-2") 28 | LANGOPT(ISO , 1, 0, "ISO Modula-2") 29 | LANGOPT(ISOGenerics , 1, 0, "ISO Modula-2 with generics") 30 | LANGOPT(ISOObjects , 1, 0, "ISO Modula-2 with objects") 31 | LANGOPT(M2Plus , 1, 0, "SRC Modula-2+") 32 | LANGOPT(M2R10 , 1, 0, "Modula-2 R10") 33 | 34 | #undef LANGOPT 35 | #undef ENUM_LANGOPT 36 | -------------------------------------------------------------------------------- /lib/Basic/TargetInfo.cppm: -------------------------------------------------------------------------------- 1 | //===--- TargetInfo.cppm - Target-specific information --------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines target-specific information. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/ArrayRef.h" 17 | #include "llvm/ADT/StringRef.h" 18 | #include "llvm/TargetParser/Triple.h" 19 | #include 20 | 21 | export module m2lang.basic:TargetInfo; 22 | 23 | import :Config; 24 | 25 | namespace m2lang { 26 | 27 | export class TargetInfo { 28 | llvm::Triple Triple; 29 | 30 | protected: 31 | std::string DataLayout; 32 | bool IsBigEndian; 33 | 34 | TargetInfo(llvm::Triple Triple) : Triple(Triple) {} 35 | 36 | public: 37 | virtual ~TargetInfo(); 38 | 39 | static TargetInfo *get(llvm::Triple); 40 | 41 | const llvm::Triple &getTriple() const { return Triple; } 42 | const std::string &getDataLayout() const { return DataLayout; } 43 | virtual const llvm::ArrayRef> 44 | getTargetDefines() const = 0; 45 | bool isLittleEndian() const { return !IsBigEndian; } 46 | bool isBigEndian() const { return IsBigEndian; } 47 | }; 48 | 49 | } // namespace m2lang 50 | 51 | using namespace m2lang; 52 | 53 | namespace { 54 | class AArch64TargetInfo : public TargetInfo { 55 | public: 56 | AArch64TargetInfo(llvm::Triple Triple) : TargetInfo(Triple) { 57 | DataLayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-" 58 | "i64:64-i128:128-n32:64-S128-Fn32"; 59 | IsBigEndian = false; 60 | } 61 | 62 | const llvm::ArrayRef> 63 | getTargetDefines() const override { 64 | return {{"ARCH", "aarch64"}}; 65 | } 66 | }; 67 | 68 | class ARMTargetInfo : public TargetInfo { 69 | public: 70 | ARMTargetInfo(llvm::Triple Triple) : TargetInfo(Triple) { 71 | DataLayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"; 72 | IsBigEndian = false; 73 | } 74 | 75 | const llvm::ArrayRef> 76 | getTargetDefines() const override { 77 | return {{"ARCH", "arm"}}; 78 | } 79 | }; 80 | 81 | class PowerPCTargetInfo : public TargetInfo { 82 | public: 83 | PowerPCTargetInfo(llvm::Triple Triple) : TargetInfo(Triple) { 84 | // ppc64le. 85 | DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; 86 | IsBigEndian = false; 87 | } 88 | 89 | const llvm::ArrayRef> 90 | getTargetDefines() const override { 91 | return {{"ARCH", "ppc64le"}}; 92 | } 93 | }; 94 | 95 | class SystemZTargetInfo : public TargetInfo { 96 | public: 97 | SystemZTargetInfo(llvm::Triple Triple) : TargetInfo(Triple) { 98 | DataLayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64" 99 | "-v128:64-a:8:16-n32:64"; 100 | IsBigEndian = true; 101 | } 102 | 103 | const llvm::ArrayRef> 104 | getTargetDefines() const override { 105 | return {{"ARCH", "systemz"}}; 106 | } 107 | }; 108 | 109 | class X86TargetInfo : public TargetInfo { 110 | public: 111 | X86TargetInfo(llvm::Triple Triple) : TargetInfo(Triple) { 112 | DataLayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:" 113 | "64-i128:128-f80:128-n8:16:32:64-S128"; 114 | IsBigEndian = false; 115 | } 116 | 117 | const llvm::ArrayRef> 118 | getTargetDefines() const override { 119 | return {{"ARCH", "x86_64"}}; 120 | } 121 | }; 122 | } // namespace 123 | 124 | TargetInfo::~TargetInfo() = default; 125 | 126 | TargetInfo *TargetInfo::get(llvm::Triple Triple) { 127 | switch (Triple.getArch()) { 128 | case llvm::Triple::aarch64: 129 | if constexpr (cfg::hasAArch64Target) { 130 | if (Triple.isOSLinux()) 131 | return new AArch64TargetInfo(Triple); 132 | } 133 | break; 134 | case llvm::Triple::arm: 135 | if constexpr (cfg::hasARMTarget) { 136 | if (Triple.isOSLinux()) 137 | return new ARMTargetInfo(Triple); 138 | } 139 | break; 140 | case llvm::Triple::ppc64le: 141 | if constexpr (cfg::hasPowerPCTarget) { 142 | if (Triple.isOSLinux()) 143 | return new PowerPCTargetInfo(Triple); 144 | } 145 | break; 146 | case llvm::Triple::systemz: 147 | if constexpr (cfg::hasSystemZTarget) { 148 | if (Triple.isOSLinux()) 149 | return new SystemZTargetInfo(Triple); 150 | } 151 | break; 152 | case llvm::Triple::x86_64: 153 | if constexpr (cfg::hasX86Target) { 154 | if (Triple.isOSLinux()) 155 | return new X86TargetInfo(Triple); 156 | } 157 | break; 158 | default: 159 | break; 160 | } 161 | return nullptr; 162 | } 163 | -------------------------------------------------------------------------------- /lib/Basic/TargetOptions.cppm: -------------------------------------------------------------------------------- 1 | //===--- TargetOptions.cppm - M2 Language Family Target Options -----------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the m2lang::TargetOptions class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include 17 | #include 18 | 19 | export module m2lang.basic:TargetOptions; 20 | 21 | namespace m2lang { 22 | /// Options for controlling the target. 23 | export class TargetOptions { 24 | public: 25 | /// The name of the target triple to compile for. 26 | std::string Triple; 27 | 28 | /// When compiling for the device side, contains the triple used to compile 29 | /// for the host. 30 | std::string HostTriple; 31 | 32 | /// If given, the name of the target CPU to generate code for. 33 | std::string CPU; 34 | 35 | /// The list of target specific features to enable or disable, as written on the command line. 36 | std::vector FeaturesAsWritten; 37 | 38 | /// The list of target specific features to enable or disable -- this should 39 | /// be a list of strings starting with by '+' or '-'. 40 | std::vector Features; 41 | 42 | // The code model to be used as specified by the user. Corresponds to 43 | // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus 44 | // "default" for the case when the user has not explicitly specified a 45 | // code model. 46 | std::string CodeModel; 47 | }; 48 | 49 | } // namespace m2lang 50 | -------------------------------------------------------------------------------- /lib/Basic/TokenKinds.cppm: -------------------------------------------------------------------------------- 1 | //===--- TokenKinds.cppm - Token Kinds Support ----------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements the TokenKind enum and support functions. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/Support/Compiler.h" 17 | #include "llvm/Support/ErrorHandling.h" 18 | 19 | export module m2lang.basic:TokenKinds; 20 | 21 | export namespace m2lang { 22 | 23 | namespace tok { 24 | enum TokenKind : unsigned short { 25 | #define TOK(X) X, 26 | #include "m2lang/Basic/TokenKinds.def" 27 | NUM_TOKENS 28 | }; 29 | 30 | const char *getTokenName(TokenKind Kind) LLVM_READNONE; 31 | 32 | const char *getPunctuatorSpelling(TokenKind Kind) LLVM_READNONE; 33 | 34 | const char *getKeywordSpelling(TokenKind Kind) LLVM_READNONE; 35 | } // end namespace tok 36 | } // end namespace m2lang 37 | 38 | using namespace m2lang; 39 | 40 | static const char *const TokNames[] = { 41 | #define TOK(X) #X, 42 | #define KEYWORD(X, Y) #X, 43 | #include "m2lang/Basic/TokenKinds.def" 44 | nullptr}; 45 | 46 | const char *tok::getTokenName(TokenKind Kind) { 47 | if (Kind < tok::NUM_TOKENS) 48 | return TokNames[Kind]; 49 | llvm_unreachable("unknown TokenKind"); 50 | return nullptr; 51 | } 52 | 53 | const char *tok::getPunctuatorSpelling(TokenKind Kind) { 54 | switch (Kind) { 55 | #define PUNCTUATOR(X, Y) \ 56 | case X: \ 57 | return Y; 58 | #include "m2lang/Basic/TokenKinds.def" 59 | default: 60 | break; 61 | } 62 | return nullptr; 63 | } 64 | 65 | const char *tok::getKeywordSpelling(TokenKind Kind) { 66 | switch (Kind) { 67 | #define KEYWORD(X, Y) \ 68 | case kw_##X: \ 69 | return #X; 70 | #include "m2lang/Basic/TokenKinds.def" 71 | default: 72 | break; 73 | } 74 | return nullptr; 75 | } -------------------------------------------------------------------------------- /lib/Basic/Version.cppm: -------------------------------------------------------------------------------- 1 | //===--- Version.cppm - M2lang version numbder ----------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the m2lang version numbers. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/StringRef.h" 17 | 18 | export module m2lang.basic:Version; 19 | 20 | #include "generated/Basic/Version.inc" 21 | 22 | export namespace m2lang { 23 | /// Retrieves a string representing the complete m2lang version, 24 | /// which includes the m2lang version number, and the repository version. 25 | llvm::StringRef getM2langFullVersion() { 26 | return M2LANG_VERSION; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /lib/Basic/Version.inc.in: -------------------------------------------------------------------------------- 1 | #define M2LANG_VERSION "${CONFIG_APP_NAME} ${VERSION} - Modula-2 language compiler (git ${GIT_COMMIT})" 2 | #define M2LANG_VERSION_STRING "${VERSION}" 3 | #define M2LANG_VERSION_MAJOR ${VERSION_MAJOR} 4 | #define M2LANG_VERSION_MINOR ${VERSION_MINOR} 5 | #define M2LANG_VERSION_PATCHLEVEL ${VERSION_ALTER} 6 | -------------------------------------------------------------------------------- /lib/Basic/xmake.lua: -------------------------------------------------------------------------------- 1 | target("basic") 2 | add_packages("llvm") 3 | set_kind("static") 4 | -- Adding a local include dir seems not to be honored by scan-deps. 5 | set_configdir("$(buildir)/$(plat)/$(arch)/$(mode)/generated/Basic") 6 | add_configfiles("$(projectdir)/lib/Basic/Version.inc.in") 7 | add_files("*.cppm", {public = true}) 8 | -------------------------------------------------------------------------------- /lib/CodeGen/CGDebug.h: -------------------------------------------------------------------------------- 1 | //===--- CGDebug.h - Debug information ------------------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Adds debug information. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef M2LANG_CODEGEN_DEBUG_H 15 | #define M2LANG_CODEGEN_DEBUG_H 16 | 17 | #include "llvm/IR/DIBuilder.h" 18 | 19 | namespace m2lang { 20 | 21 | class CGDebug { 22 | llvm::DIBuilder DBuilder; 23 | public: 24 | CGDebug(llvm::Module &Mod) 25 | : DBuilder(Mod) {} 26 | 27 | void demo(); 28 | }; 29 | 30 | } // namespace m2lang 31 | #endif -------------------------------------------------------------------------------- /lib/CodeGen/CGModule.cppm: -------------------------------------------------------------------------------- 1 | //===--- CGModule.cppm - Code Generator for Modules -----------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the code generator implementation for modules. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/SmallVector.h" 17 | #include "llvm/IR/CFG.h" 18 | #include "llvm/IR/Constants.h" 19 | #include "llvm/IR/DerivedTypes.h" 20 | #include "llvm/IR/Type.h" 21 | #include "llvm/IR/Value.h" 22 | #include "llvm/Support/Casting.h" 23 | #include 24 | 25 | export module m2lang.codegen:CGModule; 26 | 27 | import :CGProcedure; 28 | import :CGUnit; 29 | import m2lang.ast; 30 | 31 | namespace m2lang { 32 | class CGModule : public CGUnit { 33 | public: 34 | CGModule(ASTContext &ASTCtx, llvm::Module *M) : CGUnit(ASTCtx, M) {} 35 | 36 | void emitDecls(Declaration *Mod, DeclarationList &Decls, const Block &InitBlk, 37 | const Block &FinalBlk); 38 | void run(CompilationModule *CM); 39 | }; 40 | } // namespace m2lang 41 | 42 | using namespace m2lang; 43 | 44 | void CGModule::emitDecls(Declaration *Mod, DeclarationList &Decls, 45 | const Block &InitBlk, const Block &FinalBlk) { 46 | for (auto *Decl : Decls) { 47 | if (auto *Var = llvm::dyn_cast(Decl)) { 48 | llvm::GlobalVariable *V = new llvm::GlobalVariable( 49 | *M, convertType(Var->getTypeDenoter()), 50 | /*isConstant=*/false, llvm::GlobalValue::PrivateLinkage, nullptr, 51 | utils::mangleName(Var)); 52 | Globals[Var] = V; 53 | } else if (auto *Proc = llvm::dyn_cast(Decl)) { 54 | CGProcedure CGP(*this); 55 | CGP.run(Proc); 56 | } else if (auto *LM = llvm::dyn_cast(Decl)) { 57 | emitDecls(LM, LM->getDecls(), LM->getInitBlk(), LM->getFinalBlk()); 58 | } 59 | } 60 | if (!InitBlk.getStmts().empty()) { 61 | CGProcedure CGP(*this); 62 | CGP.run(InitBlk, utils::mangleName(Mod, "Init")); 63 | } 64 | if (!FinalBlk.getStmts().empty()) { 65 | CGProcedure CGP(*this); 66 | CGP.run(FinalBlk, utils::mangleName(Mod, "Final")); 67 | } 68 | } 69 | 70 | void CGModule::run(CompilationModule *CM) { 71 | ImplementationModule *PM = llvm::cast(CM); 72 | emitDecls(PM, PM->getDecls(), PM->getInitBlk(), PM->getFinalBlk()); 73 | } 74 | -------------------------------------------------------------------------------- /lib/CodeGen/CGTBAA.cppm: -------------------------------------------------------------------------------- 1 | //===--- CGTBAA.cppm - Type Based Alias Analysis Metadata -----------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Adds the definition of metadata for TBAA. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/DenseMap.h" 17 | #include "llvm/ADT/StringRef.h" 18 | #include "llvm/IR/MDBuilder.h" 19 | #include "llvm/IR/Metadata.h" 20 | 21 | export module m2lang.codegen:CGTBAA; 22 | 23 | import m2lang.ast; 24 | 25 | export namespace m2lang { 26 | 27 | class CGTBAA { 28 | // MDHelper - Helper for creating metadata. 29 | llvm::MDBuilder MDHelper; 30 | 31 | // The root node of the TBAA hierarchy 32 | llvm::MDNode *Root; 33 | 34 | llvm::DenseMap MetadataCache; 35 | 36 | llvm::MDNode *createScalarTypeNode(TypeDenoter *TyDe, llvm::StringRef Name, 37 | llvm::MDNode *Parent); 38 | llvm::MDNode *createStructTypeNode(TypeDenoter *TyDe, llvm::StringRef Name, 39 | llvm::MDNode *Parent); 40 | 41 | public: 42 | CGTBAA(llvm::LLVMContext &Ctx) 43 | : MDHelper(llvm::MDBuilder(Ctx)), Root(nullptr) {} 44 | 45 | llvm::MDNode *getRoot(); 46 | llvm::MDNode *getTypeInfo(TypeDenoter *TyDe); 47 | llvm::MDNode *getAccessTagInfo(TypeDenoter *TyDe); 48 | }; 49 | 50 | } // namespace m2lang 51 | 52 | using namespace m2lang; 53 | 54 | llvm::MDNode *CGTBAA::getRoot() { 55 | if (!Root) 56 | Root = MDHelper.createTBAARoot("Simple Modula-2 TBAA"); 57 | 58 | return Root; 59 | } 60 | 61 | llvm::MDNode *CGTBAA::createScalarTypeNode(TypeDenoter *TyDe, 62 | llvm::StringRef Name, 63 | llvm::MDNode *Parent) { 64 | llvm::MDNode *N = MDHelper.createTBAAScalarTypeNode(Name, Parent); 65 | return MetadataCache[TyDe] = N; 66 | } 67 | 68 | llvm::MDNode *CGTBAA::createStructTypeNode(TypeDenoter *TyDe, 69 | llvm::StringRef Name, 70 | llvm::MDNode *Parent) { 71 | llvm::MDNode *N = MDHelper.createTBAAScalarTypeNode(Name, Parent); 72 | return MetadataCache[TyDe] = N; 73 | } 74 | 75 | llvm::MDNode *CGTBAA::getTypeInfo(TypeDenoter *TyDe) { 76 | if (llvm::MDNode *N = MetadataCache[TyDe]) 77 | return N; 78 | 79 | if (auto *Pervasive = llvm::dyn_cast(TyDe)) { 80 | llvm::StringRef Name = 81 | pervasive::getPervasiveTypeName(Pervasive->getTypeKind()); 82 | return createScalarTypeNode(Pervasive, Name, getRoot()); 83 | } 84 | if (auto *Enum = llvm::dyn_cast(TyDe)) { 85 | // TODO Implement 86 | llvm::StringRef Name = "enum"; 87 | return createScalarTypeNode(Enum, Name, getRoot()); 88 | } 89 | if (auto *Pointer = llvm::dyn_cast(TyDe)) { 90 | // TODO Implement 91 | llvm::StringRef Name = "any pointer"; 92 | return createScalarTypeNode(Pointer, Name, getRoot()); 93 | } 94 | if (auto *Array = llvm::dyn_cast(TyDe)) { 95 | // TODO Implement 96 | llvm::StringRef Name = "array"; 97 | return createScalarTypeNode(Array, Name, getRoot()); 98 | } 99 | if (auto *Record = llvm::dyn_cast(TyDe)) { 100 | // TODO Implement 101 | llvm::StringRef Name = "record"; 102 | return createStructTypeNode(Record, Name, getRoot()); 103 | } 104 | return nullptr; 105 | } 106 | 107 | llvm::MDNode *CGTBAA::getAccessTagInfo(TypeDenoter *TyDe) { 108 | if (auto *Pointer = llvm::dyn_cast(TyDe)) { 109 | return getTypeInfo(Pointer->getTyDen()); 110 | } 111 | return nullptr; 112 | } -------------------------------------------------------------------------------- /lib/CodeGen/CGUnit.cppm: -------------------------------------------------------------------------------- 1 | //===--- CGUnit.cppm - Code Generator for CUs -----------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the code generator implementation for complilation unita. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/DenseMap.h" 17 | #include "llvm/ADT/SmallVector.h" 18 | #include "llvm/IR/CFG.h" 19 | #include "llvm/IR/Constants.h" 20 | #include "llvm/IR/DerivedTypes.h" 21 | #include "llvm/IR/Module.h" 22 | #include "llvm/IR/Type.h" 23 | #include "llvm/IR/Value.h" 24 | #include "llvm/Support/Casting.h" 25 | #include 26 | 27 | export module m2lang.codegen:CGUnit; 28 | 29 | import :CGTBAA; 30 | import m2lang.ast; 31 | 32 | namespace m2lang { 33 | namespace utils { 34 | 35 | export std::string mangleName(const Declaration *Decl, 36 | const llvm::StringRef Suffix = ""); 37 | 38 | } 39 | 40 | export class CGUnit { 41 | protected: 42 | ASTContext &ASTCtx; 43 | llvm::Module *M; 44 | CGTBAA TBAA; 45 | 46 | // Repository of global objects. 47 | llvm::DenseMap Globals; 48 | 49 | // Cache converted types. 50 | llvm::DenseMap TypeCache; 51 | 52 | public: 53 | llvm::Type *VoidTy; 54 | llvm::Type *Int1Ty; 55 | llvm::Type *Int8Ty; 56 | llvm::Type *Int32Ty; 57 | llvm::Type *Int64Ty; 58 | llvm::Type *FloatTy; 59 | llvm::Type *DoubleTy; 60 | llvm::PointerType *PtrTy; 61 | llvm::Constant *Int32Zero; 62 | 63 | public: 64 | CGUnit(ASTContext &ASTCtx, llvm::Module *M) 65 | : ASTCtx(ASTCtx), M(M), TBAA(M->getContext()) { 66 | initialize(); 67 | } 68 | 69 | void initialize(); 70 | 71 | llvm::LLVMContext &getLLVMCtx() { return M->getContext(); } 72 | llvm::Module *getModule() { return M; } 73 | 74 | ASTContext &getASTCtx() { return ASTCtx; } 75 | 76 | llvm::Type *convertType(TypeDenoter *TyDe); 77 | llvm::Type *convertType(Type *Ty); 78 | 79 | void decorateInst(llvm::Instruction *Inst, TypeDenoter *TyDe); 80 | 81 | llvm::GlobalObject *getGlobal(Declaration *Decl) { 82 | return Globals.lookup(Decl); 83 | } 84 | }; 85 | } // namespace m2lang 86 | 87 | using namespace m2lang; 88 | 89 | std::string utils::mangleName(const Declaration *Decl, 90 | const llvm::StringRef Suffix) { 91 | std::string Mangled("_m"); 92 | llvm::SmallVector Parts; 93 | for (; Decl; Decl = Decl->getEnclosingDecl()) 94 | Parts.push_back(Decl->getName()); 95 | while (!Parts.empty()) { 96 | llvm::StringRef Name = Parts.pop_back_val(); 97 | Mangled.append(llvm::Twine(Name.size()).concat(Name).str()); 98 | } 99 | if (!Suffix.empty()) 100 | Mangled.append("_").append(Suffix); 101 | return Mangled; 102 | } 103 | 104 | void CGUnit::initialize() { 105 | VoidTy = llvm::Type::getVoidTy(getLLVMCtx()); 106 | Int1Ty = llvm::Type::getInt1Ty(getLLVMCtx()); 107 | Int8Ty = llvm::Type::getInt8Ty(getLLVMCtx()); 108 | Int32Ty = llvm::Type::getInt32Ty(getLLVMCtx()); 109 | Int64Ty = llvm::Type::getInt64Ty(getLLVMCtx()); 110 | FloatTy = llvm::Type::getFloatTy(getLLVMCtx()); 111 | DoubleTy = llvm::Type::getDoubleTy(getLLVMCtx()); 112 | PtrTy = llvm::PointerType::get(getLLVMCtx(), /*AddressSpace=*/0); 113 | Int32Zero = llvm::ConstantInt::get(Int32Ty, 0, /*isSigned*/ true); 114 | } 115 | 116 | llvm::Type *CGUnit::convertType(TypeDenoter *TyDe) { 117 | auto *Cached = TypeCache.lookup(TyDe); 118 | if (Cached != nullptr) 119 | return Cached; 120 | if (auto *P = llvm::dyn_cast(TyDe)) { 121 | switch (P->getTypeKind()) { 122 | case pervasive::Void: 123 | return VoidTy; 124 | case pervasive::Boolean: 125 | return Int1Ty; 126 | case pervasive::Char: 127 | return Int8Ty; 128 | case pervasive::Cardinal: 129 | case pervasive::Integer: 130 | case pervasive::WholeNumber: 131 | return Int64Ty; 132 | case pervasive::Real: 133 | return FloatTy; 134 | case pervasive::LongReal: 135 | case pervasive::RealNumber: 136 | return DoubleTy; 137 | default: 138 | return Int32Ty; 139 | } 140 | } 141 | if (auto *Rec = llvm::dyn_cast(TyDe)) { 142 | // An enumeration is currently always mapped to an i64 type. 143 | // This must be in sync with Sema::actOnEnumerationType(). 144 | TypeCache[TyDe] = Int64Ty; 145 | return Int64Ty; 146 | } 147 | if (auto *A = llvm::dyn_cast(TyDe)) { 148 | llvm::Type *Component = convertType(A->getComponentType()); 149 | // IndexType is an ordinal type. 150 | TypeDenoter *IndexType = A->getIndexType(); 151 | uint64_t NumElements; 152 | if (auto *EnumTy = llvm::dyn_cast(IndexType)) { 153 | NumElements = EnumTy->getMembers().size(); 154 | } else if (llvm::dyn_cast(IndexType)) { 155 | // For LLVM, we need to compute MAX(IndexType) - MIN(IndexType) + 1, 156 | // e.g. [1..5] has 5-1+1 = 5 elements. 157 | // TODO Implement. The challenge here is that getTo() and getFrom() are 158 | // constant expressions, but the value is not available. 159 | NumElements = 5; 160 | } else { 161 | // A whole number type. 162 | // TODO Implement. 163 | NumElements = 6; 164 | } 165 | llvm::Type *Ty = llvm::ArrayType::get(Component, NumElements); 166 | TypeCache[TyDe] = Ty; 167 | return Ty; 168 | } 169 | if (auto *Rec = llvm::dyn_cast(TyDe)) { 170 | llvm::Type *Ty = llvm::StructType::get(getLLVMCtx(), false); 171 | // TODO Fill in members. 172 | TypeCache[TyDe] = Ty; 173 | return Ty; 174 | } 175 | if (auto *Ptr = llvm::dyn_cast(TyDe)) { 176 | TypeCache[TyDe] = PtrTy; 177 | return PtrTy; 178 | } 179 | // TODO Implement. 180 | return Int32Ty; 181 | } 182 | 183 | llvm::Type *CGUnit::convertType(Type *Ty) { 184 | return convertType(Ty->getTypeDenoter()); 185 | } 186 | 187 | void CGUnit::decorateInst(llvm::Instruction *Inst, TypeDenoter *TyDe) { 188 | if (auto *N = TBAA.getAccessTagInfo(TyDe)) 189 | Inst->setMetadata(llvm::LLVMContext::MD_tbaa, N); 190 | } 191 | -------------------------------------------------------------------------------- /lib/CodeGen/CodeGenerator.cppm: -------------------------------------------------------------------------------- 1 | //===--- CodeGenerator.cppm - Modula-2 Language Code Generator ------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the code generator implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/IR/LLVMContext.h" 17 | #include "llvm/IR/Module.h" 18 | #include "llvm/IR/Verifier.h" 19 | #include "llvm/Support/raw_ostream.h" 20 | #include "llvm/Target/TargetMachine.h" 21 | #include 22 | 23 | export module m2lang.codegen; 24 | 25 | import :CGUnit; 26 | import :CGModule; 27 | import :CGProcedure; 28 | import :CGTBAA; 29 | 30 | import m2lang.ast; 31 | 32 | namespace m2lang { 33 | 34 | export class CodeGenerator { 35 | llvm::LLVMContext &Ctx; 36 | ASTContext &ASTCtx; 37 | llvm::TargetMachine *TM; 38 | CompilationModule *CM; 39 | 40 | protected: 41 | CodeGenerator(llvm::LLVMContext &Ctx, ASTContext &ASTCtx, 42 | llvm::TargetMachine *TM) 43 | : Ctx(Ctx), ASTCtx(ASTCtx), TM(TM), CM(nullptr) {} 44 | 45 | public: 46 | static CodeGenerator *create(llvm::LLVMContext &Ctx, ASTContext &ASTCtx, 47 | llvm::TargetMachine *TM); 48 | 49 | std::unique_ptr run(CompilationModule *CM, 50 | std::string FileName); 51 | }; 52 | 53 | } // end namespace m2lang 54 | 55 | 56 | using namespace m2lang; 57 | 58 | CodeGenerator *CodeGenerator::create(llvm::LLVMContext &Ctx, ASTContext &ASTCtx, llvm::TargetMachine *TM) { 59 | return new CodeGenerator(Ctx, ASTCtx, TM); 60 | } 61 | 62 | std::unique_ptr CodeGenerator::run(CompilationModule *CM, std::string FileName) { 63 | std::unique_ptr M = std::make_unique(FileName, Ctx); 64 | M->setTargetTriple(TM->getTargetTriple().getTriple()); 65 | M->setDataLayout(TM->createDataLayout()); 66 | 67 | CGModule CGM(ASTCtx, M.get()); 68 | CGM.run(CM); 69 | llvm::verifyModule(*M, &llvm::errs()); 70 | //M->print(llvm::outs(), nullptr); 71 | return M; 72 | } 73 | -------------------------------------------------------------------------------- /lib/CodeGen/xmake.lua: -------------------------------------------------------------------------------- 1 | target("codegen") 2 | add_packages("llvm") 3 | set_kind("static") 4 | add_deps("basic", "ast") 5 | add_files("*.cppm", {public = true}) 6 | -------------------------------------------------------------------------------- /lib/LLVM/llvm.cppm: -------------------------------------------------------------------------------- 1 | export module llvm; 2 | 3 | export import :ADT; 4 | 5 | -------------------------------------------------------------------------------- /lib/LLVM/xmake.lua: -------------------------------------------------------------------------------- 1 | target("llvm-modules") 2 | add_packages("llvm") 3 | set_kind("moduleonly") 4 | add_files("*.cppm", {public = true}) 5 | -------------------------------------------------------------------------------- /lib/LLtool/Diagnostic.cpp: -------------------------------------------------------------------------------- 1 | //===--- Diagnostic.h - LLtool diagnostic output ----------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the error printing interface. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "lltool/Diagnostic.h" 15 | #include "llvm/ADT/Twine.h" 16 | #include "llvm/Support/SMLoc.h" 17 | #include "llvm/Support/SourceMgr.h" 18 | 19 | using namespace lltool; 20 | 21 | void Diagnostic::error(llvm::SMLoc Loc, const llvm::Twine &Msg) { 22 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Error, Msg); 23 | ++Errors; 24 | } 25 | 26 | void Diagnostic::error(const char *Loc, const llvm::Twine &Msg) { 27 | error(llvm::SMLoc::getFromPointer(Loc), Msg); 28 | } 29 | 30 | void Diagnostic::warning(llvm::SMLoc Loc, const llvm::Twine &Msg) { 31 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Warning, Msg); 32 | } 33 | 34 | void Diagnostic::warning(const char *Loc, const llvm::Twine &Msg) { 35 | warning(llvm::SMLoc::getFromPointer(Loc), Msg); 36 | } 37 | 38 | void Diagnostic::note(llvm::SMLoc Loc, const llvm::Twine &Msg) { 39 | SrcMgr.PrintMessage(Loc, llvm::SourceMgr::DK_Note, Msg); 40 | } 41 | 42 | void Diagnostic::note(const char *Loc, const llvm::Twine &Msg) { 43 | note(llvm::SMLoc::getFromPointer(Loc), Msg); 44 | } 45 | -------------------------------------------------------------------------------- /lib/LLtool/Lexer.cpp: -------------------------------------------------------------------------------- 1 | //===--- Lexer.cpp - LLtool lexer -------------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the lexer implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "lltool/Lexer.h" 15 | #include "llvm/ADT/StringSwitch.h" 16 | 17 | using namespace lltool; 18 | 19 | namespace charinfo { 20 | LLVM_READNONE inline bool isDigit(char C) { return C >= '0' && C <= '9'; } 21 | 22 | LLVM_READNONE inline bool isLetter(char C) { 23 | return (C >= 'A' && C <= 'Z') || (C >= 'a' && C <= 'z'); 24 | } 25 | } // namespace charinfo 26 | 27 | void Lexer::next(Token &Tok) { 28 | repeat: 29 | while (*CurPtr == '\r' || *CurPtr == '\n' || *CurPtr == ' ' || 30 | *CurPtr == '\t' || *CurPtr == '\f' || *CurPtr == '\v') { 31 | ++CurPtr; 32 | } 33 | if (!*CurPtr) { 34 | Tok.Kind = tok::eoi; 35 | return; 36 | } 37 | if (charinfo::isLetter(*CurPtr)) { 38 | identifier(Tok); 39 | return; 40 | } 41 | switch (*CurPtr) { 42 | #define CASE(Ch, tok) \ 43 | case Ch: \ 44 | formToken(Tok, CurPtr + 1, tok); \ 45 | return 46 | CASE('=', tok::equal); 47 | CASE(':', tok::colon); 48 | CASE(',', tok::comma); 49 | CASE(';', tok::semi); 50 | CASE('|', tok::pipe); 51 | CASE('(', tok::l_paren); 52 | #undef CASE 53 | case '/': 54 | if (char Ch = *(CurPtr + 1)) { 55 | if (Ch == '*') { 56 | multilinecomment(); 57 | goto repeat; 58 | } 59 | if (Ch == '/') { 60 | singlelinecomment(); 61 | goto repeat; 62 | } 63 | } 64 | break; 65 | case '"': 66 | case '\'': 67 | string(Tok); 68 | return; 69 | case '<': 70 | code(Tok, '<', '>', tok::argument); 71 | return; 72 | case '{': 73 | code(Tok, '{', '}', tok::code); 74 | return; 75 | case ')': 76 | if (char Ch = *(CurPtr + 1)) { 77 | switch (Ch) { 78 | case '?': 79 | formToken(Tok, CurPtr + 2, tok::r_parenquestion); 80 | return; 81 | case '*': 82 | formToken(Tok, CurPtr + 2, tok::r_parenstar); 83 | return; 84 | case '+': 85 | formToken(Tok, CurPtr + 2, tok::r_parenplus); 86 | return; 87 | default: 88 | formToken(Tok, CurPtr + 1, tok::r_paren); 89 | return; 90 | } 91 | } else 92 | formToken(Tok, CurPtr + 1, tok::r_paren); 93 | return; 94 | case '%': 95 | if (*(CurPtr + 1) == '%') 96 | formToken(Tok, CurPtr + 2, tok::percentpercent); 97 | else 98 | keyword(Tok); 99 | return; 100 | default: 101 | break; 102 | } 103 | formToken(Tok, CurPtr + 1, tok::unknown); 104 | } 105 | 106 | void Lexer::identifier(Token &Tok) { 107 | bool Qualified = false; 108 | const char *Start = CurPtr; 109 | const char *End = CurPtr + 1; 110 | while (charinfo::isLetter(*End) || charinfo::isDigit(*End) || *End == '_' || 111 | *End == '.') { 112 | if (*End == '.') 113 | Qualified = true; 114 | ++End; 115 | } 116 | formToken(Tok, End, Qualified ? tok::qualidentifier : tok::identifier); 117 | Tok.Ptr = Start; 118 | } 119 | 120 | void Lexer::keyword(Token &Tok) { 121 | const char *Start = CurPtr; 122 | const char *End = CurPtr + 1; 123 | while (charinfo::isLetter(*End) || charinfo::isDigit(*End)) 124 | ++End; 125 | // Exclude '%' from compare. 126 | llvm::StringRef Keyword = llvm::StringRef(Start + 1, End - Start - 1); 127 | tok::TokenKind Kind = llvm::StringSwitch(Keyword) 128 | .Case("eoi", tok::kw_eoi) 129 | .Case("define", tok::kw_define) 130 | .Case("if", tok::kw_if) 131 | .Case("language", tok::kw_language) 132 | .Case("start", tok::kw_start) 133 | .Case("token", tok::kw_token) 134 | .Default(tok::unknown); 135 | if (Kind == tok::unknown) 136 | Diag.error(Start, "unrecognized keyword"); 137 | formToken(Tok, End, Kind); 138 | } 139 | 140 | void Lexer::code(Token &Tok, char Open, const char Close, tok::TokenKind Kind) { 141 | const char *Start = CurPtr; 142 | const char *End = CurPtr + 1; 143 | const bool Dot = *End == '.'; 144 | if (Dot) { 145 | do { 146 | ++End; 147 | while (*End && *End != Close) 148 | ++End; 149 | } while (Dot && *End && Start + 1 < End && End[-1] != '.'); 150 | } else { 151 | unsigned Level = 1; 152 | while (*End && (*End != Close || --Level)) { 153 | if (*End == Open) 154 | ++Level; 155 | ++End; 156 | } 157 | } 158 | if (!*End) 159 | Diag.error(Start, "unterminated code"); 160 | formToken(Tok, End + 1, Kind); 161 | } 162 | 163 | void Lexer::string(Token &Tok) { 164 | const char *Start = CurPtr; 165 | const char *End = CurPtr + 1; 166 | while (*End && *End != *Start && *CurPtr != '\n' && *CurPtr != '\r') 167 | ++End; 168 | if (*CurPtr == '\n' || *CurPtr == '\r') { 169 | Diag.error(Start, "unterminated string"); 170 | } 171 | formToken(Tok, End + 1, tok::string); 172 | Tok.Ptr = Start; 173 | } 174 | 175 | void Lexer::multilinecomment() { 176 | const char *Start = CurPtr; 177 | CurPtr += 2; 178 | do { 179 | while (*CurPtr && *CurPtr != '*') 180 | ++CurPtr; 181 | ++CurPtr; 182 | } while (*CurPtr && *CurPtr != '/'); 183 | if (!*CurPtr) 184 | Diag.error(Start, "unterminated comment"); 185 | ++CurPtr; 186 | } 187 | 188 | void Lexer::singlelinecomment() { 189 | // Line endings: Unix \n, Mac \r, Dos/Windows \r\n 190 | while (*CurPtr && *CurPtr != '\n' && *CurPtr != '\r') 191 | ++CurPtr; 192 | if (*(CurPtr + 1) && *CurPtr == '\r' && *(CurPtr + 1) == '\n') 193 | ++CurPtr; 194 | ++CurPtr; 195 | } 196 | 197 | void Lexer::formToken(Token &Tok, const char *TokEnd, tok::TokenKind Kind) { 198 | size_t TokLen = TokEnd - CurPtr; 199 | Tok.Ptr = CurPtr; 200 | Tok.Length = TokLen; 201 | Tok.Kind = Kind; 202 | CurPtr = TokEnd; 203 | } -------------------------------------------------------------------------------- /lib/LLtool/Main.cpp: -------------------------------------------------------------------------------- 1 | //===--- Main.cpp - LLtool main entry point ---------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements the main entry point for LLtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "lltool/Main.h" 15 | #include "lltool/Algo.h" 16 | #include "lltool/Diagnostic.h" 17 | #include "lltool/Parser.h" 18 | #include "lltool/RDPEmitter.h" 19 | #include "llvm/Support/CommandLine.h" 20 | #include "llvm/Support/FileSystem.h" 21 | #include "llvm/Support/InitLLVM.h" 22 | #include "llvm/Support/MemoryBuffer.h" 23 | #include "llvm/Support/ToolOutputFile.h" 24 | #include "llvm/Support/raw_ostream.h" 25 | 26 | using namespace lltool; 27 | 28 | static llvm::cl::opt 29 | OutputFilename("o", llvm::cl::desc("Output filename"), 30 | llvm::cl::value_desc("filename"), llvm::cl::init("-")); 31 | 32 | static llvm::cl::opt InputFilename(llvm::cl::Positional, 33 | llvm::cl::desc(""), 34 | llvm::cl::init("-")); 35 | 36 | static llvm::cl::opt 37 | WriteIfChanged("write-if-changed", 38 | llvm::cl::desc("Only write output if it changed")); 39 | 40 | static llvm::cl::opt 41 | DumpYAML("dump-yaml", 42 | llvm::cl::desc("Write YAML file and stop processing")); 43 | 44 | static int reportError(const char *ProgName, llvm::Twine Msg) { 45 | llvm::errs() << ProgName << ": " << Msg; 46 | llvm::errs().flush(); 47 | return 1; 48 | } 49 | 50 | int lltool::runLLtoolMain(const char *Argv0) { 51 | // Read the input file. 52 | llvm::ErrorOr> FileOrErr = 53 | llvm::MemoryBuffer::getFileOrSTDIN(InputFilename); 54 | if (std::error_code EC = FileOrErr.getError()) 55 | return reportError(Argv0, "Could not open input file '" + InputFilename + 56 | "': " + EC.message() + "\n"); 57 | 58 | llvm::SourceMgr SrcMgr; 59 | 60 | // Tell SrcMgr about this buffer, which is what the parser will pick up. 61 | SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), llvm::SMLoc()); 62 | 63 | // Parser the grammar and calculate all LL(1) values. 64 | Grammar Grammar; 65 | VarStore Vars; 66 | Parser TheParser(SrcMgr); 67 | TheParser.parse(Grammar, Vars); 68 | Grammar.performAnalysis(TheParser.getDiag()); 69 | 70 | if (DumpYAML) 71 | Grammar.writeYAML(llvm::dbgs()); 72 | 73 | // Do not generate output, if syntactically or semantically errors occured. 74 | if (TheParser.getDiag().errorsOccured()) 75 | return reportError(Argv0, llvm::Twine(TheParser.getDiag().errorsPrinted()) + 76 | " errors.\n"); 77 | 78 | if (DumpYAML) 79 | return 0; 80 | 81 | // Write output to memory. 82 | std::string OutString; 83 | llvm::raw_string_ostream Out(OutString); 84 | emitRDP(Grammar, Vars, Out); 85 | 86 | if (WriteIfChanged) { 87 | // Only updates the real output file if there are any differences. 88 | // This prevents recompilation of all the files depending on it if there 89 | // aren't any. 90 | if (auto ExistingOrErr = llvm::MemoryBuffer::getFile(OutputFilename)) 91 | if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) 92 | return 0; 93 | } 94 | 95 | std::error_code EC; 96 | llvm::ToolOutputFile OutFile(OutputFilename, EC, llvm::sys::fs::OF_None); 97 | if (EC) 98 | return reportError(Argv0, "error opening " + OutputFilename + ":" + 99 | EC.message() + "\n"); 100 | OutFile.os() << Out.str(); 101 | 102 | // Declare success. 103 | OutFile.keep(); 104 | return 0; 105 | } -------------------------------------------------------------------------------- /lib/LLtool/Parser.cpp: -------------------------------------------------------------------------------- 1 | //===--- Parser.cpp - LLtool parser -----------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements the LLtool parser class. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "lltool/Parser.h" 15 | #include "lltool/Node.h" 16 | #include "lltool/VarStore.h" 17 | 18 | namespace lltool { 19 | using llvm::cast; 20 | using llvm::SMLoc; 21 | using llvm::StringRef; 22 | } // namespace lltool 23 | 24 | using namespace lltool; 25 | 26 | void Parser::parse(Grammar &G, VarStore &V) { 27 | _TokenBitSet FollowSet{tok::eoi}; 28 | parseLltool(FollowSet); 29 | G = Builder.build(); 30 | V = Builder.varStore(); 31 | } 32 | 33 | #define PARSER_DEFINITION 34 | #include "lltool/lltool.g.inc" 35 | #undef PARSER_DEFINITION 36 | -------------------------------------------------------------------------------- /lib/LLtool/README.md: -------------------------------------------------------------------------------- 1 | # LLtool 2 | A recursive-descent parser generator for C++. 3 | 4 | ## Purpose 5 | 6 | LLtool generates the body of a parser from a context free grammar written in EBNF. 7 | The generated code fragment can be mixed into C++ source. 8 | 9 | ## Interface 10 | 11 | The generated code makes some assumptions about the environment: 12 | 13 | - It assumes there are functions `expect()`, `consume()` and `advance()`. 14 | - It assumes that functions `expect()` and `consume()` return `true` in case of error. 15 | - It assumes there exists an enumeration `TokenKind`. 16 | - It assumes there is a member/variable `tok`. The type of `tok` is not important. 17 | Only a member/property `kind` (of type `TokenKind`) is required. 18 | 19 | ## Syntax 20 | 21 | The input for syntax LLtool is similar to yacc/bison. It has the following specification: 22 | 23 | %token identifier, code, argument, string 24 | %start lltool 25 | %% 26 | lltool : ( header )? ( rule )+ ; 27 | 28 | header : ( "%start" identifier | "%token" tokenlist | "%language" string | "%eoi" identifier )* "%%" ; 29 | 30 | tokenlist : tokendecl ("," tokendecl )* ; 31 | 32 | tokendecl : (identifier | string) ( "=" identifier )? ; 33 | 34 | rule : nonterminal "=" rhs "." ; 35 | 36 | nonterminal : identifier ( argument )? ; 37 | 38 | rhs : sequence ( "|" sequence )* ; 39 | 40 | sequence : ( group | identifier ( argument)? | string | code | "%if" code )* ; 41 | 42 | group : "(" rhs ( ")" | ")?" | ")*" | ")+" ) ; 43 | 44 | This specification uses the following tokens: 45 | 46 | - `identifier`: a sequence of letters and digits. First element must be a letter. 47 | Only ASCII characters are supported. 48 | - `string`: an arbitrary sequence of characters, enclosed by `"` and `"` or `'` and `'`. 49 | - `code`: an arbitrary sequence of characters, enclosed by `{.` and `.}`. 50 | - `argument`: an arbitrary sequence of characters, enclosed by `<` and `>` or `<.` and `.>`. 51 | 52 | Single-line comments start with `//` and run until the end of line. 53 | Multi-line comments use `/*` and `*/` as delimiters. Multi-line comments may not 54 | be nested. 55 | 56 | ## Influencing the parsing process 57 | 58 | Consider the following example which is a simple version of a `import` statement 59 | with the possibility to use an alias: 60 | 61 | %token id 62 | %start import 63 | %% 64 | import : 65 | "import" (id ":=")? id; 66 | 67 | Because the optional group `(id "=")?` begins with the same token as the symbol 68 | after it (both are `id`), the parser generator can't decide if parsing must 69 | continue with the optional group or with the symbol after the group if the next 70 | token is `id`. This is an example of an LL(1) conflict. To solve this conflict 71 | it is possible to insert a resolver. A resolver is a `bool` expression, e.g. 72 | `bool isAlias()`. The resolver is inserted at the place of the LL(1) conflict: 73 | 74 | %token id 75 | %start import 76 | %% 77 | import : 78 | "import" (%if {. isAlias() .} id ":=")? id; 79 | 80 | In this case the implementation of the resolver is trivial. It only has to look 81 | one token further in the token range: 82 | 83 | bool isAlias() 84 | { 85 | // LL(1) conflict can be resolved with a look ahead of two 86 | return lexer.save.moveFront.kind == TokenKind.ColonEqual; 87 | } 88 | 89 | Other LL(1) conflicts can be solved in a similar way. The resolver can be as 90 | complex as required, as long as a `bool` value is returned. 91 | 92 | Now consider that the language has evolved over time. The original version did 93 | not support the alias name. That was later introduced in version 2. To support 94 | both versions, the grammar now look like: 95 | 96 | %token id 97 | %start import 98 | %% 99 | import : 100 | "import" id (":=" id)?; 101 | 102 | This rule can parse an import with or without an alias name. To support both 103 | language versions with one parser, you can add a predicate to differentiate 104 | between both versions. Like a resolver a predicate must return a `bool` value. 105 | Here the flag `isV2` is used as predicate: 106 | 107 | %token id 108 | %start import 109 | %% 110 | import : 111 | "import" id (%if {. isV2 .} ":=" id)?; 112 | 113 | A predicate can be inserted at the beginning of an optional group or at the 114 | beginning of an sequence in case the sequence itself can derive epsilon or is 115 | embedded in an optional group. 116 | 117 | Another approach to solving this task is to deliberately create an LL (1) 118 | conflict and then use a resolver: 119 | 120 | %token id 121 | %start import 122 | %% 123 | import : 124 | "import" ( %if {. isAlias() && isV2 .} id ":=" id 125 | | id 126 | ) ; 127 | 128 | LLtools checks if resolver and predicates are placed correctly. Incorrectly 129 | placed resolvers and predicates are ignored and a warning is printed. 130 | 131 | ## Error handling 132 | 133 | A simple local error handling scheme based on _FOLLOW_ sets is implemented. It 134 | uses the so-called panic mode approach. 135 | 136 | ## Open tasks 137 | 138 | - Integration with ASTtool 139 | -------------------------------------------------------------------------------- /lib/LLtool/VarStore.cpp: -------------------------------------------------------------------------------- 1 | //===--- VarStore.cpp - LLtool variable container ---------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the variable container. Variables can be defined in zhe grammar file 11 | /// with the %define directive. They are mainly used to customize the code 12 | /// generation. 13 | /// 14 | //===----------------------------------------------------------------------===// 15 | 16 | #include "lltool/VarStore.h" 17 | 18 | using namespace lltool; 19 | 20 | namespace { 21 | static const char *ExternalNames[] = { 22 | #define VAR(NAME, VAR, TYPE, DEFAULT) NAME, 23 | #include "lltool/Variables.def" 24 | }; 25 | static var::VarType Types[] = { 26 | #define VAR(NAME, VAR, TYPE, DEFAULT) var::TYPE, 27 | #include "lltool/Variables.def" 28 | }; 29 | static llvm::StringLiteral Defaults[] = { 30 | #define VAR(NAME, VAR, TYPE, DEFAULT) DEFAULT, 31 | #include "lltool/Variables.def" 32 | }; 33 | } // namespace 34 | 35 | var::VarType VarStore::getType(var::VarName Name) const { 36 | assert(Name != var::NUM_VARIABLES); 37 | return Types[Name]; 38 | } 39 | 40 | llvm::StringRef VarStore::getDefault(var::VarName Name) const { 41 | assert(Name != var::NUM_VARIABLES); 42 | return Defaults[Name]; 43 | } 44 | 45 | VarStore::VarStore() {} 46 | 47 | llvm::Error VarStore::add(llvm::StringRef Name, llvm::StringRef Value, 48 | var::VarType Type) { 49 | unsigned Idx = 0; 50 | for (; Idx < var::NUM_VARIABLES; ++Idx) 51 | if (ExternalNames[Idx] == Name) 52 | break; 53 | if (Idx >= var::NUM_VARIABLES) { 54 | return llvm::make_error( 55 | llvm::Twine("unknown variable name ").concat(Name), 56 | llvm::inconvertibleErrorCode()); 57 | } 58 | if (Types[Idx] != Type) { 59 | return llvm::make_error("wrong variable type", 60 | llvm::inconvertibleErrorCode()); 61 | } 62 | if (Type == var::Flag && (Value != "" && Value != "true")) { 63 | return llvm::make_error( 64 | llvm::Twine("wrong value for flag variable ").concat(Name), 65 | llvm::inconvertibleErrorCode()); 66 | } 67 | Vars[Idx] = Value; 68 | return llvm::Error::success(); 69 | } 70 | 71 | void VarStore::set(var::VarName Name, llvm::StringRef Value) { 72 | assert(Name != var::NUM_VARIABLES); 73 | var::VarType Ty = Types[Name]; 74 | assert(Ty != var::Flag || (Value == "" || Value == "true")); 75 | Vars[Name] = Value; 76 | } 77 | -------------------------------------------------------------------------------- /lib/LLtool/lltool.g: -------------------------------------------------------------------------------- 1 | /* Grammar for LLtool */ 2 | %language "c++" 3 | %define api.parser.class {Parser} 4 | %token identifier, qualidentifier, code, argument, string 5 | %token "%token" = kw_token, "%start" = kw_start, "%eoi" = kw_eoi 6 | %token "%language" = kw_language, "%define" = kw_define, "%if" = kw_if 7 | %start lltool 8 | %% 9 | lltool 10 | : ( header )? ( rule )+ ; 11 | 12 | header 13 | : ("%start" identifier { Builder.startSymbol(Tok.getLoc(), Tok.getData()); } 14 | | "%token" tokenlist 15 | | "%language" string { Builder.language(Tok.getLoc(), Tok.getData()); } 16 | | "%define" { SMLoc Loc; StringRef ident, value; var::VarType type = var::Flag; } 17 | ( { Loc = Tok.getLoc(); ident = Tok.getData(); } 18 | ( identifier | qualidentifier ) 19 | ) 20 | ( { value = Tok.getData(); } 21 | ( code { type = var::Code; } 22 | | identifier { type = var::Identifier; } 23 | | qualidentifier { type = var::Identifier; } 24 | | string { type = var::String; } 25 | ) 26 | )? 27 | { Builder.define(Loc, ident, value, type); } 28 | | "%eoi" identifier { Builder.eoiSymbol(Tok.getLoc(), Tok.getData()); } 29 | )* 30 | "%%" 31 | ; 32 | 33 | tokenlist 34 | : tokendecl ("," tokendecl )* 35 | ; 36 | 37 | tokendecl 38 | : { SMLoc Loc; StringRef val, ext; } 39 | (identifier { Loc = Tok.getLoc(); val = Tok.getData(); } 40 | | string { Loc = Tok.getLoc(); val = Tok.getData(); } 41 | ) 42 | ( "=" identifier { ext = Tok.getData(); } 43 | )? 44 | { Builder.terminal(Loc, val, ext); } 45 | ; 46 | 47 | rule 48 | : { Node *node = nullptr; } 49 | nonterminal ":" 50 | rhs<.node->Link.> { node->Link->Back = node; } 51 | ";" 52 | ; 53 | 54 | nonterminal 55 | : identifier { node = Builder.nonterminal(Tok.getLoc(), Tok.getData()); } 56 | ( argument { Builder.argument(node, Tok.getData()); } 57 | )? 58 | ( code 59 | )? 60 | ; 61 | 62 | rhs 63 | : sequence 64 | ( { node = Builder.alternative(node->Loc, node); 65 | Node *alt = node->Link; alt->Back = node; } 66 | ( "|" sequence<.alt->Link.> { alt = alt->Link; alt->Back = node; } 67 | )+ 68 | )? 69 | ; 70 | 71 | sequence 72 | : { Node *last = nullptr; node = Builder.sequence(Tok.getLoc()); } 73 | ( { Node *n = nullptr; } 74 | ( group 75 | | identifier { n = Builder.symbol(Tok.getLoc(), Tok.getData()); } 76 | ( argument { Builder.argument(n, Tok.getData()); } 77 | )? 78 | | string { n = Builder.symbol(Tok.getLoc(), Tok.getData(), true); } 79 | | code { n = Builder.code(Tok.getLoc(), Tok.getData()); } 80 | | "%if" code { n = Builder.code(Tok.getLoc(), Tok.getData()); 81 | cast(n)->Type = Code::Condition; } 82 | ) 83 | { if (!last) node->Inner = last = n; 84 | else last->Next = n, last = n; } 85 | )* 86 | { if (last) last->Back = node; } 87 | ; 88 | 89 | group 90 | : "(" { Node *n = nullptr; Group::CardinalityKind Cardinality; } 91 | rhs<.n.> 92 | ( ")" { Cardinality = Group::One; } 93 | | ")?" { Cardinality = Group::ZeroOrOne; } 94 | | ")*" { Cardinality = Group::ZeroOrMore; } 95 | | ")+" { Cardinality = Group::OneOrMore; } 96 | ) { node = Builder.group(Tok.getLoc(), n, Cardinality); } 97 | ; -------------------------------------------------------------------------------- /lib/LLtool/xmake.lua: -------------------------------------------------------------------------------- 1 | target("lltool") 2 | add_packages("llvm") 3 | set_kind("static") 4 | add_files("*.cpp") 5 | -------------------------------------------------------------------------------- /lib/Lexer/DirectiveParser.g: -------------------------------------------------------------------------------- 1 | //===--- DirectiveParers.g - Modula-2 Language Directive parser -----------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the grammar for Modula-2 directives. 11 | /// The grammar is based on the draft technical report "Interfacing Modula-2 to 12 | /// C", Annex B: http://www.zi.biologie.uni-muenchen.de/~enger/SC22WG13/im2c-981130.html#TR-AXI-PRAGMAS 13 | /// and is compatible to the Macintosh p1 compiler, 14 | /// https://modula2.awiedemann.de/manual/comp4.html#L4_2 15 | /// 16 | //===----------------------------------------------------------------------===// 17 | %language "c++" 18 | %define api.parser.class {DirectiveParser} 19 | %token identifier, string_literal 20 | %start directive 21 | %eoi eof 22 | %% 23 | directive 24 | : "<*" singleDirective (";" singleDirective )* "*>" 25 | ; 26 | singleDirective 27 | : ( assignment | environment | definition 28 | | save_restore | condition )? 29 | ; 30 | assignment 31 | : { StringRef Val; } 32 | ( identifier { StringRef Identifier = Tok.getIdentifier(); SMLoc Loc = Tok.getLocation(); } 33 | "(" value ")" { actOnAssignment(Loc, Identifier, Val); } 34 | | "ASSIGN" "(" identifier { StringRef Identifier = Tok.getIdentifier(); SMLoc Loc = Tok.getLocation(); } 35 | "," value ")" { actOnAssignment(Loc, Identifier, Val); } 36 | ) 37 | ; 38 | environment 39 | : "ENVIRON" "(" identifier { StringRef Identifier = Tok.getIdentifier(); SMLoc Loc = Tok.getLocation(); } 40 | "," { StringRef Val; } 41 | value ")" { actOnEnvironment(Loc, Identifier, Val); } 42 | ; 43 | definition 44 | : "DEFINE" "(" identifier { StringRef Identifier = Tok.getIdentifier(); SMLoc Loc = Tok.getLocation(); } 45 | "," { StringRef Val; } 46 | value ")" { actOnDefinition(Loc, Identifier, Val); } 47 | ; 48 | save_restore 49 | : "PUSH" | "POP" ; 50 | condition 51 | : ifpart | elsifpart | elsepart | endifpart ; 52 | ifpart 53 | : "IF" { SMLoc Loc = Tok.getLocation(); StringRef Val; } 54 | expr "THEN" { actOnIf(Loc, Val); } 55 | ; 56 | elsifpart 57 | : "ELSIF" { SMLoc Loc = Tok.getLocation(); StringRef Val; } 58 | { actOnElsIf(Loc); } 59 | expr "THEN" { actOnElsIf(Loc, Val); } 60 | ; 61 | elsepart 62 | : "ELSE" { actOnElse(Tok.getLocation()); } 63 | ; 64 | endifpart 65 | : "END" { actOnEnd(Tok.getLocation()); } 66 | ; 67 | expr 68 | : condExpr 69 | ( { tok::TokenKind Op = tok::unknown; } 70 | ( "=" { Op = Tok.getKind(); } 71 | | "#" { Op = Tok.getKind(); } 72 | ) { StringRef RightVal; } 73 | condExpr { Val = actOnRelation(Op, Val, RightVal); } 74 | )? 75 | ; 76 | condExpr 77 | : condTerm 78 | ( "OR" { StringRef RightVal; } 79 | condTerm { Val = actOnOr(Val, RightVal); } 80 | )* 81 | ; 82 | condTerm 83 | : condFactor 84 | ( "AND" { StringRef RightVal; } 85 | condFactor { Val = actOnAnd(Val, RightVal); } 86 | )* 87 | ; 88 | condFactor 89 | : "NOT" condFactor { actOnNot(Val); } 90 | | value 91 | | "(" expr ")" 92 | ; 93 | value 94 | : string_literal { Val = Tok.getLiteralData().substr(1, Tok.getLiteralData().size()-2); } 95 | | identifier { Val = actOnIdentifierValue(Tok.getIdentifier()); } 96 | ; 97 | -------------------------------------------------------------------------------- /lib/Lexer/Module.cppm: -------------------------------------------------------------------------------- 1 | //===--- Module.cppm - Modula-2 Language Lexer ----------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the module lexer. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | 15 | export module m2lang.lexer; 16 | 17 | export import :Lexer; 18 | export import :Preprocessor; 19 | export import :Token; 20 | -------------------------------------------------------------------------------- /lib/Lexer/Token.cppm: -------------------------------------------------------------------------------- 1 | //===--- TokenKinds.cppm - Enum values for Modula-2 Token Kinds -----------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the m2lang::TokenKind enum and support functions. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/StringRef.h" 17 | #include "llvm/Support/SMLoc.h" 18 | 19 | export module m2lang.lexer:Token; 20 | 21 | import m2lang.basic; 22 | 23 | namespace m2lang { 24 | 25 | export class Token { 26 | friend class Lexer; 27 | 28 | /// The begin of the token. 29 | const char *Ptr; 30 | 31 | /// The length of the token. 32 | size_t Length; 33 | 34 | /// Kind - The actual flavor of token this is. 35 | tok::TokenKind Kind; 36 | 37 | public: 38 | tok::TokenKind getKind() const { return Kind; } 39 | void setKind(tok::TokenKind K) { Kind = K; } 40 | 41 | /// is/isNot - Predicates to check if this token is a specific kind, as in 42 | /// "if (Tok.is(tok::l_brace)) {...}". 43 | bool is(tok::TokenKind K) const { return Kind == K; } 44 | bool isNot(tok::TokenKind K) const { return Kind != K; } 45 | template bool isOneOf(TokenKind &&...Tks) const { 46 | return (... || is(Tks)); 47 | } 48 | const char *getName() const { return tok::getTokenName(Kind); } 49 | 50 | llvm::SMLoc getLocation() const { return llvm::SMLoc::getFromPointer(Ptr); } 51 | 52 | size_t getLength() const { return Length; } 53 | 54 | llvm::StringRef getIdentifier() { 55 | assert(is(tok::identifier) && "Cannot get identfier of non-identifier"); 56 | return llvm::StringRef(Ptr, Length); 57 | } 58 | 59 | llvm::StringRef getLiteralData() { 60 | assert(isOneOf(tok::integer_literal, tok::real_literal, tok::char_literal, 61 | tok::string_literal) && 62 | "Cannot get literal data of non-literal"); 63 | return llvm::StringRef(Ptr, Length); 64 | } 65 | }; 66 | 67 | } // end namespace m2lang 68 | -------------------------------------------------------------------------------- /lib/Lexer/xmake.lua: -------------------------------------------------------------------------------- 1 | target("lexer") 2 | add_packages("llvm", {components = "base"}) 3 | set_kind("static") 4 | add_deps("LLtool", "basic") 5 | add_rules("lltool") 6 | add_files("DirectiveParser.g") 7 | add_files("*.cppm", {public = true}) 8 | -------------------------------------------------------------------------------- /lib/Parser/M2Parser.cppm: -------------------------------------------------------------------------------- 1 | //===--- M2Parser.cppm - Modula-2 Language parser -------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the parser implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include "llvm/ADT/StringRef.h" 17 | #include "llvm/Support/raw_ostream.h" 18 | #include "llvm/Support/SMLoc.h" 19 | 20 | module m2lang.parser:M2Parser; 21 | 22 | import m2lang.ast; 23 | import m2lang.basic; 24 | import m2lang.sema; 25 | import m2lang.lexer; 26 | 27 | namespace m2lang { 28 | 29 | // TODO These are used in the grammar file. 30 | using llvm::StringRef; 31 | using llvm::SMLoc; 32 | 33 | export class M2Parser { 34 | 35 | Preprocessor &PP; 36 | 37 | /// Actions - These are the callbacks we invoke as we parse various constructs 38 | /// in the file. 39 | Sema &Actions; 40 | 41 | /// Tok - The current token we are peeking ahead. All parsing methods assume 42 | /// that this is valid. 43 | Token Tok; 44 | 45 | DiagnosticsEngine &getDiagnostics() const { return PP.getDiagnostics(); } 46 | 47 | /// nextToken - This peeks ahead one token and returns it without 48 | /// consuming it. 49 | const Token &nextToken() { 50 | PP.next(Tok); 51 | StringRef str = StringRef(Tok.getLocation().getPointer(), Tok.getLength()); 52 | llvm::outs() << "Token: " << Tok.getName() << ": '" << str << "'\n"; 53 | return Tok; 54 | } 55 | 56 | SMLoc consumeToken() { 57 | SMLoc PrevLoc = Tok.getLocation(); 58 | nextToken(); 59 | return PrevLoc; 60 | } 61 | 62 | void consumeAnyToken() { nextToken(); } 63 | 64 | void consumeSemi() {} 65 | 66 | /// Expects and consume the token. 67 | /// Returns true in case of syntax error 68 | bool expectAndConsume(tok::TokenKind ExpectedTok, llvm::StringRef Msg = "") { 69 | if (Tok.is(ExpectedTok)) { 70 | consumeToken(); 71 | return false; 72 | } 73 | // There must be a better way! 74 | const char *Expected = tok::getPunctuatorSpelling(ExpectedTok); 75 | if (!Expected) 76 | Expected = tok::getKeywordSpelling(ExpectedTok); 77 | StringRef Actual = 78 | StringRef(Tok.getLocation().getPointer(), Tok.getLength()); 79 | getDiagnostics().report(Tok.getLocation(), diag::err_expected) 80 | << Expected << Actual; 81 | return true; 82 | } 83 | 84 | void error() { 85 | getDiagnostics().report(Tok.getLocation(), diag::err_unexpected_symbol); 86 | } 87 | 88 | void advance() { nextToken(); } 89 | 90 | bool consume(tok::TokenKind ExpectedTok) { 91 | return expectAndConsume(ExpectedTok); 92 | } 93 | 94 | bool expect(tok::TokenKind ExpectedTok) { 95 | if (Tok.is(ExpectedTok)) { 96 | return false; 97 | } 98 | return true; 99 | } 100 | 101 | #define M2PARSER_DECLARATION 102 | #include "Modula-2.g.inc" 103 | #undef M2PARSER_DECLARATION 104 | 105 | public: 106 | M2Parser(Preprocessor &PP, Sema &Actions); 107 | 108 | void initialize(); 109 | 110 | const LangOptions &getLangOpts() const { return PP.getLangOpts(); } 111 | 112 | CompilationModule *parse() { 113 | __TokenBitSet Eof{tok::eof}; 114 | CompilationModule *CM = nullptr; 115 | parseCompilationModule(Eof, CM); 116 | return CM; 117 | } 118 | }; 119 | } // end namespace m2lang 120 | 121 | using namespace m2lang; 122 | 123 | namespace { 124 | template T tokenAs(Token Tok) { return T(Tok); } 125 | 126 | template <> Identifier tokenAs(Token Tok) { 127 | return Identifier(Tok.getLocation(), Tok.getIdentifier()); 128 | } 129 | 130 | template <> OperatorInfo tokenAs(Token Tok) { 131 | return OperatorInfo(Tok.getLocation(), Tok.getKind()); 132 | } 133 | } // namespace 134 | 135 | M2Parser::M2Parser(Preprocessor &PP, Sema &Actions) : PP(PP), Actions(Actions) { 136 | nextToken(); 137 | } 138 | 139 | void M2Parser::initialize() {} 140 | 141 | #define M2PARSER_DEFINITION 142 | #include "Modula-2.g.inc" 143 | #undef M2PARSER_DEFINITION 144 | -------------------------------------------------------------------------------- /lib/Parser/Parser.cppm: -------------------------------------------------------------------------------- 1 | //===--- Parser.cppm - Modula-2 Language parser ---------------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Defines the parser implementation. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | module; 15 | 16 | #include 17 | 18 | export module m2lang.parser; 19 | 20 | import :M2Parser; 21 | import m2lang.ast; 22 | import m2lang.lexer; 23 | import m2lang.sema; 24 | 25 | namespace m2lang { 26 | export class Parser { 27 | std::unique_ptr Impl; 28 | 29 | const M2Parser *get() const { return Impl.get(); } 30 | 31 | M2Parser *get() { return Impl.get(); } 32 | 33 | public: 34 | Parser(Preprocessor &PP, Sema &Actions); 35 | ~Parser(); 36 | Parser(Parser &&) noexcept; 37 | Parser(Parser const &) = delete; 38 | Parser &operator=(Parser &&) noexcept; 39 | Parser &operator=(Parser const &) = delete; 40 | 41 | CompilationModule *parse(); 42 | }; 43 | } // end namespace m2lang 44 | 45 | using namespace m2lang; 46 | 47 | Parser::Parser(Preprocessor &PP, Sema &Actions) : Impl(std::make_unique(PP, Actions)) { 48 | } 49 | 50 | Parser::Parser(Parser &&) noexcept = default; 51 | Parser &Parser::operator=(Parser &&) noexcept = default; 52 | Parser::~Parser() = default; 53 | 54 | CompilationModule *Parser::parse() { 55 | return get()->parse(); 56 | } -------------------------------------------------------------------------------- /lib/Parser/xmake.lua: -------------------------------------------------------------------------------- 1 | target("parser") 2 | add_packages("llvm") 3 | set_kind("static") 4 | add_deps("LLtool", "basic", "ast", "lexer", "sema") 5 | add_rules("lltool") 6 | add_files("Modula-2.g") 7 | add_files("Parser.cppm", {public = true}) 8 | add_files("M2Parser.cppm", {public = true}) 9 | -------------------------------------------------------------------------------- /lib/Sema/xmake.lua: -------------------------------------------------------------------------------- 1 | target("sema") 2 | add_packages("llvm", {components = "base"}) 3 | set_kind("static") 4 | add_deps("basic", "ast", "lexer") 5 | add_files("*.cppm", {public = true}) 6 | -------------------------------------------------------------------------------- /scripts/vcenv.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | if NOT DEFINED VCINSTALLDIR ( 3 | if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" ( 4 | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 > nul 5 | echo # USING VISUAL STUDIO 16 6 | ) 7 | ) 8 | 9 | if NOT DEFINED VCINSTALLDIR ( 10 | if exist "C:\Program Files (x86)\Microsoft Visual Studio 15.0\VC\vcvarsall.bat" ( 11 | call "C:\Program Files (x86)\Microsoft Visual Studio 15.0\VC\vcvarsall.bat" amd64 12 | echo # USING VISUAL STUDIO 15 13 | ) 14 | ) 15 | 16 | if NOT DEFINED VCINSTALLDIR ( 17 | if exist "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" ( 18 | call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 19 | echo # USING VISUAL STUDIO 14 20 | ) 21 | ) 22 | 23 | if NOT DEFINED VCINSTALLDIR ( 24 | if exist "C:\Program Files (x86)\Microsoft Visual Studio 13.0\VC\vcvarsall.bat" ( 25 | call "C:\Program Files (x86)\Microsoft Visual Studio 13.0\VC\vcvarsall.bat" amd64 26 | echo # USING VISUAL STUDIO 13 27 | ) 28 | ) 29 | 30 | if NOT DEFINED VCINSTALLDIR ( 31 | if exist "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" ( 32 | call "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" amd64 33 | echo # USING VISUAL STUDIO 12 34 | ) 35 | ) 36 | 37 | if NOT DEFINED VCINSTALLDIR ( 38 | echo # No compatible visual studio found! run vcvarsall.bat first! 39 | ) 40 | 41 | echo export PATH="%PATH%" 42 | echo export INCLUDE="%INCLUDE%" 43 | echo export LIB="%LIB%" 44 | echo export LIBPATH="%LIBPATH%" 45 | -------------------------------------------------------------------------------- /test/M2/CodeGen/Array1.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE Array1; 5 | 6 | TYPE 7 | Color = (Red, Green, Blue, Cyan, Yellow, White, Purple, Black); 8 | 9 | VAR 10 | ColorWeight : ARRAY Color OF CARDINAL; 11 | 12 | PROCEDURE SetValue(Index: Color; Weight: CARDINAL); 13 | BEGIN 14 | ColorWeight[Index] := Weight; 15 | END SetValue; 16 | (* IR is wrong. *) 17 | 18 | PROCEDURE GetValue(Index: Color; VAR Weight: CARDINAL); 19 | BEGIN 20 | Weight := ColorWeight[Index]; 21 | END GetValue; 22 | (* IR is correct, assuming a 64 bit platform. *) 23 | (* 24 | CHECK: define void @_m6Array18GetValue(i64 %Index, ptr nocapture dereferenceable(8) %Weight) { 25 | CHECK: entry: 26 | CHECK: %0 = getelementptr inbounds ptr, ptr @_m6Array111ColorWeight, i64 %Index 27 | CHECK: %1 = load i64, ptr %0, align 8 28 | CHECK: store i64 %1, ptr %Weight, align 8 29 | CHECK: } 30 | *) 31 | 32 | END Array1. 33 | -------------------------------------------------------------------------------- /test/M2/CodeGen/Call.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | Still crashes because function calls are not yet implemented! 4 | XFAIL: * 5 | *) 6 | MODULE Call; 7 | 8 | PROCEDURE Get():INTEGER; 9 | BEGIN 10 | RETURN 5; 11 | END Get; 12 | 13 | PROCEDURE Test1():INTEGER; 14 | BEGIN 15 | IF Get() > 0 THEN 16 | RETURN 1; 17 | ELSE 18 | RETURN 0; 19 | END; 20 | END Test1; 21 | (* 22 | CHECK-LABEL: 23 | CHECK: 24 | *) 25 | 26 | END Call. 27 | -------------------------------------------------------------------------------- /test/M2/CodeGen/CheckModVisibility.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | (* See https://www.arjay.bc.ca/Modula-2/Text/index.html?https://www.arjay.bc.ca/Modula-2/Text/Ch10/Ch10.15.html *) 5 | MODULE CheckModVisibility; 6 | 7 | MODULE Inner; 8 | EXPORT number; 9 | VAR 10 | number : REAL; 11 | 12 | END Inner; 13 | 14 | BEGIN 15 | number := 5.0; 16 | Inner.number := 9.8; (* both references legal *) 17 | END CheckModVisibility. -------------------------------------------------------------------------------- /test/M2/CodeGen/CheckModVisibility2.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | (* See https://www.arjay.bc.ca/Modula-2/Text/index.html?https://www.arjay.bc.ca/Modula-2/Text/Ch10/Ch10.15.html *) 5 | MODULE CheckModVisibility2; 6 | 7 | MODULE Inside1; 8 | EXPORT number1; 9 | VAR 10 | number1 : REAL; 11 | 12 | BEGIN (* Only number1 is visible here *) 13 | number1 := 5.4; (* number1 can be initialized here *) 14 | END Inside1; 15 | 16 | MODULE Inside2; 17 | IMPORT Inside1; 18 | VAR 19 | number2 : REAL; 20 | 21 | BEGIN (* number2, number1 both visible here. *) 22 | number1 := 3.4; 23 | Inside1.number1 := 5.7; 24 | number2 := 3.9 25 | END Inside2; 26 | 27 | BEGIN (* Test *) 28 | (* only number1 visible here *) 29 | END CheckModVisibility2. -------------------------------------------------------------------------------- /test/M2/CodeGen/CheckModVisibility3.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | (* See https://www.arjay.bc.ca/Modula-2/Text/index.html?https://www.arjay.bc.ca/Modula-2/Text/Ch10/Ch10.15.html *) 5 | MODULE CheckModVisibility3; 6 | 7 | MODULE Shell1; 8 | EXPORT 9 | Inner1; 10 | 11 | MODULE Inner1; 12 | EXPORT thing; 13 | VAR 14 | thing: CARDINAL; 15 | END Inner1; 16 | 17 | END Shell1; 18 | 19 | MODULE Shell2; 20 | EXPORT Inner2; 21 | 22 | MODULE Inner2; 23 | EXPORT QUALIFIED thing; 24 | VAR 25 | thing: CARDINAL; 26 | END Inner2; 27 | 28 | END Shell2; 29 | 30 | MODULE Shell3; 31 | EXPORT QUALIFIED Inner3; 32 | 33 | MODULE Inner3; 34 | EXPORT QUALIFIED thing; 35 | VAR 36 | thing: CARDINAL; 37 | END Inner3; 38 | 39 | END Shell3; 40 | 41 | (* here in this outer one, Inner1 is visible, so its exports may be qualified *) 42 | (* Inner2 is visible, so its qualified exports may be qualified *) 43 | (* however, Inner3 itself must be qualified *) 44 | BEGIN 45 | Inner1.thing := 5; 46 | Inner2.thing := 4; 47 | Shell3.Inner3.thing := 8; 48 | END CheckModVisibility3. -------------------------------------------------------------------------------- /test/M2/CodeGen/Enum.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | TODO The array dereferencing is still wrong. 4 | *) 5 | MODULE Enum; 6 | 7 | TYPE 8 | WeekDay = (Mon, Tue, Wed, Thu, Fri, Sat, Sun); 9 | Month = (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec); 10 | 11 | DayInMonth = ARRAY Month OF WeekDay; 12 | 13 | VAR 14 | Special: DayInMonth; 15 | 16 | PROCEDURE SetDay(VAR d: WeekDay); 17 | BEGIN 18 | d := Sun 19 | END SetDay; 20 | (* 21 | CHECK-LABEL: _m4Enum6SetDay 22 | CHECK: store i64 6, ptr %d, align 8 23 | *) 24 | 25 | PROCEDURE SetMonth(VAR m: DayInMonth); 26 | VAR 27 | i: Month; 28 | j: WeekDay; 29 | BEGIN 30 | j := Mon; 31 | FOR i := Jan TO Dec DO 32 | m[i] := j; 33 | END; 34 | END SetMonth; 35 | (* 36 | CHECK-LABEL: _m4Enum8SetMonth 37 | TODO Array access still wrong! 38 | *) 39 | 40 | PROCEDURE SetSpecial; 41 | VAR 42 | i: Month; 43 | j: WeekDay; 44 | BEGIN 45 | j := Mon; 46 | FOR i := Jan TO Dec DO 47 | Special[i] := j; 48 | END; 49 | END SetSpecial; 50 | (* 51 | CHECK-LABEL: _m4Enum10SetSpecial 52 | TODO Array access still wrong! 53 | *) 54 | 55 | END Enum. 56 | -------------------------------------------------------------------------------- /test/M2/CodeGen/For.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE For; 5 | 6 | PROCEDURE Test1():INTEGER; 7 | VAR x, y: INTEGER; 8 | BEGIN 9 | y := 0; 10 | FOR x := 1 TO 10 DO 11 | y := y + 2; 12 | END; 13 | RETURN y; 14 | END Test1; 15 | (* 16 | CHECK-LABEL: _m3For5Test1 17 | CHECK: %0 = phi i64 [ %3, %for.body ], [ 0, %entry ] 18 | CHECK: %1 = phi i64 [ %4, %for.body ], [ 1, %entry ] 19 | *) 20 | 21 | PROCEDURE Test2():INTEGER; 22 | VAR x, y: INTEGER; 23 | BEGIN 24 | y := 0; 25 | FOR x := 1 TO 10 BY 2 DO 26 | y := y + 2; 27 | END; 28 | RETURN y; 29 | END Test2; 30 | (* 31 | CHECK-LABEL: _m3For5Test2 32 | CHECK: %0 = phi i64 [ %3, %for.body ], [ 0, %entry ] 33 | CHECK: %1 = phi i64 [ %4, %for.body ], [ 1, %entry ] 34 | *) 35 | 36 | END For. 37 | -------------------------------------------------------------------------------- /test/M2/CodeGen/IfElse.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE IfElse; 5 | 6 | PROCEDURE Test1(x: INTEGER):INTEGER; 7 | BEGIN 8 | IF x >= 10 THEN 9 | RETURN 10; 10 | ELSIF x = 9 THEN 11 | RETURN 9; 12 | ELSIF x = 5 THEN 13 | RETURN 5; 14 | ELSE 15 | RETURN 0; 16 | END; 17 | END Test1; 18 | (* 19 | CHECK-LABEL: _m6IfElse5Test1 20 | *) 21 | 22 | PROCEDURE Test2(x: INTEGER):INTEGER; 23 | BEGIN 24 | IF x >= 10 THEN 25 | RETURN 10; 26 | ELSIF x = 9 THEN 27 | RETURN 9; 28 | ELSIF x = 5 THEN 29 | RETURN 5; 30 | END; 31 | RETURN 0; 32 | END Test2; 33 | (* 34 | CHECK-LABEL: _m6IfElse5Test2 35 | *) 36 | 37 | END IfElse. 38 | -------------------------------------------------------------------------------- /test/M2/CodeGen/LocalMod.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE LocalMod; 5 | 6 | (* 7 | CHECK: @_m8LocalMod1A6AColor = private global i64 8 | CHECK: @_m8LocalMod8TheColor = private global i64 9 | CHECK: @_m8LocalMod1B12CurrentColor = private global i64 10 | *) 11 | 12 | MODULE A; 13 | EXPORT Color, AColor; 14 | TYPE 15 | Color = (Yellow, Red, Blue, Purple); 16 | VAR 17 | AColor: Color; 18 | END A; 19 | 20 | VAR 21 | TheColor: Color; 22 | 23 | PROCEDURE InitColor; 24 | BEGIN 25 | TheColor := Yellow; 26 | AColor := Purple; 27 | END InitColor; 28 | (* 29 | CHECK-LABEL: _m8LocalMod9InitColor 30 | CHECK: store i64 0, ptr @_m8LocalMod8TheColor, align 8 31 | CHECK: store i64 3, ptr @_m8LocalMod1A6AColor, align 8 32 | *) 33 | 34 | MODULE B; 35 | IMPORT Color; 36 | EXPORT CurrentColor; 37 | VAR 38 | CurrentColor: Color; 39 | END B; 40 | 41 | MODULE C; 42 | IMPORT Color, CurrentColor; 43 | EXPORT SetColor; 44 | PROCEDURE SetColor(C: Color); 45 | BEGIN 46 | (* Replace C with Color to produce crash in gm2. *) 47 | CurrentColor := C; 48 | END SetColor; 49 | END C; 50 | (* 51 | CHECK-LABEL: _m8LocalMod1C8SetColor 52 | CHECK: store i64 %C, ptr @_m8LocalMod1B12CurrentColor, align 8 53 | *) 54 | 55 | END LocalMod. 56 | -------------------------------------------------------------------------------- /test/M2/CodeGen/Loop.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE Loop; 5 | 6 | PROCEDURE Test1(num: INTEGER):INTEGER; 7 | VAR sum: INTEGER; 8 | BEGIN 9 | sum := 0; 10 | LOOP 11 | sum := sum + num; 12 | num := num - 2; 13 | IF num <= 0 THEN EXIT END; 14 | END; 15 | RETURN sum; 16 | END Test1; 17 | (* 18 | CHECK-LABEL: _m4Loop5Test1 19 | CHECK: loop.body: 20 | CHECK-NEXT: %0 = phi i64 [ %3, %after.if ], [ %num, %entry ] 21 | CHECK-NEXT: %1 = phi i64 [ %2, %after.if ], [ 0, %entry ] 22 | *) 23 | 24 | END Loop. 25 | -------------------------------------------------------------------------------- /test/M2/CodeGen/OpenArray.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | XFAIL: * 4 | *) 5 | MODULE OpenArray; 6 | 7 | (* Arrays should be passed as length + pointer to data. 8 | E.g. %array = type { i32, ptr } 9 | *) 10 | 11 | (* How are arrays passed? Pointer to local copy? *) 12 | PROCEDURE Test1(x: ARRAY OF CHAR):CHAR; 13 | BEGIN 14 | RETURN x[1]; 15 | END Test1; 16 | 17 | (* CodeGen is completely broken for this case. *) 18 | (* Expected IR (bounds check missing): 19 | 20 | define signext i8 @_m9OpenArray5Test2(i32 %len, ptr %data) { 21 | %ptr = getelementptr inbounds i8, ptr %data, i64 1 22 | %res = load i8, ptr %ptr, align 1 23 | ret i8 %res 24 | } 25 | 26 | *) 27 | PROCEDURE Test2(VAR x: ARRAY OF CHAR):CHAR; 28 | BEGIN 29 | RETURN x[1]; 30 | END Test2; 31 | 32 | END OpenArray. 33 | -------------------------------------------------------------------------------- /test/M2/CodeGen/PassByRef.mod: -------------------------------------------------------------------------------- 1 | (* 2 | COM: RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | Selectors for records are wrong! 4 | *) 5 | MODULE PassByRef; 6 | 7 | PROCEDURE SetInt(VAR i: INTEGER); 8 | BEGIN 9 | i := 0; 10 | END SetInt; 11 | (* 12 | CHECK-LABEL: _m9PassByRef6SetInt 13 | CHECK: store i64 0, ptr %i, align 8 14 | *) 15 | 16 | TYPE 17 | Person = RECORD 18 | Height, Weight, Age: INTEGER; 19 | END; 20 | 21 | PROCEDURE SetPerson(VAR p: Person); 22 | BEGIN 23 | p.Height := 180; 24 | p.Weight := 80; 25 | p.Age := 18; 26 | END SetPerson; 27 | (* 28 | CHECK-LABEL: _m9PassByRef9SetPerson 29 | TODO Selectors are wrong! 30 | *) 31 | 32 | END PassByRef. 33 | -------------------------------------------------------------------------------- /test/M2/CodeGen/Pointer.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE Pointer; 5 | 6 | TYPE 7 | IntPtr = POINTER TO INTEGER; 8 | CardPtr = POINTER TO CARDINAL; 9 | PointPtr = POINTER TO Point; 10 | Point = RECORD 11 | x, y: CARDINAL 12 | END; 13 | 14 | PROCEDURE InitInt():IntPtr; 15 | BEGIN 16 | RETURN NIL; 17 | END InitInt; 18 | (* 19 | CHECK-LABEL: _m7Pointer7InitInt 20 | CHECK: ret ptr null 21 | *) 22 | 23 | PROCEDURE InitCard():CardPtr; 24 | BEGIN 25 | RETURN NIL; 26 | END InitCard; 27 | (* 28 | CHECK-LABEL: _m7Pointer8InitCard 29 | CHECK: ret ptr null 30 | *) 31 | 32 | PROCEDURE InitPoint():PointPtr; 33 | BEGIN 34 | RETURN NIL; 35 | END InitPoint; 36 | (* 37 | CHECK-LABEL: _m7Pointer9InitPoint 38 | CHECK: ret ptr null 39 | *) 40 | 41 | END Pointer. 42 | -------------------------------------------------------------------------------- /test/M2/CodeGen/Repeat.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE Repeat; 5 | 6 | PROCEDURE Test1(num: INTEGER):INTEGER; 7 | VAR sum: INTEGER; 8 | BEGIN 9 | sum := 0; 10 | REPEAT 11 | sum := sum + num; 12 | num := num - 2; 13 | UNTIL num = 0; 14 | RETURN sum; 15 | END Test1; 16 | (* 17 | CHECK-LABEL: _m6Repeat5Test1 18 | CHECK: %0 = phi i64 [ %3, %repeat.body ], [ %num, %entry ] 19 | CHECK: %1 = phi i64 [ %2, %repeat.body ], [ 0, %entry ] 20 | *) 21 | 22 | END Repeat. 23 | -------------------------------------------------------------------------------- /test/M2/CodeGen/While.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -filetype=asm -emit-llvm -o - %s | FileCheck %s 3 | *) 4 | MODULE While; 5 | 6 | PROCEDURE Test1(a, b: INTEGER):INTEGER; 7 | VAR t: INTEGER; 8 | BEGIN 9 | IF b = 0 THEN 10 | RETURN a; 11 | END; 12 | WHILE b # 0 DO 13 | t := a MOD b; 14 | a := b; 15 | b := t; 16 | END; 17 | RETURN a; 18 | END Test1; 19 | (* 20 | CHECK-LABEL: _m5While5Test1 21 | CHECK: %1 = phi i64 [ %2, %while.body ], [ %a, %after.if ] 22 | CHECK: %2 = phi i64 [ %4, %while.body ], [ %b, %after.if ] 23 | *) 24 | 25 | PROCEDURE Test2(VAR a: INTEGER; b: INTEGER); 26 | VAR t: INTEGER; 27 | BEGIN 28 | IF b = 0 THEN 29 | RETURN; 30 | END; 31 | WHILE b # 0 DO 32 | t := a MOD b; 33 | a := b; 34 | b := t; 35 | END; 36 | END Test2; 37 | (* 38 | CHECK-LABEL: _m5While5Test2 39 | CHECK: %1 = phi i64 [ %4, %while.body ], [ %b, %after.if ] 40 | CHECK: %3 = load i64, ptr %a, align 8 41 | CHECK: store i64 %1, ptr %a, align 8 42 | *) 43 | 44 | END While. 45 | -------------------------------------------------------------------------------- /test/M2/Directives/Nested.mod: -------------------------------------------------------------------------------- 1 | (* 2 | RUN: m2lang -DBRANCH=b1 -filetype=asm -emit-llvm -o - %s | FileCheck --check-prefix=BRANCH1 %s 3 | RUN: m2lang -DBRANCH=b2 -filetype=asm -emit-llvm -o - %s | FileCheck --check-prefix=BRANCH2 %s 4 | RUN: m2lang -DBRANCH=b3 -filetype=asm -emit-llvm -o - %s | FileCheck --check-prefix=BRANCH3 %s 5 | RUN: m2lang -DBRANCH=b2 -DNESTED=Y1 -filetype=asm -emit-llvm -o - %s | FileCheck --check-prefix=NESTED %s 6 | *) 7 | MODULE Nested; 8 | 9 | <* ENVIRON(BRANCH, "b4") *> 10 | <* ENVIRON(NESTED, FALSE) *> 11 | <* IF BRANCH="b1" THEN*> 12 | VAR X : INTEGER; 13 | <* ELSIF BRANCH="b2" THEN*> 14 | VAR 15 | <* IF NESTED="Y1" THEN*> 16 | Y1 17 | <*ELSE*> 18 | Y2 19 | <*END*> 20 | : INTEGER; 21 | <*ELSE*> 22 | VAR Z : INTEGER; 23 | <*END*> 24 | 25 | (* 26 | BRANCH1: @_m6Nested1X 27 | BRANCH2: @_m6Nested2Y2 28 | BRANCH3: @_m6Nested1Z 29 | NESTED: @_m6Nested2Y1 30 | *) 31 | END Nested. 32 | -------------------------------------------------------------------------------- /test/Unit/lit.cfg.py: -------------------------------------------------------------------------------- 1 | # -*- Python -*- 2 | 3 | # Configuration file for the 'lit' test runner. 4 | 5 | import os 6 | import platform 7 | import subprocess 8 | 9 | import lit.formats 10 | import lit.util 11 | 12 | # name: The name of this test suite. 13 | config.name = 'M2lang-Unit' 14 | 15 | # suffixes: A list of file extensions to treat as test files. 16 | config.suffixes = [] 17 | 18 | # test_source_root: The root path where tests are located. 19 | # test_exec_root: The root path where tests should be run. 20 | config.test_exec_root = os.path.join(config.m2lang_obj_root, 'unittests') 21 | config.test_source_root = config.test_exec_root 22 | 23 | # testFormat: The test format to use to interpret tests. 24 | config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, 'Tests') 25 | 26 | # Propagate the temp directory. Windows requires this because it uses \Windows\ 27 | # if none of these are present. 28 | if 'TMP' in os.environ: 29 | config.environment['TMP'] = os.environ['TMP'] 30 | if 'TEMP' in os.environ: 31 | config.environment['TEMP'] = os.environ['TEMP'] 32 | 33 | # Propagate path to symbolizer for ASan/MSan. 34 | for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: 35 | if symbolizer in os.environ: 36 | config.environment[symbolizer] = os.environ[symbolizer] 37 | 38 | def find_shlibpath_var(): 39 | if platform.system() in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']: 40 | yield 'LD_LIBRARY_PATH' 41 | elif platform.system() == 'Darwin': 42 | yield 'DYLD_LIBRARY_PATH' 43 | elif platform.system() == 'Windows': 44 | yield 'PATH' 45 | 46 | for shlibpath_var in find_shlibpath_var(): 47 | # in stand-alone builds, shlibdir is m2lang's build tree 48 | # while llvm_libs_dir is installed LLVM (and possibly older m2lang) 49 | shlibpath = os.path.pathsep.join( 50 | (config.shlibdir, 51 | config.llvm_libs_dir, 52 | config.environment.get(shlibpath_var, ''))) 53 | config.environment[shlibpath_var] = shlibpath 54 | break 55 | else: 56 | lit_config.warning("unable to inject shared library path on '{}'" 57 | .format(platform.system())) 58 | -------------------------------------------------------------------------------- /test/Unit/lit.site.cfg.py.in: -------------------------------------------------------------------------------- 1 | @LIT_SITE_CFG_IN_HEADER@ 2 | 3 | import sys 4 | 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@" 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@" 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@" 9 | config.llvm_build_mode = "@LLVM_BUILD_MODE@" 10 | config.m2lang_obj_root = "@M2LANG_BINARY_DIR@" 11 | config.enable_shared = @ENABLE_SHARED@ 12 | config.shlibdir = "@SHLIBDIR@" 13 | config.target_triple = "@TARGET_TRIPLE@" 14 | 15 | # Support substitution of the tools_dir, libs_dirs, and build_mode with user 16 | # parameters. This is used when we can't determine the tool dir at 17 | # configuration time. 18 | try: 19 | config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params 20 | config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params 21 | config.llvm_build_mode = config.llvm_build_mode % lit_config.params 22 | except KeyError: 23 | e = sys.exc_info()[1] 24 | key, = e.args 25 | lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) 26 | 27 | # Let the main config do the real work. 28 | lit_config.load_config(config, "@M2LANG_SOURCE_DIR@/test/Unit/lit.cfg.py") 29 | -------------------------------------------------------------------------------- /test/lit.site.cfg.py.in: -------------------------------------------------------------------------------- 1 | @LIT_SITE_CFG_IN_HEADER@ 2 | 3 | import sys 4 | 5 | config.llvm_src_root = "@LLVM_SOURCE_DIR@" 6 | config.llvm_obj_root = "@LLVM_BINARY_DIR@" 7 | config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" 8 | config.llvm_libs_dir = "@LLVM_LIBS_DIR@" 9 | config.llvm_shlib_dir = "@SHLIBDIR@" 10 | config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@" 11 | config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" 12 | config.m2lang_obj_root = "@M2LANG_BINARY_DIR@" 13 | config.m2lang_src_dir = "@M2LANG_SOURCE_DIR@" 14 | config.m2lang_tools_dir = "@M2LANG_TOOLS_DIR@" 15 | config.host_triple = "@LLVM_HOST_TRIPLE@" 16 | config.target_triple = "@TARGET_TRIPLE@" 17 | config.host_cxx = "@CMAKE_CXX_COMPILER@" 18 | config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" 19 | config.have_zlib = @HAVE_LIBZ@ 20 | config.enable_shared = @ENABLE_SHARED@ 21 | config.enable_backtrace = @ENABLE_BACKTRACES@ 22 | config.host_arch = "@HOST_ARCH@" 23 | config.python_executable = "@PYTHON_EXECUTABLE@" 24 | 25 | # Support substitution of the tools and libs dirs with user parameters. This is 26 | # used when we can't determine the tool dir at configuration time. 27 | try: 28 | config.m2lang_tools_dir = config.m2lang_tools_dir % lit_config.params 29 | config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params 30 | config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params 31 | config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params 32 | except KeyError: 33 | e = sys.exc_info()[1] 34 | key, = e.args 35 | lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) 36 | 37 | import lit.llvm 38 | lit.llvm.initialize(lit_config, config) 39 | 40 | # Let the main config do the real work. 41 | lit_config.load_config(config, "@M2LANG_SOURCE_DIR@/test/lit.cfg.py") 42 | -------------------------------------------------------------------------------- /tools/driver/xmake.lua: -------------------------------------------------------------------------------- 1 | target("driver") 2 | add_packages("llvm") 3 | set_kind("binary") 4 | add_deps("basic", "ast", "lexer", "sema", "parser", "codegen") 5 | add_files("driver.cpp") 6 | set_basename("m2lang") -------------------------------------------------------------------------------- /unittests/LLtool/FirstFollowSetTest.cpp: -------------------------------------------------------------------------------- 1 | //===- unittests/tools/LLtool//FirstSetTest.cpp --- LLtool Algo tests -----===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #include "lltool/Algo.h" 10 | #include "lltool/Diagnostic.h" 11 | #include "lltool/Parser.h" 12 | #include "lltool/VarStore.h" 13 | #include "llvm/ADT/StringRef.h" 14 | #include "llvm/Support/MemoryBuffer.h" 15 | #include "llvm/Support/SourceMgr.h" 16 | #include "gtest/gtest.h" 17 | 18 | namespace { 19 | 20 | using namespace lltool; 21 | 22 | namespace { 23 | struct InitBitVector { 24 | template 25 | InitBitVector(unsigned NBits, Ts... BitsToSet) : Bits(NBits) { 26 | set(BitsToSet...); 27 | } 28 | 29 | template void set(T Idx) { Bits.set(Idx); } 30 | 31 | template void set(T Idx, Ts... Idxs) { 32 | Bits.set(Idx); 33 | set(Idxs...); 34 | } 35 | 36 | llvm::BitVector operator()() { return Bits; } 37 | 38 | private: 39 | llvm::BitVector Bits; 40 | }; 41 | 42 | } // namespace 43 | 44 | TEST(FirstFollowSetTest, firstSet1Test) { 45 | // Wilhelm, Maurer; p. 311 46 | // Nonterminals Eq, Tq derives epsilon. 47 | llvm::StringRef Input = "%token id\n" 48 | "%%\n" 49 | "S : E ;\n" 50 | "E : T Eq ;\n" 51 | "Eq : ( '+' E )? ;\n" 52 | "T : F Tq ;\n" 53 | "Tq : ( '*' T )? ;\n" 54 | "F : id | '(' E ')' ;\n"; 55 | 56 | llvm::SourceMgr SrcMgr; 57 | Diagnostic Diag(SrcMgr); 58 | std::unique_ptr Content = 59 | llvm::MemoryBuffer::getMemBuffer(Input); 60 | SrcMgr.AddNewSourceBuffer(std::move(Content), llvm::SMLoc()); 61 | Grammar G; 62 | VarStore Vars; 63 | Parser TheParser(SrcMgr); 64 | TheParser.parse(G, Vars); 65 | calculateReachable(G); 66 | calculateDerivesEpsilon(G); 67 | calculateFirstSets(G); 68 | llvm::StringMap MapNT; 69 | llvm::StringMap MapT; 70 | for (Node *N : G.nodes()) { 71 | if (auto *NT = llvm::dyn_cast(N)) { 72 | MapNT[NT->name()] = NT; 73 | } else if (auto *T = llvm::dyn_cast(N)) { 74 | MapT[T->name()] = T; 75 | } 76 | } 77 | 78 | const unsigned NumT = G.numberOfTerminals(); 79 | ASSERT_EQ(MapNT["S"]->FirstSet, 80 | InitBitVector(NumT, MapT["id"]->No, MapT["'('"]->No)()); 81 | ASSERT_FALSE(MapNT["S"]->derivesEpsilon()); 82 | ASSERT_EQ(MapNT["E"]->FirstSet, 83 | InitBitVector(NumT, MapT["id"]->No, MapT["'('"]->No)()); 84 | ASSERT_FALSE(MapNT["E"]->derivesEpsilon()); 85 | ASSERT_EQ(MapNT["Eq"]->FirstSet, InitBitVector(NumT, MapT["'+'"]->No)()); 86 | ASSERT_TRUE(MapNT["Eq"]->derivesEpsilon()); 87 | ASSERT_EQ(MapNT["T"]->FirstSet, 88 | InitBitVector(NumT, MapT["id"]->No, MapT["'('"]->No)()); 89 | ASSERT_FALSE(MapNT["T"]->derivesEpsilon()); 90 | ASSERT_EQ(MapNT["Tq"]->FirstSet, InitBitVector(NumT, MapT["'*'"]->No)()); 91 | ASSERT_TRUE(MapNT["Tq"]->derivesEpsilon()); 92 | ASSERT_EQ(MapNT["F"]->FirstSet, 93 | InitBitVector(NumT, MapT["id"]->No, MapT["'('"]->No)()); 94 | ASSERT_FALSE(MapNT["F"]->derivesEpsilon()); 95 | } 96 | 97 | TEST(FirstFollowSetTest, followSet1Test) { 98 | // This is basically the same example as from Wilhelm, Maurer. 99 | // Please note the ";" at the end of production stmt. 100 | llvm::StringRef Input = "%token number\n" 101 | "%%\n" 102 | "stmt : expr ';' ;\n" 103 | "expr : ( term exprq )? ;\n" 104 | "exprq : ( '+' term exprq )? ;\n" 105 | "term : factor termq ;\n" 106 | "termq : ( '*' factor termq )? ;\n" 107 | "factor : '(' expr ')' | number ;\n"; 108 | 109 | llvm::SourceMgr SrcMgr; 110 | Diagnostic Diag(SrcMgr); 111 | std::unique_ptr Content = 112 | llvm::MemoryBuffer::getMemBuffer(Input); 113 | SrcMgr.AddNewSourceBuffer(std::move(Content), llvm::SMLoc()); 114 | Grammar G; 115 | VarStore Vars; 116 | Parser TheParser(SrcMgr); 117 | TheParser.parse(G, Vars); 118 | calculateReachable(G); 119 | calculateDerivesEpsilon(G); 120 | calculateFirstSets(G); 121 | calculateFollowSets(G); 122 | llvm::StringMap MapNT; 123 | llvm::StringMap MapT; 124 | for (Node *N : G.nodes()) { 125 | if (auto *NT = llvm::dyn_cast(N)) { 126 | MapNT[NT->name()] = NT; 127 | } else if (auto *T = llvm::dyn_cast(N)) { 128 | MapT[T->name()] = T; 129 | } 130 | } 131 | 132 | const unsigned NumT = G.numberOfTerminals(); 133 | ASSERT_EQ(MapNT["stmt"]->FirstSet, 134 | InitBitVector(NumT, MapT["number"]->No, MapT["'('"]->No, 135 | MapT["';'"]->No)()); 136 | ASSERT_EQ(MapNT["stmt"]->Link->FollowSet, 137 | InitBitVector(NumT, MapT["_eoi"]->No)()); 138 | ASSERT_FALSE(MapNT["stmt"]->derivesEpsilon()); 139 | 140 | ASSERT_EQ(MapNT["expr"]->FirstSet, 141 | InitBitVector(NumT, MapT["number"]->No, MapT["'('"]->No)()); 142 | ASSERT_EQ(MapNT["expr"]->Link->FollowSet, 143 | InitBitVector(NumT, MapT["';'"]->No, MapT["')'"]->No)()); 144 | ASSERT_TRUE(MapNT["expr"]->derivesEpsilon()); 145 | 146 | ASSERT_EQ(MapNT["exprq"]->FirstSet, InitBitVector(NumT, MapT["'+'"]->No)()); 147 | ASSERT_EQ(MapNT["exprq"]->Link->FollowSet, 148 | InitBitVector(NumT, MapT["';'"]->No, MapT["')'"]->No)()); 149 | ASSERT_TRUE(MapNT["exprq"]->derivesEpsilon()); 150 | 151 | ASSERT_EQ(MapNT["term"]->FirstSet, 152 | InitBitVector(NumT, MapT["number"]->No, MapT["'('"]->No)()); 153 | ASSERT_EQ( 154 | MapNT["term"]->Link->FollowSet, 155 | InitBitVector(NumT, MapT["'+'"]->No, MapT["';'"]->No, MapT["')'"]->No)()); 156 | ASSERT_FALSE(MapNT["term"]->derivesEpsilon()); 157 | 158 | ASSERT_EQ(MapNT["termq"]->FirstSet, InitBitVector(NumT, MapT["'*'"]->No)()); 159 | ASSERT_EQ( 160 | MapNT["termq"]->Link->FollowSet, 161 | InitBitVector(NumT, MapT["'+'"]->No, MapT["';'"]->No, MapT["')'"]->No)()); 162 | ASSERT_TRUE(MapNT["termq"]->derivesEpsilon()); 163 | 164 | ASSERT_EQ(MapNT["factor"]->FirstSet, 165 | InitBitVector(NumT, MapT["number"]->No, MapT["'('"]->No)()); 166 | ASSERT_EQ(MapNT["factor"]->Link->FollowSet, 167 | InitBitVector(NumT, MapT["'*'"]->No, MapT["'+'"]->No, 168 | MapT["';'"]->No, MapT["')'"]->No)()); 169 | ASSERT_FALSE(MapNT["factor"]->derivesEpsilon()); 170 | } 171 | 172 | } // anonymous namespace 173 | -------------------------------------------------------------------------------- /unittests/Lexer/LexerTest.cpp: -------------------------------------------------------------------------------- 1 | //===- unittests/Lexer/LexerTest.cpp ------ Lexer tests -------------------===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | 9 | #include "m2lang/Lexer/Lexer.h" 10 | #include "gtest/gtest.h" 11 | 12 | using namespace m2lang; 13 | 14 | namespace { 15 | 16 | TEST(LexerTest, operatorTest) { 17 | LangOptions langOpts; 18 | langOpts.PIM = 1; 19 | llvm::StringRef Input("+ - * / := . , ; ( [ { ^ = # < > <= >= .. : ) ] } |"); 20 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 21 | SourceMgr SrcMgr; 22 | DiagnosticsEngine Diags(SrcMgr); 23 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 24 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 25 | Token token; 26 | lexer.next(token); EXPECT_EQ(tok::plus, token.getKind()); 27 | lexer.next(token); EXPECT_EQ(tok::minus, token.getKind()); 28 | lexer.next(token); EXPECT_EQ(tok::star, token.getKind()); 29 | lexer.next(token); EXPECT_EQ(tok::slash, token.getKind()); 30 | lexer.next(token); EXPECT_EQ(tok::colonequal, token.getKind()); 31 | lexer.next(token); EXPECT_EQ(tok::period, token.getKind()); 32 | lexer.next(token); EXPECT_EQ(tok::comma, token.getKind()); 33 | lexer.next(token); EXPECT_EQ(tok::semi, token.getKind()); 34 | lexer.next(token); EXPECT_EQ(tok::l_paren, token.getKind()); 35 | lexer.next(token); EXPECT_EQ(tok::l_square, token.getKind()); 36 | lexer.next(token); EXPECT_EQ(tok::l_brace, token.getKind()); 37 | lexer.next(token); EXPECT_EQ(tok::caret, token.getKind()); 38 | lexer.next(token); EXPECT_EQ(tok::equal, token.getKind()); 39 | lexer.next(token); EXPECT_EQ(tok::hash, token.getKind()); 40 | lexer.next(token); EXPECT_EQ(tok::less, token.getKind()); 41 | lexer.next(token); EXPECT_EQ(tok::greater, token.getKind()); 42 | lexer.next(token); EXPECT_EQ(tok::lessequal, token.getKind()); 43 | lexer.next(token); EXPECT_EQ(tok::greaterequal, token.getKind()); 44 | lexer.next(token); EXPECT_EQ(tok::ellipsis, token.getKind()); 45 | lexer.next(token); EXPECT_EQ(tok::colon, token.getKind()); 46 | lexer.next(token); EXPECT_EQ(tok::r_paren, token.getKind()); 47 | lexer.next(token); EXPECT_EQ(tok::r_square, token.getKind()); 48 | lexer.next(token); EXPECT_EQ(tok::r_brace, token.getKind()); 49 | lexer.next(token); EXPECT_EQ(tok::pipe, token.getKind()); 50 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 51 | } 52 | 53 | TEST(LexerTest, trigraphs1Test) { 54 | LangOptions langOpts; 55 | langOpts.ISO = 1; 56 | llvm::StringRef Input("(! !) (: :)"); 57 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 58 | SourceMgr SrcMgr; 59 | DiagnosticsEngine Diags(SrcMgr); 60 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 61 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 62 | Token token; 63 | lexer.next(token); EXPECT_EQ(tok::l_square, token.getKind()); 64 | lexer.next(token); EXPECT_EQ(tok::r_square, token.getKind()); 65 | lexer.next(token); EXPECT_EQ(tok::l_brace, token.getKind()); 66 | lexer.next(token); EXPECT_EQ(tok::r_brace, token.getKind()); 67 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 68 | } 69 | 70 | TEST(LexerTest, trigraphs2Test) { 71 | LangOptions langOpts; 72 | langOpts.ISO = 1; 73 | llvm::StringRef Input("(!!)(::)"); 74 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 75 | SourceMgr SrcMgr; 76 | DiagnosticsEngine Diags(SrcMgr); 77 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 78 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 79 | Token token; 80 | lexer.next(token); EXPECT_EQ(tok::l_square, token.getKind()); 81 | lexer.next(token); EXPECT_EQ(tok::r_square, token.getKind()); 82 | lexer.next(token); EXPECT_EQ(tok::l_brace, token.getKind()); 83 | lexer.next(token); EXPECT_EQ(tok::r_brace, token.getKind()); 84 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 85 | } 86 | 87 | TEST(LexerTest, aliasTest) { 88 | LangOptions langOpts; 89 | langOpts.ISO = 1; 90 | llvm::StringRef Input("&~!@"); 91 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 92 | SourceMgr SrcMgr; 93 | DiagnosticsEngine Diags(SrcMgr); 94 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 95 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 96 | Token token; 97 | lexer.next(token); EXPECT_EQ(tok::kw_AND, token.getKind()); 98 | lexer.next(token); EXPECT_EQ(tok::kw_NOT, token.getKind()); 99 | lexer.next(token); EXPECT_EQ(tok::pipe, token.getKind()); 100 | lexer.next(token); EXPECT_EQ(tok::caret, token.getKind()); 101 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 102 | } 103 | 104 | TEST(LexerTest, numberTest) { 105 | LangOptions langOpts; 106 | langOpts.ISO = 1; 107 | llvm::StringRef Input("42 42H 42B 42C 42.42E+3"); 108 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 109 | SourceMgr SrcMgr; 110 | DiagnosticsEngine Diags(SrcMgr); 111 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 112 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 113 | Token token; 114 | lexer.next(token); EXPECT_EQ(tok::integer_literal, token.getKind()); 115 | lexer.next(token); EXPECT_EQ(tok::integer_literal, token.getKind()); 116 | lexer.next(token); EXPECT_EQ(tok::integer_literal, token.getKind()); 117 | lexer.next(token); EXPECT_EQ(tok::char_literal, token.getKind()); 118 | lexer.next(token); EXPECT_EQ(tok::real_literal, token.getKind()); 119 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 120 | } 121 | 122 | TEST(LexerTest, ellipsisTest) { 123 | LangOptions langOpts; 124 | langOpts.ISO = 1; 125 | llvm::StringRef Input("0..100"); 126 | std::unique_ptr InputBuffer = llvm::MemoryBuffer::getMemBuffer(Input); 127 | SourceMgr SrcMgr; 128 | DiagnosticsEngine Diags(SrcMgr); 129 | SrcMgr.AddNewSourceBuffer(std::move(InputBuffer), llvm::SMLoc()); 130 | auto lexer = Lexer(SrcMgr, Diags, langOpts); 131 | Token token; 132 | lexer.next(token); EXPECT_EQ(tok::integer_literal, token.getKind()); 133 | lexer.next(token); EXPECT_EQ(tok::ellipsis, token.getKind()); 134 | lexer.next(token); EXPECT_EQ(tok::integer_literal, token.getKind()); 135 | lexer.next(token); EXPECT_EQ(tok::eof, token.getKind()); 136 | } 137 | 138 | TEST(LexerTest, keywordsSorted) { 139 | const char *keywords[] = { 140 | #define KEYWORD(NAME, FLAGS) \ 141 | #NAME, 142 | #include "m2lang/Basic/TokenKinds.def" 143 | nullptr, 144 | }; 145 | for (size_t i = 1; keywords[i]; ++i) 146 | { 147 | int cmp = strcmp(keywords[i-1], keywords[i]); 148 | ASSERT_LT(cmp, 0); 149 | } 150 | } 151 | 152 | } // anonymous namespace 153 | -------------------------------------------------------------------------------- /utils/ASTtool/ASTtool.cpp: -------------------------------------------------------------------------------- 1 | //===--- ASTtool.cpp - ASTtool driver ----------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements driver of ASTtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "asttool/Main.h" 15 | #include "llvm/Support/CommandLine.h" 16 | #include "llvm/Support/InitLLVM.h" 17 | 18 | int main(int argc, const char **argv) { 19 | llvm::InitLLVM X(argc, argv); 20 | llvm::cl::ParseCommandLineOptions( 21 | argc, argv, "ASTtool - abstract syntx tree generator\n"); 22 | 23 | return asttool::runASTtoolMain(argv[0]); 24 | } -------------------------------------------------------------------------------- /utils/ASTtool/xmake.lua: -------------------------------------------------------------------------------- 1 | target("ASTtool") 2 | add_packages("llvm") 3 | set_kind("binary") 4 | add_deps("asttool") 5 | add_files("ASTtool.cpp") 6 | -------------------------------------------------------------------------------- /utils/LLtool/LLtool.cpp: -------------------------------------------------------------------------------- 1 | //===--- LLtool.cpp - LLtool driver -----------------------------*- C++ -*-===// 2 | // 3 | // Part of the M2Lang Project, under the Apache License v2.0 with 4 | // LLVM Exceptions. See LICENSE file for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | /// 9 | /// \file 10 | /// Implements driver of LLtool. 11 | /// 12 | //===----------------------------------------------------------------------===// 13 | 14 | #include "lltool/Main.h" 15 | #include "llvm/Support/CommandLine.h" 16 | #include "llvm/Support/InitLLVM.h" 17 | 18 | using namespace lltool; 19 | 20 | int main(int argc, const char **argv) { 21 | llvm::InitLLVM X(argc, argv); 22 | llvm::cl::ParseCommandLineOptions( 23 | argc, argv, "LLtool - recursive descent parser generator\n"); 24 | 25 | return lltool::runLLtoolMain(argv[0]); 26 | } -------------------------------------------------------------------------------- /utils/LLtool/xmake.lua: -------------------------------------------------------------------------------- 1 | target("LLtool") 2 | add_packages("llvm") 3 | set_kind("binary") 4 | add_deps("lltool") 5 | add_files("LLtool.cpp") 6 | -------------------------------------------------------------------------------- /utils/README.md: -------------------------------------------------------------------------------- 1 | LLtool is an LL(1) parser generator and is used to generate the m2lang parser. 2 | 3 | Most of the other utils are copied from LLVM and are required for standalone build. 4 | 5 | - count: from LLVM 10 6 | - not: from LLVM 10 7 | - FileCheck-x.y: from LLVM x.y 8 | - unittest: from LLVM 9 9 | - m2lang-lit: from LLVM9 (llvm-lit) -------------------------------------------------------------------------------- /utils/count/count.c: -------------------------------------------------------------------------------- 1 | /*===- count.c - The 'count' testing tool ---------------------------------===*\ 2 | * 3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 | * See https://llvm.org/LICENSE.txt for license information. 5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | * 7 | \*===----------------------------------------------------------------------===*/ 8 | 9 | #include 10 | #include 11 | 12 | int main(int argc, char **argv) { 13 | size_t Count, NumLines, NumRead; 14 | char Buffer[4096], *End; 15 | 16 | if (argc != 2) { 17 | fprintf(stderr, "usage: %s \n", argv[0]); 18 | return 2; 19 | } 20 | 21 | Count = strtoul(argv[1], &End, 10); 22 | if (*End != '\0' && End != argv[1]) { 23 | fprintf(stderr, "%s: invalid count argument '%s'\n", argv[0], argv[1]); 24 | return 2; 25 | } 26 | 27 | NumLines = 0; 28 | do { 29 | size_t i; 30 | 31 | NumRead = fread(Buffer, 1, sizeof(Buffer), stdin); 32 | 33 | for (i = 0; i != NumRead; ++i) 34 | if (Buffer[i] == '\n') 35 | ++NumLines; 36 | } while (NumRead == sizeof(Buffer)); 37 | 38 | if (!feof(stdin)) { 39 | fprintf(stderr, "%s: error reading stdin\n", argv[0]); 40 | return 3; 41 | } 42 | 43 | if (Count != NumLines) { 44 | fprintf(stderr, "Expected %zu lines, got %zu.\n", Count, NumLines); 45 | return 1; 46 | } 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /utils/m2lang-lit/llvm-lit.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | 7 | config_map = {} 8 | 9 | def map_config(source_dir, site_config): 10 | global config_map 11 | source_dir = os.path.realpath(source_dir) 12 | source_dir = os.path.normcase(source_dir) 13 | site_config = os.path.normpath(site_config) 14 | config_map[source_dir] = site_config 15 | 16 | # Variables configured at build time. 17 | llvm_source_root = "@LLVM_SOURCE_DIR@" 18 | llvm_obj_root = "@LLVM_BINARY_DIR@" 19 | 20 | # Make sure we can find the lit package. 21 | sys.path.insert(0, os.path.join(llvm_source_root, 'utils', 'lit')) 22 | 23 | # Set up some builtin parameters, so that by default the LLVM test suite 24 | # configuration file knows how to find the object tree. 25 | builtin_parameters = { 'build_mode' : "@BUILD_MODE@" } 26 | 27 | @LLVM_LIT_CONFIG_MAP@ 28 | 29 | builtin_parameters['config_map'] = config_map 30 | 31 | if __name__=='__main__': 32 | from lit.main import main 33 | main(builtin_parameters) 34 | -------------------------------------------------------------------------------- /utils/not/not.cpp: -------------------------------------------------------------------------------- 1 | //===- not.cpp - The 'not' testing tool -----------------------------------===// 2 | // 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 | // See https://llvm.org/LICENSE.txt for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | // Usage: 9 | // not cmd 10 | // Will return true if cmd doesn't crash and returns false. 11 | // not --crash cmd 12 | // Will return true if cmd crashes (e.g. for testing crash reporting). 13 | 14 | #include "llvm/Support/Program.h" 15 | #include "llvm/Support/WithColor.h" 16 | #include "llvm/Support/raw_ostream.h" 17 | 18 | #ifdef _WIN32 19 | #include 20 | #endif 21 | 22 | using namespace llvm; 23 | 24 | int main(int argc, const char **argv) { 25 | bool ExpectCrash = false; 26 | 27 | ++argv; 28 | --argc; 29 | 30 | if (argc > 0 && StringRef(argv[0]) == "--crash") { 31 | ++argv; 32 | --argc; 33 | ExpectCrash = true; 34 | 35 | // Crash is expected, so disable crash report and symbolization to reduce 36 | // output and avoid potentially slow symbolization. 37 | #ifdef _WIN32 38 | SetEnvironmentVariableA("LLVM_DISABLE_CRASH_REPORT", "1"); 39 | SetEnvironmentVariableA("LLVM_DISABLE_SYMBOLIZATION", "1"); 40 | #else 41 | setenv("LLVM_DISABLE_CRASH_REPORT", "1", 0); 42 | setenv("LLVM_DISABLE_SYMBOLIZATION", "1", 0); 43 | #endif 44 | } 45 | 46 | if (argc == 0) 47 | return 1; 48 | 49 | auto Program = sys::findProgramByName(argv[0]); 50 | if (!Program) { 51 | WithColor::error() << "unable to find `" << argv[0] 52 | << "' in PATH: " << Program.getError().message() << "\n"; 53 | return 1; 54 | } 55 | 56 | std::vector Argv; 57 | Argv.reserve(argc); 58 | for (int i = 0; i < argc; ++i) 59 | Argv.push_back(argv[i]); 60 | std::string ErrMsg; 61 | int Result = 62 | sys::ExecuteAndWait(*Program, Argv, std::nullopt, {}, 0, 0, &ErrMsg); 63 | #ifdef _WIN32 64 | // Handle abort() in msvcrt -- It has exit code as 3. abort(), aka 65 | // unreachable, should be recognized as a crash. However, some binaries use 66 | // exit code 3 on non-crash failure paths, so only do this if we expect a 67 | // crash. 68 | if (ExpectCrash && Result == 3) 69 | Result = -3; 70 | #endif 71 | if (Result < 0) { 72 | WithColor::error() << ErrMsg << "\n"; 73 | if (ExpectCrash) 74 | return 0; 75 | return 1; 76 | } 77 | 78 | if (ExpectCrash) 79 | return 1; 80 | 81 | return Result == 0; 82 | } 83 | -------------------------------------------------------------------------------- /utils/vscode/README.md: -------------------------------------------------------------------------------- 1 | Syntax highlighting and snippets for Visual Studio Code 2 | ======================================================= 3 | 4 | The [m2-vscode](https://github.com/redstar/m2-vscode) project provides syntax 5 | highlihting and code snippets for Visual Studio code. -------------------------------------------------------------------------------- /xmake.lua: -------------------------------------------------------------------------------- 1 | set_project("m2lang") 2 | set_xmakever("2.9.4") 3 | 4 | set_version("0.0.1") 5 | set_configvar("CONFIG_APP_NAME", "m2lang") 6 | 7 | add_rules("mode.release", "mode.debug") 8 | 9 | -- Hard-code the toolchain. 10 | --set_toolchains("clang") 11 | 12 | -- The project uses C++ 20 modules. 13 | set_languages("c++20") 14 | 15 | -- Trigger update of compile_commands.json in the build directory. 16 | add_rules("plugin.compile_commands.autoupdate", {outputdir = "$(buildir)", lsp = "clangd"}) 17 | 18 | -- Use local repository for package LLVM. 19 | add_repositories("local-repo xmake/local-repo") 20 | add_requires("llvm >= 19", {system = true, configs = {shared = not is_plat("macosx")}}) 21 | 22 | -- Include custom rules. 23 | includes("xmake/rules") 24 | 25 | add_includedirs("include") 26 | add_includedirs("$(buildir)/$(plat)/$(arch)/$(mode)") 27 | -- These should be added by the rules. Looks like a bug in xmake. 28 | add_includedirs("$(buildir)/.gens/lexer/$(plat)/$(arch)/$(mode)/rules/lltool") 29 | add_includedirs("$(buildir)/.gens/parser/$(plat)/$(arch)/$(mode)/rules/lltool") 30 | 31 | includes("lib/LLtool") 32 | includes("utils/LLtool") 33 | includes("lib/ASTtool") 34 | includes("utils/ASTtool") 35 | 36 | includes("lib/Basic") 37 | includes("lib/Lexer") 38 | includes("lib/AST") 39 | includes("lib/Sema") 40 | includes("lib/Parser") 41 | includes("lib/CodeGen") 42 | includes("tools/driver") 43 | 44 | -------------------------------------------------------------------------------- /xmake/local-repo/packages/l/llvm/xmake.lua: -------------------------------------------------------------------------------- 1 | package("llvm") 2 | set_homepage("https://llvm.org/") 3 | set_description("The LLVM Compiler Infrastructure") 4 | set_kind("library") 5 | set_policy("package.fetch_only", true) 6 | set_policy("package.include_external_headers", false) 7 | 8 | on_fetch(function (package, opt) 9 | if opt.system then 10 | local llvm_config = "llvm-config" 11 | local version = try {function() return os.iorunv(llvm_config, {"--version"}) end} 12 | if version then 13 | version = version:trim() 14 | local libopt 15 | if package:config("shared") then 16 | libopt = "--link-shared" 17 | else 18 | libopt = "--link-static" 19 | end 20 | local includedir = try {function() return os.iorunv(llvm_config, {libopt, "--includedir"}) end} 21 | local libs = try {function() return os.iorunv(llvm_config, {libopt, "--libs"}) end} 22 | local linkdir = try {function() return os.iorunv(llvm_config, {libopt, "--libdir"}) end} 23 | local cxxflags = try {function() return os.iorunv(llvm_config, {libopt, "--cxxflags"}) end} 24 | local ldflags = try {function() return os.iorunv(llvm_config, {libopt, "--ldflags"}) end} 25 | local result = {} 26 | result.version = version 27 | if includedir then 28 | result.includedirs = includedir:trim() 29 | end 30 | if libs then 31 | local links = {} 32 | for _, item in ipairs(libs:trim():split(" ")) do 33 | table.insert(links, item:sub(3)) -- Remove -l prefix 34 | end 35 | if not package:config("shared") then 36 | table.insert(links, "zstd") 37 | table.insert(links, "z") 38 | end 39 | result.links = table.unwrap(links) 40 | end 41 | if linkdir then 42 | result.linkdirs = { linkdir:trim(), "/opt/local/lib" } -- FIXME macosx specific 43 | end 44 | if cxxflags then 45 | local flags = {} 46 | local defines = {} 47 | for _, item in ipairs(cxxflags:trim():split(" ")) do 48 | -- Filter out options --std= and -I, and put macro 49 | -- definitions in separate table. 50 | if not (item:startswith("-std=") or item:startswith("-I")) then 51 | if item:startswith("-D") then 52 | table.insert(defines, item:sub(3)) 53 | else 54 | table.insert(flags, item) 55 | end 56 | end 57 | end 58 | result.cxxflags = table.unwrap(flags) 59 | result.defines = table.unwrap(defines) 60 | end 61 | if ldflags then 62 | local flags = {} 63 | for _, item in ipairs(ldflags:trim():split(" ")) do 64 | -- Filter out options -L. 65 | print("Item", item) 66 | if not item:startswith("-L") then 67 | table.insert(flags, item) 68 | end 69 | end 70 | result.ldflags = table.unwrap(flags) 71 | end 72 | -- print("Result: ", result) 73 | return result 74 | end 75 | end 76 | end) -------------------------------------------------------------------------------- /xmake/rules/xmake.lua: -------------------------------------------------------------------------------- 1 | rule("lltool") 2 | add_deps("c++") 3 | set_extensions(".g") 4 | 5 | on_config(function (target) 6 | -- Create empty file for each grammar file. Set time to 0 to trigger 7 | -- build first time. 8 | local includes 9 | for _, file in pairs(target:sourcefiles()) do 10 | local extension = path.extension(file) 11 | if extension == ".g" then 12 | local gendir = path.join(target:autogendir(), "rules", 'lltool') 13 | print("gendir: %s", gendir) 14 | local fragmentfile = path.join(gendir, path.basename(file) .. ".g.inc") 15 | if not os.exists(fragmentfile) then 16 | io.writefile(fragmentfile, "") 17 | os.touch(fragmentfile, {atime = 1, mtime = 1}) 18 | includes = gendir 19 | cprint("${dim}touching %s", fragmentfile) 20 | table.insert(target:headerfiles(), fragmentfile) 21 | end 22 | end 23 | end 24 | if includes then 25 | target:add("includedirs", includes) 26 | end 27 | end) 28 | 29 | before_build(function (target, opt) 30 | import("lib.detect.find_tool") 31 | import("utils.progress") 32 | local lltool = assert(find_tool("LLtool", {paths = {"$(buildir)/$(plat)/$(arch)/$(mode)"}}), "LLtool not found!") 33 | 34 | local includes 35 | for _, file in pairs(target:sourcefiles()) do 36 | local extension = path.extension(file) 37 | if extension == ".g" then 38 | local gendir = path.join(target:autogendir(), "rules", 'lltool') 39 | local fragmentfile = path.join(gendir, path.basename(file) .. ".g.inc") 40 | local params = {"-o", path(fragmentfile), path(file)} 41 | progress.show(opt.progress, "${color.build.target}<%s> ${clear}${color.build.object}generating.lltool %s", target:name(), file) 42 | local ok = try 43 | { 44 | function () 45 | os.vrunv(lltool.program, params) 46 | return true 47 | end 48 | } 49 | if not ok then 50 | local command = lltool.program .. " " ..os.args(params) 51 | cprint("\r${bright color.error}error: ${clear}run `%s` failed", command) 52 | end 53 | end 54 | end 55 | end) 56 | 57 | -- This does not work. It seems that the dependency between the include in 58 | -- the C++ file and the generated fragment is missing. 59 | -- Same with before_build_file. 60 | -- before_buildcmd_file(function (target, batchcmds, sourcefile_lltool, opt) 61 | -- -- Get path to LLtool. 62 | -- import("lib.detect.find_tool") 63 | -- local lltool = assert(find_tool("LLtool", {paths = {"$(buildir)/$(plat)/$(arch)/$(mode)"}}), "LLtool not found!") 64 | 65 | -- -- Get path of fragment source file. 66 | -- local gendir = path.join(target:autogendir(), "rules", 'lltool') 67 | -- local fragmentfile = path.join(gendir, path.basename(file) .. ".g.inc") 68 | -- target:add("includedirs", includes) 69 | 70 | -- -- Add fragment. 71 | -- table.insert(target:headerfiles(), fragmentfile) 72 | 73 | -- -- Add commands. 74 | -- batchcmds:show_progress(opt.progress, "${color.build.target}<%s> ${clear}${color.build.object}generating.lltool 2 %s", target:name(), sourcefile_lltool) 75 | -- batchcmds:mkdir(path.directory(fragmentfile)) 76 | -- batchcmds:vrunv(lltool.program, {"-o", path(fragmentfile), path(sourcefile_lltool)}) 77 | 78 | -- -- Add dependencies. 79 | -- batchcmds:add_depfiles(sourcefile_lltool) 80 | -- batchcmds:set_depmtime(os.mtime(fragmentfile)) 81 | -- batchcmds:set_depcache(target:dependfile(fragmentfile)) 82 | -- end) 83 | rule_end() 84 | --------------------------------------------------------------------------------