├── README.md ├── include ├── Code.h ├── Error.h ├── Identifier.h ├── Lexer.h ├── Num.h ├── ObjCode.h ├── Parse.h ├── Symbol.h ├── Tag.h ├── Token.h ├── Vm.h └── Word.h ├── makefile ├── sample ├── test1.pl0 └── test2.pl0 └── src ├── Code.cpp ├── Error.cpp ├── Identifier.cpp ├── Lexer.cpp ├── Num.cpp ├── ObjCode.cpp ├── Parse.cpp ├── Symbol.cpp ├── Token.cpp ├── Vm.cpp ├── Word.cpp └── main.cpp /README.md: -------------------------------------------------------------------------------- 1 | # pl0-compiler 2 | > This is a compiler for pl0 programming language writen in C++ 3 | 4 | ## Introduction of pl0 5 | > pl0 is similar to but much simpler than the general-purpose programming language Pascal,intend as an educational programming language. 6 | > It serves as an example of how to construct a compiler. 7 | 8 | ### Grammer 9 | ``` 10 | program = block "." . 11 | 12 | block = [ "const" ident "=" number {"," ident "=" number} ";"] 13 | [ "var" ident {"," ident} ";"] 14 | { "procedure" ident ";" block ";" } statement . 15 | 16 | statement = [ ident ":=" expression | "call" ident 17 | | "?" ident | "!" expression 18 | | "begin" statement {";" statement } "end" 19 | | "if" condition "then" statement 20 | | "while" condition "do" statement ]. 21 | 22 | condition = "odd" expression | 23 | expression ("="|"#"|"<"|"<="|">"|">=") expression . 24 | 25 | expression = [ "+"|"-"] term { ("+"|"-") term}. 26 | 27 | term = factor {("*"|"/") factor}. 28 | 29 | factor = ident | number | "(" expression ")". 30 | ``` 31 | 32 | ## Usage 33 | - make 34 | - ./pl0 "filepath" 35 | - "exec the program" 36 | - make clean 37 | 38 | ## LICENSE 39 | MIT © [duduscript](https://github.com/duduscript) 40 | -------------------------------------------------------------------------------- /include/Code.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_H 2 | #define CODE_H 3 | 4 | #include 5 | 6 | class Code 7 | { 8 | public: 9 | Code(int _op, int _l, int _m); 10 | void printCode() const; 11 | void changeAdrr(int); 12 | std::string getOp(int) const; 13 | std::string getALUOp(int) const; 14 | const int getOp() const; 15 | const int getL() const; 16 | const int getM() const; 17 | private: 18 | const int op; //Operation 19 | int l; //L is lexicographical level 20 | int m; //M is an address, data or ALU operatorion 21 | private: 22 | Code()=delete; 23 | Code(const Code&)=delete; 24 | Code& operator=(const Code&)=delete; 25 | }; 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/Error.h: -------------------------------------------------------------------------------- 1 | #ifndef ERROR_H 2 | #define ERROR_H 3 | 4 | #include 5 | 6 | extern const std::string parseErrors[]; 7 | 8 | void printError(int errorId, int line); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /include/Identifier.h: -------------------------------------------------------------------------------- 1 | #ifndef IDENTIFIER_H 2 | #define IDENTIFIER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum KIND {VAR = 1, CONST, PROC}; 9 | 10 | struct Id 11 | { 12 | Id(); 13 | Id(KIND, int, int); 14 | Id& operator=(const Id&); 15 | KIND kind; 16 | int value; 17 | int level; 18 | int addr; 19 | }; 20 | 21 | class Identifier 22 | { 23 | public: 24 | Identifier(); 25 | public: 26 | std::map id; 27 | std::vector currM; 28 | int currentLevel; 29 | private: 30 | Identifier(const Identifier&)=delete; 31 | Identifier& operator=(const Identifier&)=delete; 32 | }; 33 | 34 | #endif // IDENTIFIER_H 35 | -------------------------------------------------------------------------------- /include/Lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H 2 | #define LEXER_H 3 | 4 | #include 5 | #include 6 | 7 | class Token; 8 | 9 | class Lexer 10 | { 11 | public: 12 | Lexer(); 13 | Lexer(std::string FileName); 14 | ~Lexer(); 15 | public: 16 | void Tokenizer(); 17 | void PrintList(); 18 | void JudgeError(); 19 | Token* GetToken(); 20 | void PutToken(Token*& p); 21 | void PlusLine(); 22 | private: 23 | std::fstream in; 24 | std::list TokenList; 25 | int line; 26 | private: 27 | Lexer(const Lexer&)=delete; 28 | Lexer& operator=(const Lexer&)=delete; 29 | }; 30 | 31 | 32 | #endif // LEXER_H 33 | -------------------------------------------------------------------------------- /include/Num.h: -------------------------------------------------------------------------------- 1 | #ifndef NUM_H 2 | #define NUM_H 3 | 4 | #include "Token.h" 5 | 6 | class Num : public Token 7 | { 8 | public: 9 | Num(); 10 | Num(std::string, int); 11 | ~Num(); 12 | public: 13 | void Print(); 14 | int GetValue(); 15 | private: 16 | int value; 17 | }; 18 | 19 | #endif // NUM_H 20 | -------------------------------------------------------------------------------- /include/ObjCode.h: -------------------------------------------------------------------------------- 1 | #ifndef OBJCODE_H 2 | #define OBJCODE_H 3 | 4 | #include 5 | class Code; 6 | 7 | class ObjCode 8 | { 9 | public: 10 | ObjCode(); 11 | ObjCode(ObjCode&); 12 | ~ObjCode(); 13 | void emitCode(int, int, int); 14 | void printCode() const; 15 | const int size() const; 16 | void changeAdrr(int, int); 17 | Code*& at(int); 18 | //const Code*& at(int) const; 19 | private: 20 | std::vector icode; 21 | }; 22 | 23 | enum { 24 | LIT = 1, OPR, LOD, STO, CAL, INC, JMP, JPC, SIO_OUT, SIO_IN 25 | }; 26 | 27 | enum { 28 | OPR_RET, OPR_NEG, OPR_ADD, OPR_SUB, OPR_MUL, OPR_DIV, OPR_ODD, OPR_MOD, 29 | OPR_EQL, OPR_NEQ, OPR_LSS, OPR_LEQ, OPR_GTR, OPR_GEQ 30 | }; 31 | 32 | #endif // OBJCODE_H 33 | -------------------------------------------------------------------------------- /include/Parse.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSE_H 2 | #define PARSE_H 3 | 4 | #include "Lexer.h" 5 | #include "ObjCode.h" 6 | #include "Identifier.h" 7 | 8 | class Token; 9 | 10 | class Parse 11 | { 12 | public: 13 | Parse(); 14 | Parse(std::string); 15 | ~Parse(); 16 | public: 17 | void move(); 18 | void program(); 19 | void block(); 20 | void decls(); 21 | void stmts(); 22 | void cond(); 23 | void expr(); 24 | void term(); 25 | void factor(); 26 | void GrammerAnalyzier(); 27 | void PrintTemineCode(); 28 | ObjCode& getCode(); 29 | private: 30 | Parse(const Parse&)=delete; 31 | Parse& operator=(const Parse&)=delete; 32 | Lexer lex; 33 | Token* look; 34 | ObjCode icode; 35 | Identifier ident; 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /include/Symbol.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMBOL_H 2 | #define SYMBOL_H 3 | 4 | #include "Token.h" 5 | 6 | const int SymbolNum = 16; 7 | extern const std::string SymbolTable[]; 8 | 9 | class Symbol : public Token 10 | { 11 | public: 12 | Symbol(); 13 | Symbol(std::string s,int l); 14 | ~Symbol(); 15 | public: 16 | void Print(); 17 | public: 18 | static int GetSymbolNum(std::string); 19 | static Tag GetSymbolTag(int); 20 | }; 21 | 22 | #endif // SYMBOL_H 23 | -------------------------------------------------------------------------------- /include/Tag.h: -------------------------------------------------------------------------------- 1 | #ifndef TAG_H 2 | #define TAG_H 3 | 4 | enum Tag 5 | { 6 | BADTOKEN, // 7 | IDENTSYM, // 8 | NUMBERSYM, // 9 | PLUSSYM, // + 10 | MINUSYM, // - 11 | MULSYM, // * 12 | SLASHSYM, // / 13 | ODDSYM, //odd 14 | EQLSYM, // = 15 | NEQSYM, // <> 16 | LESSYM, // < 17 | LEQSYM, // <= 18 | GTRSYM, // > 19 | GEQSYM, // >= 20 | LPARENTSYM, // ( 21 | RPARENTSYM, // ) 22 | COMMASYM, // , 23 | SEMICOLOMSYM, // ; 24 | PERIODSYM, // . 25 | BECOMESSYM, // := 26 | BEGINSYM, // begin 27 | ENDSYM, // end 28 | IFSYM, // if 29 | THENSYM, // then 30 | WHILESYM, // while 31 | DOSYM, // do 32 | CALLSYM, // call 33 | CONSTSYM, // const 34 | VARSYM, // var 35 | PROCSYM, // procedure 36 | WRITESYM, // write 37 | READSYM, // read 38 | ELSESYM // else 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /include/Token.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_H 2 | #define TOKEN_H 3 | 4 | #include 5 | #include "Tag.h" 6 | 7 | class Token 8 | { 9 | public: 10 | Token(); 11 | Token(std::string s, Tag t, int l); 12 | virtual ~Token(); 13 | public: 14 | Tag GetTag(); 15 | int GetLine(); 16 | void ChangeTag(Tag t); 17 | std::string GetLexeme(); 18 | public: 19 | virtual void Print(); 20 | private: 21 | Token(const Token&) = delete; 22 | Token& operator=(const Token&) = delete; 23 | std::string lexeme; 24 | Tag tag; 25 | int line; 26 | }; 27 | 28 | #endif // TOKEN_H 29 | -------------------------------------------------------------------------------- /include/Vm.h: -------------------------------------------------------------------------------- 1 | #ifndef VM_H 2 | #define VM_H 3 | 4 | class ObjCode; 5 | 6 | class Vm 7 | { 8 | public: 9 | Vm(ObjCode&); 10 | ~Vm(); 11 | void pl0(); 12 | private: 13 | Vm(const Vm&)=delete; 14 | Vm& operator=(const Vm&)=delete; 15 | ObjCode& objCode; 16 | }; 17 | 18 | #endif // VM_H 19 | -------------------------------------------------------------------------------- /include/Word.h: -------------------------------------------------------------------------------- 1 | #ifndef WORD_H 2 | #define WORD_H 3 | 4 | #include "Token.h" 5 | 6 | const int NUMBER_OF_KEYWORD = 14; 7 | 8 | extern const std::string KeywordTable[]; 9 | 10 | class Word :public Token 11 | { 12 | public: 13 | Word(); 14 | Word(std::string v, int l); 15 | ~Word(); 16 | void Print(); 17 | public: 18 | static int GetKeywordNum(std::string); 19 | static Tag GetKeywordTag(int t); 20 | }; 21 | 22 | 23 | #endif // WORD_H 24 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | INCDIR = ./include 2 | SRCDIR = ./src 3 | OBJDIR = ./obj 4 | BINDIR = ./bin 5 | 6 | CC = clang++ 7 | CFLAGS = -std=c++11 -I $(INCDIR) 8 | TARGET = $(BINDIR)/pl0 9 | 10 | _OBJS = main.o Parse.o Vm.o Error.o Token.o Code.o Identifier.o \ 11 | Lexer.o Symbol.o ObjCode.o Word.o Num.o 12 | 13 | OBJS = $(patsubst %, $(OBJDIR)/%, $(_OBJS)) 14 | 15 | all: $(TARGET) 16 | 17 | $(TARGET) : $(OBJS) 18 | $(CC) -o $@ $^ 19 | 20 | $(OBJDIR)/%.o: $(SRCDIR)/%.cpp 21 | $(CC) -c -o $@ $< $(CFLAGS) 22 | 23 | .PHONY : clean 24 | clean: 25 | rm -rf $(BINDIR)/* $(OBJDIR)/* 26 | -------------------------------------------------------------------------------- /sample/test1.pl0: -------------------------------------------------------------------------------- 1 | const ADD = 1, SUB = 2,MULT = 3,DIV = 4; 2 | var op,x,y,done; 3 | 4 | procedure calculate; 5 | procedure add; 6 | begin 7 | x := x + y; 8 | end; 9 | procedure sub; 10 | begin 11 | x := x - y; 12 | end; 13 | procedure mult; 14 | var c; 15 | begin 16 | c := y - 1; 17 | y := x; 18 | while c > 0 do 19 | begin 20 | call add; 21 | c :=c -1; 22 | end; 23 | end; 24 | procedure div; 25 | begin 26 | if y <> 0 then 27 | begin 28 | x := x / y; 29 | end 30 | else done := 1; 31 | end; 32 | begin 33 | if op = ADD then call add 34 | else if op = SUB then call sub 35 | else if op = MULT then call mult 36 | else if op = DIV then call div 37 | else done := 1; 38 | if done = 0 then write x; 39 | end; 40 | begin 41 | done := 0; 42 | read x; 43 | while done = 0 do 44 | begin 45 | read op; 46 | if op < 1 then done := 1 47 | else if op > 4 then done := 1; 48 | 49 | if done = 0 then 50 | begin 51 | read y; 52 | call calculate; 53 | end; 54 | end; 55 | end. -------------------------------------------------------------------------------- /sample/test2.pl0: -------------------------------------------------------------------------------- 1 | var f,n; 2 | procedure fact; 3 | var ans1; 4 | begin 5 | ans1:=n; 6 | n:=n-1; 7 | if n < 0 then f := -1 8 | else if n = 0 then f := 1 9 | else call fact; 10 | f := f*ans1; 11 | end; 12 | begin 13 | read n; 14 | call fact; 15 | write f; 16 | end. 17 | -------------------------------------------------------------------------------- /src/Code.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Code.h" 4 | 5 | Code::Code(int _op, int _l, int _m) :op(_op), l(_l), m(_m){} 6 | 7 | void Code::printCode() const 8 | { 9 | std::cout << getOp(op) << " " << l << " "; 10 | if (op == 2) 11 | std::cout << getALUOp(m) << std::endl; 12 | else std::cout << m << std::endl; 13 | } 14 | void Code::changeAdrr(int x) 15 | { 16 | m = x; 17 | } 18 | std::string Code::getOp(int op) const 19 | { 20 | switch (op) { 21 | case 1: 22 | return "LIT"; 23 | break; 24 | case 2: 25 | return "OPR"; 26 | break; 27 | case 3: 28 | return "LOD"; 29 | break; 30 | case 4: 31 | return "STO"; 32 | break; 33 | case 5: 34 | return "CAL"; 35 | break; 36 | case 6: 37 | return "INC"; 38 | break; 39 | case 7: 40 | return "JMP"; 41 | break; 42 | case 8: 43 | return "JPC"; 44 | break; 45 | case 9: 46 | return "SIO_OUT"; 47 | break; 48 | case 10: 49 | return "SIO_IN"; 50 | break; 51 | default: 52 | return ""; 53 | break; 54 | } 55 | } 56 | 57 | std::string Code::getALUOp(int m) const 58 | { 59 | switch (m) { 60 | case 0: 61 | return "OPR_RET"; 62 | break; 63 | case 1: 64 | return "OPR_NEG"; 65 | break; 66 | case 2: 67 | return "OPR_ADD"; 68 | break; 69 | case 3: 70 | return "OPR_SUB"; 71 | break; 72 | case 4: 73 | return "OPR_MUL"; 74 | break; 75 | case 5: 76 | return "OPR_DIV"; 77 | break; 78 | case 6: 79 | return "OPR_ODD"; 80 | break; 81 | case 7: 82 | return "OPR_MOD"; 83 | break; 84 | case 8: 85 | return "OPR_EQL"; 86 | break; 87 | case 9: 88 | return "OPR_NEQ"; 89 | break; 90 | case 10: 91 | return "OPR_LSS"; 92 | break; 93 | case 11: 94 | return "OPR_LEQ"; 95 | break; 96 | case 12: 97 | return "OPR_GTR"; 98 | break; 99 | case 13: 100 | return "OPR_GEQ"; 101 | break; 102 | default: 103 | return ""; 104 | break; 105 | } 106 | } 107 | 108 | const int Code::getOp() const 109 | { 110 | return op; 111 | } 112 | 113 | const int Code::getL() const 114 | { 115 | return l; 116 | } 117 | 118 | const int Code::getM() const 119 | { 120 | return m; 121 | } 122 | -------------------------------------------------------------------------------- /src/Error.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Error.h" 3 | 4 | const std::string parseErrors[] = { 5 | /* 0. */ "No errors, program is syntactically correct.", 6 | /* 1. */ "Use = instead of :=.", 7 | /* 2. */ "= must be followed by a number.", 8 | /* 3. */ "Identifier must be followed by =.", 9 | /* 4. */ "const, int, procedure must be followed by identifier.", 10 | /* 5. */ "Semicolon or comma missing.", 11 | 12 | /* XXX ??? */ 13 | /* 6. */ "Incorrect symbol after procedure declaration.", 14 | /* 7. */ "Statement expected.", 15 | /* 8. */ "Incorrect symbol after statement part in block.", 16 | /* XXX ??? */ 17 | 18 | /* 9. */ "Period expected.", 19 | /* 10. */ "Semicolon between statements missing.", 20 | /* 11. */ "Undeclared identifier.", 21 | /* 12. */ "Assignment to constant or procedure is not allowed.", 22 | /* 13. */ "Assignment operator expected.", 23 | /* 14. */ "call must be followed by an identifier.", 24 | /* 15. */ "Call of a constant or variable is meaningless.", 25 | /* 16. */ "then expected.", 26 | 27 | /* XXX ??? */ 28 | /* 17. */ "Semicolon or end expected.", 29 | /* XXX ??? */ 30 | 31 | /* 18. */ "do expected.", 32 | 33 | /* XXX ??? */ 34 | /* 19. */ "Incorrect symbol following statement.", 35 | /* XXX ??? */ 36 | 37 | /* 20. */ "Relational operator expected.", 38 | /* 21. */ "Expression must not contain a procedure identifier.", 39 | /* 22. */ "Right parenthesis missing.", 40 | /* 23. */ "The preceding factor cannot begin with this symbol.", 41 | 42 | /* XXX ??? */ 43 | /* 24. */ "An expression cannot begin with this symbol.", 44 | /* XXX ??? */ 45 | 46 | /* 25. */ "This number is too large.", 47 | 48 | /* extra errors! */ 49 | /* 26. */ "out must be followed by an expression.", 50 | /* 27. */ "in must be followed by an identifier.", 51 | /* 28. */ "Cannot reuse this symbol here.", 52 | /* 29. */ "Cannot redefine an ident.", 53 | /* 30 */ "Unexperted Token in a declaration.", 54 | /* 31 */ "The symbol do not exist in pl0.", 55 | /* 32 */ "Unrecognized token.", 56 | /* 33 */ "Empty file.", 57 | /* 34 */ "Unacceptale token in front of a statement.", 58 | /* 35 */ "Read a procedure is meaningless." 59 | }; 60 | 61 | void printError(int errorId, int line) 62 | { 63 | std::cout << parseErrors[errorId] << "in line" << line << std::endl; 64 | system("pause"); 65 | exit(0); 66 | } 67 | -------------------------------------------------------------------------------- /src/Identifier.cpp: -------------------------------------------------------------------------------- 1 | #include "Identifier.h" 2 | 3 | Id::Id(){} 4 | 5 | Id::Id(KIND _kind, int _level, int _addr) : 6 | kind(_kind), level(_level), addr(_addr) 7 | { 8 | } 9 | 10 | Id& Id::operator=(const Id& _id) 11 | { 12 | if (this == &_id) return *this; 13 | this->kind = _id.kind; 14 | this->level = _id.level; 15 | this->addr = _id.addr; 16 | return *this; 17 | } 18 | 19 | Identifier::Identifier() :currentLevel(0) 20 | { 21 | currM.resize(10, 0); 22 | } 23 | -------------------------------------------------------------------------------- /src/Lexer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Lexer.h" 5 | #include "Num.h" 6 | #include "Word.h" 7 | #include "Symbol.h" 8 | #include "Error.h" 9 | 10 | Lexer::Lexer() : line(1) {} 11 | 12 | Lexer::Lexer(std::string FileName) : line(1) 13 | { 14 | const char* FileN = FileName.c_str(); 15 | in.open(FileN); 16 | if (!in){ 17 | std::cerr << "error: unable to open input file -- " 18 | << FileN 19 | << std::endl; 20 | exit(0); 21 | } 22 | } 23 | 24 | Lexer::~Lexer() 25 | { 26 | for (auto token : TokenList) 27 | delete token; 28 | in.close(); 29 | } 30 | 31 | void Lexer::PlusLine() 32 | { 33 | ++line; 34 | } 35 | 36 | void Lexer::Tokenizer() 37 | { 38 | Token* p = 0; 39 | char nowChar = 0; 40 | auto putNumber = [&] { 41 | std::string s; 42 | do{ 43 | s += nowChar; 44 | if (in.peek() == EOF) { 45 | p = new Num(s, line); 46 | return; 47 | } 48 | in.get(nowChar); 49 | } while (isdigit(nowChar)); 50 | in.putback(nowChar); 51 | p = new Num(s, line); 52 | }; 53 | auto putWord = [&] { 54 | std::string s; 55 | do{ 56 | s += nowChar; 57 | if (in.peek() == EOF) { 58 | p = new Word(s, line); 59 | return; 60 | } 61 | in.get(nowChar); 62 | } while (isalnum(nowChar)); 63 | in.putback(nowChar); 64 | p = new Word(s, line); 65 | }; 66 | auto putSymbol = [&] { 67 | std::string s(1, nowChar); 68 | if (s[0] == ':') { 69 | if (in.peek() == EOF) { 70 | p = new Symbol(s, line); 71 | return; 72 | } 73 | in.get(nowChar); 74 | if (nowChar == '=') s = ":="; 75 | else in.putback(nowChar); 76 | } 77 | if (s == "<" || s == ">") { 78 | if (in.peek() == EOF) { 79 | p = new Symbol(s, line); 80 | return; 81 | } 82 | in.get(nowChar); 83 | if (s == "<" && nowChar == '>') s = "<>"; 84 | else if (nowChar == '=') s += "="; 85 | else in.putback(nowChar); 86 | } 87 | p = new Symbol(s, line); 88 | }; 89 | 90 | while (in.peek() != EOF) 91 | { 92 | in.get(nowChar); 93 | if (nowChar == '\n') { 94 | PlusLine(); 95 | continue; 96 | } 97 | else if (isspace(nowChar)) { 98 | continue; 99 | } 100 | else if (isdigit(nowChar)) { 101 | putNumber(); 102 | } 103 | else if (isalpha(nowChar)) { 104 | putWord(); 105 | } 106 | else { 107 | putSymbol(); 108 | } 109 | TokenList.push_back(p); 110 | } 111 | } 112 | 113 | void Lexer::PrintList() 114 | { 115 | for (auto token : TokenList) 116 | token->Print(); 117 | } 118 | 119 | Token* Lexer::GetToken() 120 | { 121 | if (TokenList.size()){ 122 | Token* token = TokenList.front(); 123 | TokenList.pop_front(); 124 | return token; 125 | } 126 | return NULL; 127 | } 128 | 129 | void Lexer::PutToken(Token*& p) 130 | { 131 | TokenList.push_front(p); 132 | p = 0; 133 | } 134 | 135 | void Lexer::JudgeError() 136 | { 137 | if (!TokenList.size()) 138 | printError(33, 1); 139 | std::list::iterator it = TokenList.begin(); 140 | for (; it != TokenList.end(); ++it){ 141 | if ((*it)->GetTag() == BADTOKEN) 142 | printError(32, (*it)->GetLine()); 143 | } 144 | --it; 145 | if ((*it)->GetTag() != PERIODSYM) 146 | printError(9, (*it)->GetLine()); 147 | } 148 | -------------------------------------------------------------------------------- /src/Num.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "Num.h" 6 | #include "Token.h" 7 | #include "Error.h" 8 | 9 | Num::Num(){} 10 | 11 | Num::Num(std::string s, int l) :Token(s, NUMBERSYM, l), value(0) 12 | { 13 | if (GetLexeme().size() > 8){ 14 | printError(25, GetLine()); 15 | } 16 | std::istringstream tmp(GetLexeme()); 17 | tmp >> value; 18 | } 19 | 20 | Num::~Num(){} 21 | 22 | int Num::GetValue() 23 | { 24 | return value; 25 | } 26 | 27 | void Num::Print() 28 | { 29 | std::cout << value 30 | << " (number)(" 31 | << Num::GetLine() 32 | << ")" 33 | << std::endl; 34 | } 35 | -------------------------------------------------------------------------------- /src/ObjCode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ObjCode.h" 6 | #include "Code.h" 7 | 8 | ObjCode::ObjCode() 9 | { 10 | } 11 | 12 | ObjCode::ObjCode(ObjCode &obj) 13 | { 14 | std::cout << "ObjCode Copy Construct" << std::endl; 15 | Code* pCode; 16 | for (int i = 0; i != obj.size(); ++i){ 17 | pCode = new Code(obj.at(i)->getOp(), obj.at(i)->getL(), obj.at(i)->getM()); 18 | icode.push_back(pCode); 19 | pCode = NULL; 20 | } 21 | } 22 | 23 | ObjCode::~ObjCode() 24 | { 25 | for (size_t i = 0; i != icode.size(); ++i) 26 | delete icode[i]; 27 | } 28 | 29 | void ObjCode::printCode() const 30 | { 31 | std::cout << "**************************************************" << std::endl; 32 | std::cout << " n op l m" << std::endl; 33 | std::cout << "**************************************************" << std::endl; 34 | for (size_t i = 0; i != icode.size(); ++i){ 35 | std::cout << std::setw(3) << i << " "; 36 | icode[i]->printCode(); 37 | } 38 | } 39 | void ObjCode::emitCode(int op, int l, int m) 40 | { 41 | icode.push_back(new Code(op, l, m)); 42 | } 43 | 44 | const int ObjCode::size() const 45 | { 46 | return icode.size(); 47 | } 48 | void ObjCode::changeAdrr(int a, int x) 49 | { 50 | icode[a]->changeAdrr(x); 51 | } 52 | 53 | Code*& ObjCode::at(int index) 54 | { 55 | return icode[index]; 56 | } 57 | 58 | //const Code*& ObjCode::at(int index) const 59 | //{ 60 | // return icode[index]; 61 | //} 62 | -------------------------------------------------------------------------------- /src/Parse.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "Parse.h" 6 | #include "Error.h" 7 | #include "Token.h" 8 | #include "Identifier.h" 9 | 10 | Parse::Parse() :look(0){} 11 | 12 | Parse::Parse(std::string FileName) :lex(FileName), look(0) 13 | { 14 | lex.Tokenizer(); 15 | } 16 | 17 | Parse::~Parse(){} 18 | 19 | void Parse::move() 20 | { 21 | if (look != 0) { 22 | delete look; 23 | look = NULL; 24 | } 25 | look = lex.GetToken(); 26 | } 27 | 28 | void Parse::GrammerAnalyzier() 29 | { 30 | lex.JudgeError(); 31 | program(); 32 | //icode.printCode(); 33 | //std::cout << "Over!" << std::endl; 34 | } 35 | 36 | ObjCode& Parse::getCode() 37 | { 38 | return this->icode; 39 | } 40 | 41 | void Parse::program() 42 | { 43 | block(); 44 | icode.emitCode(OPR, 0, OPR_RET); 45 | move(); 46 | if (look->GetTag() != PERIODSYM){ 47 | printError(9, look->GetLine()); 48 | } 49 | } 50 | 51 | void Parse::block() 52 | { 53 | ident.currM[ident.currentLevel] += 3; 54 | icode.emitCode(INC, 0, 3); 55 | decls(); 56 | stmts(); 57 | } 58 | 59 | void Parse::decls() 60 | { 61 | auto vardecl = [&](){ 62 | int numOfVarDecl = 0; 63 | while (true){ 64 | move(); 65 | if (look->GetTag() != IDENTSYM) 66 | printError(4, look->GetLine()); 67 | if (ident.id.find(look->GetLexeme()) != ident.id.end()) 68 | printError(29, look->GetLine()); 69 | 70 | ident.id[look->GetLexeme()] = Id(VAR, 71 | ident.currentLevel, 72 | ident.currM[ident.currentLevel]++); 73 | numOfVarDecl++; 74 | 75 | move(); 76 | if (look->GetTag() == SEMICOLOMSYM) 77 | break; 78 | else if (look->GetTag() == COMMASYM) 79 | continue; 80 | else 81 | printError(30, look->GetLine()); 82 | } 83 | icode.emitCode(INC, 0, numOfVarDecl); 84 | }; 85 | 86 | auto condecl = [&](){ 87 | while (true){ 88 | move(); 89 | if (look->GetTag() != IDENTSYM) 90 | printError(4, look->GetLine()); 91 | if (ident.id.find(look->GetLexeme()) != ident.id.end()) 92 | printError(29, look->GetLine()); 93 | std::string idName = look->GetLexeme(); 94 | 95 | move(); 96 | if (look->GetTag() == BECOMESSYM) 97 | printError(1, look->GetLine()); 98 | if (look->GetTag() != EQLSYM) 99 | printError(2, look->GetLine()); 100 | 101 | move(); 102 | if (look->GetTag() != NUMBERSYM) 103 | printError(2, look->GetLine()); 104 | ident.id[idName].kind = CONST; 105 | ident.id[idName].value = atoi(look->GetLexeme().c_str()); 106 | 107 | move(); 108 | if (look->GetTag() == COMMASYM) 109 | continue; 110 | else if (look->GetTag() == SEMICOLOMSYM) 111 | break; 112 | else 113 | printError(30, look->GetLine()); 114 | } 115 | }; 116 | 117 | auto procdecl = [&](){ 118 | while (true){ 119 | move(); 120 | if (look->GetTag() != IDENTSYM) 121 | printError(4, look->GetLine()); 122 | if (ident.id.find(look->GetLexeme()) != ident.id.end()) 123 | printError(29, look->GetLine()); 124 | ident.id[look->GetLexeme()] = Id(PROC, 125 | ident.currentLevel++, 126 | icode.size() + 1); 127 | int tmpAdrr1 = icode.size(); 128 | icode.emitCode(JMP, 0, 0); 129 | 130 | move(); 131 | if (look->GetTag() != SEMICOLOMSYM) 132 | printError(5, look->GetLine()); 133 | block(); 134 | icode.emitCode(OPR, 0, OPR_RET); 135 | ident.currentLevel--; 136 | icode.changeAdrr(tmpAdrr1, icode.size()); 137 | 138 | move(); 139 | if (look->GetTag() != SEMICOLOMSYM) 140 | printError(5, look->GetLine()); 141 | 142 | move(); 143 | if (look->GetTag() != PROCSYM){ 144 | lex.PutToken(look); 145 | break; 146 | } else { 147 | continue; 148 | } 149 | } 150 | }; 151 | 152 | move(); 153 | switch (look->GetTag()){ 154 | case VARSYM: vardecl(); break; 155 | case CONSTSYM: condecl(); break; 156 | case PROCSYM: procdecl(); break; 157 | default: lex.PutToken(look); return; 158 | } 159 | decls(); 160 | } 161 | 162 | void Parse::stmts() 163 | { 164 | move(); 165 | std::string tmp; 166 | int tmpIfAdrr1, tmpIfAdrr2; 167 | int tmpWhileAdrr1, tmpWhileAdrr2; 168 | switch (look->GetTag()) 169 | { 170 | case IDENTSYM://assign 171 | if (ident.id.find(look->GetLexeme()) == ident.id.end()) 172 | printError(11, look->GetLine()); 173 | if (ident.id[look->GetLexeme()].kind != VAR) 174 | printError(12, look->GetLine()); 175 | tmp = look->GetLexeme(); 176 | 177 | move(); 178 | if (look->GetTag() != BECOMESSYM) 179 | printError(13, look->GetLine()); 180 | expr(); 181 | icode.emitCode(STO, 182 | abs(ident.id[tmp].level - ident.currentLevel), 183 | ident.id[tmp].addr); 184 | break; 185 | case CALLSYM://call 186 | move(); 187 | if (look->GetTag() != IDENTSYM) 188 | printError(14, look->GetLine()); 189 | if (ident.id.find(look->GetLexeme()) == ident.id.end()) 190 | printError(11, look->GetLine()); 191 | if (ident.id[look->GetLexeme()].kind != PROC) 192 | printError(15, (*look).GetLine()); 193 | tmp = look->GetLexeme(); 194 | icode.emitCode(CAL, 195 | abs(ident.id[tmp].level - ident.currentLevel), 196 | ident.id[tmp].addr); 197 | break; 198 | case BEGINSYM://begin 199 | while (true){ 200 | stmts(); 201 | move(); 202 | if (look->GetTag() != ENDSYM && look->GetTag() != SEMICOLOMSYM) 203 | printError(5, look->GetLine()); 204 | if (look->GetTag() == ENDSYM) 205 | break; 206 | else 207 | continue; 208 | } 209 | break; 210 | case IFSYM://if 211 | cond(); 212 | 213 | move(); 214 | if (look->GetTag() != THENSYM) 215 | printError(16, look->GetLine()); 216 | tmpIfAdrr1 = icode.size(); 217 | icode.emitCode(JPC, 0, 0); 218 | stmts(); 219 | tmpIfAdrr2 = icode.size(); 220 | icode.emitCode(JMP, 0, 0); 221 | icode.changeAdrr(tmpIfAdrr1, icode.size()); 222 | 223 | move(); 224 | if (look->GetTag() != ELSESYM) 225 | lex.PutToken(look); 226 | else 227 | stmts(); 228 | icode.changeAdrr(tmpIfAdrr2, icode.size()); 229 | break; 230 | case WHILESYM://while 231 | tmpWhileAdrr1 = icode.size(); 232 | cond(); 233 | tmpWhileAdrr2 = icode.size(); 234 | 235 | move(); 236 | icode.emitCode(JPC, 0, 0); 237 | if (look->GetTag() != DOSYM) 238 | printError(18, look->GetLine()); 239 | stmts(); 240 | icode.emitCode(JMP, 0, tmpWhileAdrr1); 241 | icode.changeAdrr(tmpWhileAdrr2, icode.size()); 242 | break; 243 | case READSYM://read 244 | move(); 245 | if (look->GetTag() != IDENTSYM) 246 | printError(27, look->GetLine()); 247 | if (ident.id.find(look->GetLexeme()) == ident.id.end()) 248 | printError(11, look->GetLine()); 249 | if (ident.id[look->GetLexeme()].kind == PROC) 250 | printError(35, look->GetLine()); 251 | icode.emitCode(SIO_IN, 0, 2); 252 | tmp = look->GetLexeme(); 253 | icode.emitCode(STO, 254 | abs(ident.id[tmp].level - ident.currentLevel), 255 | ident.id[tmp].addr); 256 | break; 257 | case WRITESYM://write 258 | expr(); 259 | icode.emitCode(SIO_OUT, 0, 1); 260 | break; 261 | default: 262 | lex.PutToken(look); 263 | break; 264 | } 265 | } 266 | 267 | void Parse::cond() 268 | { 269 | move(); 270 | if (look->GetTag() != ODDSYM){ 271 | lex.PutToken(look); 272 | expr(); 273 | 274 | move(); 275 | auto tmp = look->GetTag(); 276 | expr(); 277 | switch (tmp) { 278 | case EQLSYM: 279 | icode.emitCode(OPR, 0, OPR_EQL); 280 | break; 281 | case NEQSYM: 282 | icode.emitCode(OPR, 0, OPR_NEQ); 283 | break; 284 | case LESSYM: 285 | icode.emitCode(OPR, 0, OPR_LSS); 286 | break; 287 | case LEQSYM: 288 | icode.emitCode(OPR, 0, OPR_LEQ); 289 | break; 290 | case GTRSYM: 291 | icode.emitCode(OPR, 0, OPR_GTR); 292 | break; 293 | case GEQSYM: 294 | icode.emitCode(OPR, 0, OPR_GEQ); 295 | break; 296 | default: 297 | printError(20, look->GetLine()); 298 | break; 299 | } 300 | } else { 301 | expr(); 302 | } 303 | } 304 | 305 | void Parse::expr() 306 | { 307 | int op; 308 | move(); 309 | if (look->GetTag() == MINUSYM) 310 | icode.emitCode(OPR, 0, OPR_NEG); 311 | if (look->GetTag() != PLUSSYM && 312 | look->GetTag() != MINUSYM) 313 | lex.PutToken(look); 314 | term(); 315 | while (true){ 316 | move(); 317 | if (look->GetTag() == PLUSSYM || 318 | look->GetTag() == MINUSYM){ 319 | op = look->GetTag(); 320 | } else { 321 | lex.PutToken(look); 322 | break; 323 | } 324 | term(); 325 | if (op == PLUSSYM) 326 | icode.emitCode(OPR, 0, OPR_ADD); 327 | if (op == MINUSYM) 328 | icode.emitCode(OPR, 0, OPR_SUB); 329 | } 330 | } 331 | 332 | void Parse::term() 333 | { 334 | int op; 335 | factor(); 336 | while (true){ 337 | move(); 338 | if (look->GetTag() == MULSYM || 339 | look->GetTag() == SLASHSYM){ 340 | op = look->GetTag(); 341 | } else { 342 | lex.PutToken(look); 343 | break; 344 | } 345 | factor(); 346 | if (op == MULSYM) 347 | icode.emitCode(OPR, 0, OPR_MUL); 348 | if (op == SLASHSYM) 349 | icode.emitCode(OPR, 0, OPR_DIV); 350 | } 351 | } 352 | 353 | void Parse::factor() 354 | { 355 | move(); 356 | if (look->GetTag() == LPARENTSYM){ 357 | expr(); 358 | move(); 359 | if (look->GetTag() != RPARENTSYM) 360 | printError(22, look->GetLine()); 361 | } else { 362 | if (look->GetTag() != IDENTSYM && look->GetTag() != NUMBERSYM) 363 | printError(23, look->GetLine()); 364 | } 365 | if (look->GetTag() == IDENTSYM && ident.id[look->GetLexeme()].kind == CONST) 366 | icode.emitCode(LIT, 0, ident.id[look->GetLexeme()].value); 367 | if (look->GetTag() == IDENTSYM && ident.id[look->GetLexeme()].kind == VAR) 368 | icode.emitCode(LOD, 369 | abs(ident.id[look->GetLexeme()].level - ident.currentLevel), 370 | ident.id[look->GetLexeme()].addr); 371 | if (look->GetTag() == NUMBERSYM) 372 | icode.emitCode(LIT,0,atoi(look->GetLexeme().c_str())); 373 | } 374 | -------------------------------------------------------------------------------- /src/Symbol.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Symbol.h" 4 | 5 | Symbol::Symbol(){} 6 | 7 | Symbol::Symbol(std::string s,int l) :Token(s,GetSymbolTag(GetSymbolNum(s)),l){} 8 | 9 | Symbol::~Symbol(){} 10 | 11 | void Symbol::Print() 12 | { 13 | if (BADTOKEN == GetTag()) 14 | std::cout << GetLexeme() 15 | << " (BADTOKEN)(" 16 | << GetLine () 17 | << ")" 18 | << std::endl; 19 | else 20 | std::cout << GetLexeme() 21 | << " (symbol)(" 22 | << GetLine() 23 | << ")" 24 | << std::endl; 25 | } 26 | 27 | int Symbol::GetSymbolNum(std::string s) 28 | { 29 | for (int i = 0; i != SymbolNum; ++i) 30 | { 31 | if (SymbolTable[i] == s) 32 | return i; 33 | } 34 | return -1; 35 | } 36 | Tag Symbol::GetSymbolTag(int t) 37 | { 38 | if (t == 0) return PLUSSYM; 39 | else if (t == 1) return MINUSYM; 40 | else if (t == 2) return MULSYM; 41 | else if (t == 3) return SLASHSYM; 42 | else if (t == 4) return EQLSYM; 43 | else if (t == 5) return NEQSYM; 44 | else if (t == 6) return LESSYM; 45 | else if (t == 7) return LEQSYM; 46 | else if (t == 8) return GTRSYM; 47 | else if (t == 9) return GEQSYM; 48 | else if (t == 10) return LPARENTSYM; 49 | else if (t == 11) return RPARENTSYM; 50 | else if (t == 12) return COMMASYM; 51 | else if (t == 13) return SEMICOLOMSYM; 52 | else if (t == 14) return PERIODSYM; 53 | else if (t == 15) return BECOMESSYM; 54 | else return BADTOKEN; 55 | } 56 | 57 | const std::string SymbolTable[] = 58 | { 59 | "+", "-", "*", "/", 60 | "=", "<>", "<", "<=", 61 | ">", ">=", "(", ")", 62 | ",", ";", ".", ":=" 63 | }; 64 | -------------------------------------------------------------------------------- /src/Token.cpp: -------------------------------------------------------------------------------- 1 | #include "Token.h" 2 | 3 | Token::Token(){} 4 | 5 | Token::Token(std::string s, Tag t, int l) :lexeme(s), tag(t), line(l){} 6 | 7 | Token::~Token(){} 8 | 9 | Tag Token::GetTag() 10 | { 11 | return tag; 12 | } 13 | 14 | int Token::GetLine() 15 | { 16 | return line; 17 | } 18 | 19 | void Token::ChangeTag(Tag t) 20 | { 21 | tag = t; 22 | } 23 | 24 | void Token::Print() 25 | {} 26 | 27 | std::string Token::GetLexeme() 28 | { 29 | return lexeme; 30 | } 31 | -------------------------------------------------------------------------------- /src/Vm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duduscript/pl0-compiler/5813a351222ffd862492b2c9c4f951e03acb8d93/src/Vm.cpp -------------------------------------------------------------------------------- /src/Word.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Word.h" 4 | 5 | Word::Word(){} 6 | 7 | Word::Word(std::string v, int l) :Token(v, GetKeywordTag(GetKeywordNum(v)), l){} 8 | 9 | Word::~Word(){} 10 | 11 | void Word::Print() 12 | { 13 | Tag t = GetTag(); 14 | if (t != IDENTSYM) 15 | std::cout << GetLexeme() 16 | << " (Keyword)(" 17 | << GetLine() 18 | << ")" 19 | << std::endl; 20 | else 21 | std::cout << GetLexeme() 22 | << " (IDENT)(" 23 | << GetLine() 24 | << ")" 25 | << std::endl; 26 | } 27 | 28 | Tag Word::GetKeywordTag(int t) 29 | { 30 | if (t == 0) return BEGINSYM; 31 | else if (t == 1) return ENDSYM; 32 | else if (t == 2) return IFSYM; 33 | else if (t == 3) return THENSYM; 34 | else if (t == 4) return WHILESYM; 35 | else if (t == 5) return DOSYM; 36 | else if (t == 6) return CALLSYM; 37 | else if (t == 7) return CONSTSYM; 38 | else if (t == 8) return VARSYM; 39 | else if (t == 9) return PROCSYM; 40 | else if (t == 10) return WRITESYM; 41 | else if (t == 11) return READSYM; 42 | else if (t == 12) return ELSESYM; 43 | else if (t == 13) return ODDSYM; 44 | else return IDENTSYM; 45 | } 46 | 47 | int Word::GetKeywordNum(std::string s) 48 | { 49 | for (int i = 0; i != NUMBER_OF_KEYWORD; ++i) 50 | if (KeywordTable[i] == s) 51 | return i; 52 | return -1; 53 | } 54 | 55 | const std::string KeywordTable[NUMBER_OF_KEYWORD] = 56 | { 57 | "begin", "end", "if", "then", "while", 58 | "do", "call", "const", "var", "procedure", 59 | "write", "read", "else", "odd" 60 | }; 61 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Parse.h" 2 | #include "Vm.h" 3 | 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]){ 8 | if (argc > 2 || argc == 1){ 9 | std::cerr<<"Usage: ./a.out filepath"<