├── .gitignore ├── CONTRIBUTE.md ├── DOCS.md ├── KWD.md ├── PR.md ├── README.md ├── STRUCT.md ├── TODO.md ├── TUTORIAL.md ├── analyzer ├── lexical.c ├── lexical.h └── lexical │ ├── recognizer.c │ ├── recognizer.h │ ├── scanner.c │ ├── scanner.h │ ├── sym_tbl.c │ └── sym_tbl.h ├── build ├── build ├── file.txt ├── lexical.o ├── p ├── scanner.o └── sym_tbl.o ├── conf.h ├── example.pndx ├── latest ├── no_README.md └── tests ├── GNUmakefile ├── README.rst ├── makefile ├── scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.c ├── scanner_scan_kwd_fn_test.c └── table.c /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | *.o 3 | *.exe 4 | *.out -------------------------------------------------------------------------------- /CONTRIBUTE.md: -------------------------------------------------------------------------------- 1 | # Prerequirements 2 | The only prerequisite for contribution to Pandex is understanding `How It(Pandex) works` . 3 | 4 | But If you want to open a pull request see [pull requests notes](PR.md) file .the pull requests that don't follow the contribution instructions, will be left open or potentially closed. Before the pull request is closed I'll send a message which includes complete explanations of why your pull request was closed. 5 | 6 | So first read the [Pandex structure ( How Pandex works )](STRUCT.md) and next read [PR.md](PR.md) 7 | 8 | Following kinds of pull requests will never be accepted: 9 | 10 | * the pull requests that add bug/bugs 11 | * the pull requests that remove neccessry feature/features 12 | * etc . 13 | 14 | # NOTE 15 | [before you open a pull request. please send me an email:](mailto:pooiaferdowsiisdeveloper@gmail.com) 16 | 17 | [](mailto:pooiaferdowsiisdeveloper@gmail.com) 18 | -------------------------------------------------------------------------------- /DOCS.md: -------------------------------------------------------------------------------- 1 | # Pandex docs -------------------------------------------------------------------------------- /KWD.md: -------------------------------------------------------------------------------- 1 | # RET 2 | 3 | # CALL 4 | 5 | # INC 6 | 7 | # DEC 8 | 9 | # VAR 10 | 11 | # IF 12 | 13 | # ELSE 14 | 15 | # THEN 16 | 17 | # FOR 18 | 19 | # WHILE 20 | -------------------------------------------------------------------------------- /PR.md: -------------------------------------------------------------------------------- 1 | chapters for an PR -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pandex comes soon in near future! 2 | 3 | _PLEASE_ __WAIT__ 4 | 36 | -------------------------------------------------------------------------------- /STRUCT.md: -------------------------------------------------------------------------------- 1 | Pandex development is very deciplined so you should have a deep understaning of following topics: 2 | 3 | * lexical analyzer 4 | * syntax analyzer 5 | * semantic analyzer 6 | * scanner 7 | * symbol table 8 | * tokens 9 | * QuI when Q = {1 ,2} and I = {a ,b} 10 | 11 | this language is developed based on `اصول طراحی کامپایلر ها` tutorials by `جعفر پور امینی` but you can learn the topics at [tutorialspoint.com course](https://www.tutorialspoint.com/compiler_design/index.htm) 12 | 13 | # Structure 14 | 15 | * The `conf.h` contain configurations 16 | * The `analyzer` directory contain lexical , syntax and semantic analyzers 17 | * The `analyzer/lexical` contain lexcial analyzer component ( e.g `scanner`) 18 | * The `analyzer/syntax` contain syntax analyzer component 19 | * The `analyzer/semantic` contain semantic analyzer components 20 | 21 | # Part I|1 Scanner 22 | The scanner checks the source and reads each character of it, then returns the characters one by one in `alpha` format. 23 | 24 | *example* 25 | 26 | ```bash 27 | for $times 28 | ``` 29 | The scanner returns: 30 | 31 | ``` 32 | alpha(f)-alpha(o)-alpha(r)-whitespace-dollar_tok($)-alpha(t)-alpha(i)-alpha (m)-alpha(e)-alpha(s)-whitespace``` 33 | **not it's an example only** 34 | 35 | The `scanner.c` file contain the `scanner.h` declurations that is following list 36 | * Tokens type and etc 37 | ```c 38 | #define alpha 0 // if character is alpha like a:b:c:d 39 | #define number 1 // if character is number 1:2:3 40 | #define token 2 // if character is token \ 41 | ( e.g ~@#$%*()+_=-`'":{[]}") 42 | 43 | #define T_TILDE 10 // ~ 44 | #define T_ACUTE 11 // ` 45 | #define T_BANG 12 // ! 46 | #define T_AT 13 // @ 47 | #define T_SHARP 14 // # 48 | #define T_DOLLAR 15 // $ 49 | #define T_PERCENT 16 // % 50 | #define T_CARET 17 // ^ 51 | #define T_AND 18 // & 52 | #define T_STAR 19 // * 53 | #define T_LPAREN 20 // ( LPAREN = LEFT PARANTHES 54 | #define T_RPAREN 21 // ) RPAREN = RIGHT PARANTHES 55 | #define T_MINUS 22 // - (HYPHEN . MINUS . DASH) 56 | #define T_UNDERSCORE 23 // _ 57 | #define T_PLUS 24 // + 58 | #define T_EQ 25 // = 59 | #define T_OBRACKET 26 // [ OBRACKET = OPEN BRACKET 60 | #define T_CBRACKET 27 // ] CBRACKET = CLOSE BRACKET 61 | #define T_OBRACE 28 // { OBRACE = OPEN BRACE 62 | #define T_CBRACE 29 // } CBRACE = CLOSE BRACE 63 | #define T_OR 30 // | 64 | #define T_BACKSLASH 31 // \ back slash 65 | #define T_FORWARDSLASH 32 // / forward slash 66 | #define T_COLON 33 // : 67 | #define T_SEMICOLON 34 68 | #define T_SINGLECOUTE 35 // ' SINGLE COUTE 69 | #define T_DOUBLEQOUTE 36 // " DOUBLE COUTE 70 | #define T_QUES 37 // ? 71 | #define T_LESSER 38 // < 72 | #define T_GREATER 39 // > 73 | #define T_COMMA 40 // , 74 | #define T_DOT 41 // . 75 | ``` 76 | 77 | ```c 78 | * _Bool isAlpha(char); // check if ch is in alphabet 79 | * _Bool isNumeric(char); // check if ch is a number 80 | * int getCharTok(char); // get the char token id ( e.g if ch=='~' return T_TILDE=10) 81 | * int getCharType(char); // check if is alpha , numeric or token 82 | ``` 83 | 84 | `scanner` headers are used in recognizer (No determinstic finite automaton ) 85 | 86 | # Symbol table 87 | * In `Analyzer directoy` 88 | 89 | **Location: [lexical/sym_tbl(.h|.c)](analyzer/lexical/sym_tbl.h)** 90 | 91 | 92 | 93 | The Symbol table holds variables `name`,`address` and `scope` becuase identfiers can contain more than one characters but lexer returns only one token for each identifier, so symbol table contains identifiers because identifiers' rel is `> 1` --- `1` or `more than one` to `one` 94 | 95 | Please research more about symbol table 96 | 97 | # Recognizer ( instead DFA ( Deterministic Finite Automaon)) 98 | 99 | 100 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO: works should I do 2 | 3 | * [x] - rename `recognizer/dfa.(h|c)` to `recognizer.(h|c)` 4 | * [x] - add kywords to lexical analyzer 5 | * [ ] - add keywords to recognizer (open-soure comunity ) 6 | * [ ] - complete symbol table 7 | * [ ] - use of hash data structure instead simple linked lists 8 | * [ ] - complete lexer 9 | -------------------------------------------------------------------------------- /TUTORIAL.md: -------------------------------------------------------------------------------- 1 | # a simple tutorial -------------------------------------------------------------------------------- /analyzer/lexical.c: -------------------------------------------------------------------------------- 1 | #ifndef LEXICAL_H 2 | #include "lexical.h" 3 | #endif 4 | 5 | #ifndef _STDBOOL_H 6 | # include 7 | #endif 8 | 9 | #ifndef _STDIO_H 10 | # include 11 | #endif 12 | 13 | #ifndef _STRING_H 14 | # include 15 | #endif 16 | 17 | #ifndef _STDLIB_H 18 | # include 19 | #endif 20 | 21 | #ifndef SCANNER_H 22 | # include "lexical/scanner.h" 23 | #endif 24 | 25 | size_t cur_line; // current line 26 | 27 | void swap_tokenizer(int tok_id , 28 | struct Tokenizer *ptr){ 29 | ptr->TOKEN = tok_id; 30 | ptr->prev->next = ptr; 31 | ptr->prev = ptr; 32 | ptr = ptr->next; 33 | } 34 | 35 | bool lex1line_str(char*str , 36 | /*struct Symbol_table *sym_tbl , */ 37 | struct Tokenizer *tokenner){ 38 | char copy[strlen(str)]; // you don't can use pointers \ 39 | you must use arrays instead :( 40 | 41 | strcpy(copy , str); // copy $str into $copy 42 | 43 | bool isKwd = true; 44 | bool isVar = false; 45 | bool mightPrintOrUnCompleteStringOnEmpty = false; 46 | 47 | int cur_kwd; // current keyword 48 | 49 | char *tok = strtok(copy , " "); 50 | while(tok != NULL) 51 | { 52 | // lex keywords 53 | if(tok[0] == '\"' || mightPrintOrUnCompleteStringOnEmpty){ 54 | for(size_t i = 1; i < strlen(tok); i++){ 55 | if(tok[i] != '"') 56 | fprintf(stdout,"%c",tok[i]); 57 | } 58 | 59 | 60 | printf("\n"); 61 | goto endLoop; 62 | } 63 | 64 | if(scan_kwd(tok) == -1) 65 | isKwd = false; 66 | 67 | if(isKwd) 68 | { 69 | cur_kwd = scan_kwd(tok); 70 | switch(cur_kwd){ 71 | case RET_KWD: 72 | swap_tokenizer(51 , tokenner); // 51 = RET 73 | break; 74 | 75 | case CALL_KWD: 76 | swap_tokenizer(52 , tokenner); // 52 = CALL 77 | break; 78 | 79 | case INC_KWD: 80 | swap_tokenizer(53 , tokenner); // INC = 53 81 | break; 82 | 83 | case DEC_KWD: 84 | swap_tokenizer(54 , tokenner); // DEC = 54 85 | break; 86 | 87 | case VAR_KWD: 88 | swap_tokenizer(55 , tokenner); // VAR = 55 89 | break; 90 | 91 | case IF_KWD: 92 | swap_tokenizer(56 , tokenner); // IF = 56 93 | break; 94 | 95 | case ELSE_KWD: 96 | swap_tokenizer(57 , tokenner); // ELSE = 57 97 | break; 98 | 99 | case THEN_KWD: 100 | swap_tokenizer(58 , tokenner); // THEN = 58 101 | break; 102 | 103 | case FOR_KWD: 104 | swap_tokenizer(59 , tokenner); // FOR = 59 105 | break; 106 | 107 | case WHILE_KWD: 108 | swap_tokenizer(60 , tokenner); // WHILE = 60 109 | break; 110 | 111 | } 112 | } 113 | 114 | // end lex keywords 115 | 116 | endLoop: 117 | cur_kwd = 0; // empty cur_kwd variable 118 | tok = strtok(NULL , " "); 119 | } // end while 120 | } 121 | 122 | int main(int argc , char *argv[]) 123 | { 124 | struct Tokenizer tokenizer; 125 | 126 | if(argc != 2) 127 | return -1; 128 | 129 | FILE *fp = fopen(argv[1],"r"); 130 | fseek(fp , 0 , SEEK_END); 131 | unsigned long int length = ftell(fp); 132 | rewind(fp); 133 | char *buf = malloc(length); 134 | fread(buf , 1 , length , fp); 135 | 136 | lex1line_str(buf , &tokenizer); 137 | perror("Lexer"); 138 | puts("created but not executed else of print"); 139 | return 0; 140 | } 141 | -------------------------------------------------------------------------------- /analyzer/lexical.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXICAL_H // header guard 2 | #define LEXICAL_H 3 | 4 | #ifndef SCANNER_H 5 | # include "lexical/scanner.h" 6 | #endif 7 | 8 | #ifndef SYM_TBL_H 9 | # include "lexical/sym_tbl.h" 10 | #endif 11 | 12 | #ifndef DFA_H 13 | # include "lexical/recognizer.h" 14 | #endif 15 | 16 | enum the_token { 17 | // tokens 18 | TILDE=T_TILDE, 19 | ACUTE , BANG , 20 | AT , SHARP , 21 | DOLLAR , 22 | PERCENT , CARET 23 | , AND , STAR , 24 | LPAREN , RPAREN , 25 | MINUS , UNDERSCORE 26 | , PLUS , EQ , 27 | OBRACKET , 28 | CBRACKET , 29 | OBRACE , CBRACE , 30 | OR , BACKSLASH , 31 | FORWARDSLASH , 32 | COLON , SEMICOLON 33 | , SINGLECOUTE , 34 | DOUBLEQOUTE , 35 | QUES , LESSER , 36 | GREATER , COMMA 37 | , DOT // end tokens 38 | 39 | // keywords 40 | , RET = 51, CALL 41 | , INC , DEC , 42 | VAR , IF , ELSE 43 | , THEN , FOR , 44 | WHILE , /* end keywords */ 45 | 46 | /* numbers */ 47 | NUM0 = 70 , 48 | NUM1 , NUM2 49 | ,NUM3, NUM4 50 | ,NUM5, NUM6 51 | ,NUM7, NUM8 52 | ,NUM9 /* 53 | end 54 | positive 55 | numbers*/ 56 | 57 | /* negative numbers */ 58 | , 59 | NNUM0 = 80 , 60 | NNUM1 , NNUM2 , 61 | NNUM3 , NNUM4 , 62 | NNUM5 , NNUM6 , 63 | NNUM7 , NNUM8 , 64 | NNUM9 , 65 | /* end negative 66 | numbers */ 67 | NEW_LINE = 99 68 | }; 69 | 70 | 71 | struct Tokenizer 72 | { 73 | enum the_token TOKEN; 74 | struct Tokenizer *next; 75 | struct Tokenizer *prev; 76 | }; 77 | void swap_tokenizer(int /* token id */ , struct Tokenizer *); 78 | _Bool lex1line_str(char* ,/* struct Symbol_table * ,*/ struct Tokenizer *); 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /analyzer/lexical/recognizer.c: -------------------------------------------------------------------------------- 1 | #ifndef RECOGNIZER_H 2 | # include "recognizer.h" 3 | #endif 4 | 5 | #ifndef SCANNER_H 6 | # include "scanner.c" 7 | #endif 8 | 9 | #ifndef _STDBOOL_H 10 | # include 11 | #endif 12 | 13 | bool recognize(char source[]){ 14 | for(int iter = 0; 15 | source[iter] != '\0'; 16 | iter += 1){ 17 | // check tokens 18 | if(getCharType(source[iter]) != token); 19 | else 20 | if(getCharType(source[iter]) == -1) 21 | return false; 22 | } 23 | 24 | return true; 25 | } 26 | -------------------------------------------------------------------------------- /analyzer/lexical/recognizer.h: -------------------------------------------------------------------------------- 1 | // rename `recognizer/dfa.(c|h)` to recognizer.(c|h) 2 | #ifndef RECOGNIZER_H 3 | #define RECOGNIZER_H 4 | 5 | // recognizer check if is this string \ 6 | this language code 7 | 8 | _Bool recognize(char[]); 9 | 10 | #endif -------------------------------------------------------------------------------- /analyzer/lexical/scanner.c: -------------------------------------------------------------------------------- 1 | // implemention of declurations in `scanner.h` file 2 | 3 | #ifndef SCANNER_H 4 | # include "scanner.h" 5 | #endif 6 | 7 | #ifndef _STDBOOL_H 8 | # include 9 | #endif 10 | 11 | #ifndef _STDLIB_H 12 | # include 13 | #endif 14 | 15 | #ifndef _STDIO_H 16 | # include 17 | #endif 18 | 19 | #ifndef _STRING_H 20 | # include 21 | #endif 22 | 23 | bool isAlpha(const char ch) 24 | { 25 | if(ch >= 48 && ch <= 57) 26 | return false; 27 | 28 | if ((ch >= 97 && ch <= 122) || 29 | (ch >= 65 && ch <= 90) ){ 30 | return true; 31 | } 32 | 33 | return false; 34 | } 35 | 36 | bool isNumeric(const char ch) 37 | { 38 | if( (ch >= 97 && ch <= 122) || 39 | ( ch >= 65 && ch <= 90)){ 40 | return false; 41 | } 42 | if(ch >= 48 && ch <= 57) 43 | return true; 44 | 45 | return false; 46 | } 47 | 48 | int getCharTok(const char ch) 49 | { 50 | if(getCharType(ch) == token){ 51 | switch(ch){ 52 | // NOTE : cases don't needs break because when a \ 53 | case executed return a thing that means end of function 54 | 55 | case '~': 56 | return T_TILDE; 57 | case '`': 58 | return T_ACUTE; 59 | case '!': 60 | return T_BANG; 61 | case '@': 62 | return T_AT; 63 | case '#': 64 | return T_SHARP; 65 | case '$': 66 | return T_DOLLAR; 67 | case '%': 68 | return T_PERCENT; 69 | case '^': 70 | return T_CARET; 71 | case '&': 72 | return T_AND; 73 | case '*': 74 | return T_STAR; 75 | case '(': 76 | return T_LPAREN; 77 | case ')': 78 | return T_RPAREN; 79 | case '-': 80 | return T_MINUS; 81 | case '_': 82 | return T_UNDERSCORE; 83 | case '+': 84 | return T_PLUS; 85 | case '=': 86 | return T_EQ; 87 | case '[': 88 | return T_OBRACKET; 89 | case ']': 90 | return T_CBRACKET; 91 | case '{': 92 | return T_OBRACE; 93 | case '}': 94 | return T_CBRACE; 95 | case '|': 96 | return T_OR; 97 | case '\\': 98 | return T_BACKSLASH; 99 | case '/': 100 | return T_FORWARDSLASH; 101 | case ':': 102 | return T_COLON; 103 | case ';': 104 | return T_SEMICOLON; 105 | case '\'': 106 | return T_SINGLECOUTE; 107 | case '"': 108 | return T_DOUBLEQOUTE; 109 | case '?': 110 | return T_QUES; 111 | case '<': 112 | return T_LESSER; 113 | case '>': 114 | return T_GREATER; 115 | case ',': 116 | return T_COMMA; 117 | case '.': 118 | return T_DOT; 119 | default: 120 | return -1; // EOF (End-Of-File) 121 | } 122 | } 123 | return EOF; 124 | } 125 | 126 | int getCharNum(const char ch , const bool isNegative){ 127 | switch(ch){ 128 | case '0': 129 | if(isNegative) 130 | return NNUM_0; 131 | else 132 | return NUM_0; 133 | 134 | case '1': 135 | if(isNegative) 136 | return NNUM_1; 137 | else 138 | return NUM_1; 139 | 140 | case '2': 141 | if(isNegative) 142 | return NNUM_2; 143 | else 144 | return NUM_2; 145 | 146 | case '3': 147 | if(isNegative) 148 | return NNUM_3; 149 | else 150 | return NUM_3; 151 | 152 | case '4': 153 | if(isNegative) 154 | return NNUM_4; 155 | else 156 | return NUM_4; 157 | 158 | case '5': 159 | if(isNegative) 160 | return NNUM_5; 161 | else 162 | return NUM_5; 163 | 164 | case '6': 165 | if(isNegative) 166 | return NNUM_6; 167 | else 168 | return NUM_6; 169 | 170 | case '7': 171 | if(isNegative) 172 | return NNUM_7; 173 | else 174 | return NUM_7; 175 | 176 | case '8': 177 | if(isNegative) 178 | return NNUM_8; 179 | else 180 | return NUM_8; 181 | 182 | case '9': 183 | if(isNegative) 184 | return NNUM_9; 185 | else 186 | return NUM_9; 187 | 188 | default: return -1; // EOF 189 | 190 | } 191 | } 192 | 193 | int getCharType(const char ch) 194 | { 195 | if(isAlpha(ch)) 196 | return alpha; 197 | else if(isNumeric(ch)) 198 | return number; 199 | else 200 | return token; 201 | } 202 | 203 | /* 204 | @_scan:(char):int 205 | It's scanner main function 206 | that read the source and 207 | scan source and return the 208 | id of token/kwd/var . 209 | */ 210 | int _scan(const char src , bool isItIsNumber_Type) 211 | { 212 | // 213 | size_t type = getCharType(src); // getCharType return the type to integer : \ 214 | alpha=0 , number=1 , token2 215 | 216 | if(type == token) 217 | return getCharTok(src); 218 | if (type == number ) 219 | return getCharNum(src , isItIsNumber_Type); 220 | if (type == alpha ) 221 | return alpha; 222 | 223 | } 224 | 225 | /* 226 | @scan:(char):int 227 | the scan function 228 | scan the source and 229 | help to `analyer/lexical` 230 | to create a token lists 231 | But What's difference between 232 | _sacn and scan(this) function ? 233 | The _scan function used in scan 234 | but _scan cannot lex alpha but can 235 | lex numbers and tokens So the scan 236 | function available lexing alpha and 237 | use of _scan for lexing numbers and 238 | tokens 239 | */ 240 | 241 | int scan(const char source){ 242 | size_t result = _scan(source,false); 243 | if ( result == alpha){ 244 | return source; 245 | 246 | }else 247 | return result; // may be hard to understand ? :/ or :\ 248 | which of them :) 249 | } 250 | 251 | 252 | /* 253 | this function 254 | return keywords 255 | id | lex keyword. 256 | this function when 257 | detect first keyword 258 | in source , return 259 | the keyword so function 260 | will stopped and do not 261 | return next keywords 262 | */ 263 | int scan_kwd(const char source[]){ 264 | size_t length = strlen(source); 265 | 266 | char *ptr = malloc(length); // new string 267 | int ptr_cur_pos = 0; // position of new string 268 | 269 | for(size_t i = 0; i < length; i++) 270 | { 271 | if ( source[i] != ' ' 272 | && source[i] != '\n' 273 | && source[i] != '\t') 274 | ptr[ptr_cur_pos++] = source[i]; 275 | } 276 | 277 | ptr[ptr_cur_pos++] ='\0'; 278 | 279 | // delete whitespaces 280 | 281 | /* 282 | if ( strcmp ( 283 | tok , 284 | "keyword" , 285 | length 286 | ) 287 | ) 288 | 289 | check is tok is equal to kywords and 290 | pass the length of keyword to latest 291 | strncmp argument 292 | */ 293 | 294 | if ( strncmp (ptr , "ret" , 3) == 0) 295 | return RET_KWD; 296 | 297 | if ( strncmp (ptr , "call" , 4) == 0) 298 | return CALL_KWD; 299 | 300 | if ( strncmp (ptr , "inc" , 3) == 0) 301 | return INC_KWD; 302 | 303 | if ( strncmp (ptr , "dec" , 3) == 0) 304 | return DEC_KWD; 305 | 306 | if ( strncmp (ptr , "var" , 3) == 0) 307 | return VAR_KWD; 308 | 309 | if ( strncmp (ptr , "if" , 2) == 0) 310 | return IF_KWD; 311 | 312 | if ( strncmp (ptr , "else" , 4) == 0) 313 | return ELSE_KWD; 314 | 315 | if ( strncmp (ptr , "then" , 4) == 0) 316 | return THEN_KWD; 317 | 318 | if ( strncmp (ptr , "for" , 3) == 0) 319 | return FOR_KWD; 320 | 321 | if ( strncmp (ptr , "while" , 5) == 0) 322 | return WHILE_KWD; 323 | 324 | return EOF; // if was not keyword 325 | } 326 | -------------------------------------------------------------------------------- /analyzer/lexical/scanner.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANNER_H 2 | #define SCANNER_H 3 | 4 | #ifndef _STDBOOL_H 5 | # include 6 | #endif 7 | 8 | #ifndef _STDDEF_H 9 | # include 10 | #endif 11 | 12 | #define alpha 0 // if character is alpha like a:b:c:d 13 | #define number 1 // if character is number 1:2:3 14 | #define token 2 // if character is token \ 15 | ( e.g ~@#$%*()+_=-`'":{[]}") 16 | 17 | #define T_TILDE 10 // ~ 18 | #define T_ACUTE 11 // ` 19 | #define T_BANG 12 // ! 20 | #define T_AT 13 // @ 21 | #define T_SHARP 14 // # 22 | #define T_DOLLAR 15 // $ 23 | #define T_PERCENT 16 // % 24 | #define T_CARET 17 // ^ 25 | #define T_AND 18 // & 26 | #define T_STAR 19 // * 27 | #define T_LPAREN 20 // ( LPAREN = LEFT PARANTHES 28 | #define T_RPAREN 21 // ) RPAREN = RIGHT PARANTHES 29 | #define T_MINUS 22 // - (HYPHEN . MINUS . DASH) 30 | #define T_UNDERSCORE 23 // _ 31 | #define T_PLUS 24 // + 32 | #define T_EQ 25 // = 33 | #define T_OBRACKET 26 // [ OBRACKET = OPEN BRACKET 34 | #define T_CBRACKET 27 // ] CBRACKET = CLOSE BRACKET 35 | #define T_OBRACE 28 // { OBRACE = OPEN BRACE 36 | #define T_CBRACE 29 // } CBRACE = CLOSE BRACE 37 | #define T_OR 30 // | 38 | #define T_BACKSLASH 31 // \ back slash 39 | #define T_FORWARDSLASH 32 // / forward slash 40 | #define T_COLON 33 // : 41 | #define T_SEMICOLON 34 // ; 42 | #define T_SINGLECOUTE 35 // ' SINGLE COUTE 43 | #define T_DOUBLEQOUTE 36 // " DOUBLE COUTE 44 | #define T_QUES 37 // ? 45 | #define T_LESSER 38 // < 46 | #define T_GREATER 39 // > 47 | #define T_COMMA 40 // , 48 | #define T_DOT 41 // . 49 | 50 | // keywords 51 | #define RET_KWD 51 // ret kwds 52 | #define CALL_KWD 52 53 | #define INC_KWD 53 54 | #define DEC_KWD 54 55 | #define VAR_KWD 55 56 | #define IF_KWD 56 57 | #define ELSE_KWD 57 58 | #define THEN_KWD 58 59 | #define FOR_KWD 59 60 | #define WHILE_KWD 60 61 | 62 | // number 63 | // + 64 | #define NUM_0 70 // 0 65 | #define NUM_1 71 // 1 66 | #define NUM_2 72 // 2 67 | #define NUM_3 73 // 3 68 | #define NUM_4 74 // 4 69 | #define NUM_5 75 // 5 70 | #define NUM_6 76 // 6 71 | #define NUM_7 77 // 7 72 | #define NUM_8 78 // 8 73 | #define NUM_9 79 // 9 74 | // - 75 | #define NNUM_0 80 76 | #define NNUM_1 81 77 | #define NNUM_2 82 78 | #define NNUM_3 83 79 | #define NNUM_4 84 80 | #define NNUM_5 85 81 | #define NNUM_6 86 82 | #define NNUM_7 87 83 | #define NNUM_8 88 84 | #define NNUM_9 89 85 | 86 | bool isAlpha(const char); // check if ch is in alphabet 87 | bool isNumeric(const char); // check if ch is a number 88 | 89 | int getCharTok(const char); // get the char token id \ 90 | ( e.g if ch=='~' return T_TILDE=10) 91 | int getCharType(const char); // check if is alpha , numeric or token 92 | int getCharNum(const char , const bool isNegative); 93 | 94 | int _scan(const char , bool); 95 | 96 | int scan(const char); 97 | 98 | int scan_kwd(const char[]); 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /analyzer/lexical/sym_tbl.c: -------------------------------------------------------------------------------- 1 | // implementation of sym_tbl.h 2 | #ifndef SYM_TBL_H 3 | # include "sym_tbl.h" 4 | #endif 5 | 6 | #ifndef _STDBOOL_H 7 | # include 8 | #endif 9 | 10 | #ifndef _STRING_H 11 | # include 12 | #endif 13 | 14 | #ifndef _STDLIB_H 15 | # include 16 | #endif 17 | 18 | bool lookup_by_name(char*name , 19 | struct Symbol_table *table) 20 | { 21 | struct Symbol_table *copied 22 | = table; // copy second argument into copied 23 | 24 | 25 | /* colorize the source */ 26 | 27 | do 28 | if (strcmp (copied->name , name) == 0) 29 | return true; 30 | while(/* Assign */ (copied = (*copied).next) /* end */); 31 | 32 | return false; 33 | } 34 | 35 | bool check_scope(struct Symbol_table *ptr , 36 | unsigned short int scope){ 37 | if(ptr->scope == scope) 38 | return true; 39 | 40 | return false; 41 | } 42 | 43 | bool insert_table( 44 | struct Symbol_table * MAIN , 45 | struct Symbol_table insert_it){ 46 | 47 | MAIN->next = &insert_it; 48 | 49 | if(MAIN->next); // segment fault if MAIN->next is not exits 50 | return true; 51 | } 52 | 53 | struct Symbol_table make_table(char*name,char*value){ 54 | struct Symbol_table *TABLE = (struct Symbol_table *) malloc(sizeof(struct Symbol_table)); 55 | 56 | TABLE->name = name; 57 | TABLE->value = value; 58 | TABLE->scope = 0; 59 | TABLE->addr = calc_addr(); 60 | 61 | return *TABLE; 62 | } 63 | 64 | unsigned long long int calc_addr(){ 65 | static unsigned long long int 66 | addr = 999; // a thosound is \ 67 | primary address for each variable 68 | 69 | return ++addr; 70 | } 71 | -------------------------------------------------------------------------------- /analyzer/lexical/sym_tbl.h: -------------------------------------------------------------------------------- 1 | // declurations of sym_tbl.c 2 | #ifndef SYM_TBL_H // header guard 3 | #define SYM_TBL_H 4 | 5 | #ifndef _STDBOOL_h 6 | # include 7 | #endif 8 | 9 | struct Symbol_table 10 | { 11 | unsigned short int scope; 12 | char *name; 13 | char *value; 14 | unsigned long long int addr; 15 | struct Symbol_table *next; 16 | }; 17 | 18 | 19 | bool lookup_by_name(char* 20 | , struct Symbol_table *); 21 | 22 | bool check_scope 23 | (struct Symbol_table * 24 | , unsigned short int); // \ 25 | check is variable is in same scope 26 | 27 | 28 | bool insert_table( 29 | struct Symbol_table * /* MAIN */ , 30 | struct Symbol_table /* insert_it*/ 31 | ); 32 | 33 | struct Symbol_table make_table(char*,char*); 34 | 35 | // calculate address of variable \ 36 | for symbol table 37 | unsigned long long int 38 | calc_addr(); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /build/build: -------------------------------------------------------------------------------- 1 | gcc -c ../analyzer/lexical/scanner.c ../analyzer/lexical/sym_tbl.c ../analyzer/lexical.c 2 | gcc lexical.o scanner.o sym_tbl.o -o p 3 | -------------------------------------------------------------------------------- /build/file.txt: -------------------------------------------------------------------------------- 1 | "Hello" 2 | -------------------------------------------------------------------------------- /build/lexical.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandex-lang/Pandex/39eecb297321b4deb133eefef2c30032b6a44cbf/build/lexical.o -------------------------------------------------------------------------------- /build/p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandex-lang/Pandex/39eecb297321b4deb133eefef2c30032b6a44cbf/build/p -------------------------------------------------------------------------------- /build/scanner.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandex-lang/Pandex/39eecb297321b4deb133eefef2c30032b6a44cbf/build/scanner.o -------------------------------------------------------------------------------- /build/sym_tbl.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandex-lang/Pandex/39eecb297321b4deb133eefef2c30032b6a44cbf/build/sym_tbl.o -------------------------------------------------------------------------------- /conf.h: -------------------------------------------------------------------------------- 1 | #ifndef CONF_H 2 | #define CONF_H 3 | /* begin */ 4 | 5 | #define VERSION 1004 6 | /* 7 | // comments 8 | // asm 9 | #define START_COMMENT ";" 10 | #define END_COMMENT "\n" 11 | // c 12 | #define START_COMMENT "//" 13 | #define END_COMMENT "\n" 14 | // cpp 15 | #define START_COMMENT "/*" 16 | #define END_COMMENT "* /" 17 | // python 18 | #define START_COMMENT "#" 19 | #define END_COMMENT "\n" 20 | // ada 21 | #define START_COMMENT "--" 22 | #define END_COMMENT "\n" 23 | // lua 24 | #define START_COMMENT "--[[" 25 | #define END_COMMENT "]]" 26 | */ 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /example.pndx: -------------------------------------------------------------------------------- 1 | ; an example Pandex source 2 | 3 | .import `string`::puts,strlen 4 | 5 | main: ; main function 6 | var result%b ; %b = boolean 7 | var length%d ; %d = integer 8 | call strlen ($1 , &length) 9 | if result then call puts ($ , $length) 10 | 11 | another: 12 | ; new version 13 | call puts ( $0 ) ; 'another` -------------------------------------------------------------------------------- /latest: -------------------------------------------------------------------------------- 1 | this file is never will changed 2 | it's hold latest time that an file modified in particular repo 3 | -------------------------------------------------------------------------------- /no_README.md: -------------------------------------------------------------------------------- 1 | # Pandex 2 | Pandex is a light but FAST programming language written in C . Pandex goal is that be hard & it's good for eductional goals 3 | 4 | ![badge](https://tokei.rs/b1/github/Pandex-lang/Pandex) 5 | 6 | **How To Memebr In Pandex-lang** 7 | * 10 first person star this repository (step 1) 8 | * 15 other peoples that send pull request ( step 2) 9 | * anyone send a merged pull request 10 | 11 | # ***Three (now Two) week delay in the project*** 12 | 13 | School quizzes starts & I am student that means three week delay. 14 | 15 | # comments are after ; 16 | 17 | # Why Pandex 18 | When I think to Yandex I understand that it's stand for "Yet Another INDEXer" 19 | and before it, the Pandex name was index But I like my name ( first name ) , 20 | so my language name should starts with the letter that my first name starts 21 | with this letter and it's short for "Pooia Index". 22 | But I can point to following causes. 23 | * if you call epidemic instead Pandex , it's cause it that Pandex created a year after COVID-19 VIRUS epidemic 24 | * Englishes , Russians and Persians can say Pandex correctly 25 | * Pandex can write beautiful in cyrillic < пандекс > 26 | -------------------------------------------------------------------------------- /tests/GNUmakefile: -------------------------------------------------------------------------------- 1 | CC = cc # gcc for me 2 | 3 | ROOT_PATH = .. 4 | 5 | TEST_OUTPUT = m 6 | 7 | # main files 8 | OBJECTS = scanner.o 9 | OBJECTS += sym_tbl.o 10 | 11 | # test files 12 | OBJECTS += scanner_scan_kwd_fn_test.o 13 | OBJECTS += scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.o 14 | OBJECTS += table.o 15 | 16 | # main files 17 | MAIN_TARGET_DEPS = scanner.o # 18 | MAIN_TARGET_DEPS += sym_tbl.o 19 | 20 | # test files 21 | MAIN_TARGET_DEPS += scanner_scan_kwd_fn_test.o # 22 | MAIN_TARGET_DEPS += scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.o # 23 | MAIN_TARGET_DEPS += table.o 24 | 25 | #TEST_STATUS = scanner_scan_kwd_fn_test 26 | #TEST_STATUS = scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test 27 | TEST_STATUS = table 28 | 29 | all: $(MAIN_TARGET_DEPS) 30 | ifeq (${TEST_STATUS},scanner_scan_kwd_fn_test) 31 | $(CC) scanner.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 32 | endif 33 | 34 | ifeq (${TEST_STATUS},scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test) 35 | $(CC) scanner.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 36 | endif 37 | 38 | ifeq (${TEST_STATUS},table) 39 | $(CC) sym_tbl.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 40 | endif 41 | 42 | 43 | # main files 44 | scanner.o: 45 | $(CC) -c $(ROOT_PATH)/analyzer/lexical/${@:.o=.c} 46 | 47 | sym_tbl.o: 48 | $(CC) -c $(ROOT_PATH)/analyzer/lexical/${@:.o=.c} 49 | 50 | # test files 51 | scanner_scan_kwd_fn_test.o: 52 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 53 | 54 | scan__scan_getCharType_getCharNum_getCharTok_isAlpha_isNumeric_fn_s_test.o: 55 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 56 | 57 | table.o: 58 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 59 | 60 | clean: 61 | rm $(OBJECTS) 62 | rm $(TEST_OUTPUT) 63 | -------------------------------------------------------------------------------- /tests/README.rst: -------------------------------------------------------------------------------- 1 | Pandex tests 2 | ============= 3 | 4 | * in the :code:`test` directory I push Pandex tests 5 | 6 | but How their works ? How use them ? 7 | ------------------------------------- 8 | 9 | so look at structure . 10 | 11 | structure 12 | ~~~~~~~~~~ 13 | this directory hold some of files check the functions and etc 14 | 15 | 1) check the scanner ( starts by scan ) 16 | 2) 17 | 18 | 19 | The makefile 20 | ------------- 21 | I use makefile to run and check tests . 22 | 23 | about makefile 24 | ~~~~~~~~~~~~~~~ 25 | 26 | 1) variables 27 | - :code:`$(CC)` : :code:`$cc` hold the compiler ( e.g CC=gcc or CC=clang ) 28 | - :code:`$(ROOT_PATH)` : it's hold relative root path so if you go there from here , you are in root 29 | - :code:`$(TEST_OUTPUT)` : name of executable file or :code:`$(CC) files.(o|c) -o $(TEST_OUTPUT)` 30 | - :code:`$(OBJECTS)` : :code:`.o` CC output names 31 | - :code:`$(MAIN_TARGET_DEPS)` : makefile main | default | all target needs targets executed before 32 | - :code:`$(TEST_STATUS)` : type of test will execute 33 | -------------------------------------------------------------------------------- /tests/makefile: -------------------------------------------------------------------------------- 1 | CC = cc # gcc for me 2 | 3 | ROOT_PATH = .. 4 | 5 | TEST_OUTPUT = m 6 | 7 | # main files 8 | OBJECTS = scanner.o 9 | OBJECTS += sym_tbl.o 10 | 11 | # test files 12 | OBJECTS += scanner_scan_kwd_fn_test.o 13 | OBJECTS += scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.o 14 | OBJECTS += table.o 15 | 16 | # main files 17 | MAIN_TARGET_DEPS = scanner.o # 18 | MAIN_TARGET_DEPS += sym_tbl.o 19 | 20 | # test files 21 | MAIN_TARGET_DEPS += scanner_scan_kwd_fn_test.o # 22 | MAIN_TARGET_DEPS += scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.o # 23 | MAIN_TARGET_DEPS += table.o 24 | 25 | #TEST_STATUS = scanner_scan_kwd_fn_test 26 | #TEST_STATUS = scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test 27 | TEST_STATUS = table 28 | 29 | all: $(MAIN_TARGET_DEPS) 30 | ifeq (${TEST_STATUS},scanner_scan_kwd_fn_test) 31 | $(CC) scanner.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 32 | endif 33 | 34 | ifeq (${TEST_STATUS},scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test) 35 | $(CC) scanner.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 36 | endif 37 | 38 | ifeq (${TEST_STATUS},table) 39 | $(CC) sym_tbl.o ${TEST_STATUS}.o -o $(TEST_OUTPUT) 40 | endif 41 | 42 | 43 | # main files 44 | scanner.o: 45 | $(CC) -c $(ROOT_PATH)/analyzer/lexical/${@:.o=.c} 46 | 47 | sym_tbl.o: 48 | $(CC) -c $(ROOT_PATH)/analyzer/lexical/${@:.o=.c} 49 | 50 | # test files 51 | scanner_scan_kwd_fn_test.o: 52 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 53 | 54 | scan__scan_getCharType_getCharNum_getCharTok_isAlpha_isNumeric_fn_s_test.o: 55 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 56 | 57 | table.o: 58 | $(CC) -c $(ROOT_PATH)/tests/${@:.o=.c} 59 | 60 | clean: 61 | rm $(OBJECTS) 62 | rm $(TEST_OUTPUT) 63 | -------------------------------------------------------------------------------- /tests/scan__scan_get_CharType_CharNum_CharTok_is_Alpha_Numeric_fn_s_test.c: -------------------------------------------------------------------------------- 1 | #include "../analyzer/lexical/scanner.h" 2 | 3 | #include 4 | #include 5 | 6 | int main(){ 7 | // isNumeric() function 8 | bool n0 = isNumeric('0'); 9 | bool n1 = isNumeric('1'); 10 | bool n2 = isNumeric('2'); 11 | bool n3 = isNumeric('3'); 12 | bool n4 = isNumeric('4'); 13 | bool n5 = isNumeric('5'); 14 | bool n6 = isNumeric('6'); 15 | bool n7 = isNumeric('7'); 16 | bool n8 = isNumeric('8'); 17 | bool n9 = isNumeric('9'); 18 | bool nn = isNumeric('a'); 19 | bool nnn = isNumeric('z'); 20 | bool nnnn = isNumeric('A'); 21 | bool nnnnn = isNumeric('Z'); 22 | 23 | if ( n0 && n1 && n2 && n3 && n4 24 | && n5 && n6 && n7 && n8 && n9 ) 25 | puts("isNumeric function check numbers to correctly (YES)"); 26 | 27 | if ( nn || nnn || nnnn || nnnnn) 28 | puts("isNumeric function think alpha characters are number (ERROR)"); 29 | // end isNumeric 30 | 31 | // isAlpha 32 | bool a0 = isAlpha('a'); 33 | bool a1 = isAlpha('z'); 34 | bool a2 = isAlpha('A'); 35 | bool a3 = isAlpha('Z'); 36 | bool a4 = isAlpha('s'); 37 | bool a5 = isAlpha('Q'); 38 | bool a6 = isAlpha('b'); 39 | bool a7 = isAlpha('B'); 40 | bool a8 = isAlpha('Y'); 41 | bool a9 = isAlpha('y'); 42 | bool a = isAlpha('0'); 43 | bool aa = isAlpha('9'); 44 | bool aaa = isAlpha('8'); 45 | 46 | if ( a0 && a1 && a2 && a3 && a4 47 | && a5 && a6 && a7 && a8 && a9) 48 | puts("isAlpha() works correctly (YES)"); 49 | 50 | if ( a || aa || aaa) 51 | puts("isAlpha works uncorrect ( ERROR )"); 52 | // end isAlpha 53 | 54 | int type; 55 | type = getCharType('~'); 56 | if ( type == token ) 57 | puts ( "getCharType(tok) works correct (YES)"); 58 | else 59 | puts("getCharType(tok) works uncorrect (ERROR)"); 60 | 61 | type = getCharType('0'); 62 | if ( type == number) 63 | puts("Yes"); 64 | else 65 | puts("no"); 66 | 67 | type = getCharType('9'); 68 | if ( type == number) 69 | puts("Yes"); 70 | else 71 | puts("no"); 72 | 73 | type = getCharType('5'); 74 | if ( type == number) 75 | puts("Yes"); 76 | else 77 | puts("no"); 78 | 79 | type = getCharType('A'); 80 | if(type != alpha) 81 | puts("getCharType(alpha) works uncorrec"); 82 | 83 | // puts("testing getCharNum function"); 84 | 85 | puts("testing scan() function"); 86 | 87 | if ( scan('a') == 'a' 88 | && scan('A') == 'A' 89 | && scan('Z') == 'Z' 90 | && scan('z') == 'z' 91 | && scan('q') == 'q' 92 | && scan('Q') == 'Q') 93 | { 94 | puts("scan() works correctly (YES)"); 95 | } 96 | else 97 | { 98 | puts("scan() work uncorrect ( ERRRO)"); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /tests/scanner_scan_kwd_fn_test.c: -------------------------------------------------------------------------------- 1 | #include "../analyzer/lexical.h" 2 | #include 3 | 4 | void test_kwd(char src[]){ 5 | int quantity = scan_kwd(src); 6 | switch(quantity){ 7 | case RET_KWD: 8 | puts("ret"); 9 | break; 10 | case CALL_KWD: 11 | puts("call"); 12 | break; 13 | case INC_KWD: 14 | puts("inc"); 15 | break; 16 | case DEC_KWD: 17 | puts("dec"); 18 | break; 19 | case VAR_KWD: 20 | puts("var"); 21 | break; 22 | case IF_KWD: 23 | puts("if"); 24 | break; 25 | case ELSE_KWD: 26 | puts("else"); 27 | break; 28 | case THEN_KWD: 29 | puts("then"); 30 | break; 31 | case WHILE_KWD: 32 | puts("while"); 33 | break; 34 | case FOR_KWD: 35 | puts("for"); 36 | } 37 | } 38 | 39 | int main(){ 40 | 41 | test_kwd(" inc"); 42 | test_kwd("\tdec"); 43 | test_kwd("\tcall\t"); 44 | test_kwd("var"); 45 | test_kwd("\nif"); 46 | test_kwd(" else "); 47 | test_kwd("\nthen\n"); 48 | test_kwd("\twhile"); 49 | test_kwd("for\n"); 50 | } 51 | -------------------------------------------------------------------------------- /tests/table.c: -------------------------------------------------------------------------------- 1 | #include "../analyzer/lexical/sym_tbl.h" 2 | 3 | 4 | #include 5 | #include 6 | 7 | int main(){ 8 | // no check_scope function test exits here 9 | 10 | // test make_table function 11 | struct Symbol_table base = make_table("var" , "Hello , World"); 12 | 13 | struct Symbol_table after = make_table("var2" , "Hello , world too"); 14 | 15 | // test insert function 16 | 17 | insert_table(&base /* base is main table */ , 18 | after /* after insert into main(base) */); 19 | 20 | // test lookup_by_name function 21 | bool r = lookup_by_name("var2",&base); 22 | 23 | if(r) 24 | puts("Yes :)"); 25 | 26 | return 0; 27 | 28 | } 29 | --------------------------------------------------------------------------------