├── .gitattributes ├── .gitignore ├── grammer └── grammer.ebnf ├── example ├── focus.sk └── main.sk ├── include ├── error_codes.h ├── utils.h ├── token.h ├── lexer.h ├── stretchy_buffer.h └── token_type.h ├── Makefile ├── playground └── hashtable │ ├── Makefile │ ├── main.c │ ├── hashtable.h │ └── hashtable.c ├── src ├── main.c ├── token.c ├── utils.c └── lexer.c └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | *.h linguist-language=C 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/**.o 2 | sk 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /grammer/grammer.ebnf: -------------------------------------------------------------------------------- 1 | (* We will define grammers rules here *) 2 | -------------------------------------------------------------------------------- /example/focus.sk: -------------------------------------------------------------------------------- 1 | fn main(argc: int, argv: []str): int { 2 | return 0; 3 | } 4 | -------------------------------------------------------------------------------- /include/error_codes.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_ERROR_CODES_H 2 | #define SK_ERROR_CODES_H 3 | 4 | typedef enum 5 | { 6 | NO_SOURCE_FILE, 7 | 8 | FILE_READ, 9 | FILE_CLOSE, 10 | 11 | INVALID_CHARACTER 12 | } error_code_T; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_UTILS_H 2 | #define SK_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "token.h" 9 | #include "error_codes.h" 10 | 11 | char *append(const char *, const char *); 12 | 13 | char *read_from_file(const char *); 14 | 15 | void fatalf(const int, const char *, ...); 16 | 17 | void printToken(token_T); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /example/main.sk: -------------------------------------------------------------------------------- 1 | fn add(a: int, b: int): int { 2 | return a + b; 3 | } 4 | 5 | fn main(): void { 6 | let a: int = 10; 7 | b := 20; 8 | b += 10; 9 | b -= 10; 10 | b *= 2; 11 | b /= 5; 12 | add(a, b); // This is an example of comment 13 | 14 | for { 15 | 16 | } 17 | 18 | if true { 19 | 20 | } else if a >= 10 { 21 | 22 | } else { 23 | 24 | } 25 | 26 | let c: str = "hello world"; 27 | } 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TARGET = sk 2 | 3 | CC = clang 4 | INCLUDES = -I ./include 5 | CFLAGS = -Wall -Werror -std=c99 $(INCLUDES) 6 | 7 | HEADERS=$(shell find ./include -type f -name *.h) 8 | SOURCE=$(shell find ./src -type f -name *.c) 9 | OBJS=$(SOURCE:.c=.o) 10 | 11 | %.o: %.c 12 | @$(CC) $(CFLAGS) -o $@ -c $< 13 | 14 | run: $(OBJS) $(HEADERS) 15 | @$(CC) -o $(TARGET) $(OBJS) 16 | 17 | .PHONY: clean 18 | clean: 19 | @rm $(OBJS) 20 | @rm ./$(TARGET) 21 | 22 | -------------------------------------------------------------------------------- /include/token.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_TOKEN_H 2 | #define SK_TOKEN_H 3 | 4 | #include 5 | 6 | #include "token_type.h" 7 | 8 | typedef struct 9 | { 10 | token_type_T type; 11 | union 12 | { 13 | int64_t integer_value; 14 | char *string_value; 15 | char *token_value; 16 | }; 17 | } token_T; 18 | 19 | token_T integer_token(token_type_T, int64_t); 20 | 21 | token_T string_token(token_type_T, char *); 22 | 23 | token_T new_token(token_type_T, char *); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /include/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_LEXER_H 2 | #define SK_LEXER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | #include "token.h" 12 | #include "error_codes.h" 13 | 14 | typedef struct 15 | { 16 | char *input; 17 | size_t position; 18 | size_t read_position; 19 | char character; 20 | } lexer_T; 21 | 22 | lexer_T *new_lexer(char *); 23 | 24 | token_T next_token(lexer_T *); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /playground/hashtable/Makefile: -------------------------------------------------------------------------------- 1 | TARGET = main 2 | 3 | CC = clang 4 | INCLUDES = -I . 5 | CFLAGS = -Wall -Werror -std=c99 $(INCLUDES) 6 | 7 | HEADERS=$(shell find . -type f -name "*.h") 8 | SOURCE=$(shell find . -type f -name "*.c") 9 | OBJS=$(SOURCE:.c=.o) 10 | 11 | %.o: %.c 12 | @$(CC) $(CFLAGS) -o $@ -c $< 13 | 14 | run: $(OBJS) $(HEADERS) 15 | @$(CC) -o $(TARGET) $(OBJS) 16 | 17 | debug: $(SOURCE) $(HEADERS) 18 | @$(CC) -g $(CFLAGS) -o $(TARGET) $(SOURCE) 19 | 20 | .PHONY: clean 21 | clean: 22 | @rm -f ./$(TARGET) 23 | @rm -f $(OBJS) 24 | @rm -rf ./*.dSYM 25 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include "token.h" 2 | #include "lexer.h" 3 | #include "utils.h" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | char *fileName = argv[1]; 8 | 9 | if (argc < 2 && fileName == NULL) 10 | { 11 | fatalf(NO_SOURCE_FILE, "no sanskrit file listed\n"); 12 | } 13 | 14 | char *input = read_from_file(fileName); 15 | 16 | lexer_T *lex = new_lexer(input); 17 | 18 | while (true) 19 | { 20 | token_T token = next_token(lex); 21 | printToken(token); 22 | if (token.type == TOKEN_EOF) 23 | { 24 | break; 25 | } 26 | } 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /src/token.c: -------------------------------------------------------------------------------- 1 | #include "token.h" 2 | 3 | token_T integer_token(token_type_T type, int64_t value) 4 | { 5 | token_T token; 6 | token.type = type; 7 | token.integer_value = value; 8 | return token; 9 | } 10 | 11 | token_T string_token(token_type_T type, char *value) 12 | { 13 | token_T token; 14 | token.type = type; 15 | token.string_value = value; 16 | return token; 17 | } 18 | 19 | // TODO: give more meaningful name 20 | token_T new_token(token_type_T type, char *value) 21 | { 22 | token_T token; 23 | token.type = type; 24 | token.token_value = value; 25 | return token; 26 | } 27 | -------------------------------------------------------------------------------- /playground/hashtable/main.c: -------------------------------------------------------------------------------- 1 | #include "hashtable.h" 2 | 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | hash_table_T *hash_table = init_hash(2); 8 | 9 | printf("%d\n", (int)hash_insert(hash_table, "rat", 10)); 10 | printf("%d\n", (int)hash_insert(hash_table, "cat", 20)); 11 | if (hash_insert(hash_table, "Cat", 30) == NULL) 12 | { 13 | printf("Can't insert\n"); 14 | }; 15 | 16 | printf("%d\n", (int)hash_get(hash_table, "rat")); 17 | printf("%d\n", (int)hash_get(hash_table, "cat")); 18 | 19 | printf("%d\n", (int)hash_delete(hash_table, "cat")); 20 | if (hash_get(hash_table, "cat") == NULL) 21 | { 22 | printf("Value is null\n"); 23 | } 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /playground/hashtable/hashtable.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_TABLE_H 2 | #define HASH_TABLE_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct HASH_ELEMENT 8 | { 9 | char *key; 10 | void *value; 11 | int deleted; 12 | } hash_element_T; 13 | 14 | typedef struct HASH_TABLE 15 | { 16 | struct HASH_ELEMENT **elements; 17 | size_t capacity; 18 | } hash_table_T; 19 | 20 | // Public functions 21 | 22 | hash_table_T *init_hash(size_t capacity); 23 | 24 | #define hash_insert(table, key, value) \ 25 | _hash_insert((table), (key), (void *)(value)) 26 | 27 | void *hash_get(hash_table_T *table, char *key); 28 | 29 | void *hash_delete(hash_table_T *table, char *key); 30 | 31 | // Private functions 32 | void *_hash_insert(hash_table_T *table, char *key, void *value); 33 | 34 | void *_hash_insert_element(hash_table_T *table, hash_element_T *element); 35 | 36 | hash_element_T *_hash_get_element(hash_table_T *table, char *key); 37 | 38 | size_t _hash_function(hash_table_T *table, char *key); 39 | 40 | void _resize_hash_table(hash_table_T *table); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/stretchy_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_STRETCHY_BUFFER_H 2 | #define SK_STRETCHY_BUFFER_H 3 | 4 | #include 5 | #include 6 | 7 | #define max(x, y) ((x) >= (y) ? (x) : (y)) 8 | 9 | // Public API 10 | #define buf_len(b) ((b) ? buf__raw(b)[0] : 0) 11 | #define buf_cap(b) ((b) ? buf__raw(b)[1] : 0) 12 | #define buf_push(b, x) (buf__fit(b, 1), b[buf_len(b)] = x, buf__raw(b)[0]++) 13 | #define buf_free(b) ((b) ? free(buf__raw(b)) : 0) 14 | 15 | // Private API 16 | #define buf__raw(b) ((b) ? ((int *)(void *)(b)) - 2 : 0) 17 | #define buf__fits(b, n) (buf_len(b) + (n) <= buf_cap(b)) 18 | #define buf__fit(b, n) (buf__fits(b, n) ? 0 : ((b) = buf__grow((b), buf_len(b) + (n), sizeof(*(b))))) 19 | 20 | void *buf__grow(const void *buf, int new_len, int elem_size) 21 | { 22 | int new_cap = max(1 + 2 * buf_cap(buf), new_len); 23 | int new_size = sizeof(int) * 2 + new_cap * elem_size; 24 | int *new_buf; 25 | if (buf) 26 | { 27 | new_buf = (int *)realloc(buf__raw(buf), new_size); 28 | } 29 | else 30 | { 31 | new_buf = (int *)malloc(new_size); 32 | new_buf[0] = 0; 33 | } 34 | new_buf[1] = new_cap; 35 | return new_buf + 2; 36 | } 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sanskrit 2 | 3 | Sanskrit is a low level general purpose programming language 4 | 5 | _Note: This programming language is in development phase, trying to write parser right now_ 6 | 7 | # Language Pipeline (for now) 8 | 9 | Source Code -> Lexer -> Parser(recursive descent) -> LLVM -> Executable 10 | 11 | .sk file -> Tokens -> AST -> .o file -> executable 12 | 13 | # Parser implementation 14 | 15 | Parser will be handwritten recursive descent parser, although for better understanding of parser rules I will write and add EBNF grammers rules. 16 | 17 | # Language Syntax 18 | 19 | Variable Declaration 20 | 21 | ``` 22 | let a: int = 10; 23 | let b = 20; 24 | c := 20 // this can be only done inside a function 25 | ``` 26 | 27 | Conditional 28 | 29 | ``` 30 | if true { 31 | // do something 32 | } 33 | ``` 34 | 35 | ``` 36 | let a = 10; 37 | if a == 10 { 38 | // do something 39 | } else if a == 20 { 40 | // do something 41 | } else { 42 | // do something 43 | } 44 | ``` 45 | 46 | Loops 47 | 48 | ``` 49 | for { 50 | // this is an infinite for loop 51 | } 52 | ``` 53 | 54 | ``` 55 | for value <= 10 { 56 | // this is a for loop 57 | value++; 58 | } 59 | ``` 60 | 61 | ``` 62 | for i := 0; i < 10; i++ { 63 | // this is a for loop 64 | } 65 | ``` 66 | 67 | ``` 68 | for index, number := range 10 { 69 | // this is a loop 70 | } 71 | ``` 72 | 73 | Function Definition 74 | 75 | ``` 76 | 77 | fn add(a:int, b:int): int { 78 | return a + b; 79 | } 80 | 81 | ``` 82 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | char *append(const char *dest, const char *source) 4 | { 5 | char *result = NULL; 6 | asprintf(&result, "%s%s", dest, source); 7 | return result; 8 | } 9 | 10 | char *read_from_file(const char *file_path) 11 | { 12 | FILE *f = fopen(file_path, "r"); 13 | if (f == NULL) 14 | { 15 | fatalf(FILE_READ, "Can't read file: %s\n", file_path); 16 | } 17 | 18 | if (fseek(f, 0, SEEK_END) != 0) 19 | { 20 | fatalf(FILE_READ, "Can't read file: %s\n", file_path); 21 | }; 22 | 23 | long fsize = ftell(f); 24 | rewind(f); 25 | 26 | char *string = malloc(fsize + 1); 27 | fread(string, 1, fsize, f); 28 | if (ferror(f) != 0) 29 | { 30 | fatalf(FILE_READ, "Can't read file: %s\n", file_path); 31 | } 32 | 33 | if (fclose(f) != 0) 34 | { 35 | fatalf(FILE_CLOSE, "Can't close file: %s\n", file_path); 36 | }; 37 | 38 | return string; 39 | } 40 | 41 | void fatalf(const int exit_code, const char *fmt, ...) 42 | { 43 | va_list args; 44 | va_start(args, fmt); 45 | vprintf(fmt, args); 46 | exit(exit_code); 47 | va_end(args); 48 | } 49 | 50 | void printToken(token_T token) 51 | { 52 | switch (token.type) 53 | { 54 | case TOKEN_INTEGER: 55 | { 56 | printf("Token type: %-25s Token literal: %lld\n", token_type_string[token.type], token.integer_value); 57 | break; 58 | } 59 | case TOKEN_STRING: 60 | { 61 | printf("Token type: %-25s Token literal: %s\n", token_type_string[token.type], token.string_value); 62 | break; 63 | } 64 | default: 65 | { 66 | printf("Token type: %-25s Token literal: %s\n", token_type_string[token.type], token.token_value); 67 | break; 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /include/token_type.h: -------------------------------------------------------------------------------- 1 | #ifndef SK_TOKEN_TYPE_H 2 | #define SK_TOKEN_TYPE_H 3 | 4 | typedef enum 5 | { 6 | TOKEN_ASSIGN, 7 | TOKEN_COLON_ASSIGN, 8 | TOKEN_EQUAL, 9 | TOKEN_NOT_EQUAL, 10 | TOKEN_AND, 11 | TOKEN_OR, 12 | 13 | TOKEN_PLUS, 14 | TOKEN_PLUS_ASSIGN, 15 | TOKEN_MINUS, 16 | TOKEN_MINUS_ASSIGN, 17 | TOKEN_EXCLAMATION, 18 | TOKEN_ASTERISK, 19 | TOKEN_ASTERISK_ASSIGN, 20 | TOKEN_SLASH, 21 | TOKEN_SLASH_ASSIGN, 22 | TOKEN_INCREMENT, 23 | TOKEN_DECREMENT, 24 | 25 | TOKEN_COMMA, 26 | TOKEN_SEMICOLON, 27 | TOKEN_COLON, 28 | TOKEN_LESS_THEN, 29 | TOKEN_LESS_THEN_EQUAL, 30 | TOKEN_GREATER_THEN, 31 | TOKEN_GREATER_THEN_EQUAL, 32 | 33 | TOKEN_LPAREN, 34 | TOKEN_RPAREN, 35 | TOKEN_LBRACE, 36 | TOKEN_RBRACE, 37 | 38 | TOKEN_IDENT, 39 | TOKEN_INTEGER, 40 | TOKEN_STRING, 41 | 42 | TOKEN_FUNCTION, 43 | TOKEN_LET, 44 | TOKEN_INT, 45 | TOKEN_RETURN, 46 | TOKEN_VOID, 47 | TOKEN_FOR, 48 | TOKEN_BOOL, 49 | TOKEN_IF, 50 | TOKEN_ELSE, 51 | TOKEN_RANGE, 52 | TOKEN_STR, 53 | 54 | TOKEN_ILLEGAL, 55 | TOKEN_EOF, 56 | } token_type_T; 57 | 58 | static char *token_type_string[] = { 59 | "TOKEN_ASSIGN", 60 | "TOKEN_COLON_ASSIGN", 61 | "TOKEN_EQUAL", 62 | "TOKEN_NOT_EQUAL", 63 | "TOKEN_AND", 64 | "TOKEN_OR", 65 | 66 | "TOKEN_PLUS", 67 | "TOKEN_PLUS_ASSIGN", 68 | "TOKEN_MINUS", 69 | "TOKEN_MINUS_ASSIGN", 70 | "TOKEN_EXCLAMATION", 71 | "TOKEN_ASTERISK", 72 | "TOKEN_ASTERISK_ASSIGN", 73 | "TOKEN_SLASH", 74 | "TOKEN_SLASH_ASSIGN", 75 | "TOKEN_INCREMENT", 76 | "TOKEN_DECREMENT", 77 | 78 | "TOKEN_COMMA", 79 | "TOKEN_SEMICOLON", 80 | "TOKEN_COLON", 81 | "TOKEN_LESS_THEN", 82 | "TOKEN_LESS_THEN_EQUAL", 83 | "TOKEN_GREATER_THEN", 84 | "TOKEN_GREATER_THEN_EQUAL", 85 | 86 | "TOKEN_LPAREN", 87 | "TOKEN_RPAREN", 88 | "TOKEN_LBRACE", 89 | "TOKEN_RBRACE", 90 | 91 | "TOKEN_IDENT", 92 | "TOKEN_INTEGER", 93 | "TOKEN_STRING", 94 | 95 | "TOKEN_FUNCTION", 96 | "TOKEN_LET", 97 | "TOKEN_INT", 98 | "TOKEN_RETURN", 99 | "TOKEN_VOID", 100 | "TOKEN_FOR", 101 | "TOKEN_BOOL", 102 | "TOKEN_IF", 103 | "TOKEN_ELSE", 104 | "TOKEN_RANGE", 105 | "TOKEN_STR", 106 | 107 | "TOKEN_ILLEGAL", 108 | "TOKEN_EOF"}; 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /playground/hashtable/hashtable.c: -------------------------------------------------------------------------------- 1 | // TODO: ADD ability to set int as key 2 | 3 | #include "hashtable.h" 4 | 5 | hash_table_T *init_hash(size_t capacity) 6 | { 7 | hash_table_T *hash_table = calloc(1, sizeof(struct HASH_TABLE)); 8 | hash_table->elements = calloc(capacity, sizeof(struct HASH_ELEMENT)); 9 | hash_table->capacity = capacity; 10 | 11 | return hash_table; 12 | } 13 | 14 | void *hash_get(hash_table_T *table, char *key) 15 | { 16 | void *value = NULL; 17 | 18 | hash_element_T *element = _hash_get_element(table, key); 19 | if (element) 20 | { 21 | value = element->value; 22 | } 23 | 24 | return value; 25 | } 26 | 27 | void *hash_delete(hash_table_T *table, char *key) 28 | { 29 | void *value = NULL; 30 | 31 | hash_element_T *element = _hash_get_element(table, key); 32 | if (element) 33 | { 34 | element->deleted = 1; 35 | value = element->value; 36 | } 37 | 38 | return value; 39 | } 40 | 41 | // Private function 42 | 43 | void *_hash_insert(hash_table_T *table, char *key, void *value) 44 | { 45 | hash_element_T *element = calloc(1, sizeof(struct HASH_ELEMENT)); 46 | element->key = key; 47 | element->value = value; 48 | element->deleted = 0; 49 | 50 | void *inserted = _hash_insert_element(table, element); 51 | if (inserted == NULL) 52 | { 53 | _resize_hash_table(table); 54 | inserted = _hash_insert_element(table, element); 55 | } 56 | 57 | return inserted; 58 | } 59 | 60 | void *_hash_insert_element(hash_table_T *table, hash_element_T *element) 61 | { 62 | void *value = NULL; 63 | 64 | size_t hash_value = _hash_function(table, element->key); 65 | 66 | for (int count = 0; count < table->capacity; count++) 67 | { 68 | int index = (hash_value + count * count) % table->capacity; 69 | hash_element_T *elm = table->elements[index]; 70 | 71 | if (!elm || (elm && elm->deleted && elm->key == element->key)) 72 | { 73 | table->elements[index] = element; 74 | value = element->value; 75 | break; 76 | } 77 | } 78 | 79 | return value; 80 | } 81 | 82 | hash_element_T *_hash_get_element(hash_table_T *table, char *key) 83 | { 84 | size_t hash_value = _hash_function(table, key); 85 | 86 | hash_element_T *element = NULL; 87 | 88 | for (int count = 0; count < table->capacity; count++) 89 | { 90 | int index = (hash_value + count * count) % table->capacity; 91 | hash_element_T *elm = table->elements[index]; 92 | 93 | if (elm && !elm->deleted && elm->key == key) 94 | { 95 | element = elm; 96 | break; 97 | } 98 | } 99 | 100 | return element; 101 | } 102 | 103 | size_t _hash_function(hash_table_T *table, char *key) 104 | { 105 | size_t number = 1; 106 | 107 | for (size_t i = 0; i < strlen(key); i++) 108 | { 109 | number += key[i]; 110 | } 111 | 112 | return number % table->capacity; 113 | } 114 | 115 | void _resize_hash_table(hash_table_T *table) 116 | { 117 | hash_table_T *new_hash_table = init_hash(table->capacity * 2); 118 | 119 | for (int index = 0; index < table->capacity; index++) 120 | { 121 | hash_element_T *element = table->elements[index]; 122 | if (element) 123 | { 124 | _hash_insert_element(new_hash_table, element); 125 | } 126 | } 127 | 128 | free(table->elements); 129 | 130 | table->elements = new_hash_table->elements; 131 | table->capacity = new_hash_table->capacity; 132 | 133 | free(new_hash_table); 134 | } 135 | -------------------------------------------------------------------------------- /src/lexer.c: -------------------------------------------------------------------------------- 1 | #include "lexer.h" 2 | 3 | static inline char read_next_character(lexer_T *); 4 | static inline char peek_character(lexer_T *); 5 | static char *read_ident(lexer_T *); 6 | static int64_t read_integer(lexer_T *); 7 | static char *read_string(lexer_T *); 8 | static void skip_whitespace(lexer_T *); 9 | 10 | lexer_T *new_lexer(char *input) 11 | { 12 | lexer_T *lex = (lexer_T *)malloc(sizeof(lexer_T)); 13 | lex->input = input; 14 | lex->position = 0; 15 | lex->read_position = 1; 16 | lex->character = lex->input[lex->position]; 17 | return lex; 18 | } 19 | 20 | static inline char read_next_character(lexer_T *lex) 21 | { 22 | lex->position = lex->read_position; 23 | lex->read_position++; 24 | lex->character = lex->input[lex->position]; 25 | return lex->character; 26 | } 27 | 28 | static inline char peek_character(lexer_T *lex) 29 | { 30 | size_t peek_position = lex->read_position; 31 | return lex->input[peek_position]; 32 | } 33 | 34 | static char *read_ident(lexer_T *lex) 35 | { 36 | char *literal = ""; 37 | while (true) 38 | { 39 | literal = append(literal, &lex->character); 40 | if (!isalpha(peek_character(lex))) 41 | { 42 | break; 43 | } 44 | read_next_character(lex); 45 | } 46 | return literal; 47 | } 48 | 49 | static int64_t read_integer(lexer_T *lex) 50 | { 51 | int64_t literal = 0; 52 | while (true) 53 | { 54 | literal *= 10; 55 | literal += lex->character - '0'; 56 | if (!isdigit(peek_character(lex))) 57 | { 58 | break; 59 | } 60 | read_next_character(lex); 61 | } 62 | return literal; 63 | } 64 | 65 | static char *read_string(lexer_T *lex) 66 | { 67 | char *literal = ""; 68 | while (true) 69 | { 70 | read_next_character(lex); 71 | literal = append(literal, &lex->character); 72 | if (!peek_character(lex) || peek_character(lex) == '"') 73 | { 74 | read_next_character(lex); 75 | break; 76 | } 77 | } 78 | return literal; 79 | } 80 | 81 | static void skip_whitespace(lexer_T *lex) 82 | { 83 | while (lex->character == ' ' || lex->character == '\t' || lex->character == '\n' || lex->character == '\r') 84 | { 85 | read_next_character(lex); 86 | } 87 | } 88 | 89 | token_T next_token(lexer_T *lex) 90 | { 91 | token_T token; 92 | 93 | skip_whitespace(lex); 94 | 95 | switch (lex->character) 96 | { 97 | case '=': 98 | { 99 | switch (peek_character(lex)) 100 | { 101 | case '=': 102 | { 103 | token = new_token(TOKEN_EQUAL, "=="); 104 | read_next_character(lex); 105 | break; 106 | } 107 | default: 108 | { 109 | token = new_token(TOKEN_ASSIGN, "="); 110 | } 111 | } 112 | break; 113 | } 114 | case '&': 115 | { 116 | switch (peek_character(lex)) 117 | { 118 | case '&': 119 | { 120 | token = new_token(TOKEN_AND, "&&"); 121 | read_next_character(lex); 122 | break; 123 | } 124 | default: 125 | { 126 | // Single & character is illegal for now 127 | token = new_token(TOKEN_ILLEGAL, append("", &lex->character)); 128 | } 129 | } 130 | break; 131 | } 132 | case '|': 133 | { 134 | switch (peek_character(lex)) 135 | { 136 | case '|': 137 | { 138 | token = new_token(TOKEN_OR, "||"); 139 | read_next_character(lex); 140 | break; 141 | } 142 | default: 143 | { 144 | // Single | character is illegal for now 145 | token = new_token(TOKEN_ILLEGAL, append("", &lex->character)); 146 | } 147 | } 148 | break; 149 | } 150 | case '+': 151 | { 152 | switch (peek_character(lex)) 153 | { 154 | case '+': 155 | { 156 | token = new_token(TOKEN_INCREMENT, "++"); 157 | read_next_character(lex); 158 | break; 159 | } 160 | case '=': 161 | { 162 | token = new_token(TOKEN_PLUS_ASSIGN, "+="); 163 | read_next_character(lex); 164 | break; 165 | } 166 | default: 167 | { 168 | token = new_token(TOKEN_PLUS, "+"); 169 | } 170 | } 171 | break; 172 | } 173 | case '-': 174 | { 175 | switch (peek_character(lex)) 176 | { 177 | case '-': 178 | { 179 | token = new_token(TOKEN_DECREMENT, "--"); 180 | read_next_character(lex); 181 | break; 182 | } 183 | case '=': 184 | { 185 | token = new_token(TOKEN_MINUS_ASSIGN, "-="); 186 | read_next_character(lex); 187 | break; 188 | } 189 | default: 190 | { 191 | token = new_token(TOKEN_MINUS, "-"); 192 | } 193 | } 194 | break; 195 | } 196 | case '!': 197 | { 198 | switch (peek_character(lex)) 199 | { 200 | case '=': 201 | { 202 | token = new_token(TOKEN_NOT_EQUAL, "!="); 203 | read_next_character(lex); 204 | break; 205 | } 206 | default: 207 | { 208 | token = new_token(TOKEN_EXCLAMATION, "!"); 209 | } 210 | } 211 | break; 212 | } 213 | case '*': 214 | { 215 | switch (peek_character(lex)) 216 | { 217 | case '=': 218 | { 219 | token = new_token(TOKEN_ASTERISK_ASSIGN, "*="); 220 | read_next_character(lex); 221 | break; 222 | } 223 | default: 224 | { 225 | token = new_token(TOKEN_ASTERISK, "*"); 226 | } 227 | } 228 | break; 229 | } 230 | case '/': 231 | { 232 | switch (peek_character(lex)) 233 | { 234 | case '=': 235 | { 236 | token = new_token(TOKEN_SLASH_ASSIGN, "/="); 237 | read_next_character(lex); 238 | break; 239 | } 240 | default: 241 | { 242 | token = new_token(TOKEN_SLASH, "/"); 243 | } 244 | } 245 | break; 246 | } 247 | case ',': 248 | { 249 | token = new_token(TOKEN_COMMA, ","); 250 | break; 251 | } 252 | case ';': 253 | { 254 | token = new_token(TOKEN_SEMICOLON, ";"); 255 | break; 256 | } 257 | case ':': 258 | { 259 | switch (peek_character(lex)) 260 | { 261 | case '=': 262 | { 263 | token = new_token(TOKEN_COLON_ASSIGN, ":="); 264 | read_next_character(lex); 265 | break; 266 | } 267 | default: 268 | { 269 | token = new_token(TOKEN_COLON, ":"); 270 | } 271 | } 272 | break; 273 | } 274 | case '<': 275 | { 276 | switch (peek_character(lex)) 277 | { 278 | case '=': 279 | { 280 | token = new_token(TOKEN_LESS_THEN_EQUAL, "<="); 281 | read_next_character(lex); 282 | break; 283 | } 284 | default: 285 | { 286 | token = new_token(TOKEN_LESS_THEN, "<"); 287 | } 288 | } 289 | break; 290 | } 291 | case '>': 292 | { 293 | switch (peek_character(lex)) 294 | { 295 | case '=': 296 | { 297 | token = new_token(TOKEN_GREATER_THEN_EQUAL, ">="); 298 | read_next_character(lex); 299 | break; 300 | } 301 | default: 302 | { 303 | token = new_token(TOKEN_GREATER_THEN, ">"); 304 | } 305 | } 306 | break; 307 | } 308 | case '(': 309 | { 310 | token = new_token(TOKEN_LPAREN, "("); 311 | break; 312 | } 313 | case ')': 314 | { 315 | token = new_token(TOKEN_RPAREN, ")"); 316 | break; 317 | } 318 | case '{': 319 | { 320 | token = new_token(TOKEN_LBRACE, "{"); 321 | break; 322 | } 323 | case '}': 324 | { 325 | token = new_token(TOKEN_RBRACE, "}"); 326 | break; 327 | } 328 | case '"': 329 | { 330 | char *literal = read_string(lex); 331 | token = string_token(TOKEN_STRING, literal); 332 | break; 333 | } 334 | case 0: 335 | { 336 | token = new_token(TOKEN_EOF, "EOF"); 337 | break; 338 | } 339 | default: 340 | { 341 | if (isalpha(lex->character)) 342 | { 343 | char *literal = read_ident(lex); 344 | 345 | // TODO: use string interning for comparing string 346 | if (!strncmp(literal, "let", 3)) 347 | { 348 | token = new_token(TOKEN_LET, literal); 349 | break; 350 | } 351 | 352 | if (!strncmp(literal, "int", 3)) 353 | { 354 | token = new_token(TOKEN_INT, literal); 355 | break; 356 | } 357 | 358 | if (!strncmp(literal, "return", 6)) 359 | { 360 | token = new_token(TOKEN_RETURN, literal); 361 | break; 362 | } 363 | 364 | if (!strncmp(literal, "void", 4)) 365 | { 366 | token = new_token(TOKEN_VOID, literal); 367 | break; 368 | } 369 | 370 | if (!strncmp(literal, "for", 3)) 371 | { 372 | token = new_token(TOKEN_FOR, literal); 373 | break; 374 | } 375 | 376 | if (!strncmp(literal, "true", 4)) 377 | { 378 | token = new_token(TOKEN_BOOL, literal); 379 | break; 380 | } 381 | 382 | if (!strncmp(literal, "false", 4)) 383 | { 384 | token = new_token(TOKEN_BOOL, literal); 385 | break; 386 | } 387 | 388 | if (!strncmp(literal, "if", 2)) 389 | { 390 | token = new_token(TOKEN_IF, literal); 391 | break; 392 | } 393 | 394 | if (!strncmp(literal, "else", 4)) 395 | { 396 | token = new_token(TOKEN_ELSE, literal); 397 | break; 398 | } 399 | 400 | if (!strncmp(literal, "range", 5)) 401 | { 402 | token = new_token(TOKEN_RANGE, literal); 403 | break; 404 | } 405 | 406 | if (!strncmp(literal, "str", 3)) 407 | { 408 | token = new_token(TOKEN_STR, literal); 409 | break; 410 | } 411 | 412 | token = new_token(TOKEN_IDENT, literal); 413 | break; 414 | } 415 | 416 | if (isdigit(lex->character)) 417 | { 418 | int64_t integer = read_integer(lex); 419 | token = integer_token(TOKEN_INTEGER, integer); 420 | break; 421 | } 422 | 423 | token = new_token(TOKEN_ILLEGAL, append("", &lex->character)); 424 | break; 425 | } 426 | } 427 | read_next_character(lex); 428 | return token; 429 | } 430 | --------------------------------------------------------------------------------