├── .gitattributes
├── .gitignore
├── grammer
    └── grammer.ebnf
├── example
    ├── focus.sk
    └── main.sk
├── include
    ├── error_codes.h
    ├── utils.h
    ├── token.h
    ├── lexer.h
    ├── stretchy_buffer.h
    └── token_type.h
├── Makefile
├── playground
    └── hashtable
    │   ├── Makefile
    │   ├── main.c
    │   ├── hashtable.h
    │   └── hashtable.c
├── src
    ├── main.c
    ├── token.c
    ├── utils.c
    └── lexer.c
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.h linguist-language=C
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/**.o
2 | sk
3 | .DS_Store
4 | 


--------------------------------------------------------------------------------
/grammer/grammer.ebnf:
--------------------------------------------------------------------------------
1 | (* We will define grammers rules here *)
2 | 


--------------------------------------------------------------------------------
/example/focus.sk:
--------------------------------------------------------------------------------
1 | fn main(argc: int, argv: []str): int {
2 |   return 0;
3 | }
4 | 


--------------------------------------------------------------------------------
/include/error_codes.h:
--------------------------------------------------------------------------------
 1 | #ifndef SK_ERROR_CODES_H
 2 | #define SK_ERROR_CODES_H
 3 | 
 4 | typedef enum
 5 | {
 6 |   NO_SOURCE_FILE,
 7 | 
 8 |   FILE_READ,
 9 |   FILE_CLOSE,
10 | 
11 |   INVALID_CHARACTER
12 | } error_code_T;
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/include/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef SK_UTILS_H
 2 | #define SK_UTILS_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <stdarg.h>
 7 | 
 8 | #include "token.h"
 9 | #include "error_codes.h"
10 | 
11 | char *append(const char *, const char *);
12 | 
13 | char *read_from_file(const char *);
14 | 
15 | void fatalf(const int, const char *, ...);
16 | 
17 | void printToken(token_T);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/example/main.sk:
--------------------------------------------------------------------------------
 1 | fn add(a: int, b: int): int {
 2 |   return a + b;
 3 | }
 4 | 
 5 | fn main(): void {
 6 |   let a: int = 10;
 7 |   b := 20;
 8 |   b += 10;
 9 |   b -= 10;
10 |   b *= 2;
11 |   b /= 5;
12 |   add(a, b); // This is an example of comment
13 | 
14 |   for {
15 | 
16 |   }
17 | 
18 |   if true {
19 | 
20 |   } else if a >= 10 {
21 | 
22 |   } else {
23 |    
24 |   }
25 | 
26 |   let c: str = "hello world";
27 | }
28 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET = sk
 2 | 
 3 | CC = clang
 4 | INCLUDES = -I ./include
 5 | CFLAGS = -Wall -Werror -std=c99 $(INCLUDES)
 6 | 
 7 | HEADERS=$(shell find ./include -type f -name *.h)
 8 | SOURCE=$(shell find ./src -type f -name *.c)
 9 | OBJS=$(SOURCE:.c=.o)
10 | 
11 | %.o: %.c
12 | 	@$(CC) $(CFLAGS) -o $@ -c $<
13 | 
14 | run: $(OBJS) $(HEADERS)
15 | 	@$(CC) -o $(TARGET) $(OBJS)
16 | 
17 | .PHONY: clean
18 | clean:
19 | 	@rm $(OBJS)
20 | 	@rm ./$(TARGET)
21 | 
22 | 


--------------------------------------------------------------------------------
/include/token.h:
--------------------------------------------------------------------------------
 1 | #ifndef SK_TOKEN_H
 2 | #define SK_TOKEN_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #include "token_type.h"
 7 | 
 8 | typedef struct
 9 | {
10 |   token_type_T type;
11 |   union
12 |   {
13 |     int64_t integer_value;
14 |     char *string_value;
15 |     char *token_value;
16 |   };
17 | } token_T;
18 | 
19 | token_T integer_token(token_type_T, int64_t);
20 | 
21 | token_T string_token(token_type_T, char *);
22 | 
23 | token_T new_token(token_type_T, char *);
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/include/lexer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SK_LEXER_H
 2 | #define SK_LEXER_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include <ctype.h>
 6 | #include <stdbool.h>
 7 | #include <string.h>
 8 | #include <stdint.h>
 9 | 
10 | #include "utils.h"
11 | #include "token.h"
12 | #include "error_codes.h"
13 | 
14 | typedef struct
15 | {
16 |   char *input;
17 |   size_t position;
18 |   size_t read_position;
19 |   char character;
20 | } lexer_T;
21 | 
22 | lexer_T *new_lexer(char *);
23 | 
24 | token_T next_token(lexer_T *);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/playground/hashtable/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET = main
 2 | 
 3 | CC = clang
 4 | INCLUDES = -I .
 5 | CFLAGS = -Wall -Werror -std=c99 $(INCLUDES)
 6 | 
 7 | HEADERS=$(shell find . -type f -name "*.h")
 8 | SOURCE=$(shell find . -type f -name "*.c")
 9 | OBJS=$(SOURCE:.c=.o)
10 | 
11 | %.o: %.c
12 | 	@$(CC) $(CFLAGS) -o $@ -c $<
13 | 
14 | run: $(OBJS) $(HEADERS)
15 | 	@$(CC) -o $(TARGET) $(OBJS)
16 | 
17 | debug: $(SOURCE) $(HEADERS)
18 | 	@$(CC) -g $(CFLAGS) -o $(TARGET) $(SOURCE)
19 | 
20 | .PHONY: clean
21 | clean:
22 | 	@rm -f ./$(TARGET)
23 | 	@rm -f $(OBJS)
24 | 	@rm -rf ./*.dSYM
25 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
 1 | #include "token.h"
 2 | #include "lexer.h"
 3 | #include "utils.h"
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |   char *fileName = argv[1];
 8 | 
 9 |   if (argc < 2 && fileName == NULL)
10 |   {
11 |     fatalf(NO_SOURCE_FILE, "no sanskrit file listed\n");
12 |   }
13 | 
14 |   char *input = read_from_file(fileName);
15 | 
16 |   lexer_T *lex = new_lexer(input);
17 | 
18 |   while (true)
19 |   {
20 |     token_T token = next_token(lex);
21 |     printToken(token);
22 |     if (token.type == TOKEN_EOF)
23 |     {
24 |       break;
25 |     }
26 |   }
27 | 
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/src/token.c:
--------------------------------------------------------------------------------
 1 | #include "token.h"
 2 | 
 3 | token_T integer_token(token_type_T type, int64_t value)
 4 | {
 5 |   token_T token;
 6 |   token.type = type;
 7 |   token.integer_value = value;
 8 |   return token;
 9 | }
10 | 
11 | token_T string_token(token_type_T type, char *value)
12 | {
13 |   token_T token;
14 |   token.type = type;
15 |   token.string_value = value;
16 |   return token;
17 | }
18 | 
19 | // TODO: give more meaningful name
20 | token_T new_token(token_type_T type, char *value)
21 | {
22 |   token_T token;
23 |   token.type = type;
24 |   token.token_value = value;
25 |   return token;
26 | }
27 | 


--------------------------------------------------------------------------------
/playground/hashtable/main.c:
--------------------------------------------------------------------------------
 1 | #include "hashtable.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |   hash_table_T *hash_table = init_hash(2);
 8 | 
 9 |   printf("%d\n", (int)hash_insert(hash_table, "rat", 10));
10 |   printf("%d\n", (int)hash_insert(hash_table, "cat", 20));
11 |   if (hash_insert(hash_table, "Cat", 30) == NULL)
12 |   {
13 |     printf("Can't insert\n");
14 |   };
15 | 
16 |   printf("%d\n", (int)hash_get(hash_table, "rat"));
17 |   printf("%d\n", (int)hash_get(hash_table, "cat"));
18 | 
19 |   printf("%d\n", (int)hash_delete(hash_table, "cat"));
20 |   if (hash_get(hash_table, "cat") == NULL)
21 |   {
22 |     printf("Value is null\n");
23 |   }
24 | 
25 |   return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/playground/hashtable/hashtable.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASH_TABLE_H
 2 | #define HASH_TABLE_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | 
 7 | typedef struct HASH_ELEMENT
 8 | {
 9 |   char *key;
10 |   void *value;
11 |   int deleted;
12 | } hash_element_T;
13 | 
14 | typedef struct HASH_TABLE
15 | {
16 |   struct HASH_ELEMENT **elements;
17 |   size_t capacity;
18 | } hash_table_T;
19 | 
20 | // Public functions
21 | 
22 | hash_table_T *init_hash(size_t capacity);
23 | 
24 | #define hash_insert(table, key, value) \
25 |   _hash_insert((table), (key), (void *)(value))
26 | 
27 | void *hash_get(hash_table_T *table, char *key);
28 | 
29 | void *hash_delete(hash_table_T *table, char *key);
30 | 
31 | // Private functions
32 | void *_hash_insert(hash_table_T *table, char *key, void *value);
33 | 
34 | void *_hash_insert_element(hash_table_T *table, hash_element_T *element);
35 | 
36 | hash_element_T *_hash_get_element(hash_table_T *table, char *key);
37 | 
38 | size_t _hash_function(hash_table_T *table, char *key);
39 | 
40 | void _resize_hash_table(hash_table_T *table);
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/include/stretchy_buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SK_STRETCHY_BUFFER_H
 2 | #define SK_STRETCHY_BUFFER_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #define max(x, y) ((x) >= (y) ? (x) : (y))
 8 | 
 9 | // Public API
10 | #define buf_len(b) ((b) ? buf__raw(b)[0] : 0)
11 | #define buf_cap(b) ((b) ? buf__raw(b)[1] : 0)
12 | #define buf_push(b, x) (buf__fit(b, 1), b[buf_len(b)] = x, buf__raw(b)[0]++)
13 | #define buf_free(b) ((b) ? free(buf__raw(b)) : 0)
14 | 
15 | // Private API
16 | #define buf__raw(b) ((b) ? ((int *)(void *)(b)) - 2 : 0)
17 | #define buf__fits(b, n) (buf_len(b) + (n) <= buf_cap(b))
18 | #define buf__fit(b, n) (buf__fits(b, n) ? 0 : ((b) = buf__grow((b), buf_len(b) + (n), sizeof(*(b)))))
19 | 
20 | void *buf__grow(const void *buf, int new_len, int elem_size)
21 | {
22 |   int new_cap = max(1 + 2 * buf_cap(buf), new_len);
23 |   int new_size = sizeof(int) * 2 + new_cap * elem_size;
24 |   int *new_buf;
25 |   if (buf)
26 |   {
27 |     new_buf = (int *)realloc(buf__raw(buf), new_size);
28 |   }
29 |   else
30 |   {
31 |     new_buf = (int *)malloc(new_size);
32 |     new_buf[0] = 0;
33 |   }
34 |   new_buf[1] = new_cap;
35 |   return new_buf + 2;
36 | }
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sanskrit
 2 | 
 3 | Sanskrit is a low level general purpose programming language
 4 | 
 5 | _Note: This programming language is in development phase, trying to write parser right now_
 6 | 
 7 | # Language Pipeline (for now)
 8 | 
 9 | Source Code -> Lexer -> Parser(recursive descent) -> LLVM -> Executable
10 | 
11 | .sk file -> Tokens -> AST -> .o file -> executable
12 | 
13 | # Parser implementation
14 | 
15 | Parser will be handwritten recursive descent parser, although for better understanding of parser rules I will write and add EBNF grammers rules.
16 | 
17 | # Language Syntax
18 | 
19 | Variable Declaration
20 | 
21 | ```
22 | let a: int = 10;
23 | let b = 20;
24 | c := 20 // this can be only done inside a function
25 | ```
26 | 
27 | Conditional
28 | 
29 | ```
30 | if true {
31 |   // do something
32 | }
33 | ```
34 | 
35 | ```
36 | let a = 10;
37 | if a == 10 {
38 |   // do something
39 | } else if a == 20 {
40 |   // do something
41 | } else {
42 |   // do something
43 | }
44 | ```
45 | 
46 | Loops
47 | 
48 | ```
49 | for {
50 |   // this is an infinite for loop
51 | }
52 | ```
53 | 
54 | ```
55 | for value <= 10 {
56 |   // this is a for loop
57 |   value++;
58 | }
59 | ```
60 | 
61 | ```
62 | for i := 0; i < 10; i++ {
63 |   // this is a for loop
64 | }
65 | ```
66 | 
67 | ```
68 | for index, number := range 10 {
69 |   // this is a loop
70 | }
71 | ```
72 | 
73 | Function Definition
74 | 
75 | ```
76 | 
77 | fn add(a:int, b:int): int {
78 |   return a + b;
79 | }
80 | 
81 | ```
82 | 


--------------------------------------------------------------------------------
/src/utils.c:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | char *append(const char *dest, const char *source)
 4 | {
 5 |   char *result = NULL;
 6 |   asprintf(&result, "%s%s", dest, source);
 7 |   return result;
 8 | }
 9 | 
10 | char *read_from_file(const char *file_path)
11 | {
12 |   FILE *f = fopen(file_path, "r");
13 |   if (f == NULL)
14 |   {
15 |     fatalf(FILE_READ, "Can't read file: %s\n", file_path);
16 |   }
17 | 
18 |   if (fseek(f, 0, SEEK_END) != 0)
19 |   {
20 |     fatalf(FILE_READ, "Can't read file: %s\n", file_path);
21 |   };
22 | 
23 |   long fsize = ftell(f);
24 |   rewind(f);
25 | 
26 |   char *string = malloc(fsize + 1);
27 |   fread(string, 1, fsize, f);
28 |   if (ferror(f) != 0)
29 |   {
30 |     fatalf(FILE_READ, "Can't read file: %s\n", file_path);
31 |   }
32 | 
33 |   if (fclose(f) != 0)
34 |   {
35 |     fatalf(FILE_CLOSE, "Can't close file: %s\n", file_path);
36 |   };
37 | 
38 |   return string;
39 | }
40 | 
41 | void fatalf(const int exit_code, const char *fmt, ...)
42 | {
43 |   va_list args;
44 |   va_start(args, fmt);
45 |   vprintf(fmt, args);
46 |   exit(exit_code);
47 |   va_end(args);
48 | }
49 | 
50 | void printToken(token_T token)
51 | {
52 |   switch (token.type)
53 |   {
54 |   case TOKEN_INTEGER:
55 |   {
56 |     printf("Token type: %-25s Token literal: %lld\n", token_type_string[token.type], token.integer_value);
57 |     break;
58 |   }
59 |   case TOKEN_STRING:
60 |   {
61 |     printf("Token type: %-25s Token literal: %s\n", token_type_string[token.type], token.string_value);
62 |     break;
63 |   }
64 |   default:
65 |   {
66 |     printf("Token type: %-25s Token literal: %s\n", token_type_string[token.type], token.token_value);
67 |     break;
68 |   }
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/include/token_type.h:
--------------------------------------------------------------------------------
  1 | #ifndef SK_TOKEN_TYPE_H
  2 | #define SK_TOKEN_TYPE_H
  3 | 
  4 | typedef enum
  5 | {
  6 |   TOKEN_ASSIGN,
  7 |   TOKEN_COLON_ASSIGN,
  8 |   TOKEN_EQUAL,
  9 |   TOKEN_NOT_EQUAL,
 10 |   TOKEN_AND,
 11 |   TOKEN_OR,
 12 | 
 13 |   TOKEN_PLUS,
 14 |   TOKEN_PLUS_ASSIGN,
 15 |   TOKEN_MINUS,
 16 |   TOKEN_MINUS_ASSIGN,
 17 |   TOKEN_EXCLAMATION,
 18 |   TOKEN_ASTERISK,
 19 |   TOKEN_ASTERISK_ASSIGN,
 20 |   TOKEN_SLASH,
 21 |   TOKEN_SLASH_ASSIGN,
 22 |   TOKEN_INCREMENT,
 23 |   TOKEN_DECREMENT,
 24 | 
 25 |   TOKEN_COMMA,
 26 |   TOKEN_SEMICOLON,
 27 |   TOKEN_COLON,
 28 |   TOKEN_LESS_THEN,
 29 |   TOKEN_LESS_THEN_EQUAL,
 30 |   TOKEN_GREATER_THEN,
 31 |   TOKEN_GREATER_THEN_EQUAL,
 32 | 
 33 |   TOKEN_LPAREN,
 34 |   TOKEN_RPAREN,
 35 |   TOKEN_LBRACE,
 36 |   TOKEN_RBRACE,
 37 | 
 38 |   TOKEN_IDENT,
 39 |   TOKEN_INTEGER,
 40 |   TOKEN_STRING,
 41 | 
 42 |   TOKEN_FUNCTION,
 43 |   TOKEN_LET,
 44 |   TOKEN_INT,
 45 |   TOKEN_RETURN,
 46 |   TOKEN_VOID,
 47 |   TOKEN_FOR,
 48 |   TOKEN_BOOL,
 49 |   TOKEN_IF,
 50 |   TOKEN_ELSE,
 51 |   TOKEN_RANGE,
 52 |   TOKEN_STR,
 53 | 
 54 |   TOKEN_ILLEGAL,
 55 |   TOKEN_EOF,
 56 | } token_type_T;
 57 | 
 58 | static char *token_type_string[] = {
 59 |     "TOKEN_ASSIGN",
 60 |     "TOKEN_COLON_ASSIGN",
 61 |     "TOKEN_EQUAL",
 62 |     "TOKEN_NOT_EQUAL",
 63 |     "TOKEN_AND",
 64 |     "TOKEN_OR",
 65 | 
 66 |     "TOKEN_PLUS",
 67 |     "TOKEN_PLUS_ASSIGN",
 68 |     "TOKEN_MINUS",
 69 |     "TOKEN_MINUS_ASSIGN",
 70 |     "TOKEN_EXCLAMATION",
 71 |     "TOKEN_ASTERISK",
 72 |     "TOKEN_ASTERISK_ASSIGN",
 73 |     "TOKEN_SLASH",
 74 |     "TOKEN_SLASH_ASSIGN",
 75 |     "TOKEN_INCREMENT",
 76 |     "TOKEN_DECREMENT",
 77 | 
 78 |     "TOKEN_COMMA",
 79 |     "TOKEN_SEMICOLON",
 80 |     "TOKEN_COLON",
 81 |     "TOKEN_LESS_THEN",
 82 |     "TOKEN_LESS_THEN_EQUAL",
 83 |     "TOKEN_GREATER_THEN",
 84 |     "TOKEN_GREATER_THEN_EQUAL",
 85 | 
 86 |     "TOKEN_LPAREN",
 87 |     "TOKEN_RPAREN",
 88 |     "TOKEN_LBRACE",
 89 |     "TOKEN_RBRACE",
 90 | 
 91 |     "TOKEN_IDENT",
 92 |     "TOKEN_INTEGER",
 93 |     "TOKEN_STRING",
 94 | 
 95 |     "TOKEN_FUNCTION",
 96 |     "TOKEN_LET",
 97 |     "TOKEN_INT",
 98 |     "TOKEN_RETURN",
 99 |     "TOKEN_VOID",
100 |     "TOKEN_FOR",
101 |     "TOKEN_BOOL",
102 |     "TOKEN_IF",
103 |     "TOKEN_ELSE",
104 |     "TOKEN_RANGE",
105 |     "TOKEN_STR",
106 | 
107 |     "TOKEN_ILLEGAL",
108 |     "TOKEN_EOF"};
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/playground/hashtable/hashtable.c:
--------------------------------------------------------------------------------
  1 | // TODO: ADD ability to set int as key
  2 | 
  3 | #include "hashtable.h"
  4 | 
  5 | hash_table_T *init_hash(size_t capacity)
  6 | {
  7 |   hash_table_T *hash_table = calloc(1, sizeof(struct HASH_TABLE));
  8 |   hash_table->elements = calloc(capacity, sizeof(struct HASH_ELEMENT));
  9 |   hash_table->capacity = capacity;
 10 | 
 11 |   return hash_table;
 12 | }
 13 | 
 14 | void *hash_get(hash_table_T *table, char *key)
 15 | {
 16 |   void *value = NULL;
 17 | 
 18 |   hash_element_T *element = _hash_get_element(table, key);
 19 |   if (element)
 20 |   {
 21 |     value = element->value;
 22 |   }
 23 | 
 24 |   return value;
 25 | }
 26 | 
 27 | void *hash_delete(hash_table_T *table, char *key)
 28 | {
 29 |   void *value = NULL;
 30 | 
 31 |   hash_element_T *element = _hash_get_element(table, key);
 32 |   if (element)
 33 |   {
 34 |     element->deleted = 1;
 35 |     value = element->value;
 36 |   }
 37 | 
 38 |   return value;
 39 | }
 40 | 
 41 | // Private function
 42 | 
 43 | void *_hash_insert(hash_table_T *table, char *key, void *value)
 44 | {
 45 |   hash_element_T *element = calloc(1, sizeof(struct HASH_ELEMENT));
 46 |   element->key = key;
 47 |   element->value = value;
 48 |   element->deleted = 0;
 49 | 
 50 |   void *inserted = _hash_insert_element(table, element);
 51 |   if (inserted == NULL)
 52 |   {
 53 |     _resize_hash_table(table);
 54 |     inserted = _hash_insert_element(table, element);
 55 |   }
 56 | 
 57 |   return inserted;
 58 | }
 59 | 
 60 | void *_hash_insert_element(hash_table_T *table, hash_element_T *element)
 61 | {
 62 |   void *value = NULL;
 63 | 
 64 |   size_t hash_value = _hash_function(table, element->key);
 65 | 
 66 |   for (int count = 0; count < table->capacity; count++)
 67 |   {
 68 |     int index = (hash_value + count * count) % table->capacity;
 69 |     hash_element_T *elm = table->elements[index];
 70 | 
 71 |     if (!elm || (elm && elm->deleted && elm->key == element->key))
 72 |     {
 73 |       table->elements[index] = element;
 74 |       value = element->value;
 75 |       break;
 76 |     }
 77 |   }
 78 | 
 79 |   return value;
 80 | }
 81 | 
 82 | hash_element_T *_hash_get_element(hash_table_T *table, char *key)
 83 | {
 84 |   size_t hash_value = _hash_function(table, key);
 85 | 
 86 |   hash_element_T *element = NULL;
 87 | 
 88 |   for (int count = 0; count < table->capacity; count++)
 89 |   {
 90 |     int index = (hash_value + count * count) % table->capacity;
 91 |     hash_element_T *elm = table->elements[index];
 92 | 
 93 |     if (elm && !elm->deleted && elm->key == key)
 94 |     {
 95 |       element = elm;
 96 |       break;
 97 |     }
 98 |   }
 99 | 
100 |   return element;
101 | }
102 | 
103 | size_t _hash_function(hash_table_T *table, char *key)
104 | {
105 |   size_t number = 1;
106 | 
107 |   for (size_t i = 0; i < strlen(key); i++)
108 |   {
109 |     number += key[i];
110 |   }
111 | 
112 |   return number % table->capacity;
113 | }
114 | 
115 | void _resize_hash_table(hash_table_T *table)
116 | {
117 |   hash_table_T *new_hash_table = init_hash(table->capacity * 2);
118 | 
119 |   for (int index = 0; index < table->capacity; index++)
120 |   {
121 |     hash_element_T *element = table->elements[index];
122 |     if (element)
123 |     {
124 |       _hash_insert_element(new_hash_table, element);
125 |     }
126 |   }
127 | 
128 |   free(table->elements);
129 | 
130 |   table->elements = new_hash_table->elements;
131 |   table->capacity = new_hash_table->capacity;
132 | 
133 |   free(new_hash_table);
134 | }
135 | 


--------------------------------------------------------------------------------
/src/lexer.c:
--------------------------------------------------------------------------------
  1 | #include "lexer.h"
  2 | 
  3 | static inline char read_next_character(lexer_T *);
  4 | static inline char peek_character(lexer_T *);
  5 | static char *read_ident(lexer_T *);
  6 | static int64_t read_integer(lexer_T *);
  7 | static char *read_string(lexer_T *);
  8 | static void skip_whitespace(lexer_T *);
  9 | 
 10 | lexer_T *new_lexer(char *input)
 11 | {
 12 |   lexer_T *lex = (lexer_T *)malloc(sizeof(lexer_T));
 13 |   lex->input = input;
 14 |   lex->position = 0;
 15 |   lex->read_position = 1;
 16 |   lex->character = lex->input[lex->position];
 17 |   return lex;
 18 | }
 19 | 
 20 | static inline char read_next_character(lexer_T *lex)
 21 | {
 22 |   lex->position = lex->read_position;
 23 |   lex->read_position++;
 24 |   lex->character = lex->input[lex->position];
 25 |   return lex->character;
 26 | }
 27 | 
 28 | static inline char peek_character(lexer_T *lex)
 29 | {
 30 |   size_t peek_position = lex->read_position;
 31 |   return lex->input[peek_position];
 32 | }
 33 | 
 34 | static char *read_ident(lexer_T *lex)
 35 | {
 36 |   char *literal = "";
 37 |   while (true)
 38 |   {
 39 |     literal = append(literal, &lex->character);
 40 |     if (!isalpha(peek_character(lex)))
 41 |     {
 42 |       break;
 43 |     }
 44 |     read_next_character(lex);
 45 |   }
 46 |   return literal;
 47 | }
 48 | 
 49 | static int64_t read_integer(lexer_T *lex)
 50 | {
 51 |   int64_t literal = 0;
 52 |   while (true)
 53 |   {
 54 |     literal *= 10;
 55 |     literal += lex->character - '0';
 56 |     if (!isdigit(peek_character(lex)))
 57 |     {
 58 |       break;
 59 |     }
 60 |     read_next_character(lex);
 61 |   }
 62 |   return literal;
 63 | }
 64 | 
 65 | static char *read_string(lexer_T *lex)
 66 | {
 67 |   char *literal = "";
 68 |   while (true)
 69 |   {
 70 |     read_next_character(lex);
 71 |     literal = append(literal, &lex->character);
 72 |     if (!peek_character(lex) || peek_character(lex) == '"')
 73 |     {
 74 |       read_next_character(lex);
 75 |       break;
 76 |     }
 77 |   }
 78 |   return literal;
 79 | }
 80 | 
 81 | static void skip_whitespace(lexer_T *lex)
 82 | {
 83 |   while (lex->character == ' ' || lex->character == '\t' || lex->character == '\n' || lex->character == '\r')
 84 |   {
 85 |     read_next_character(lex);
 86 |   }
 87 | }
 88 | 
 89 | token_T next_token(lexer_T *lex)
 90 | {
 91 |   token_T token;
 92 | 
 93 |   skip_whitespace(lex);
 94 | 
 95 |   switch (lex->character)
 96 |   {
 97 |   case '=':
 98 |   {
 99 |     switch (peek_character(lex))
100 |     {
101 |     case '=':
102 |     {
103 |       token = new_token(TOKEN_EQUAL, "==");
104 |       read_next_character(lex);
105 |       break;
106 |     }
107 |     default:
108 |     {
109 |       token = new_token(TOKEN_ASSIGN, "=");
110 |     }
111 |     }
112 |     break;
113 |   }
114 |   case '&':
115 |   {
116 |     switch (peek_character(lex))
117 |     {
118 |     case '&':
119 |     {
120 |       token = new_token(TOKEN_AND, "&&");
121 |       read_next_character(lex);
122 |       break;
123 |     }
124 |     default:
125 |     {
126 |       // Single & character is illegal for now
127 |       token = new_token(TOKEN_ILLEGAL, append("", &lex->character));
128 |     }
129 |     }
130 |     break;
131 |   }
132 |   case '|':
133 |   {
134 |     switch (peek_character(lex))
135 |     {
136 |     case '|':
137 |     {
138 |       token = new_token(TOKEN_OR, "||");
139 |       read_next_character(lex);
140 |       break;
141 |     }
142 |     default:
143 |     {
144 |       // Single | character is illegal for now
145 |       token = new_token(TOKEN_ILLEGAL, append("", &lex->character));
146 |     }
147 |     }
148 |     break;
149 |   }
150 |   case '+':
151 |   {
152 |     switch (peek_character(lex))
153 |     {
154 |     case '+':
155 |     {
156 |       token = new_token(TOKEN_INCREMENT, "++");
157 |       read_next_character(lex);
158 |       break;
159 |     }
160 |     case '=':
161 |     {
162 |       token = new_token(TOKEN_PLUS_ASSIGN, "+=");
163 |       read_next_character(lex);
164 |       break;
165 |     }
166 |     default:
167 |     {
168 |       token = new_token(TOKEN_PLUS, "+");
169 |     }
170 |     }
171 |     break;
172 |   }
173 |   case '-':
174 |   {
175 |     switch (peek_character(lex))
176 |     {
177 |     case '-':
178 |     {
179 |       token = new_token(TOKEN_DECREMENT, "--");
180 |       read_next_character(lex);
181 |       break;
182 |     }
183 |     case '=':
184 |     {
185 |       token = new_token(TOKEN_MINUS_ASSIGN, "-=");
186 |       read_next_character(lex);
187 |       break;
188 |     }
189 |     default:
190 |     {
191 |       token = new_token(TOKEN_MINUS, "-");
192 |     }
193 |     }
194 |     break;
195 |   }
196 |   case '!':
197 |   {
198 |     switch (peek_character(lex))
199 |     {
200 |     case '=':
201 |     {
202 |       token = new_token(TOKEN_NOT_EQUAL, "!=");
203 |       read_next_character(lex);
204 |       break;
205 |     }
206 |     default:
207 |     {
208 |       token = new_token(TOKEN_EXCLAMATION, "!");
209 |     }
210 |     }
211 |     break;
212 |   }
213 |   case '*':
214 |   {
215 |     switch (peek_character(lex))
216 |     {
217 |     case '=':
218 |     {
219 |       token = new_token(TOKEN_ASTERISK_ASSIGN, "*=");
220 |       read_next_character(lex);
221 |       break;
222 |     }
223 |     default:
224 |     {
225 |       token = new_token(TOKEN_ASTERISK, "*");
226 |     }
227 |     }
228 |     break;
229 |   }
230 |   case '/':
231 |   {
232 |     switch (peek_character(lex))
233 |     {
234 |     case '=':
235 |     {
236 |       token = new_token(TOKEN_SLASH_ASSIGN, "/=");
237 |       read_next_character(lex);
238 |       break;
239 |     }
240 |     default:
241 |     {
242 |       token = new_token(TOKEN_SLASH, "/");
243 |     }
244 |     }
245 |     break;
246 |   }
247 |   case ',':
248 |   {
249 |     token = new_token(TOKEN_COMMA, ",");
250 |     break;
251 |   }
252 |   case ';':
253 |   {
254 |     token = new_token(TOKEN_SEMICOLON, ";");
255 |     break;
256 |   }
257 |   case ':':
258 |   {
259 |     switch (peek_character(lex))
260 |     {
261 |     case '=':
262 |     {
263 |       token = new_token(TOKEN_COLON_ASSIGN, ":=");
264 |       read_next_character(lex);
265 |       break;
266 |     }
267 |     default:
268 |     {
269 |       token = new_token(TOKEN_COLON, ":");
270 |     }
271 |     }
272 |     break;
273 |   }
274 |   case '<':
275 |   {
276 |     switch (peek_character(lex))
277 |     {
278 |     case '=':
279 |     {
280 |       token = new_token(TOKEN_LESS_THEN_EQUAL, "<=");
281 |       read_next_character(lex);
282 |       break;
283 |     }
284 |     default:
285 |     {
286 |       token = new_token(TOKEN_LESS_THEN, "<");
287 |     }
288 |     }
289 |     break;
290 |   }
291 |   case '>':
292 |   {
293 |     switch (peek_character(lex))
294 |     {
295 |     case '=':
296 |     {
297 |       token = new_token(TOKEN_GREATER_THEN_EQUAL, ">=");
298 |       read_next_character(lex);
299 |       break;
300 |     }
301 |     default:
302 |     {
303 |       token = new_token(TOKEN_GREATER_THEN, ">");
304 |     }
305 |     }
306 |     break;
307 |   }
308 |   case '(':
309 |   {
310 |     token = new_token(TOKEN_LPAREN, "(");
311 |     break;
312 |   }
313 |   case ')':
314 |   {
315 |     token = new_token(TOKEN_RPAREN, ")");
316 |     break;
317 |   }
318 |   case '{':
319 |   {
320 |     token = new_token(TOKEN_LBRACE, "{");
321 |     break;
322 |   }
323 |   case '}':
324 |   {
325 |     token = new_token(TOKEN_RBRACE, "}");
326 |     break;
327 |   }
328 |   case '"':
329 |   {
330 |     char *literal = read_string(lex);
331 |     token = string_token(TOKEN_STRING, literal);
332 |     break;
333 |   }
334 |   case 0:
335 |   {
336 |     token = new_token(TOKEN_EOF, "EOF");
337 |     break;
338 |   }
339 |   default:
340 |   {
341 |     if (isalpha(lex->character))
342 |     {
343 |       char *literal = read_ident(lex);
344 | 
345 |       // TODO: use string interning for comparing string
346 |       if (!strncmp(literal, "let", 3))
347 |       {
348 |         token = new_token(TOKEN_LET, literal);
349 |         break;
350 |       }
351 | 
352 |       if (!strncmp(literal, "int", 3))
353 |       {
354 |         token = new_token(TOKEN_INT, literal);
355 |         break;
356 |       }
357 | 
358 |       if (!strncmp(literal, "return", 6))
359 |       {
360 |         token = new_token(TOKEN_RETURN, literal);
361 |         break;
362 |       }
363 | 
364 |       if (!strncmp(literal, "void", 4))
365 |       {
366 |         token = new_token(TOKEN_VOID, literal);
367 |         break;
368 |       }
369 | 
370 |       if (!strncmp(literal, "for", 3))
371 |       {
372 |         token = new_token(TOKEN_FOR, literal);
373 |         break;
374 |       }
375 | 
376 |       if (!strncmp(literal, "true", 4))
377 |       {
378 |         token = new_token(TOKEN_BOOL, literal);
379 |         break;
380 |       }
381 | 
382 |       if (!strncmp(literal, "false", 4))
383 |       {
384 |         token = new_token(TOKEN_BOOL, literal);
385 |         break;
386 |       }
387 | 
388 |       if (!strncmp(literal, "if", 2))
389 |       {
390 |         token = new_token(TOKEN_IF, literal);
391 |         break;
392 |       }
393 | 
394 |       if (!strncmp(literal, "else", 4))
395 |       {
396 |         token = new_token(TOKEN_ELSE, literal);
397 |         break;
398 |       }
399 | 
400 |       if (!strncmp(literal, "range", 5))
401 |       {
402 |         token = new_token(TOKEN_RANGE, literal);
403 |         break;
404 |       }
405 | 
406 |       if (!strncmp(literal, "str", 3))
407 |       {
408 |         token = new_token(TOKEN_STR, literal);
409 |         break;
410 |       }
411 | 
412 |       token = new_token(TOKEN_IDENT, literal);
413 |       break;
414 |     }
415 | 
416 |     if (isdigit(lex->character))
417 |     {
418 |       int64_t integer = read_integer(lex);
419 |       token = integer_token(TOKEN_INTEGER, integer);
420 |       break;
421 |     }
422 | 
423 |     token = new_token(TOKEN_ILLEGAL, append("", &lex->character));
424 |     break;
425 |   }
426 |   }
427 |   read_next_character(lex);
428 |   return token;
429 | }
430 | 


--------------------------------------------------------------------------------