├── README.md ├── src ├── windows │ └── os-interface-win32.cpp ├── os-interface.h ├── iolib.h ├── utils.h ├── utils.cpp ├── interface │ ├── cell-lang.h │ ├── lib-cpp.cpp │ ├── conversion.cpp │ ├── lib-cpp.h │ └── value.cpp ├── os-interface-linux.cpp ├── iolib.cpp ├── hashing.cpp ├── sorting.cpp ├── debug.cpp ├── mem-core.cpp ├── table-utils.h ├── mem-alloc.cpp ├── inter-utils.cpp ├── tern-rel-obj.cpp ├── mem-copying.cpp ├── bin-rel-obj.cpp ├── basic-ops.cpp ├── binary-table.cpp ├── unary-table.cpp ├── mem.cpp ├── value-store.cpp ├── instrs.cpp ├── algs.cpp ├── ternary-table.cpp ├── printing.cpp └── parsing.cpp └── LICENSE /README.md: -------------------------------------------------------------------------------- 1 | # runtime -------------------------------------------------------------------------------- /src/windows/os-interface-win32.cpp: -------------------------------------------------------------------------------- 1 | #include "os_interface.h" 2 | 3 | #include "windows.h" 4 | 5 | uint64 get_tick_count() { 6 | return GetTickCount(); 7 | } 8 | -------------------------------------------------------------------------------- /src/os-interface.h: -------------------------------------------------------------------------------- 1 | // uint64 get_tick_count(); // Moved to lib.h 2 | 3 | char *file_read(const char *fname, int &size); 4 | bool file_write(const char *fname, const char *buffer, int size, bool append); 5 | -------------------------------------------------------------------------------- /src/iolib.h: -------------------------------------------------------------------------------- 1 | namespace generated { 2 | struct ENV; 3 | } 4 | 5 | OBJ FileRead_P(OBJ, generated::ENV &); 6 | OBJ FileWrite_P(OBJ, OBJ, generated::ENV &); 7 | OBJ Print_P(OBJ, generated::ENV &env); 8 | OBJ GetChar_P(generated::ENV &env); 9 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | //////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef NDEBUG 17 | //#define assert(_E_) (void)(_assert_((_E_), #_E_, __FILE__, __LINE__) ? 0 : (*((char *)0)) = 0) 18 | #define assert(_E_) (void)(_assert_((_E_), #_E_, __FILE__, __LINE__)) 19 | #else 20 | #define assert(_E_) 21 | #endif 22 | 23 | #define halt (void)(_assert_(0, "Halt reached", __FILE__, __LINE__)) 24 | 25 | bool _assert_(int exp, const char *exp_text, const char *file, int line); 26 | 27 | //////////////////////////////////////////////////////////////////////////////// 28 | 29 | void mantissa_and_dec_exp(double value, long long &mantissa, int &dec_exp); //## IS THIS THE RIGHT PLACE FOR THIS FUNCTION? 30 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | bool _assert_(int exp, const char *exp_text, const char *file, int line) { 5 | if (!exp) { 6 | int idx = 0; 7 | while (file[idx] != '\0') 8 | idx++; 9 | 10 | while (idx >= 0 && file[idx] != '\\') 11 | idx--; 12 | 13 | fprintf(stderr, "Assertion \"%s\" failed, file: %s, line: %d\n", exp_text, file + idx + 1, line); 14 | fflush(stderr); 15 | 16 | (*((char *)0)) = 0; 17 | } 18 | 19 | return true; 20 | } 21 | 22 | 23 | void mantissa_and_dec_exp(double value, long long &mantissa, int &dec_exp) { 24 | char buffer[1024]; 25 | sprintf(buffer, "%f", value); 26 | 27 | int len = strlen(buffer); 28 | char *dot_ptr = strchr(buffer, '.'); 29 | int dot_idx = dot_ptr - buffer; 30 | dec_exp = 0; 31 | if (dot_ptr == NULL) { 32 | for (int i=len-1 ; i >= 0 && buffer[i] == '\0' ; i++) 33 | dec_exp++; 34 | len -= dec_exp; 35 | buffer[len] = '\0'; 36 | } 37 | else { 38 | memmove(dot_ptr, dot_ptr+1, len-dot_idx); 39 | dec_exp = dot_idx - len + 1; 40 | len--; 41 | } 42 | 43 | sscanf(buffer, "%lld", &mantissa); 44 | } 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017-Present cell-lang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/interface/cell-lang.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | namespace cell { 6 | class Value { 7 | public: 8 | virtual bool is_symb() = 0; 9 | virtual bool is_int() = 0; 10 | virtual bool is_float() = 0; 11 | virtual bool is_seq() = 0; 12 | virtual bool is_set() = 0; 13 | virtual bool is_bin_rel() = 0; 14 | virtual bool is_tern_rel() = 0; 15 | virtual bool is_tagged() = 0; 16 | 17 | virtual const char *as_symb() = 0; 18 | virtual long long as_int() = 0; 19 | virtual double as_float() = 0; 20 | 21 | virtual unsigned int size() = 0; 22 | virtual Value *item(unsigned int) = 0; 23 | virtual void entry(unsigned int, Value *&, Value *&) = 0; 24 | virtual void entry(unsigned int, Value *&, Value *&, Value *&) = 0; 25 | 26 | virtual const char *tag() = 0; 27 | virtual Value *untagged() = 0; 28 | 29 | virtual bool is_string() = 0; 30 | virtual bool is_record() = 0; 31 | 32 | virtual std::string as_str() = 0; 33 | virtual Value *lookup(const char *) = 0; 34 | 35 | virtual std::string printed() = 0; 36 | virtual void print(std::ostream &os) = 0; 37 | }; 38 | } 39 | -------------------------------------------------------------------------------- /src/os-interface-linux.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include "os-interface.h" 3 | 4 | 5 | uint64 get_tick_count() { 6 | struct timespec ts; 7 | if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { 8 | // error 9 | } 10 | return 1000 * ts.tv_sec + ts.tv_nsec / 1000000; 11 | } 12 | 13 | char *file_read(const char *fname, int &size) { 14 | FILE *fp = fopen(fname, "r"); 15 | if (fp == NULL) { 16 | size = -1; 17 | return NULL; 18 | } 19 | int start = ftell(fp); 20 | assert(start == 0); 21 | fseek(fp, 0, SEEK_END); 22 | int end = ftell(fp); 23 | fseek(fp, 0, SEEK_SET); 24 | size = end - start; 25 | if (size == 0) { 26 | fclose(fp); 27 | return NULL; 28 | } 29 | char *buff = new_byte_array(size); 30 | int read = fread(buff, 1, size, fp); 31 | fclose(fp); 32 | if (read != size) { 33 | delete_byte_array(buff, size); 34 | size = -1; 35 | return NULL; 36 | } 37 | return buff; 38 | } 39 | 40 | bool file_write(const char *fname, const char *buffer, int size, bool append) { 41 | FILE *fp = fopen(fname, append ? "a" : "w"); 42 | if (fp == NULL) 43 | return false; 44 | size_t written = fwrite(buffer, 1, size, fp); 45 | fclose(fp); 46 | return written == size; 47 | } 48 | -------------------------------------------------------------------------------- /src/interface/lib-cpp.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | bool table_contains(UNARY_TABLE &table, VALUE_STORE &store, OBJ obj) { 5 | int64 surr = lookup_value(&store, obj); 6 | release(obj); 7 | return surr != -1 && unary_table_contains(&table, surr); 8 | } 9 | 10 | bool table_contains(BINARY_TABLE &table, VALUE_STORE &store0, VALUE_STORE &store1, OBJ obj0, OBJ obj1) { 11 | int64 surr0 = lookup_value(&store0, obj0); 12 | release(obj0); 13 | if (surr0 == -1) { 14 | release(obj1); 15 | return false; 16 | } 17 | int64 surr1 = lookup_value(&store1, obj1); 18 | release(obj1); 19 | return surr1 != -1 && binary_table_contains(&table, surr0, surr1); 20 | } 21 | 22 | bool table_contains(TERNARY_TABLE &table, VALUE_STORE &store0, VALUE_STORE &store1, VALUE_STORE &store2, OBJ obj0, OBJ obj1, OBJ obj2) { 23 | int64 surr0 = lookup_value(&store0, obj0); 24 | release(obj0); 25 | if (surr0 == -1) { 26 | release(obj1); 27 | release(obj2); 28 | return false; 29 | } 30 | int64 surr1 = lookup_value(&store1, obj1); 31 | release(obj1); 32 | if (surr1 == -1) { 33 | release(obj2); 34 | return false; 35 | } 36 | int64 surr2 = lookup_value(&store2, obj2); 37 | release(obj2); 38 | return surr2 != -1 && ternary_table_contains(&table, surr0, surr1, surr2); 39 | } 40 | -------------------------------------------------------------------------------- /src/iolib.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include "os-interface.h" 3 | 4 | 5 | namespace generated { 6 | struct ENV; 7 | } 8 | 9 | 10 | OBJ FileRead_P(OBJ filename, generated::ENV &) { 11 | char *fname = obj_to_str(filename); 12 | int size; 13 | char *data = file_read(fname, size); 14 | delete_byte_array(fname, strlen(fname)+1); 15 | 16 | if (size == -1) 17 | return make_symb(symb_idx_nothing); 18 | 19 | OBJ seq_obj = make_empty_seq(); 20 | if (size > 0) { 21 | SEQ_OBJ *seq = new_seq(size); 22 | for (uint32 i=0 ; i < size ; i++) 23 | seq->buffer[i] = make_int((uint8) data[i]); 24 | delete_byte_array(data, size); 25 | seq_obj = make_seq(seq, size); 26 | } 27 | 28 | return make_tag_obj(symb_idx_just, seq_obj); 29 | } 30 | 31 | 32 | OBJ FileWrite_P(OBJ filename, OBJ data, generated::ENV &) { 33 | char *fname = obj_to_str(filename); 34 | uint32 size; 35 | char *buffer = obj_to_byte_array(data, size); 36 | bool res; 37 | if (size > 0) { 38 | res = file_write(fname, buffer, size, false); 39 | delete_byte_array(buffer, size); 40 | } 41 | else { 42 | char empty_buff[1]; 43 | res = file_write(fname, empty_buff, 0, false); 44 | } 45 | delete_byte_array(fname, strlen(fname)+1); 46 | return make_bool(res); 47 | } 48 | 49 | 50 | OBJ Print_P(OBJ str_obj, generated::ENV &env) { 51 | char *str = obj_to_str(str_obj); 52 | fputs(str, stdout); 53 | fflush(stdout); 54 | delete_byte_array(str, strlen(str)+1); 55 | return make_blank_obj(); 56 | } 57 | 58 | 59 | OBJ GetChar_P(generated::ENV &env) { 60 | int ch = getchar(); 61 | if (ch == EOF) 62 | return make_symb(symb_idx_nothing); 63 | return make_tag_obj(symb_idx_just, make_int(ch)); 64 | } 65 | -------------------------------------------------------------------------------- /src/hashing.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | const uint32 BASE_VALUE = 17; 5 | const uint32 MULTIPLIER = 37; 6 | 7 | const uint32 MULT_BASE_VALUE = BASE_VALUE * MULTIPLIER; 8 | 9 | //////////////////////////////////////////////////////////////////////////////// 10 | 11 | uint32 combined_hash_code(uint32 start_value, OBJ *array, uint32 count) { 12 | uint32 hash_code = start_value; 13 | for (uint32 i=0 ; i < count ; i++) 14 | hash_code = MULTIPLIER * hash_code + compute_hash_code(array[i]); 15 | return hash_code; 16 | } 17 | 18 | uint32 compute_hash_code(OBJ obj) { 19 | if (is_tag_obj(obj)) 20 | return MULTIPLIER * (MULT_BASE_VALUE + get_tag_idx(obj)) + compute_hash_code(get_inner_obj(obj)); 21 | 22 | switch (get_physical_type(obj)) { 23 | case TYPE_BLANK_OBJ: 24 | case TYPE_NULL_OBJ: 25 | fail(); 26 | 27 | case TYPE_SYMBOL: 28 | return MULT_BASE_VALUE + get_symb_idx(obj); 29 | 30 | case TYPE_INTEGER: 31 | case TYPE_FLOAT: { 32 | uint64 core_data = obj.core_data.int_; 33 | return MULT_BASE_VALUE + (uint32) (core_data ^ (core_data >> 32)); 34 | } 35 | 36 | case TYPE_SEQUENCE: { 37 | uint32 size = get_seq_length(obj); 38 | return combined_hash_code(MULT_BASE_VALUE + size, size > 0 ? get_seq_buffer_ptr(obj) : NULL, size); 39 | } 40 | 41 | case TYPE_SET: { 42 | if (is_empty_rel(obj)) 43 | return MULT_BASE_VALUE; 44 | SET_OBJ *ptr = get_set_ptr(obj); 45 | uint32 size = ptr->size; 46 | return combined_hash_code(MULT_BASE_VALUE + size, ptr->buffer, size); 47 | } 48 | 49 | case TYPE_BIN_REL: { 50 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(obj); 51 | uint32 size = ptr->size; 52 | return combined_hash_code(MULT_BASE_VALUE + size, ptr->buffer, 2 * size); 53 | } 54 | 55 | case TYPE_TERN_REL: { 56 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(obj); 57 | uint32 size = ptr->size; 58 | return combined_hash_code(MULT_BASE_VALUE + size, ptr->buffer, 3 * size); 59 | } 60 | 61 | case TYPE_TAG_OBJ: 62 | fail(); 63 | 64 | case TYPE_SLICE: { 65 | uint32 size = get_seq_length(obj); 66 | return combined_hash_code(MULT_BASE_VALUE + size, get_seq_buffer_ptr(obj), size); 67 | } 68 | 69 | case TYPE_MAP: 70 | case TYPE_LOG_MAP: { 71 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(obj); 72 | uint32 size = ptr->size; 73 | return combined_hash_code(MULT_BASE_VALUE + size, ptr->buffer, 2 * size); 74 | } 75 | } 76 | fail(); 77 | } 78 | -------------------------------------------------------------------------------- /src/sorting.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | struct obj_idx_less { 5 | OBJ *objs; 6 | obj_idx_less(OBJ *objs) : objs(objs) {} 7 | 8 | bool operator () (uint32 idx1, uint32 idx2) { 9 | return comp_objs(objs[idx1], objs[idx2]) > 0; 10 | } 11 | }; 12 | 13 | struct obj_idx_less_no_eq { 14 | OBJ *values; 15 | obj_idx_less_no_eq(OBJ *values) : values(values) {} 16 | 17 | bool operator () (uint32 idx1, uint32 idx2) { 18 | int cr = comp_objs(values[idx1], values[idx2]); 19 | return cr != 0 ? cr > 0 : idx1 < idx2; 20 | } 21 | }; 22 | 23 | //////////////////////////////////////////////////////////////////////////////// 24 | 25 | struct obj_pair_idx_less { 26 | OBJ *major_sort, *minor_sort; 27 | obj_pair_idx_less(OBJ *major_sort, OBJ *minor_sort) : major_sort(major_sort), minor_sort(minor_sort) {} 28 | 29 | bool operator () (uint32 idx1, uint32 idx2) { 30 | int cr = comp_objs(major_sort[idx1], major_sort[idx2]); 31 | if (cr != 0) 32 | return cr > 0; 33 | cr = comp_objs(minor_sort[idx1], minor_sort[idx2]); 34 | if (cr != 0) 35 | return cr > 0; 36 | return idx1 < idx2; 37 | } 38 | }; 39 | 40 | //////////////////////////////////////////////////////////////////////////////// 41 | 42 | struct obj_triple_idx_less { 43 | OBJ *col1, *col2, *col3; 44 | obj_triple_idx_less(OBJ *col1, OBJ *col2, OBJ *col3) : col1(col1), col2(col2), col3(col3) {} 45 | 46 | bool operator () (uint32 idx1, uint32 idx2) { 47 | int cr = comp_objs(col1[idx1], col1[idx2]); 48 | if (cr != 0) 49 | return cr > 0; 50 | cr = comp_objs(col2[idx1], col2[idx2]); 51 | if (cr != 0) 52 | return cr > 0; 53 | cr = comp_objs(col3[idx1], col3[idx2]); 54 | if (cr != 0) 55 | return cr > 0; 56 | return idx1 < idx2; 57 | } 58 | }; 59 | 60 | //////////////////////////////////////////////////////////////////////////////// 61 | //////////////////////////////////////////////////////////////////////////////// 62 | 63 | void stable_index_sort(uint32 *index, OBJ *values, uint32 count) { 64 | for (uint32 i=0 ; i < count ; i++) 65 | index[i] = i; 66 | std::sort(index, index+count, obj_idx_less_no_eq(values)); 67 | } 68 | 69 | void stable_index_sort(uint32 *index, OBJ *major_sort, OBJ *minor_sort, uint32 count) { 70 | for (uint32 i=0 ; i < count ; i++) 71 | index[i] = i; 72 | std::sort(index, index+count, obj_pair_idx_less(major_sort, minor_sort)); 73 | } 74 | 75 | void stable_index_sort(uint32 *index, OBJ *major_sort, OBJ *middle_sort, OBJ *minor_sort, uint32 count) { 76 | for (uint32 i=0 ; i < count ; i++) 77 | index[i] = i; 78 | std::sort(index, index+count, obj_triple_idx_less(major_sort, middle_sort, minor_sort)); 79 | } 80 | 81 | //////////////////////////////////////////////////////////////////////////////// 82 | 83 | void index_sort(uint32 *index, OBJ *values, uint32 count) { 84 | for (uint32 i=0 ; i < count ; i++) 85 | index[i] = i; 86 | std::sort(index, index+count, obj_idx_less(values)); 87 | } 88 | 89 | void index_sort(uint32 *index, OBJ *major_sort, OBJ *minor_sort, uint32 count) { 90 | stable_index_sort(index, major_sort, minor_sort, count); 91 | } 92 | 93 | void index_sort(uint32 *index, OBJ *major_sort, OBJ *middle_sort, OBJ *minor_sort, uint32 count) { 94 | stable_index_sort(index, major_sort, middle_sort, minor_sort, count); 95 | } 96 | -------------------------------------------------------------------------------- /src/interface/conversion.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include 3 | 4 | using std::string; 5 | 6 | 7 | OBJ convert_bool_seq(const bool *array, uint32 size) { 8 | if (size == 0) 9 | return make_empty_seq(); 10 | //## CHECK THAT THE INPUT ARRAY DOES NOT EXCEED THE MAXIMUM SEQUENCE SIZE 11 | SEQ_OBJ *seq = new_seq(size); 12 | OBJ *buffer = seq->buffer; 13 | for (uint32 i=0 ; i < size ; i++) 14 | buffer[i] = make_bool(array[i]); 15 | return make_seq(seq, size); 16 | } 17 | 18 | OBJ convert_int32_seq(const int32 *array, uint32 size) { 19 | if (size == 0) 20 | return make_empty_seq(); 21 | //## CHECK THAT THE INPUT ARRAY DOES NOT EXCEED THE MAXIMUM SEQUENCE SIZE 22 | SEQ_OBJ *seq = new_seq(size); 23 | OBJ *buffer = seq->buffer; 24 | for (unsigned int i=0 ; i < size ; i++) 25 | buffer[i] = make_int(array[i]); 26 | return make_seq(seq, size); 27 | } 28 | 29 | OBJ convert_int_seq(const int64 *array, uint32 size) { 30 | if (size == 0) 31 | return make_empty_seq(); 32 | //## CHECK THAT THE INPUT ARRAY DOES NOT EXCEED THE MAXIMUM SEQUENCE SIZE 33 | SEQ_OBJ *seq = new_seq(size); 34 | OBJ *buffer = seq->buffer; 35 | for (unsigned int i=0 ; i < size ; i++) 36 | buffer[i] = make_int(array[i]); 37 | return make_seq(seq, size); 38 | } 39 | 40 | OBJ convert_float_seq(const double *array, uint32 size) { 41 | if (size == 0) 42 | return make_empty_seq(); 43 | //## CHECK THAT THE INPUT ARRAY DOES NOT EXCEED THE MAXIMUM SEQUENCE SIZE 44 | SEQ_OBJ *seq = new_seq(size); 45 | OBJ *buffer = seq->buffer; 46 | for (uint32 i=0 ; i < size ; i++) 47 | buffer[i] = make_float(array[i]); 48 | return make_seq(seq, size); 49 | } 50 | 51 | OBJ convert_text(const char *buffer) { 52 | OBJ obj; 53 | uint32 error_offset; 54 | bool ok = parse(buffer, strlen(buffer), &obj, &error_offset); 55 | if (!ok) 56 | throw (long long) error_offset; 57 | return obj; 58 | } 59 | 60 | //////////////////////////////////////////////////////////////////////////////// 61 | 62 | // void export_as_c_string(OBJ obj, char *buffer, uint32 capacity) { 63 | // obj_to_str(obj, buffer, capacity); 64 | // } 65 | 66 | uint32 export_as_bool_array(OBJ obj, bool *array, uint32 capacity) { 67 | uint32 len = get_seq_length(obj); 68 | if (len >= capacity) 69 | throw (long long) len; 70 | OBJ *buffer = get_seq_buffer_ptr(obj); 71 | for (uint32 i=0 ; i < len ; i++) 72 | array[i] = get_bool(buffer[i]); 73 | return len; 74 | } 75 | 76 | uint32 export_as_long_long_array(OBJ obj, int64 *array, uint32 capacity) { 77 | uint32 len = get_seq_length(obj); 78 | if (len >= capacity) 79 | throw (long long) len; 80 | OBJ *buffer = get_seq_buffer_ptr(obj); 81 | for (uint32 i=0 ; i < len ; i++) 82 | array[i] = get_int(buffer[i]); 83 | return len; 84 | } 85 | 86 | uint32 export_as_float_array(OBJ obj, double *array, uint32 capacity) { 87 | uint32 len = get_seq_length(obj); 88 | if (len >= capacity) 89 | throw (long long) len; 90 | OBJ *buffer = get_seq_buffer_ptr(obj); 91 | for (uint32 i=0 ; i < len ; i++) 92 | array[i] = get_float(buffer[i]); 93 | return len; 94 | } 95 | 96 | void export_literal_as_c_string(OBJ obj, char *buffer, uint32 capacity) { 97 | uint32 min_capacity = printed_obj(obj, buffer, capacity); 98 | if (capacity < min_capacity) 99 | throw (long long) min_capacity; 100 | } 101 | 102 | //////////////////////////////////////////////////////////////////////////////// 103 | 104 | string export_as_std_string(OBJ obj) { 105 | string result; 106 | OBJ raw_str_obj = get_inner_obj(obj); 107 | if (!is_empty_seq(raw_str_obj)) { 108 | OBJ *seq_buffer = get_seq_buffer_ptr(raw_str_obj); 109 | uint32 len = get_seq_length(raw_str_obj); 110 | int64 size = to_utf8(seq_buffer, len, NULL); 111 | char *utf8_data = new_byte_array(size+1); 112 | to_utf8(seq_buffer, len, utf8_data); 113 | result = utf8_data; 114 | } 115 | return result; 116 | } 117 | 118 | // #include 119 | // 120 | // using std::ostringstream; 121 | // 122 | // void emit_stream(void *ptr, const void *data, EMIT_ACTION action) { 123 | // ostringstream &stream = *(ostringstream *) ptr; 124 | // if (action == TEXT) 125 | // stream << (char *) data; 126 | // } 127 | // 128 | // string export_literal_as_std_string(OBJ obj) { 129 | // ostringstream stream; 130 | // print_obj(obj, emit_stream, &stream); 131 | // return stream.str(); 132 | // } 133 | -------------------------------------------------------------------------------- /src/debug.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | std::vector function_names; 5 | std::vector arities; 6 | std::vector param_lists; 7 | 8 | void push_call_info(const char *fn_name, uint32 arity, OBJ *params) { 9 | #ifndef NDEBUG 10 | function_names.push_back(fn_name); 11 | arities.push_back(arity); 12 | param_lists.push_back(params); 13 | #endif 14 | } 15 | 16 | void pop_call_info() { 17 | #ifndef NDEBUG 18 | uint32 arity = arities.back(); 19 | if (arity > 0) 20 | delete_obj_array(param_lists.back(), arity); 21 | 22 | function_names.pop_back(); 23 | arities.pop_back(); 24 | param_lists.pop_back(); 25 | #endif 26 | } 27 | 28 | void pop_try_mode_call_info(int depth) { 29 | while (function_names.size() > depth) 30 | pop_call_info(); 31 | } 32 | 33 | int get_call_stack_depth() { 34 | return function_names.size(); 35 | } 36 | 37 | //////////////////////////////////////////////////////////////////////////////// 38 | 39 | void printed_obj_or_filename(OBJ obj, bool add_path, char *buffer, uint32 buff_size) { 40 | const uint32 MAX_OBJ_COUNT = 1024; 41 | static uint32 filed_objs_count = 0; 42 | // Deliberate bug: storing objects without reference counting them. 43 | static OBJ filed_objs[MAX_OBJ_COUNT]; 44 | 45 | assert(buff_size >= 64); 46 | 47 | const char *file_template = add_path ? "" : ""; 48 | 49 | for (uint32 i=0 ; i < filed_objs_count ; i++) 50 | if (are_eq(filed_objs[i], obj)) { 51 | sprintf(buffer, file_template, i); 52 | return; 53 | } 54 | 55 | char fname[64]; 56 | sprintf(fname, "debug/obj_%02d.txt", filed_objs_count); 57 | 58 | print_to_buffer_or_file(obj, buffer, buff_size, fname); 59 | 60 | if (buffer[0] == '\0') { 61 | // The object was written to a file 62 | sprintf(buffer, file_template, filed_objs_count); 63 | if (filed_objs_count < MAX_OBJ_COUNT) 64 | filed_objs[filed_objs_count++] = obj; 65 | } 66 | } 67 | 68 | //////////////////////////////////////////////////////////////////////////////// 69 | 70 | void print_indented_param(FILE *fp, OBJ param, bool is_last) { 71 | const uint32 BUFF_SIZE = 512; 72 | char buffer[BUFF_SIZE]; 73 | 74 | if (!is_blank_obj(param)) 75 | printed_obj_or_filename(param, false, buffer, BUFF_SIZE); 76 | else 77 | strcpy(buffer, ""); 78 | 79 | for (uint32 i=0 ; buffer[i] != '\0' ; i++) { 80 | if (i == 0 || buffer[i-1] == '\n') 81 | fputs(" ", fp); 82 | fputc(buffer[i], fp); 83 | } 84 | 85 | if (!is_last) 86 | fputs(",", fp); 87 | fputs("\n", fp); 88 | fflush(fp); 89 | } 90 | 91 | 92 | void print_stack_frame(FILE *fp, uint32 frame_idx) { 93 | const char *fn_name = function_names[frame_idx]; 94 | uint32 arity = arities[frame_idx]; 95 | OBJ *params = param_lists[frame_idx]; 96 | 97 | fputs(fn_name, fp); 98 | fputs("(", fp); 99 | if (arity > 0) 100 | fputs("\n", fp); 101 | for (uint32 i=0 ; i < arity ; i++) 102 | print_indented_param(fp, params[i], i == arity-1); 103 | fputs(")\n\n", fp); 104 | } 105 | 106 | 107 | void print_stack_frame(uint32 frame_idx) { 108 | const char *fn_name = function_names[frame_idx]; 109 | fprintf(stderr, "%s\n", fn_name); 110 | } 111 | 112 | 113 | void print_call_stack() { 114 | #ifndef NDEBUG 115 | uint32 size = function_names.size(); 116 | for (uint32 i=0 ; i < size ; i++) 117 | print_stack_frame(i); 118 | fputs("\nNow trying to write a full dump of the stack to the file debug/stack_trace.txt.\nPlease be patient. This may take a while...", stderr); 119 | fflush(stderr); 120 | FILE *fp = fopen("debug/stack_trace.txt", "w"); 121 | if (fp == NULL) { 122 | fputs("\nFailed to open file debug/stack_trace.txt\n", stderr); 123 | return; 124 | } 125 | for (uint32 i=0 ; i < size ; i++) 126 | print_stack_frame(fp, i); 127 | fputs(" done.\n\n", stderr); 128 | fclose(fp); 129 | #endif 130 | } 131 | 132 | 133 | void dump_var(const char *name, OBJ value) { 134 | const uint32 BUFF_SIZE = 512; 135 | char buffer[BUFF_SIZE]; 136 | printed_obj_or_filename(value, true, buffer, BUFF_SIZE); 137 | fprintf(stderr, "%s = %s\n\n", name, buffer); 138 | } 139 | 140 | //////////////////////////////////////////////////////////////////////////////// 141 | 142 | void print_assertion_failed_msg(const char *file, uint32 line, const char *text) { 143 | if (text == NULL) 144 | fprintf(stderr, "\nAssertion failed. File: %s, line: %d\n\n", file, line); 145 | else 146 | fprintf(stderr, "\nAssertion failed: %s\nFile: %s, line: %d\n\n", text, file, line); 147 | } 148 | 149 | //////////////////////////////////////////////////////////////////////////////// 150 | 151 | void soft_fail(const char *msg) { 152 | #ifndef CELL_LANG_NO_TRANSACTIONS 153 | if (is_in_try_state()) 154 | throw 0LL; 155 | #endif 156 | 157 | if (msg != NULL) 158 | fprintf(stderr, "%s\n\n", msg); 159 | print_call_stack(); 160 | *(char *)0 = 0; 161 | } 162 | 163 | void impl_fail(const char *msg) { 164 | if (msg != NULL) 165 | fprintf(stderr, "%s\n\n", msg); 166 | print_call_stack(); 167 | *(char *)0 = 0; 168 | } 169 | 170 | void internal_fail() { 171 | fputs("Internal error!\n", stderr); 172 | fflush(stderr); 173 | print_call_stack(); 174 | *(char *)0 = 0; 175 | } 176 | -------------------------------------------------------------------------------- /src/mem-core.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | unsigned int size_code_size(int size_code); 5 | void *alloc_mem_block(int byte_size); 6 | void release_mem_block(void *ptr, int byte_size); 7 | 8 | //////////////////////////////////////////////////////////////////////////////// 9 | 10 | int min_size_code_ref(uint32 byte_size) { 11 | if (byte_size <= 64) 12 | return 0; 13 | if (byte_size <= 128) 14 | return 1; 15 | if (byte_size <= 256) 16 | return 2; 17 | if (byte_size <= 512) 18 | return 3; 19 | if (byte_size <= 1024) 20 | return 4; 21 | if (byte_size <= 2048) 22 | return 5; 23 | 24 | for (int i=0 ; i < 20 ; i++) 25 | if (byte_size <= 4096 << i) 26 | return -(1 << i); 27 | 28 | return -(1 << 20); 29 | } 30 | 31 | int min_size_code_fast(uint32 byte_size) { 32 | const uint64 SLOT_MASK = 33 | (0ULL << 1 * 4) | // 64 34 | (1ULL << 2 * 4) | // 128 35 | (2ULL << 3 * 4) | // 196 36 | (2ULL << 4 * 4) | // 256 37 | (3ULL << 5 * 4) | // 320 38 | (3ULL << 6 * 4) | // 384 39 | (3ULL << 7 * 4) | // 448 40 | (3ULL << 8 * 4) | // 512 41 | (4ULL << 9 * 4) | // 576 42 | (4ULL << 10 * 4) | // 640 43 | (4ULL << 11 * 4) | // 704 44 | (4ULL << 12 * 4) | // 768 45 | (4ULL << 13 * 4) | // 832 46 | (4ULL << 14 * 4) | // 896 47 | (4ULL << 15 * 4); // 960 48 | 49 | assert(byte_size > 0); 50 | 51 | if (byte_size <= 960) { 52 | int blocks_64_count = (byte_size + 63) / 64; 53 | assert(blocks_64_count < 16); 54 | return (SLOT_MASK >> (4 * blocks_64_count)) & 7; 55 | } 56 | 57 | if (byte_size <= 1024) 58 | return 4; 59 | 60 | if (byte_size <= 2048) 61 | return 5; 62 | 63 | for (int i=0 ; i < 20 ; i++) 64 | if (byte_size <= 4096 << i) 65 | return -(1 << i); 66 | 67 | return -(1 << 20); 68 | } 69 | 70 | int min_size_code(uint32 byte_size) { 71 | int code = min_size_code_fast(byte_size); 72 | // int ref_code = min_size_code_ref(byte_size); 73 | assert(code == min_size_code_ref(byte_size)); 74 | assert(byte_size <= size_code_size(code)); 75 | return code; 76 | } 77 | 78 | //////////////////////////////////////////////////////////////////////////////// 79 | //////////////////////////////////////////////////////////////////////////////// 80 | 81 | #ifndef NDEBUG 82 | 83 | uint32 num_of_live_objs; 84 | uint32 max_num_of_live_objs; 85 | uint32 total_num_of_objs; 86 | 87 | uint32 live_mem_usage; 88 | uint32 max_live_mem_usage; 89 | uint32 total_mem_requested; 90 | 91 | std::set live_objs; 92 | 93 | void inc_live_obj_count(uint32 byte_size) { 94 | num_of_live_objs++; 95 | total_num_of_objs++; 96 | if (num_of_live_objs > max_num_of_live_objs) 97 | max_num_of_live_objs = num_of_live_objs; 98 | 99 | live_mem_usage += byte_size; 100 | total_mem_requested += byte_size; 101 | if (live_mem_usage > max_live_mem_usage) 102 | max_live_mem_usage = live_mem_usage; 103 | } 104 | 105 | void dec_live_obj_count(uint32 byte_size) { 106 | num_of_live_objs--; 107 | live_mem_usage -= byte_size; 108 | } 109 | 110 | uint32 get_live_objs_count() { 111 | return num_of_live_objs; 112 | } 113 | 114 | uint32 get_max_live_objs_count() { 115 | return max_num_of_live_objs; 116 | } 117 | 118 | uint32 get_total_objs_count() { 119 | return total_num_of_objs; 120 | } 121 | 122 | uint32 get_live_mem_usage() { 123 | return live_mem_usage; 124 | } 125 | 126 | uint32 get_max_live_mem_usage() { 127 | return max_live_mem_usage; 128 | } 129 | 130 | uint32 get_total_mem_requested() { 131 | return total_mem_requested; 132 | } 133 | 134 | void print_all_live_objs() { 135 | if (!live_objs.empty()) { 136 | fprintf(stderr, "Live objects:\n"); 137 | for (std::set::iterator it = live_objs.begin() ; it != live_objs.end() ; it++) { 138 | void *ptr = *it; 139 | printf(" %8llx\n", (unsigned long long)ptr); 140 | } 141 | fflush(stdout); 142 | } 143 | } 144 | 145 | bool is_alive(void *obj) { 146 | return live_objs.find(obj) != live_objs.end(); 147 | } 148 | 149 | #endif 150 | 151 | //////////////////////////////////////////////////////////////////////////////// 152 | //////////////////////////////////////////////////////////////////////////////// 153 | 154 | void *new_obj(uint32 byte_size) { 155 | void *mem_block = alloc_mem_block(min_size_code(byte_size)); 156 | 157 | #ifndef NDEBUG 158 | if (!is_in_try_state()) { 159 | inc_live_obj_count(byte_size); //## THE SIZE IS THE WRONG ONE, BUT IT IS THE SAME THAT IS REPORTED BACK TO free_obj 160 | live_objs.insert(mem_block); 161 | } 162 | #endif 163 | 164 | return mem_block; 165 | } 166 | 167 | void *new_obj(uint32 byte_size_requested, uint32 &byte_size_returned) { 168 | int size_code = min_size_code(byte_size_requested); 169 | byte_size_returned = size_code_size(size_code); 170 | void *mem_block = alloc_mem_block(size_code); 171 | 172 | #ifndef NDEBUG 173 | if (!is_in_try_state()) { 174 | inc_live_obj_count(byte_size_returned); 175 | live_objs.insert(mem_block); 176 | } 177 | #endif 178 | 179 | return mem_block; 180 | } 181 | 182 | void free_obj(void *ptr, uint32 byte_size) { 183 | #ifndef NDEBUG 184 | if (!is_in_try_state()) { 185 | assert(num_of_live_objs > 0); 186 | assert(is_alive(ptr)); 187 | 188 | dec_live_obj_count(byte_size); 189 | live_objs.erase(live_objs.find(ptr)); 190 | } 191 | #endif 192 | 193 | release_mem_block(ptr, min_size_code(byte_size)); 194 | } 195 | 196 | void* resize_obj(void *ptr, uint32 byte_size, uint32 new_byte_size) { 197 | void *new_ptr = new_obj(new_byte_size); 198 | uint32 min_byte_size = byte_size < new_byte_size ? byte_size : new_byte_size; 199 | memcpy(new_ptr, ptr, min_byte_size); 200 | free_obj(ptr, byte_size); 201 | return new_ptr; 202 | } 203 | -------------------------------------------------------------------------------- /src/table-utils.h: -------------------------------------------------------------------------------- 1 | inline uint64 pack(uint64 left, uint64 right) { 2 | return (left << 32) | right; 3 | } 4 | 5 | inline uint64 swap(uint64 pair) { 6 | return (pair >> 32) | (pair << 32); 7 | } 8 | 9 | inline uint32 left(uint64 pair) { 10 | return pair >> 32; 11 | } 12 | 13 | inline uint32 right(uint64 pair) { 14 | return pair; 15 | } 16 | 17 | template void sort_unique(std::vector &xs) { 18 | std::sort(xs.begin(), xs.end()); 19 | xs.erase(std::unique(xs.begin(), xs.end()), xs.end()); 20 | } 21 | 22 | struct col_0 { 23 | typedef uint32 key_type; 24 | static uint32 key(uint64 tuple) { 25 | return left(tuple); 26 | } 27 | static uint32 key_shifted(uint64 unshifted_tuple) { 28 | return left(unshifted_tuple); 29 | } 30 | static uint64 lower_bound(uint32 key) { 31 | return pack(key, 0); 32 | } 33 | static const bool SORTED = true; 34 | }; 35 | 36 | 37 | struct col_1 { 38 | typedef uint32 key_type; 39 | static uint32 key(uint64 tuple) { 40 | return right(tuple); 41 | } 42 | static uint32 key_shifted(uint64 flipped_tuple) { 43 | return left(flipped_tuple); 44 | } 45 | static uint64 lower_bound(uint32 key) { 46 | return pack(key, 0); 47 | } 48 | static const bool SORTED = false; 49 | }; 50 | 51 | //////////////////////////////////////////////////////////////////////////////// 52 | 53 | inline void build(tuple3 &tuple, uint32 val0, uint32 val1, uint32 val2) { 54 | tuple.fields01 = pack(val0, val1); 55 | tuple.field2 = val2; 56 | } 57 | 58 | inline void shift(tuple3 &tuple) { 59 | uint32 new_field2 = left(tuple.fields01); 60 | tuple.fields01 = pack(tuple.fields01, tuple.field2); 61 | tuple.field2 = new_field2; 62 | } 63 | 64 | inline tuple3 lower_bound(uint64 key) { 65 | tuple3 lb; 66 | lb.fields01 = key; 67 | lb.field2 = 0; 68 | return lb; 69 | } 70 | 71 | struct cols_01 { 72 | typedef uint64 key_type; 73 | static uint64 key(const tuple3 &tuple) { 74 | return tuple.fields01; 75 | } 76 | static uint64 key_shifted(const tuple3 &unshifted_tuple) { 77 | return unshifted_tuple.fields01; 78 | } 79 | static tuple3 lower_bound(uint64 key) { 80 | return ::lower_bound(key); 81 | } 82 | static const bool SORTED = true; 83 | }; 84 | 85 | struct cols_12 { 86 | typedef uint64 key_type; 87 | static uint64 key(const tuple3 &tuple) { 88 | return pack(right(tuple.fields01), tuple.field2); 89 | } 90 | static uint64 key_shifted(const tuple3 &shifted_once_tuple) { 91 | return shifted_once_tuple.fields01; 92 | } 93 | static tuple3 lower_bound(uint64 key) { 94 | return ::lower_bound(key); 95 | } 96 | static const bool SORTED = false; 97 | }; 98 | 99 | struct cols_20 { 100 | typedef uint64 key_type; 101 | static uint64 key(const tuple3 &tuple) { 102 | return pack(tuple.field2, left(tuple.fields01)); 103 | } 104 | static uint64 key_shifted(const tuple3 &shifted_twice_tuple) { 105 | return shifted_twice_tuple.fields01; 106 | } 107 | static tuple3 lower_bound(uint64 key) { 108 | return ::lower_bound(key); 109 | } 110 | static const bool SORTED = false; 111 | }; 112 | 113 | struct col_2 { 114 | typedef uint32 key_type; 115 | static uint32 key(const tuple3 &tuple) { 116 | return tuple.field2; 117 | } 118 | static uint32 key_shifted(const tuple3 &shifted_twice_tuple) { 119 | return left(shifted_twice_tuple.fields01); 120 | } 121 | static tuple3 lower_bound(uint32 key) { 122 | return ::lower_bound(pack(key, 0)); 123 | } 124 | static const bool SORTED = false; 125 | }; 126 | 127 | //////////////////////////////////////////////////////////////////////////////// 128 | 129 | template void take_keys(std::vector &keys, const std::vector &tuples) { 130 | uint32 count = tuples.size(); 131 | keys.resize(count); 132 | for (uint32 i=0 ; i < count ; i++) 133 | keys[i] = K::key(tuples[i]); 134 | } 135 | 136 | template bool sorted_vector_has_duplicates(std::vector &xs) { 137 | uint32 count = xs.size(); 138 | if (count > 0) { 139 | uint64 last_x = xs[0]; 140 | for (int i=1 ; i < count ; i++) { 141 | uint64 x = xs[i]; 142 | if (x == last_x) 143 | return true; 144 | assert(x > last_x); 145 | last_x = x; 146 | } 147 | } 148 | return false; 149 | } 150 | 151 | template 152 | bool update_has_conflicts(std::vector &inserted_keys, std::vector &deleted_keys, std::set &target) { 153 | int count = inserted_keys.size(); 154 | for (int i=0 ; i < count ; i++) { 155 | typename K::key_type key = inserted_keys[i]; 156 | if (!binary_search(deleted_keys.begin(), deleted_keys.end(), key)) { 157 | T lb = K::lower_bound(key); 158 | typename std::set::iterator it = target.lower_bound(lb); 159 | if (it != target.end() && K::key_shifted(*it) == key) 160 | return true; 161 | } 162 | } 163 | return false; 164 | } 165 | 166 | //////////////////////////////////////////////////////////////////////////////// 167 | 168 | template 169 | bool table_updates_check_key(const std::vector &inserts, const std::vector &deletes, std::set &target) { 170 | // Gathering and sorting all keys from tuples to delete 171 | std::vector deleted_keys; 172 | take_keys(deleted_keys, deletes); 173 | std::sort(deleted_keys.begin(), deleted_keys.end()); 174 | 175 | // Gathering all keys from tuples to insert and sorting them if they are not naturally sorted 176 | std::vector inserted_keys; 177 | take_keys(inserted_keys, inserts); 178 | if (not K::SORTED) 179 | std::sort(inserted_keys.begin(), inserted_keys.end()); 180 | 181 | // Checking that there are no duplicates. Since the duplicates among the tuples 182 | // to insert have already been eliminated, the presence of a duplicate among 183 | // the keys implies a unicity conflict 184 | if (sorted_vector_has_duplicates(inserted_keys)) 185 | return false; 186 | 187 | // Checking that for each key to insert, either there's a corresponding 188 | // entry among the values to delete, or there's no entry in the current table 189 | return not update_has_conflicts(inserted_keys, deleted_keys, target); 190 | } 191 | -------------------------------------------------------------------------------- /src/mem-alloc.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | void *alloc_pages(unsigned int page_count) { 5 | assert(page_count > 0); 6 | void *ptr = malloc(4096 * page_count); 7 | // printf("+ %8llx - %4d\n", (unsigned long long) ptr, page_count); 8 | return ptr; 9 | } 10 | 11 | void release_pages(void *ptr, unsigned int page_count) { 12 | assert(ptr != NULL & page_count > 0); 13 | // printf("- %8llx - %4d\n", (unsigned long long) ptr, page_count); 14 | free(ptr); 15 | } 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | 19 | unsigned int size_code_size(int size_code) { 20 | assert(size_code <= 5); 21 | switch (size_code) { 22 | case 0: return 64; 23 | case 1: return 128; 24 | case 2: return 256; 25 | case 3: return 512; 26 | case 4: return 1024; 27 | case 5: return 2048; 28 | default: return 4096 * -size_code; 29 | } 30 | } 31 | 32 | //////////////////////////////////////////////////////////////////////////////// 33 | 34 | enum MEM_ALLOC_STATE {NORMAL, TRY, COPYING}; 35 | 36 | static MEM_ALLOC_STATE curr_mem_alloc_state = NORMAL; 37 | 38 | 39 | const int SLOT_COUNT = 6; 40 | 41 | struct STD_MEM_ALLOC { 42 | void *mem_blocks_pool[SLOT_COUNT]; 43 | }; 44 | 45 | struct TRY_STATE_MEM_ALLOC { 46 | void *mem_blocks_pool[SLOT_COUNT]; 47 | std::vector pooled_blocks; 48 | std::map large_blocks; 49 | }; 50 | 51 | static STD_MEM_ALLOC std_mem_alloc; 52 | static TRY_STATE_MEM_ALLOC try_mem_alloc; 53 | 54 | static void **curr_mem_blocks_pool = std_mem_alloc.mem_blocks_pool; 55 | 56 | //////////////////////////////////////////////////////////////////////////////// 57 | 58 | bool is_in_normal_state() { 59 | return curr_mem_alloc_state == NORMAL; 60 | } 61 | 62 | bool is_in_try_state() { 63 | return curr_mem_alloc_state == TRY; 64 | } 65 | 66 | bool is_in_copying_state() { 67 | return curr_mem_alloc_state == COPYING; 68 | } 69 | 70 | //////////////////////////////////////////////////////////////////////////////// 71 | 72 | void enter_try_state() { 73 | assert(curr_mem_alloc_state == NORMAL); 74 | 75 | curr_mem_alloc_state = TRY; 76 | curr_mem_blocks_pool = try_mem_alloc.mem_blocks_pool; 77 | } 78 | 79 | void enter_copy_state() { 80 | assert(curr_mem_alloc_state == TRY); 81 | 82 | curr_mem_alloc_state = COPYING; 83 | curr_mem_blocks_pool = std_mem_alloc.mem_blocks_pool; 84 | } 85 | 86 | void restore_try_state() { 87 | assert(curr_mem_alloc_state == COPYING); 88 | 89 | curr_mem_alloc_state = TRY; 90 | curr_mem_blocks_pool = try_mem_alloc.mem_blocks_pool; 91 | } 92 | 93 | void release_all_try_state_memory() { 94 | std::vector::iterator pit = try_mem_alloc.pooled_blocks.begin(); 95 | std::vector::iterator pend = try_mem_alloc.pooled_blocks.end(); 96 | for ( ; pit != pend ; pit++) 97 | release_pages(*pit, 16); 98 | try_mem_alloc.pooled_blocks.clear(); 99 | 100 | std::map::iterator lit = try_mem_alloc.large_blocks.begin(); 101 | std::map::iterator lend = try_mem_alloc.large_blocks.end(); 102 | for ( ; lit != lend ; lit++) 103 | release_pages(lit->first, lit->second); 104 | try_mem_alloc.large_blocks.clear(); 105 | 106 | for (int i=0 ; i < SLOT_COUNT ; i++) 107 | try_mem_alloc.mem_blocks_pool[i] = NULL; 108 | } 109 | 110 | void return_to_normal_state() { 111 | assert(curr_mem_alloc_state == COPYING); 112 | 113 | curr_mem_alloc_state = NORMAL; 114 | release_all_try_state_memory(); 115 | } 116 | 117 | void abort_try_state() { 118 | assert(curr_mem_alloc_state == TRY); 119 | 120 | curr_mem_alloc_state = NORMAL; 121 | curr_mem_blocks_pool = std_mem_alloc.mem_blocks_pool; 122 | release_all_try_state_memory(); 123 | } 124 | 125 | //////////////////////////////////////////////////////////////////////////////// 126 | 127 | void *alloc_mem_block(int size_code) { 128 | assert(size_code <= 5); 129 | 130 | if (size_code >= 0) { 131 | void **pool_head = curr_mem_blocks_pool + size_code; 132 | 133 | void *head = *pool_head; 134 | if (head == NULL) { 135 | // Allocate new memory block 136 | void *ptr = alloc_pages(16); 137 | if (curr_mem_alloc_state == TRY) 138 | try_mem_alloc.pooled_blocks.push_back(ptr); 139 | #ifndef NDEBUG 140 | memset(ptr, 0xFF, 16 * 4096); 141 | #endif 142 | int log_size = size_code + 6; 143 | int block_size = size_code_size(size_code); 144 | assert( 145 | (size_code == 0 & block_size == 64) | 146 | (size_code == 1 & block_size == 128) | 147 | (size_code == 2 & block_size == 256) | 148 | (size_code == 3 & block_size == 512) | 149 | (size_code == 4 & block_size == 1024) | 150 | (size_code == 5 & block_size == 2048) 151 | ); 152 | 153 | unsigned int last_block_idx = ((16 * 4096) >> log_size) - 1; 154 | assert(last_block_idx + 1 == (16 * 4096) / block_size); 155 | for (int i=1 ; i < last_block_idx ; i++) { 156 | void *block_ptr = ((char *) ptr) + (i << log_size); 157 | assert(block_ptr == ((char *) ptr) + i * block_size); 158 | * (void **) block_ptr = ((char *) block_ptr) + block_size; 159 | } 160 | void *last_block_ptr = ((char *) ptr) + (last_block_idx << log_size); 161 | assert(last_block_ptr == ((char *) ptr) + (last_block_idx * block_size)); 162 | * (void **) last_block_ptr = NULL; 163 | *pool_head = ((char *) ptr) + block_size; 164 | return ptr; 165 | } 166 | else { 167 | void *next = * (void **) head; 168 | *pool_head = next; 169 | return head; 170 | } 171 | } 172 | else { 173 | void *ptr = alloc_pages(-size_code); 174 | if (curr_mem_alloc_state == TRY) 175 | try_mem_alloc.large_blocks[ptr] = -size_code; 176 | #ifndef NDEBUG 177 | memset(ptr, 0xFF, -size_code * 4096); 178 | #endif 179 | return ptr; 180 | } 181 | } 182 | 183 | void release_mem_block(void *ptr, int size_code) { 184 | assert(size_code <= 5); 185 | 186 | if (size_code >= 0) { 187 | #ifndef NDEBUG 188 | unsigned int block_size = size_code_size(size_code); 189 | memset(ptr, 0xFF, block_size); 190 | #endif 191 | void **pool_head = curr_mem_blocks_pool + size_code; 192 | void *tail = *pool_head; 193 | * (void **) ptr = tail; 194 | *pool_head = ptr; 195 | } 196 | else { 197 | release_pages(ptr, -size_code); 198 | if (curr_mem_alloc_state == TRY) 199 | try_mem_alloc.large_blocks.erase(ptr); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/inter-utils.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | int64 from_utf8(const char *input, OBJ *output) { 5 | uint32 idx = 0; 6 | for (uint32 count=0 ; ; count++) { 7 | unsigned char ch = input[idx++]; 8 | if (ch == 0) 9 | return count; 10 | uint32 val; 11 | int size; 12 | if (ch >> 7 == 0) { // 0xxxxxxx 13 | size = 0; 14 | val = ch; 15 | } 16 | else if (ch >> 5 == 6) { // 110xxxxx 10xxxxxx 17 | val = (ch & 0x1F) << 6; 18 | size = 1; 19 | } 20 | else if (ch >> 4 == 0xE) { // 1110xxxx 10xxxxxx 10xxxxxx 21 | val = (ch & 0xF) << 12; 22 | size = 2; 23 | } 24 | else if (ch >> 3 == 0x1E) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 25 | val = (ch & 0xF) << 18; 26 | size = 3; 27 | } 28 | else 29 | return -idx; 30 | 31 | for (int i=0 ; i < size ; i++) { 32 | ch = input[idx++]; 33 | if (ch >> 6 != 2) 34 | return -idx; 35 | val |= (ch & 0x3F) << (6 * (size - i - 1)); 36 | } 37 | 38 | if (output != NULL) 39 | output[count] = make_int(val); 40 | } 41 | } 42 | 43 | OBJ str_to_obj(const char *c_str) { 44 | OBJ raw_str_obj; 45 | 46 | if (c_str[0] == 0) { 47 | raw_str_obj = make_empty_seq(); 48 | } 49 | else { 50 | int64 size = from_utf8(c_str, NULL); 51 | SEQ_OBJ *raw_str = new_seq(size); 52 | from_utf8(c_str, raw_str->buffer); 53 | raw_str_obj = make_seq(raw_str, size); 54 | } 55 | 56 | return make_tag_obj(symb_idx_string, raw_str_obj); 57 | } 58 | 59 | //////////////////////////////////////////////////////////////////////////////// 60 | 61 | int64 to_utf8(const OBJ *chars, uint32 len, char *output) { 62 | int offset = 0; 63 | for (uint32 i=0 ; i < len ; i++) { 64 | int64 cp = get_int_val(chars[i]); 65 | if (cp < 0x80) { 66 | if (output != NULL) 67 | output[offset] = cp; 68 | offset++; 69 | } 70 | else if (cp < 0x800) { 71 | if (output != NULL) { 72 | output[offset] = 0xC0 | (cp >> 6); 73 | output[offset+1] = 0x80 | (cp & 0x3F); 74 | } 75 | offset += 2; 76 | } 77 | else if (cp < 0x10000) { 78 | if (output != NULL) { 79 | output[offset] = 0xE0 | (cp >> 12); 80 | output[offset+1] = 0x80 | ((cp >> 6) & 0x3F); 81 | output[offset+2] = 0x80 | (cp & 0x3F); 82 | } 83 | offset += 3; 84 | } 85 | else { 86 | if (output != NULL) { 87 | output[offset] = 0xF0 | (cp >> 18); 88 | output[offset+1] = 0x80 | ((cp >> 12) & 0x3F); 89 | output[offset+2] = 0x80 | ((cp >> 6) & 0x3F); 90 | output[offset+3] = 0x80 | (cp & 0x3F); 91 | } 92 | offset += 4; 93 | } 94 | } 95 | if (output != NULL) 96 | output[offset] = 0; 97 | return offset + 1; 98 | } 99 | 100 | void obj_to_str(OBJ str_obj, char *buffer, uint32 size) { 101 | OBJ raw_str_obj = get_inner_obj(str_obj); 102 | 103 | if (!is_empty_seq(raw_str_obj)) { 104 | OBJ *seq_buffer = get_seq_buffer_ptr(raw_str_obj); 105 | uint32 len = get_seq_length(raw_str_obj); 106 | int64 min_size = to_utf8(seq_buffer, len, NULL); 107 | if (size < min_size) 108 | internal_fail(); 109 | to_utf8(seq_buffer, len, buffer); 110 | } 111 | else 112 | buffer[0] = '\0'; 113 | } 114 | 115 | char *obj_to_byte_array(OBJ byte_seq_obj, uint32 &size) { 116 | if (is_empty_seq(byte_seq_obj)) { 117 | size = 0; 118 | return NULL; 119 | } 120 | 121 | uint32 len = get_seq_length(byte_seq_obj); 122 | OBJ *elems = get_seq_buffer_ptr(byte_seq_obj); 123 | char *buffer = new_byte_array(len); 124 | for (uint32 i=0 ; i < len ; i++) { 125 | long long val = get_int_val(elems[i]); 126 | assert(val >= 0 && val <= 255); 127 | buffer[i] = (char) val; 128 | } 129 | size = len; 130 | return buffer; 131 | } 132 | 133 | char *obj_to_str(OBJ str_obj) { 134 | uint32 size = get_seq_length(get_inner_obj(str_obj)) + 1; 135 | char *buffer = new_byte_array(size); 136 | obj_to_str(str_obj, buffer, size); 137 | return buffer; 138 | } 139 | 140 | //////////////////////////////////////////////////////////////////////////////// 141 | 142 | static std::vector cached_objs; 143 | 144 | void add_obj_to_cache(OBJ obj) { 145 | if (is_ref_obj(obj)) 146 | cached_objs.push_back(obj); 147 | } 148 | 149 | void release_all_cached_objs() { 150 | uint32 count = cached_objs.size(); 151 | for (uint32 i=0 ; i < count ; i++) 152 | release(cached_objs[i]); 153 | cached_objs.clear(); 154 | } 155 | 156 | //////////////////////////////////////////////////////////////////////////////// 157 | 158 | bool str_ord(const char *str1, const char *str2) { 159 | return strcmp(str1, str2) > 0; 160 | } 161 | 162 | typedef std::map str_idx_map_type; 163 | 164 | str_idx_map_type str_to_symb_map(str_ord); 165 | //## THESE STRINGS ARE NEVER CLEANED UP. NOT MUCH OF A PROBLEM IN PRACTICE, BUT STILL A BUG... 166 | std::vector dynamic_symbs_strs; 167 | 168 | const char *symb_repr(uint16); 169 | uint32 embedded_symbs_count(); 170 | 171 | const char *symb_to_raw_str(OBJ obj) { 172 | assert(is_symb(obj)); 173 | uint16 idx = get_symb_idx(obj); 174 | uint32 count = embedded_symbs_count(); 175 | if (idx < count) 176 | return symb_repr(idx); 177 | else 178 | return dynamic_symbs_strs[idx - count]; 179 | } 180 | 181 | OBJ to_str(OBJ obj) { 182 | return str_to_obj(symb_to_raw_str(obj)); 183 | } 184 | 185 | uint16 lookup_symb_idx(const char *str_, uint32 len) { 186 | uint32 count = embedded_symbs_count(); 187 | 188 | if (str_to_symb_map.size() == 0) 189 | for (uint32 i=0 ; i < count ; i++) 190 | str_to_symb_map[symb_repr(i)] = i; 191 | 192 | char *str = strndup(str_, len); 193 | 194 | str_idx_map_type::iterator it = str_to_symb_map.find(str); 195 | if (it != str_to_symb_map.end()) { 196 | free(str); 197 | return it->second; 198 | } 199 | 200 | uint32 next_symb_id = count + dynamic_symbs_strs.size(); 201 | if (next_symb_id > 0xFFFF) 202 | impl_fail("Exceeded maximum permitted number of symbols (= 2^16)"); 203 | dynamic_symbs_strs.push_back(str); 204 | str_to_symb_map[str] = next_symb_id; 205 | return next_symb_id; 206 | } 207 | 208 | OBJ to_symb(OBJ obj) { 209 | char *str = obj_to_str(obj); 210 | uint32 len = strlen(str); 211 | uint16 symb_idx = lookup_symb_idx(str, len); 212 | delete_byte_array(str, len+1); 213 | return make_symb(symb_idx); 214 | } 215 | 216 | OBJ extern_str_to_symb(const char *str) { 217 | //## CHECK THAT IT'S A VALID SYMBOL, AND THAT IT'S AMONG THE "STATIC" ONES 218 | return make_symb(lookup_symb_idx(str, strlen(str))); 219 | } 220 | -------------------------------------------------------------------------------- /src/tern-rel-obj.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | OBJ build_tern_rel(OBJ *vals1, OBJ *vals2, OBJ *vals3, uint32 size) { 5 | if (size == 0) 6 | return make_empty_rel(); 7 | 8 | // Creating the array of indexes sorted by column 1, column 2, column 3, index 9 | uint32 *index = new_uint32_array(size); 10 | index_sort(index, vals1, vals2, vals3, size); 11 | 12 | // Counting the number of unique tuples and releasing unnecessary objects 13 | uint32 unique_tuples = 1; 14 | uint32 prev_idx = index[0]; 15 | for (uint32 i=1 ; i < size ; i++) { 16 | uint32 idx = index[i]; 17 | bool neq = comp_objs(vals1[idx], vals1[prev_idx]) != 0 || 18 | comp_objs(vals2[idx], vals2[prev_idx]) != 0 || 19 | comp_objs(vals3[idx], vals3[prev_idx]) != 0; 20 | if (neq) { 21 | unique_tuples++; 22 | prev_idx = idx; 23 | } 24 | else { 25 | // Duplicate tuple, marking the entry as duplicate and releasing the objects 26 | index[i] = INVALID_INDEX; 27 | release(vals1[idx]); 28 | release(vals2[idx]); 29 | release(vals3[idx]); 30 | } 31 | } 32 | 33 | // Creating the new binary relation object 34 | TERN_REL_OBJ *rel = new_tern_rel(unique_tuples); 35 | 36 | OBJ *col1 = get_col_array_ptr(rel, 0); 37 | OBJ *col2 = get_col_array_ptr(rel, 1); 38 | OBJ *col3 = get_col_array_ptr(rel, 2); 39 | 40 | // Copying the sorted, non-duplicate tuples into their final destination 41 | uint32 count = 0; 42 | for (uint32 i=0 ; i < size ; i++) { 43 | uint32 idx = index[i]; 44 | if (idx != INVALID_INDEX) { 45 | col1[count] = vals1[idx]; 46 | col2[count] = vals2[idx]; 47 | col3[count] = vals3[idx]; 48 | count++; 49 | } 50 | } 51 | assert(count == unique_tuples); 52 | 53 | // Creating the two indexes 54 | uint32 *index_1 = get_rotated_index(rel, 1); 55 | uint32 *index_2 = get_rotated_index(rel, 2); 56 | stable_index_sort(index_1, col2, col3, count); 57 | stable_index_sort(index_2, col3, count); 58 | 59 | #ifndef NDEBUG 60 | for (uint32 i=1 ; i < count ; i++) { 61 | int cr_1 = comp_objs(col1[i-1], col1[i]); 62 | int cr_2 = comp_objs(col2[i-1], col2[i]); 63 | int cr_3 = comp_objs(col3[i-1], col3[i]); 64 | assert(cr_1 > 0 | (cr_1 == 0 & (cr_2 > 0 | (cr_2 == 0 & cr_3 > 0)))); 65 | } 66 | 67 | for (uint32 i=1 ; i < count ; i++) { 68 | uint32 curr_idx = index_1[i]; 69 | uint32 prev_idx = index_1[i-1]; 70 | int cr_1 = comp_objs(col1[prev_idx], col1[curr_idx]); 71 | int cr_2 = comp_objs(col2[prev_idx], col2[curr_idx]); 72 | int cr_3 = comp_objs(col3[prev_idx], col3[curr_idx]); 73 | assert(cr_2 > 0 | (cr_2 == 0 & (cr_3 > 0 | (cr_3 == 0 & cr_1 > 0)))); 74 | } 75 | 76 | for (uint32 i=1 ; i < count ; i++) { 77 | uint32 curr_idx = index_2[i]; 78 | uint32 prev_idx = index_2[i-1]; 79 | int cr_1 = comp_objs(col1[prev_idx], col1[curr_idx]); 80 | int cr_2 = comp_objs(col2[prev_idx], col2[curr_idx]); 81 | int cr_3 = comp_objs(col3[prev_idx], col3[curr_idx]); 82 | assert(cr_3 > 0 | (cr_3 == 0 & (cr_1 > 0 | (cr_1 == 0 & cr_2 > 0)))); 83 | } 84 | #endif 85 | 86 | delete_uint32_array(index, size); 87 | 88 | return make_tern_rel(rel); 89 | } 90 | 91 | //////////////////////////////////////////////////////////////////////////////// 92 | 93 | OBJ build_tern_rel(STREAM &stream1, STREAM &stream2, STREAM &stream3) { 94 | assert(stream1.count == stream2.count & stream2.count == stream3.count); 95 | 96 | if (stream1.count == 0) 97 | return make_empty_rel(); 98 | 99 | OBJ rel = build_tern_rel(stream1.buffer, stream2.buffer, stream3.buffer, stream1.count); 100 | 101 | delete_obj_array(stream1.buffer, stream1.capacity); 102 | delete_obj_array(stream2.buffer, stream2.capacity); 103 | delete_obj_array(stream3.buffer, stream3.capacity); 104 | 105 | return rel; 106 | } 107 | 108 | //////////////////////////////////////////////////////////////////////////////// 109 | //////////////////////////////////////////////////////////////////////////////// 110 | 111 | void get_tern_rel_null_iter(TERN_REL_ITER &it) { 112 | it.col1 = NULL; // Not strictly necessary 113 | it.col2 = NULL; // Not strictly necessary 114 | it.col3 = NULL; // Not strictly necessary 115 | it.ordered_idxs = NULL; 116 | it.idx = 0; 117 | it.end = 0; 118 | } 119 | 120 | void get_tern_rel_iter(TERN_REL_ITER &it, OBJ rel) { 121 | assert(is_tern_rel(rel)); 122 | 123 | if (is_ne_tern_rel(rel)) { 124 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(rel); 125 | it.col1 = get_col_array_ptr(ptr, 0); 126 | it.col2 = get_col_array_ptr(ptr, 1); 127 | it.col3 = get_col_array_ptr(ptr, 2); 128 | it.ordered_idxs = NULL; 129 | it.idx = 0; 130 | it.end = ptr->size; 131 | } 132 | else 133 | get_tern_rel_null_iter(it); 134 | } 135 | 136 | void get_tern_rel_iter_by(TERN_REL_ITER &it, OBJ rel, int col_idx, OBJ arg) { 137 | assert(is_tern_rel(rel)); 138 | assert(col_idx >= 0 & col_idx <= 2); 139 | 140 | if (is_ne_tern_rel(rel)) { 141 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(rel); 142 | uint32 size = ptr->size; 143 | OBJ *col = get_col_array_ptr(ptr, col_idx); 144 | 145 | uint32 *index; 146 | uint32 count, first; 147 | if (col_idx == 0) { 148 | index = NULL; 149 | first = find_objs_range(col, size, arg, count); 150 | } 151 | else { 152 | index = get_rotated_index(ptr, col_idx); 153 | first = find_idxs_range(index, col, size, arg, count); 154 | } 155 | 156 | if (count > 0) { 157 | it.col1 = get_col_array_ptr(ptr, 0); 158 | it.col2 = get_col_array_ptr(ptr, 1); 159 | it.col3 = get_col_array_ptr(ptr, 2); 160 | it.ordered_idxs = index; 161 | it.idx = first; 162 | it.end = first + count; 163 | return; 164 | } 165 | } 166 | 167 | get_tern_rel_null_iter(it); 168 | } 169 | 170 | void get_tern_rel_iter_by(TERN_REL_ITER &it, OBJ rel, int major_col_idx, OBJ major_arg, OBJ minor_arg) { 171 | assert(is_tern_rel(rel)); 172 | assert(major_col_idx >= 0 & major_col_idx <= 2); 173 | 174 | if (is_ne_tern_rel(rel)) { 175 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(rel); 176 | uint32 size = ptr->size; 177 | OBJ *major_col = get_col_array_ptr(ptr, major_col_idx); 178 | OBJ *minor_col = get_col_array_ptr(ptr, (major_col_idx + 1) % 3); 179 | 180 | uint32 *index; 181 | uint32 count, first; 182 | if (major_col_idx == 0) { 183 | index = NULL; 184 | first = find_objs_range(major_col, minor_col, size, major_arg, minor_arg, count); 185 | } 186 | else { 187 | index = get_rotated_index(ptr, major_col_idx); 188 | first = find_idxs_range(index, major_col, minor_col, size, major_arg, minor_arg, count); 189 | } 190 | 191 | if (count > 0) { 192 | it.col1 = get_col_array_ptr(ptr, 0); 193 | it.col2 = get_col_array_ptr(ptr, 1); 194 | it.col3 = get_col_array_ptr(ptr, 2); 195 | it.ordered_idxs = index; 196 | it.idx = first; 197 | it.end = first + count; 198 | return; 199 | } 200 | } 201 | 202 | get_tern_rel_null_iter(it); 203 | } 204 | -------------------------------------------------------------------------------- /src/mem-copying.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | SEQ_OBJ *make_or_get_seq_obj_copy(SEQ_OBJ *seq) { 5 | if (seq->capacity > 0) { 6 | // The object has not been copied yet. Making a new object large enough 7 | // to accomodate all the elements of the original sequence. We are using 8 | // and not for the new sequence because it's best to 9 | // leave the decision of how much extra memory to allocate to the memory 10 | // allocator, which is aware of the context. 11 | //## IF THIS IS THE ONLY REFERENCE TO THE SEQUENCE, AND IF IS 12 | //## LOWER THAN SIZE, WE COULD COPY ONLY THE FIRST LENGTH ELEMENTS... 13 | uint32 size = seq->size; 14 | SEQ_OBJ *seq_copy = new_seq(size); 15 | // Now we copy all the elements of the sequence 16 | OBJ *buff = seq->buffer; 17 | OBJ *buff_copy = seq_copy->buffer; 18 | for (int i=0 ; i < size ; i++) 19 | buff_copy[i] = copy_obj(buff[i]); 20 | // We mark the old sequence as "copied", and we store a pointer to the copy 21 | // into it. The fields of the original object are never going to be used again, 22 | // even by the memory manager, so we can safely overwrite them. 23 | seq->capacity = 0; 24 | * (SEQ_OBJ **) buff = seq_copy; 25 | // Returning the new object 26 | return seq_copy; 27 | } 28 | else { 29 | // The object has already been copied. We just return a (reference-counted) pointer to the copy 30 | SEQ_OBJ *seq_copy = * (SEQ_OBJ **) seq->buffer; 31 | add_ref((REF_OBJ *) seq_copy); 32 | return seq_copy; 33 | } 34 | } 35 | 36 | SET_OBJ *make_or_get_set_obj_copy(SET_OBJ *set) { 37 | uint32 size = set->size; 38 | if (size > 0) { 39 | // The object has not been copied yet, so we do it now. 40 | SET_OBJ *set_copy = new_set(size); 41 | // Now we copy all the elements of the sequence 42 | OBJ *buff = set->buffer; 43 | OBJ *buff_copy = set_copy->buffer; 44 | for (int i=0 ; i < size ; i++) 45 | buff_copy[i] = copy_obj(buff[i]); 46 | // We mark the old sequence as "copied", and we store a pointer to the copy 47 | // into it. The fields of the original object are never going to be used again, 48 | // even by the memory manager, so we can safely overwrite them. 49 | set->size = 0; 50 | * (SET_OBJ **) buff = set_copy; 51 | // Returning the new object 52 | return set_copy; 53 | } 54 | else { 55 | // The object has already been copied. We just return a (reference-counted) pointer to the copy 56 | SET_OBJ *set_copy = * (SET_OBJ **) set->buffer; 57 | add_ref((REF_OBJ *) set_copy); 58 | return set_copy; 59 | } 60 | } 61 | 62 | BIN_REL_OBJ *make_or_get_bin_rel_obj_copy(BIN_REL_OBJ *rel) { 63 | uint32 size = rel->size; 64 | if (size > 0) { 65 | // The object has not been copied yet, so we do it now. 66 | BIN_REL_OBJ *rel_copy = new_bin_rel(size); 67 | // Now we copy all the elements of the collection 68 | OBJ *buff = rel->buffer; 69 | OBJ *buff_copy = rel_copy->buffer; 70 | for (int i=0 ; i < 2 * size ; i++) 71 | buff_copy[i] = copy_obj(buff[i]); 72 | // Now we copy the extra data at the end 73 | uint32 *rev_idxs = get_right_to_left_indexes(rel); 74 | uint32 *rev_idxs_copy = get_right_to_left_indexes(rel_copy); 75 | memcpy(rev_idxs_copy, rev_idxs, size * sizeof(uint32)); 76 | // We mark the old object as "copied", and we store a pointer to the copy 77 | // into it. The fields of the original object are never going to be used again, 78 | // even by the memory manager, so we can safely overwrite them. 79 | rel->size = 0; 80 | * (BIN_REL_OBJ **) buff = rel_copy; 81 | // Returning the new object 82 | return rel_copy; 83 | } 84 | else { 85 | // The object has already been copied. We just return a (reference-counted) pointer to the copy 86 | BIN_REL_OBJ *rel_copy = * (BIN_REL_OBJ **) rel->buffer; 87 | add_ref((REF_OBJ *) rel_copy); 88 | return rel_copy; 89 | } 90 | 91 | } 92 | 93 | BIN_REL_OBJ *make_or_get_map_obj_copy(BIN_REL_OBJ *map) { 94 | uint32 size = map->size; 95 | if (size > 0) { 96 | // The object has not been copied yet, so we do it now. 97 | BIN_REL_OBJ *map_copy = new_map(size); 98 | // Now we copy all the elements of the sequence 99 | OBJ *buff = map->buffer; 100 | OBJ *buff_copy = map_copy->buffer; 101 | for (int i=0 ; i < 2 * size ; i++) 102 | buff_copy[i] = copy_obj(buff[i]); 103 | // We mark the old sequence as "copied", and we store a pointer to the copy 104 | // into it. The fields of the original object are never going to be used again, 105 | // even by the memory manager, so we can safely overwrite them. 106 | map->size = 0; 107 | * (BIN_REL_OBJ **) buff = map_copy; 108 | // Returning the new object 109 | return map_copy; 110 | } 111 | else { 112 | // The object has already been copied. We just return a (reference-counted) pointer to the copy 113 | BIN_REL_OBJ *map_copy = * (BIN_REL_OBJ **) map->buffer; 114 | add_ref((REF_OBJ *) map_copy); 115 | return map_copy; 116 | } 117 | } 118 | 119 | TAG_OBJ *make_or_get_tag_obj_copy(TAG_OBJ *tag_obj) { 120 | if (tag_obj->unused_field == 0) { 121 | // The object has not been copied yet, so we do it now 122 | TAG_OBJ *tag_obj_copy = new_tag_obj(); 123 | tag_obj_copy->tag_idx = tag_obj->tag_idx; 124 | tag_obj_copy->obj = copy_obj(tag_obj->obj); 125 | // We mark the old object as "copied", and we store a pointer to the copy 126 | // into it. The fields of the original object are never going to be used again, 127 | // even by the memory manager, so we can safely overwrite them. 128 | tag_obj->unused_field = 0xFFFF; 129 | * (TAG_OBJ **) &tag_obj->obj = tag_obj_copy; 130 | // Returning the new object 131 | return tag_obj_copy; 132 | } 133 | else { 134 | // The object has already been copied. We just return a (reference-counted) pointer to the copy 135 | TAG_OBJ *tag_obj_copy = * (TAG_OBJ **) &tag_obj->obj; 136 | add_ref((REF_OBJ *) tag_obj_copy); 137 | return tag_obj_copy; 138 | } 139 | } 140 | 141 | //////////////////////////////////////////////////////////////////////////////// 142 | 143 | OBJ copy_obj(OBJ obj) { 144 | if (is_inline_obj(obj)) 145 | return obj; 146 | 147 | if (!uses_try_mem(obj)) { 148 | add_ref(obj); 149 | return obj; 150 | } 151 | 152 | assert(is_in_copying_state()); 153 | 154 | switch (get_physical_type(obj)) { 155 | case TYPE_SEQUENCE: { 156 | SEQ_OBJ *seq_copy = make_or_get_seq_obj_copy(get_seq_ptr(obj)); 157 | return repoint_to_std_mem_copy(obj, seq_copy->buffer); 158 | } 159 | 160 | case TYPE_SLICE: { 161 | SEQ_OBJ *seq_copy = make_or_get_seq_obj_copy(get_seq_ptr(obj)); 162 | OBJ *seq_copy_buffer = seq_copy->buffer; 163 | return repoint_to_std_mem_copy(obj, seq_copy_buffer + get_seq_offset(obj)); 164 | } 165 | 166 | case TYPE_SET: { 167 | SET_OBJ *set_copy = make_or_get_set_obj_copy(get_set_ptr(obj)); 168 | return repoint_to_std_mem_copy(obj, set_copy); 169 | } 170 | 171 | case TYPE_BIN_REL: case TYPE_LOG_MAP: { 172 | BIN_REL_OBJ *rel_copy = make_or_get_bin_rel_obj_copy(get_bin_rel_ptr(obj)); 173 | return repoint_to_std_mem_copy(obj, rel_copy); 174 | } 175 | 176 | case TYPE_MAP: { 177 | BIN_REL_OBJ *map_copy = make_or_get_map_obj_copy(get_bin_rel_ptr(obj)); 178 | return repoint_to_std_mem_copy(obj, map_copy); 179 | } 180 | 181 | case TYPE_TAG_OBJ: { 182 | TAG_OBJ *tag_obj_copy = make_or_get_tag_obj_copy(get_tag_obj_ptr(obj)); 183 | return repoint_to_std_mem_copy(obj, tag_obj_copy); 184 | } 185 | 186 | default: 187 | internal_fail(); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/bin-rel-obj.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | void build_map_right_to_left_sorted_idx_array(OBJ map) { 5 | assert(get_physical_type(map) == TYPE_MAP); 6 | 7 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(map); 8 | uint32 *rev_idxs = get_right_to_left_indexes(ptr); 9 | if (rev_idxs[0] != INVALID_INDEX) 10 | return; 11 | stable_index_sort(rev_idxs, get_right_col_array_ptr(ptr), ptr->size); 12 | 13 | #ifndef NDEBUG 14 | uint32 size = ptr->size; 15 | OBJ *left_col = get_left_col_array_ptr(ptr); 16 | OBJ *right_col = get_right_col_array_ptr(ptr); 17 | 18 | for (uint32 i=1 ; i < size ; i++) { 19 | int cr_M = comp_objs(left_col[i-1], left_col[i]); 20 | int cr_m = comp_objs(right_col[i-1], right_col[i]); 21 | assert(cr_M > 0 | (cr_M == 0 & cr_m > 0)); 22 | } 23 | 24 | for (uint32 i=1 ; i < size ; i++) { 25 | uint32 curr_idx = rev_idxs[i]; 26 | uint32 prev_idx = rev_idxs[i-1]; 27 | int cr_M = comp_objs(right_col[prev_idx], right_col[curr_idx]); 28 | int cr_m = comp_objs(left_col[prev_idx], left_col[curr_idx]); 29 | assert(cr_M > 0 | (cr_M == 0 & cr_m > 0)); 30 | } 31 | #endif 32 | } 33 | 34 | //////////////////////////////////////////////////////////////////////////////// 35 | 36 | OBJ build_bin_rel(OBJ *vals1, OBJ *vals2, uint32 size) { 37 | if (size == 0) 38 | return make_empty_rel(); 39 | 40 | // Creating the array of indexes sorted by column 1, column 2, index 41 | uint32 *index = new_uint32_array(size); 42 | index_sort(index, vals1, vals2, size); 43 | 44 | // Counting the number of unique tuples and unique values in the left column, 45 | // and releasing unnecessary objects 46 | uint32 unique_tuples = 1; 47 | uint32 prev_idx = index[0]; 48 | bool left_col_is_unique = true; 49 | for (uint32 i=1 ; i < size ; i++) { 50 | uint32 idx = index[i]; 51 | if (comp_objs(vals1[idx], vals1[prev_idx]) != 0) { 52 | // The current left column value is new, so the tuple is new too. 53 | unique_tuples++; 54 | prev_idx = idx; 55 | } 56 | else if (comp_objs(vals2[idx], vals2[prev_idx]) != 0) { 57 | // The current left column value is unchanged, but the value in the right column is new, so the tuple is new too 58 | unique_tuples++; 59 | prev_idx = idx; 60 | left_col_is_unique = false; 61 | } 62 | else { 63 | // Duplicate tuple, marking the entry as duplicate and releasing the objects 64 | index[i] = INVALID_INDEX; 65 | release(vals1[idx]); 66 | release(vals2[idx]); 67 | } 68 | } 69 | 70 | // Creating the new binary relation object 71 | BIN_REL_OBJ *rel = new_bin_rel(unique_tuples); 72 | 73 | OBJ *left_col = get_left_col_array_ptr(rel); 74 | OBJ *right_col = get_right_col_array_ptr(rel); 75 | 76 | // Copying the sorted, non-duplicate tuples into their final destination 77 | uint32 count = 0; 78 | for (uint32 i=0 ; i < size ; i++) { 79 | uint32 idx = index[i]; 80 | if (idx != INVALID_INDEX) { 81 | left_col[count] = vals1[idx]; 82 | right_col[count] = vals2[idx]; 83 | count++; 84 | } 85 | } 86 | assert(count == unique_tuples); 87 | 88 | // Creating the reverse index 89 | uint32 *rev_index = get_right_to_left_indexes(rel); 90 | stable_index_sort(rev_index, right_col, count); 91 | 92 | #ifndef NDEBUG 93 | for (uint32 i=1 ; i < count ; i++) { 94 | int cr_M = comp_objs(left_col[i-1], left_col[i]); 95 | int cr_m = comp_objs(right_col[i-1], right_col[i]); 96 | assert(cr_M > 0 | (cr_M == 0 & cr_m > 0)); 97 | } 98 | 99 | for (uint32 i=1 ; i < count ; i++) { 100 | uint32 curr_idx = rev_index[i]; 101 | uint32 prev_idx = rev_index[i-1]; 102 | int cr_M = comp_objs(right_col[prev_idx], right_col[curr_idx]); 103 | int cr_m = comp_objs(left_col[prev_idx], left_col[curr_idx]); 104 | assert(cr_M > 0 | (cr_M == 0 & cr_m > 0)); 105 | } 106 | #endif 107 | 108 | delete_uint32_array(index, size); 109 | 110 | return left_col_is_unique ? make_log_map(rel) : make_bin_rel(rel); 111 | } 112 | 113 | //////////////////////////////////////////////////////////////////////////////// 114 | 115 | OBJ build_bin_rel(STREAM &stream1, STREAM &stream2) { 116 | assert(stream1.count == stream2.count); 117 | 118 | if (stream1.count == 0) 119 | return make_empty_rel(); 120 | 121 | OBJ rel = build_bin_rel(stream1.buffer, stream2.buffer, stream1.count); 122 | 123 | delete_obj_array(stream1.buffer, stream1.capacity); 124 | delete_obj_array(stream2.buffer, stream2.capacity); 125 | 126 | return rel; 127 | } 128 | 129 | //////////////////////////////////////////////////////////////////////////////// 130 | //////////////////////////////////////////////////////////////////////////////// 131 | 132 | OBJ build_map(OBJ *keys, OBJ *values, uint32 size) { 133 | if (size == 0) 134 | return make_empty_rel(); 135 | 136 | uint32 actual_size = sort_and_check_no_dups(keys, values, size); 137 | 138 | BIN_REL_OBJ *map = new_map(actual_size); 139 | OBJ *ks = map->buffer; 140 | OBJ *vs = ks + map->size; 141 | 142 | for (uint32 i=0 ; i < actual_size ; i++) { 143 | ks[i] = keys[i]; 144 | vs[i] = values[i]; 145 | } 146 | 147 | return make_map(map); 148 | } 149 | 150 | OBJ build_map(STREAM &key_stream, STREAM &value_stream) { 151 | assert(key_stream.count == value_stream.count); 152 | 153 | if (key_stream.count == 0) 154 | return make_empty_rel(); 155 | 156 | OBJ map = build_map(key_stream.buffer, value_stream.buffer, key_stream.count); 157 | 158 | delete_obj_array(key_stream.buffer, key_stream.capacity); 159 | delete_obj_array(value_stream.buffer, value_stream.capacity); 160 | 161 | return map; 162 | } 163 | 164 | //////////////////////////////////////////////////////////////////////////////// 165 | //////////////////////////////////////////////////////////////////////////////// 166 | 167 | void get_bin_rel_null_iter(BIN_REL_ITER &it) { 168 | it.left_col = NULL; // Not strictly necessary 169 | it.right_col = NULL; // Not strictly necessary 170 | it.rev_idxs = NULL; 171 | it.idx = 0; 172 | it.end = 0; 173 | } 174 | 175 | void get_bin_rel_iter(BIN_REL_ITER &it, OBJ rel) { 176 | assert(is_bin_rel(rel)); 177 | 178 | if (!is_empty_rel(rel)) { 179 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 180 | it.left_col = get_left_col_array_ptr(ptr); 181 | it.right_col = get_right_col_array_ptr(ptr); 182 | it.rev_idxs = NULL; 183 | it.idx = 0; 184 | it.end = ptr->size; 185 | } 186 | else 187 | get_bin_rel_null_iter(it); 188 | } 189 | 190 | void get_bin_rel_iter_0(BIN_REL_ITER &it, OBJ rel, OBJ arg0) { 191 | assert(is_bin_rel(rel)); 192 | 193 | if (is_ne_bin_rel(rel)) { 194 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 195 | uint32 size = ptr->size; 196 | OBJ *left_col = get_left_col_array_ptr(ptr); 197 | 198 | uint32 count; 199 | uint32 first = find_objs_range(left_col, size, arg0, count); 200 | 201 | if (count > 0) { 202 | it.left_col = left_col; 203 | it.right_col = get_right_col_array_ptr(ptr); 204 | it.rev_idxs = NULL; 205 | it.idx = first; 206 | it.end = first + count; 207 | return; 208 | } 209 | } 210 | 211 | get_bin_rel_null_iter(it); 212 | } 213 | 214 | void get_bin_rel_iter_1(BIN_REL_ITER &it, OBJ rel, OBJ arg1) { 215 | assert(is_bin_rel(rel)); 216 | 217 | if (is_ne_bin_rel(rel)) { 218 | if (get_physical_type(rel) == TYPE_MAP) 219 | build_map_right_to_left_sorted_idx_array(rel); 220 | 221 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 222 | uint32 size = ptr->size; 223 | OBJ *right_col = get_right_col_array_ptr(ptr); 224 | uint32 *rev_idxs = get_right_to_left_indexes(ptr); 225 | 226 | uint32 count; 227 | uint32 first = find_idxs_range(rev_idxs, right_col, size, arg1, count); 228 | 229 | if (count > 0) { 230 | it.left_col = get_left_col_array_ptr(ptr); 231 | it.right_col = right_col; 232 | it.rev_idxs = rev_idxs; 233 | it.idx = first; 234 | it.end = first + count; 235 | return; 236 | } 237 | } 238 | 239 | get_bin_rel_null_iter(it); 240 | } 241 | -------------------------------------------------------------------------------- /src/basic-ops.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | bool inline_eq(OBJ obj1, OBJ obj2) { 5 | // assert(is_inline_obj(obj2) & !is_float(obj2)); 6 | assert(is_inline_obj(obj2)); 7 | return are_shallow_eq(obj1, obj2); 8 | } 9 | 10 | bool are_eq(OBJ obj1, OBJ obj2) { 11 | return comp_objs(obj1, obj2) == 0; 12 | } 13 | 14 | bool is_out_of_range(SET_ITER &it) { 15 | return it.idx >= it.size; 16 | } 17 | 18 | bool is_out_of_range(SEQ_ITER &it) { 19 | return it.idx >= it.len; 20 | } 21 | 22 | bool is_out_of_range(BIN_REL_ITER &it) { 23 | return it.idx >= it.end; 24 | } 25 | 26 | bool is_out_of_range(TERN_REL_ITER &it) { 27 | return it.idx >= it.end; 28 | } 29 | 30 | bool has_elem(OBJ set, OBJ elem) { 31 | if (is_empty_rel(set)) 32 | return false; 33 | SET_OBJ *s = get_set_ptr(set); 34 | bool found; 35 | find_obj(s->buffer, s->size, elem, found); 36 | return found; 37 | } 38 | 39 | bool has_pair(OBJ rel, OBJ arg0, OBJ arg1) { 40 | if (is_empty_rel(rel)) 41 | return false; 42 | 43 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 44 | uint32 size = ptr->size; 45 | OBJ *left_col = get_left_col_array_ptr(ptr); 46 | OBJ *right_col = get_right_col_array_ptr(ptr); 47 | 48 | if (is_ne_map(rel)) { 49 | bool found; 50 | uint32 idx = find_obj(left_col, size, arg0, found); 51 | if (!found) 52 | return false; 53 | return comp_objs(right_col[idx], arg1) == 0; 54 | } 55 | 56 | uint32 count; 57 | uint32 idx = find_objs_range(left_col, size, arg0, count); 58 | if (count == 0) 59 | return false; 60 | bool found; 61 | find_obj(right_col+idx, count, arg1, found); 62 | return found; 63 | } 64 | 65 | bool has_key(OBJ rel, OBJ arg1) { 66 | if (is_empty_rel(rel)) 67 | return false; 68 | 69 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 70 | uint32 size = ptr->size; 71 | OBJ *left_col = get_left_col_array_ptr(ptr); 72 | 73 | if (is_ne_map(rel)) { 74 | bool found; 75 | uint32 idx = find_obj(left_col, size, arg1, found); 76 | return found; 77 | } 78 | 79 | uint32 count; 80 | uint32 idx = find_objs_range(left_col, size, arg1, count); 81 | return count > 0; 82 | } 83 | 84 | bool has_field(OBJ rec_or_tag_rec, uint16 field_symb_idx) { 85 | OBJ rec = is_tag_obj(rec_or_tag_rec) ? get_inner_obj(rec_or_tag_rec) : rec_or_tag_rec; 86 | 87 | if (!is_empty_rel(rec)) { 88 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rec); 89 | uint32 size = ptr->size; 90 | OBJ *keys = ptr->buffer; 91 | for (uint32 i=0 ; i < size ; i++) 92 | if (is_symb(keys[i], field_symb_idx)) 93 | return true; 94 | } 95 | 96 | return false; 97 | } 98 | 99 | bool has_triple(OBJ rel, OBJ arg1, OBJ arg2, OBJ arg3) { 100 | assert(is_tern_rel(rel)); 101 | 102 | if (is_empty_rel(rel)) 103 | return false; 104 | 105 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(rel); 106 | uint32 size = ptr->size; 107 | OBJ *col1 = get_col_array_ptr(ptr, 0); 108 | 109 | uint32 count; 110 | uint32 first = find_objs_range(col1, size, arg1, count); 111 | if (count == 0) 112 | return false; 113 | 114 | OBJ *col2 = get_col_array_ptr(ptr, 1); 115 | 116 | first = first + find_objs_range(col2+first, count, arg2, count); 117 | if (count == 0) 118 | return false; 119 | 120 | OBJ *col3 = get_col_array_ptr(ptr, 2); 121 | 122 | bool found; 123 | find_obj(col3+first, count, arg3, found); 124 | return found; 125 | } 126 | 127 | //////////////////////////////////////////////////////////////////////////////// 128 | 129 | int64 get_int_val(OBJ obj) { 130 | assert(is_int(obj)); 131 | 132 | return get_int(obj); 133 | } 134 | 135 | uint32 get_size(OBJ coll) { 136 | assert(is_seq(coll) | is_set(coll) | is_bin_rel(coll) | is_tern_rel(coll)); 137 | 138 | if (is_seq(coll)) 139 | return get_seq_length(coll); 140 | 141 | if (is_empty_rel(coll)) 142 | return 0; 143 | 144 | if (is_ne_set(coll)) 145 | return get_set_ptr(coll)->size; 146 | 147 | if (is_ne_bin_rel(coll)) 148 | return get_bin_rel_ptr(coll)->size; 149 | 150 | return get_tern_rel_ptr(coll)->size; 151 | } 152 | 153 | int64 float_bits(OBJ obj) { 154 | double x = get_float(obj); 155 | return *((int64 *) &x); 156 | } 157 | 158 | int64 mantissa(OBJ obj) { 159 | int64 mantissa; 160 | int32 dec_exp; 161 | mantissa_and_dec_exp(get_float(obj), mantissa, dec_exp); 162 | return mantissa; 163 | } 164 | 165 | int64 dec_exp(OBJ obj) { 166 | int64 mantissa; 167 | int32 dec_exp; 168 | mantissa_and_dec_exp(get_float(obj), mantissa, dec_exp); 169 | return dec_exp; 170 | } 171 | 172 | int64 rand_nat(int64 max) { 173 | assert(max > 0); 174 | return rand() % max; //## BUG: THE FUNCTION rand() ONLY GENERATES A LIMITED RANGE OF INTEGERS 175 | } 176 | 177 | int64 unique_nat() { 178 | static int64 next_val = 0; 179 | return next_val++; 180 | } 181 | 182 | //////////////////////////////////////////////////////////////////////////////// 183 | 184 | OBJ obj_neg(OBJ obj) { 185 | assert(is_bool(obj)); 186 | return make_bool(!get_bool(obj)); 187 | } 188 | 189 | OBJ at(OBJ seq, int64 idx) { 190 | assert(is_seq(seq)); 191 | if (((uint64) idx) >= get_seq_length(seq)) 192 | soft_fail("Invalid sequence index"); 193 | return get_seq_buffer_ptr(seq)[idx]; 194 | } 195 | 196 | OBJ get_tag(OBJ obj) { 197 | return make_symb(get_tag_idx(obj)); 198 | } 199 | 200 | OBJ get_curr_obj(SEQ_ITER &it) { 201 | assert(!is_out_of_range(it)); 202 | return it.buffer[it.idx]; 203 | } 204 | 205 | OBJ get_curr_obj(SET_ITER &it) { 206 | assert(!is_out_of_range(it)); 207 | return it.buffer[it.idx]; 208 | } 209 | 210 | OBJ get_curr_left_arg(BIN_REL_ITER &it) { 211 | assert(!is_out_of_range(it)); 212 | uint32 idx = it.rev_idxs != NULL ? it.rev_idxs[it.idx] : it.idx; 213 | return it.left_col[idx]; 214 | } 215 | 216 | OBJ get_curr_right_arg(BIN_REL_ITER &it) { 217 | assert(!is_out_of_range(it)); 218 | uint32 idx = it.rev_idxs != NULL ? it.rev_idxs[it.idx] : it.idx; 219 | return it.right_col[idx]; 220 | } 221 | 222 | OBJ tern_rel_it_get_left_arg(TERN_REL_ITER &it) { 223 | assert(!is_out_of_range(it)); 224 | uint32 idx = it.ordered_idxs != NULL ? it.ordered_idxs[it.idx] : it.idx; 225 | return it.col1[idx]; 226 | } 227 | 228 | OBJ tern_rel_it_get_mid_arg(TERN_REL_ITER &it) { 229 | assert(!is_out_of_range(it)); 230 | uint32 idx = it.ordered_idxs != NULL ? it.ordered_idxs[it.idx] : it.idx; 231 | return it.col2[idx]; 232 | } 233 | 234 | OBJ tern_rel_it_get_right_arg(TERN_REL_ITER &it) { 235 | assert(!is_out_of_range(it)); 236 | uint32 idx = it.ordered_idxs != NULL ? it.ordered_idxs[it.idx] : it.idx; 237 | return it.col3[idx]; 238 | } 239 | 240 | OBJ rand_set_elem(OBJ set) { 241 | SET_OBJ *set_ptr = get_set_ptr(set); 242 | uint32 idx = rand() % set_ptr->size; 243 | return set_ptr->buffer[idx]; 244 | } 245 | 246 | OBJ lookup(OBJ rel, OBJ key) { 247 | if (!is_empty_rel(rel)) { 248 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 249 | uint32 size = ptr->size; 250 | OBJ *keys = ptr->buffer; 251 | OBJ *values = keys + size; 252 | OBJ_TYPE rel_type = get_physical_type(rel); 253 | if (rel_type == TYPE_MAP | rel_type == TYPE_LOG_MAP) { 254 | bool found; 255 | uint32 idx = find_obj(keys, size, key, found); 256 | if (found) 257 | return values[idx]; 258 | } 259 | else { 260 | assert(rel_type == TYPE_BIN_REL); 261 | uint32 count; 262 | uint32 idx = find_objs_range(keys, size, key, count); 263 | if (count == 1) 264 | return values[idx]; 265 | if (count > 1) 266 | soft_fail("Key is not unique. Lookup failed"); 267 | } 268 | } 269 | 270 | if (is_empty_rel(rel)) 271 | soft_fail("Map is empty. Lookup failed"); 272 | 273 | if (is_symb(key)) { 274 | char buff[1024]; 275 | strcpy(buff, "Map key not found: "); 276 | uint32 len = strlen(buff); 277 | printed_obj(key, buff+len, sizeof(buff)-len-1); 278 | soft_fail(buff); 279 | } 280 | 281 | soft_fail("Map key not found"); 282 | } 283 | 284 | OBJ lookup_field(OBJ rec_or_tag_rec, uint16 field_symb_idx) { 285 | OBJ rec = is_tag_obj(rec_or_tag_rec) ? get_inner_obj(rec_or_tag_rec) : rec_or_tag_rec; 286 | 287 | if (!is_empty_rel(rec)) { 288 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rec); 289 | uint32 size = ptr->size; 290 | OBJ *keys = ptr->buffer; 291 | OBJ *values = keys + size; 292 | for (uint32 i=0 ; i < size ; i++) 293 | if (is_symb(keys[i], field_symb_idx)) 294 | return values[i]; 295 | } 296 | 297 | internal_fail(); 298 | } 299 | -------------------------------------------------------------------------------- /src/binary-table.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include "table-utils.h" 3 | 4 | 5 | void binary_table_init(BINARY_TABLE *table) { 6 | 7 | } 8 | 9 | void binary_table_cleanup(BINARY_TABLE *table) { 10 | 11 | } 12 | 13 | void binary_table_updates_init(BINARY_TABLE_UPDATES *updates) { 14 | 15 | } 16 | 17 | void binary_table_updates_cleanup(BINARY_TABLE_UPDATES *updates) { 18 | 19 | } 20 | 21 | //////////////////////////////////////////////////////////////////////////////// 22 | 23 | bool binary_table_contains(BINARY_TABLE *table, uint32 left_val, uint32 right_val) { 24 | std::set &left_to_right = table->left_to_right; 25 | std::set::iterator it = left_to_right.find(pack(left_val, right_val)); 26 | return it != left_to_right.end(); 27 | } 28 | 29 | //////////////////////////////////////////////////////////////////////////////// 30 | 31 | void binary_table_delete_range_(BINARY_TABLE_ITER *iter, BINARY_TABLE_UPDATES *updates) { 32 | bool reversed = iter->reversed; 33 | std::vector &deletes = updates->deletes; 34 | while (!binary_table_iter_is_out_of_range(iter)) { 35 | uint64 pair = *iter->iter; 36 | deletes.push_back(reversed ? swap(pair) : pair); 37 | binary_table_iter_next(iter); 38 | } 39 | } 40 | 41 | void binary_table_delete(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates, uint32 left_val, uint32 right_val) { 42 | if (binary_table_contains(table, left_val, right_val)) 43 | updates->deletes.push_back(pack(left_val, right_val)); 44 | } 45 | 46 | void binary_table_delete_by_col_0(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates, uint32 value) { 47 | BINARY_TABLE_ITER iter; 48 | binary_table_get_iter_by_col_0(table, &iter, value); 49 | binary_table_delete_range_(&iter, updates); 50 | } 51 | 52 | void binary_table_delete_by_col_1(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates, uint32 value) { 53 | BINARY_TABLE_ITER iter; 54 | binary_table_get_iter_by_col_1(table, &iter, value); 55 | binary_table_delete_range_(&iter, updates); 56 | } 57 | 58 | void binary_table_clear(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates) { 59 | BINARY_TABLE_ITER iter; 60 | binary_table_get_iter(table, &iter); 61 | binary_table_delete_range_(&iter, updates); 62 | } 63 | 64 | void binary_table_insert(BINARY_TABLE_UPDATES *updates, uint32 left_val, uint32 right_val) { 65 | updates->inserts.push_back(pack(left_val, right_val)); 66 | } 67 | 68 | void binary_table_updates_apply(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates, VALUE_STORE *vs0, VALUE_STORE *vs1) { 69 | std::set &left_to_right = table->left_to_right; 70 | std::set &right_to_left = table->right_to_left; 71 | 72 | if (!updates->deletes.empty()) { 73 | uint32 count = updates->deletes.size(); 74 | uint64 *deletes = &updates->deletes.front(); 75 | for (uint32 i=0 ; i < count ; i++) { 76 | uint64 pair = deletes[i]; 77 | if (left_to_right.erase(pair) > 0) 78 | right_to_left.erase(swap(pair)); 79 | else 80 | deletes[i] = 0xFFFFFFFFFFFFFFFFULL; 81 | } 82 | } 83 | 84 | if (!updates->inserts.empty()) { 85 | uint32 count = updates->inserts.size(); 86 | uint64 *inserts = &updates->inserts.front(); 87 | for (uint32 i=0 ; i < count ; i++) { 88 | uint64 pair = inserts[i]; 89 | if (left_to_right.insert(pair).second) { 90 | right_to_left.insert(swap(pair)); 91 | value_store_add_ref(vs0, left(pair)); 92 | value_store_add_ref(vs1, right(pair)); 93 | } 94 | } 95 | } 96 | } 97 | 98 | void binary_table_updates_finish(BINARY_TABLE_UPDATES *updates, VALUE_STORE *vs0, VALUE_STORE *vs1) { 99 | if (!updates->deletes.empty()) { 100 | uint32 count = updates->deletes.size(); 101 | uint64 *deletes = &updates->deletes.front(); 102 | for (uint32 i=0 ; i < count ; i++) { 103 | uint64 pair = deletes[i]; 104 | if (pair != 0xFFFFFFFFFFFFFFFFULL) { 105 | value_store_release(vs0, left(pair)); 106 | value_store_release(vs1, right(pair)); 107 | } 108 | } 109 | } 110 | } 111 | 112 | //////////////////////////////////////////////////////////////////////////////// 113 | 114 | void binary_table_get_iter_by_col_0(BINARY_TABLE *table, BINARY_TABLE_ITER *iter, uint32 value) { 115 | std::set &left_to_right = table->left_to_right; 116 | iter->iter = left_to_right.lower_bound(pack(value, 0)); 117 | iter->end = left_to_right.end(); 118 | iter->value = value; 119 | iter->reversed = false; 120 | } 121 | 122 | void binary_table_get_iter_by_col_1(BINARY_TABLE *table, BINARY_TABLE_ITER *iter, uint32 value) { 123 | std::set &right_to_left = table->right_to_left; 124 | iter->iter = right_to_left.lower_bound(pack(value, 0)); 125 | iter->end = right_to_left.end(); 126 | iter->value = value; 127 | iter->reversed = true; 128 | } 129 | 130 | void binary_table_get_iter(BINARY_TABLE *table, BINARY_TABLE_ITER *iter) { 131 | std::set &left_to_right = table->left_to_right; 132 | iter->iter = left_to_right.begin(); 133 | iter->end = left_to_right.end(); 134 | iter->value = 0xFFFFFFFFU; 135 | iter->reversed = false; 136 | } 137 | 138 | //////////////////////////////////////////////////////////////////////////////// 139 | 140 | bool binary_table_iter_is_out_of_range(BINARY_TABLE_ITER *iter) { 141 | std::set::iterator it = iter->iter; 142 | return it == iter->end || (*it >> 32) > iter->value; 143 | } 144 | 145 | uint32 binary_table_iter_get_left_field(BINARY_TABLE_ITER *iter) { 146 | return *iter->iter >> (iter->reversed ? 0 : 32); 147 | } 148 | 149 | uint32 binary_table_iter_get_right_field(BINARY_TABLE_ITER *iter) { 150 | return *iter->iter >> (iter->reversed ? 32 : 0); 151 | } 152 | 153 | void binary_table_iter_next(BINARY_TABLE_ITER *iter) { 154 | assert(!binary_table_iter_is_out_of_range(iter)); 155 | iter->iter++; 156 | } 157 | 158 | //////////////////////////////////////////////////////////////////////////////// 159 | 160 | bool binary_table_updates_check_0(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates) { 161 | sort_unique(updates->inserts); 162 | return table_updates_check_key(updates->inserts, updates->deletes, table->left_to_right); 163 | } 164 | 165 | bool binary_table_updates_check_1(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates) { 166 | sort_unique(updates->inserts); 167 | return table_updates_check_key(updates->inserts, updates->deletes, table->right_to_left); 168 | } 169 | 170 | bool binary_table_updates_check_0_1(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates) { 171 | return binary_table_updates_check_0(table, updates) && 172 | table_updates_check_key(updates->inserts, updates->deletes, table->right_to_left); 173 | } 174 | 175 | //////////////////////////////////////////////////////////////////////////////// 176 | 177 | OBJ copy_binary_table(BINARY_TABLE *table, VALUE_STORE *vs1, VALUE_STORE *vs2, bool flip_cols) { 178 | OBJ *slots1 = value_store_slot_array(vs1); 179 | OBJ *slots2 = value_store_slot_array(vs2); 180 | 181 | std::set &rows = table->left_to_right; 182 | uint32 size = rows.size(); 183 | 184 | if (size == 0) 185 | return make_empty_rel(); 186 | 187 | OBJ *col1 = new_obj_array(2 * size); 188 | OBJ *col2 = col1 + size; 189 | 190 | uint32 idx = 0; 191 | for (std::set::iterator it=rows.begin(); it != rows.end() ; it++) { 192 | uint64 row = *it; 193 | col1[idx] = slots1[left(row)]; 194 | col2[idx++] = slots2[right(row)]; 195 | } 196 | assert(idx == size); 197 | 198 | for (int64 i=0 ; i < 2 * size ; i++) 199 | add_ref(col1[i]); 200 | 201 | OBJ rel = build_bin_rel(flip_cols ? col2 : col1, flip_cols ? col1 : col2, size); 202 | 203 | delete_obj_array(col1, 2 * size); 204 | 205 | return rel; 206 | } 207 | 208 | //////////////////////////////////////////////////////////////////////////////// 209 | 210 | void set_binary_table(BINARY_TABLE *table, BINARY_TABLE_UPDATES *updates, VALUE_STORE *vs1, VALUE_STORE *vs2, 211 | VALUE_STORE_UPDATES *vsu1, VALUE_STORE_UPDATES *vsu2, OBJ rel, bool flip_cols) { 212 | binary_table_clear(table, updates); 213 | 214 | if (is_empty_rel(rel)) 215 | return; 216 | 217 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(rel); 218 | uint32 size = ptr->size; 219 | OBJ *col1 = flip_cols ? get_right_col_array_ptr(ptr) : get_left_col_array_ptr(ptr); 220 | OBJ *col2 = flip_cols ? get_left_col_array_ptr(ptr) : get_right_col_array_ptr(ptr); 221 | 222 | for (uint32 i=0 ; i < size ; i++) { 223 | OBJ obj = col1[i]; 224 | uint32 ref1 = lookup_value_ex(vs1, vsu1, obj); 225 | if (ref1 == -1) { 226 | add_ref(obj); 227 | ref1 = value_store_insert(vs1, vsu1, obj); 228 | } 229 | 230 | obj = col2[i]; 231 | uint32 ref2 = lookup_value_ex(vs2, vsu2, obj); 232 | if (ref2 == -1) { 233 | add_ref(obj); 234 | ref2 = value_store_insert(vs2, vsu2, obj); 235 | } 236 | 237 | binary_table_insert(updates, ref1, ref2); 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /src/interface/lib-cpp.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cell-lang.h" 8 | 9 | 10 | using std::vector; 11 | using std::string; 12 | using std::tuple; 13 | using std::unique_ptr; 14 | using std::make_tuple; 15 | 16 | /////////////////////////////////// value.cpp ////////////////////////////////// 17 | 18 | unique_ptr export_as_value(OBJ); 19 | 20 | //////////////////////////////// conversion.cpp //////////////////////////////// 21 | 22 | string export_as_std_string(OBJ); 23 | 24 | ////////////////////////////////// lib-cpp.cpp ///////////////////////////////// 25 | 26 | bool table_contains(UNARY_TABLE &, VALUE_STORE &, OBJ); 27 | bool table_contains(BINARY_TABLE &, VALUE_STORE &, VALUE_STORE &, OBJ, OBJ); 28 | bool table_contains(TERNARY_TABLE &, VALUE_STORE &, VALUE_STORE &, VALUE_STORE &, OBJ, OBJ, OBJ); 29 | 30 | //////////////////////////////////////////////////////////////////////////////// 31 | 32 | template vector export_as_vector(OBJ obj) { 33 | assert(is_seq(obj) | is_set(obj)); 34 | 35 | vector result; 36 | 37 | if (is_ne_seq(obj)) { 38 | uint32 len = get_seq_length(obj); 39 | OBJ *buffer = get_seq_buffer_ptr(obj); 40 | result.resize(len); 41 | for (uint32 i=0 ; i < len ; i++) 42 | result[i] = T::get_value(buffer[i]); 43 | return result; 44 | } 45 | else if (is_ne_set(obj)) { 46 | SET_OBJ *ptr = get_set_ptr(obj); 47 | uint32 size = ptr->size; 48 | OBJ *buffer = ptr->buffer; 49 | result.resize(size); 50 | for (uint32 i=0 ; i < size ; i++) 51 | result[i] = T::get_value(buffer[i]); 52 | } 53 | return result; 54 | } 55 | 56 | //////////////////////////////////////////////////////////////////////////////// 57 | 58 | struct bool_conv { 59 | typedef bool type; 60 | static bool get_value(OBJ obj) { 61 | return get_bool(obj); 62 | } 63 | }; 64 | 65 | struct int_conv { 66 | typedef long long type; 67 | static long long get_value(OBJ obj) { 68 | return get_int(obj); 69 | } 70 | }; 71 | 72 | struct float_conv { 73 | typedef double type; 74 | static double get_value(OBJ obj) { 75 | return get_float(obj); 76 | } 77 | }; 78 | 79 | struct symb_conv { 80 | typedef const char *type; 81 | static const char *get_value(OBJ obj) { 82 | return symb_to_raw_str(obj); 83 | } 84 | }; 85 | 86 | struct string_conv { 87 | typedef string type; 88 | static string get_value(OBJ obj) { 89 | return export_as_std_string(obj); 90 | } 91 | }; 92 | 93 | template struct vector_conv { 94 | typedef vector type; 95 | static type get_value(OBJ obj) { 96 | return export_as_vector(obj); 97 | } 98 | }; 99 | 100 | template struct tagged_conv { 101 | typedef typename T::type type; 102 | static type get_value(OBJ obj) { 103 | return T::get_value(get_inner_obj(obj)); 104 | } 105 | }; 106 | 107 | // template struct tuple_conv { 108 | // typedef tuple type; 109 | // static type get_value(OBJ obj) { 110 | // } 111 | // }; 112 | 113 | struct generic_conv { 114 | typedef unique_ptr type; 115 | static type get_value(OBJ obj) { 116 | return export_as_value(obj); 117 | } 118 | }; 119 | 120 | //////////////////////////////////////////////////////////////////////////////// 121 | 122 | template struct tuple_2_conv { 123 | typedef tuple type; 124 | static type get_value(OBJ obj) { 125 | return make_tuple( 126 | T0::get_value(at(obj, 0)), 127 | T1::get_value(at(obj, 1)) 128 | ); 129 | } 130 | }; 131 | 132 | template struct tuple_3_conv { 133 | typedef tuple type; 134 | static type get_value(OBJ obj) { 135 | return make_tuple( 136 | T0::get_value(at(obj, 0)), 137 | T1::get_value(at(obj, 1)), 138 | T2::get_value(at(obj, 2)) 139 | ); 140 | } 141 | }; 142 | 143 | template struct tuple_4_conv { 144 | typedef tuple type; 145 | static type get_value(OBJ obj) { 146 | return make_tuple( 147 | T0::get_value(at(obj, 0)), 148 | T1::get_value(at(obj, 1)), 149 | T2::get_value(at(obj, 2)), 150 | T3::get_value(at(obj, 3)) 151 | ); 152 | } 153 | }; 154 | 155 | template struct tuple_5_conv { 156 | typedef tuple type; 157 | static type get_value(OBJ obj) { 158 | return make_tuple( 159 | T0::get_value(at(obj, 0)), 160 | T1::get_value(at(obj, 1)), 161 | T2::get_value(at(obj, 2)), 162 | T3::get_value(at(obj, 3)), 163 | T4::get_value(at(obj, 4)) 164 | ); 165 | } 166 | }; 167 | 168 | template struct tuple_6_conv { 169 | typedef tuple< 170 | typename T0::type, typename T1::type, typename T2::type, 171 | typename T3::type, typename T4::type, typename T5::type 172 | > type; 173 | static type get_value(OBJ obj) { 174 | return make_tuple( 175 | T0::get_value(at(obj, 0)), 176 | T1::get_value(at(obj, 1)), 177 | T2::get_value(at(obj, 2)), 178 | T3::get_value(at(obj, 3)), 179 | T4::get_value(at(obj, 4)), 180 | T5::get_value(at(obj, 5)) 181 | ); 182 | } 183 | }; 184 | 185 | //////////////////////////////////////////////////////////////////////////////// 186 | 187 | template vector get_unary_rel(UNARY_TABLE &table, VALUE_STORE &store) { 188 | uint32 size = table.count; 189 | vector result(size); 190 | UNARY_TABLE_ITER iter; 191 | unary_table_get_iter(&table, &iter); 192 | for (uint32 i=0 ; i < size ; i++) { 193 | assert(!unary_table_iter_is_out_of_range(&iter)); 194 | OBJ obj = lookup_surrogate(&store, unary_table_iter_get_field(&iter)); 195 | result[i] = T::get_value(obj); 196 | release(obj); 197 | unary_table_iter_next(&iter); 198 | } 199 | assert(unary_table_iter_is_out_of_range(&iter)); 200 | return result; 201 | } 202 | 203 | template vector > 204 | get_binary_rel(BINARY_TABLE &table, VALUE_STORE &store0, VALUE_STORE &store1, bool flipped) { 205 | uint32 size = table.left_to_right.size(); 206 | vector > result(size); 207 | BINARY_TABLE_ITER iter; 208 | binary_table_get_iter(&table, &iter); 209 | for (uint32 i=0 ; i < size ; i++) { 210 | assert(!binary_table_iter_is_out_of_range(&iter)); 211 | OBJ obj0 = lookup_surrogate(&store0, binary_table_iter_get_left_field(&iter)); 212 | OBJ obj1 = lookup_surrogate(&store1, binary_table_iter_get_right_field(&iter)); 213 | result[i] = make_tuple(T0::get_value(flipped ? obj1 : obj0), T1::get_value(flipped ? obj0 : obj1)); 214 | release(obj0); 215 | release(obj1); 216 | binary_table_iter_next(&iter); 217 | } 218 | assert(binary_table_iter_is_out_of_range(&iter)); 219 | return result; 220 | } 221 | 222 | template vector > 223 | get_ternary_rel(TERNARY_TABLE &table, VALUE_STORE &store0, VALUE_STORE &store1, VALUE_STORE &store2, 224 | int idx0, int idx1, int idx2) { 225 | uint32 size = table.unshifted.size(); 226 | vector > result(size); 227 | TERNARY_TABLE_ITER iter; 228 | ternary_table_get_iter(&table, &iter); 229 | for (uint32 i=0 ; i < size ; i++) { 230 | assert(!ternary_table_iter_is_out_of_range(&iter)); 231 | OBJ objs[3]; 232 | objs[0] = lookup_surrogate(&store0, ternary_table_iter_get_left_field(&iter)); 233 | objs[1] = lookup_surrogate(&store1, ternary_table_iter_get_middle_field(&iter)); 234 | objs[2] = lookup_surrogate(&store2, ternary_table_iter_get_right_field(&iter)); 235 | result[i] = make_tuple(T0::get_value(objs[idx0]), T1::get_value(objs[idx1]), T2::get_value(objs[idx2])); 236 | for (int i=0 ; i < 3 ; i++) 237 | release(objs[i]); 238 | ternary_table_iter_next(&iter); 239 | } 240 | assert(ternary_table_iter_is_out_of_range(&iter)); 241 | return result; 242 | } 243 | 244 | //////////////////////////////////////////////////////////////////////////////// 245 | 246 | template bool lookup_by_left_col(BINARY_TABLE &table, VALUE_STORE &store0, VALUE_STORE &store1, OBJ key, typename T::type &value) { 247 | int64 surr = lookup_value(&store0, key); 248 | release(key); 249 | if (surr == -1) 250 | return false; 251 | BINARY_TABLE_ITER iter; 252 | binary_table_get_iter_by_col_0(&table, &iter, surr); 253 | if (binary_table_iter_is_out_of_range(&iter)) 254 | return false; 255 | OBJ obj = lookup_surrogate(&store1, binary_table_iter_get_right_field(&iter)); 256 | value = T::get_value(obj); 257 | release(obj); 258 | #ifndef NDEBUG 259 | binary_table_iter_next(&iter); 260 | assert(binary_table_iter_is_out_of_range(&iter)); 261 | #endif 262 | return true; 263 | } 264 | -------------------------------------------------------------------------------- /src/unary-table.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | void unary_table_init(UNARY_TABLE *table) { 5 | const uint32 INIT_SIZE = 1024; 6 | uint64 *bitmap = (uint64 *) malloc(INIT_SIZE/8); 7 | memset(bitmap, 0, INIT_SIZE/8); 8 | table->bitmap = bitmap; 9 | table->size = INIT_SIZE; 10 | table->count = 0; 11 | } 12 | 13 | void unary_table_cleanup(UNARY_TABLE *table) { 14 | free(table->bitmap); 15 | } 16 | 17 | void unary_table_updates_init(UNARY_TABLE_UPDATES *table) { 18 | table->capacity = 0; 19 | table->deletes_count = 0; 20 | table->inserts_count = 0; 21 | table->buffer = NULL; 22 | } 23 | 24 | // Inserts and deletes are stored in the same buffer, 25 | // deletes at the front and inserts at the back 26 | void unary_table_updates_cleanup(UNARY_TABLE_UPDATES *table) { 27 | 28 | } 29 | 30 | //////////////////////////////////////////////////////////////////////////////// 31 | 32 | bool unary_table_contains(UNARY_TABLE *table, uint32 value) { 33 | assert(value < table->size); 34 | 35 | uint64 *bitmap = table->bitmap; 36 | uint32 idx = value >> 6; 37 | uint64 mask = 1ULL << (value % 64); 38 | return bitmap[idx] & mask; 39 | } 40 | 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // Returns new capacity 44 | uint32 unary_table_updates_resize(UNARY_TABLE_UPDATES *updates) { 45 | uint32 capacity = updates->capacity; 46 | uint32 new_capacity = capacity > 0 ? 2 * capacity : 32; 47 | uint32 *new_buffer = (uint32 *) malloc(new_capacity * sizeof(uint32)); 48 | // uint32 *new_buffer = new_uint32_array(new_capacity); 49 | 50 | if (capacity > 0) { 51 | uint32 deletes_count = updates->deletes_count; 52 | uint32 inserts_count = updates->inserts_count; 53 | uint32 *buffer = updates->buffer; 54 | 55 | if (deletes_count > 0) 56 | memcpy(new_buffer, buffer, deletes_count * sizeof(uint32)); 57 | if (inserts_count > 0) { 58 | uint32 *new_inserts = new_buffer + new_capacity - inserts_count; 59 | uint32 *inserts = buffer + capacity - inserts_count; 60 | memcpy(new_inserts, inserts, inserts_count * sizeof(uint32)); 61 | } 62 | free(buffer); 63 | // delete_uint32_array(buffer, capacity); 64 | } 65 | 66 | updates->capacity = new_capacity; 67 | updates->buffer = new_buffer; 68 | return new_capacity; 69 | } 70 | 71 | void unary_table_insert(UNARY_TABLE_UPDATES *updates, uint32 value) { 72 | uint32 capacity = updates->capacity; 73 | uint32 deletes_count = updates->deletes_count; 74 | uint32 inserts_count = updates->inserts_count; 75 | 76 | if (deletes_count + inserts_count >= capacity) 77 | capacity = unary_table_updates_resize(updates); 78 | 79 | uint32 *next_slot = updates->buffer + capacity - 1 - inserts_count; 80 | *next_slot = value; 81 | updates->inserts_count = inserts_count + 1; 82 | } 83 | 84 | void unary_table_delete(UNARY_TABLE *, UNARY_TABLE_UPDATES *updates, uint32 value) { 85 | uint32 capacity = updates->capacity; 86 | uint32 deletes_count = updates->deletes_count; 87 | uint32 inserts_count = updates->inserts_count; 88 | 89 | if (deletes_count + inserts_count >= capacity) 90 | capacity = unary_table_updates_resize(updates); 91 | 92 | uint32 *next_slot = updates->buffer + deletes_count; 93 | *next_slot = value; 94 | updates->deletes_count = deletes_count + 1; 95 | } 96 | 97 | void unary_table_clear(UNARY_TABLE *table, UNARY_TABLE_UPDATES *updates) { 98 | uint64 *bitmap = table->bitmap; 99 | uint32 cell_count = table->size / 64; 100 | for (int i=0 ; i < cell_count ; i++) { 101 | uint64 cell = bitmap[i]; 102 | for (int j=0 ; j < 64 ; j++) 103 | if ((cell >> j) & 1) 104 | unary_table_delete(table, updates, 64 * i + j); 105 | } 106 | } 107 | 108 | bool unary_table_updates_check(UNARY_TABLE *table, UNARY_TABLE_UPDATES *updates) { 109 | return true; 110 | } 111 | 112 | void unary_table_updates_apply(UNARY_TABLE *table, UNARY_TABLE_UPDATES *updates, VALUE_STORE *vs) { 113 | uint32 inserts_count = updates->inserts_count; 114 | uint32 deletes_count = updates->deletes_count; 115 | 116 | if (deletes_count > 0) { 117 | uint32 *deletes = updates->buffer; 118 | uint64 *bitmap = table->bitmap; 119 | for (uint32 i=0 ; i < deletes_count ; i++) { 120 | uint32 value = deletes[i]; 121 | uint32 idx = value >> 6; 122 | uint64 mask = 1ULL << (value % 64); 123 | uint64 cell = bitmap[idx]; 124 | if (cell & mask) { 125 | cell &= ~mask; 126 | bitmap[idx] = cell; 127 | table->count--; 128 | } 129 | else 130 | deletes[i] = 0xFFFFFFFFU; 131 | } 132 | } 133 | 134 | if (inserts_count > 0) { 135 | uint32 *inserts = updates->buffer + updates->capacity - inserts_count; 136 | uint32 max_val = *std::max_element(inserts, inserts + inserts_count); 137 | uint32 size = table->size; 138 | uint64 *bitmap = table->bitmap; 139 | if (max_val >= size) { 140 | // Reallocating the table 141 | uint32 new_size = 2 * size; 142 | while (max_val >= new_size) 143 | new_size *= 2; 144 | uint64 *bitmap = (uint64 *) realloc(bitmap, new_size / 8); 145 | memset(bitmap + (size / 64), 0, (new_size - size) / 8); 146 | size = new_size; 147 | table->size = size; 148 | table->bitmap = bitmap; 149 | } 150 | 151 | for (uint32 i=0 ; i < inserts_count ; i++) { 152 | uint32 value = inserts[i]; 153 | uint32 idx = value >> 6; 154 | uint64 mask = 1ULL << (value % 64); 155 | uint64 cell = bitmap[idx]; 156 | if (!(cell & mask)) { 157 | cell |= mask; 158 | bitmap[idx] = cell; 159 | table->count++; 160 | value_store_add_ref(vs, value); 161 | } 162 | } 163 | } 164 | } 165 | 166 | void unary_table_updates_finish(UNARY_TABLE_UPDATES *updates, VALUE_STORE *vs) { 167 | uint32 count = updates->deletes_count; 168 | uint32 *buffer = updates->buffer; 169 | if (count > 0) { 170 | for (uint32 i=0 ; i < count ; i++) { 171 | uint32 value = buffer[i]; 172 | if (value != 0xFFFFFFFFU) 173 | value_store_release(vs, value); 174 | } 175 | } 176 | if (buffer != NULL) 177 | free(buffer); 178 | // // No need to delete anything for now, this memory is allocated in "temporary" memory, it is 179 | // // cleaned up automatically. An attempt to free it at this stage would actually cause a crash. 180 | // //## BUT WHY IS IT COUNTED AMONG THE LEAKED BLOCKS OF MEMORY? 181 | // delete_uint32_array(buffer, updates->capacity); 182 | } 183 | 184 | //////////////////////////////////////////////////////////////////////////////// 185 | 186 | void unary_table_get_iter(UNARY_TABLE *table, UNARY_TABLE_ITER *iter) { 187 | if (table->count != 0) { 188 | uint64 *bitmap = table->bitmap; 189 | uint32 size = table->size; 190 | 191 | iter->bitmap = bitmap; 192 | iter->size = size; 193 | 194 | uint32 cell_count = size / 64; 195 | for (uint32 i=0 ; i < cell_count ; i++) { 196 | uint64 cell = bitmap[i]; 197 | if (cell != 0) 198 | for (int j=0 ; j < 64 ; j++) 199 | if (((cell >> j) & 1) != 0) { 200 | iter->curr_value = 64 * i + j; 201 | return; 202 | } 203 | } 204 | internal_fail(); 205 | } 206 | else { 207 | iter->bitmap = NULL; 208 | iter->size = 0; 209 | iter->curr_value = 0; 210 | } 211 | } 212 | 213 | uint32 unary_table_iter_get_field(UNARY_TABLE_ITER *iter) { 214 | assert(!unary_table_iter_is_out_of_range(iter)); 215 | 216 | return iter->curr_value; 217 | } 218 | 219 | void unary_table_iter_next(UNARY_TABLE_ITER *iter) { 220 | assert(!unary_table_iter_is_out_of_range(iter)); 221 | 222 | uint64 *bitmap = iter->bitmap; 223 | uint32 size = iter->size; 224 | uint32 curr_value = iter->curr_value; 225 | 226 | uint32 cell_count = size / 64; 227 | 228 | uint32 idx = curr_value / 64; 229 | uint32 offset = curr_value % 64 + 1; 230 | 231 | uint64 cell = bitmap[idx]; 232 | if (cell >> offset != 0) { 233 | for (int i=offset ; i < 64 ; i++) 234 | if (((cell >> i) & 1) != 0) { 235 | iter->curr_value = 64 * idx + i; 236 | return; 237 | } 238 | internal_fail(); 239 | } 240 | 241 | for (uint32 i=idx+1 ; i < cell_count ; i++) { 242 | cell = bitmap[i]; 243 | if (cell != 0) 244 | for (int j=0 ; j < 64 ; j++) 245 | if (((cell >> j) & 1) != 0) { 246 | iter->curr_value = 64 * i + j; 247 | return; 248 | } 249 | } 250 | 251 | iter->bitmap = NULL; 252 | iter->size = 0; 253 | iter->curr_value = 0; 254 | } 255 | 256 | bool unary_table_iter_is_out_of_range(UNARY_TABLE_ITER *iter) { 257 | return iter->bitmap == NULL; 258 | } 259 | 260 | //////////////////////////////////////////////////////////////////////////////// 261 | 262 | OBJ copy_unary_table(UNARY_TABLE *table, VALUE_STORE *vs) { 263 | assert(table->size % 64 == 0); 264 | 265 | OBJ *slots = value_store_slot_array(vs); 266 | uint64 *bitmap = table->bitmap; 267 | uint32 size = table->size; 268 | uint32 count = table->count; 269 | 270 | if (count == 0) 271 | return make_empty_rel(); 272 | 273 | SET_OBJ *set = new_set(count); 274 | OBJ *buffer = set->buffer; 275 | 276 | uint32 idx = 0; 277 | for (uint32 i=0 ; i < size/64 ; i++) { 278 | uint64 word = bitmap[i]; 279 | for (int j=0 ; j < 64 ; j++) 280 | if ((word >> j) & 1) { 281 | OBJ obj = slots[64 * i + j]; 282 | add_ref(obj); 283 | buffer[idx++] = obj; 284 | } 285 | } 286 | assert(idx == count); 287 | 288 | sort_obj_array(buffer, count); 289 | return make_set(set); 290 | } 291 | 292 | //////////////////////////////////////////////////////////////////////////////// 293 | 294 | void set_unary_table(UNARY_TABLE *table, UNARY_TABLE_UPDATES *updates, VALUE_STORE *vs, VALUE_STORE_UPDATES *vsu, OBJ set) { 295 | unary_table_clear(table, updates); 296 | 297 | if (is_empty_rel(set)) 298 | return; 299 | 300 | SET_OBJ *ptr = get_set_ptr(set); 301 | uint32 size = ptr->size; 302 | OBJ *buffer = ptr->buffer; 303 | 304 | for (uint32 i=0 ; i < size ; i++) { 305 | OBJ obj = buffer[i]; 306 | uint32 ref = lookup_value_ex(vs, vsu, obj); 307 | if (ref == -1) { 308 | add_ref(obj); 309 | ref = value_store_insert(vs, vsu, obj); 310 | } 311 | unary_table_insert(updates, ref); 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /src/mem.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | uint64 set_obj_mem_size(uint64 size) { 5 | assert(size > 0); 6 | return sizeof(SET_OBJ) + (size - 1) * sizeof(OBJ); 7 | } 8 | 9 | uint64 seq_obj_mem_size(uint64 capacity) { 10 | assert(capacity > 0); 11 | return sizeof(SEQ_OBJ) + (capacity - 1) * sizeof(OBJ); 12 | } 13 | 14 | uint64 bin_rel_obj_mem_size(uint64 size) { 15 | assert(size > 0); 16 | return sizeof(BIN_REL_OBJ) + (2 * size - 1) * sizeof(OBJ) + size * sizeof(uint32); 17 | } 18 | 19 | uint32 tern_rel_obj_mem_size(uint64 size) { 20 | assert(size > 0); 21 | return sizeof(TERN_REL_OBJ) + (3 * size - 1) * sizeof(OBJ) + 2 * size * sizeof(uint32); 22 | } 23 | 24 | uint64 map_obj_mem_size(uint64 size) { 25 | assert(size > 0); 26 | return bin_rel_obj_mem_size(size); 27 | } 28 | 29 | uint64 tag_obj_mem_size() { 30 | return sizeof(TAG_OBJ); 31 | } 32 | 33 | //////////////////////////////////////////////////////////////////////////////// 34 | 35 | OBJ *get_left_col_array_ptr(BIN_REL_OBJ *rel) { 36 | return rel->buffer; 37 | } 38 | 39 | OBJ *get_right_col_array_ptr(BIN_REL_OBJ *rel) { 40 | return rel->buffer + rel->size; 41 | } 42 | 43 | uint32 *get_right_to_left_indexes(BIN_REL_OBJ *rel) { 44 | return (uint32 *) (rel->buffer + 2 * rel->size); 45 | } 46 | 47 | //////////////////////////////////////////////////////////////////////////////// 48 | 49 | OBJ *get_col_array_ptr(TERN_REL_OBJ *rel, int idx) { 50 | assert(idx >= 0 & idx <= 2); 51 | return rel->buffer + idx * rel->size; 52 | } 53 | 54 | uint32 *get_rotated_index(TERN_REL_OBJ *rel, int amount) { 55 | assert(amount == 1 | amount == 2); 56 | uint32 size = rel->size; 57 | uint32 *base_ptr = (uint32 *) (rel->buffer + 3 * size); 58 | return base_ptr + (amount-1) * size; 59 | } 60 | 61 | //////////////////////////////////////////////////////////////////////////////// 62 | 63 | uint32 seq_capacity(uint64 byte_size) { 64 | return (byte_size - sizeof(SEQ_OBJ)) / sizeof(OBJ) + 1; 65 | } 66 | 67 | //////////////////////////////////////////////////////////////////////////////// 68 | 69 | SEQ_OBJ *new_seq(uint32 length) { 70 | assert(length > 0); 71 | 72 | if (length > 0xFFFFFFF) 73 | impl_fail("Maximum permitted sequence length (2^28-1) exceeded"); 74 | 75 | uint32 actual_byte_size; 76 | SEQ_OBJ *seq = (SEQ_OBJ *) new_obj(seq_obj_mem_size(length), actual_byte_size); 77 | seq->ref_obj.ref_count = 1; 78 | seq->capacity = seq_capacity(actual_byte_size); 79 | seq->size = length; 80 | return seq; 81 | } 82 | 83 | SET_OBJ *new_set(uint32 size) { 84 | SET_OBJ *set = (SET_OBJ *) new_obj(set_obj_mem_size(size)); 85 | set->ref_obj.ref_count = 1; 86 | set->size = size; 87 | return set; 88 | } 89 | 90 | BIN_REL_OBJ *new_map(uint32 size) { 91 | assert(size > 0); 92 | 93 | BIN_REL_OBJ *map = (BIN_REL_OBJ *) new_obj(map_obj_mem_size(size)); 94 | map->ref_obj.ref_count = 1; 95 | map->size = size; 96 | uint32 *rev_idxs = get_right_to_left_indexes(map); 97 | rev_idxs[0] = INVALID_INDEX; 98 | return map; 99 | } 100 | 101 | BIN_REL_OBJ *new_bin_rel(uint32 size) { 102 | assert(size > 0); 103 | 104 | BIN_REL_OBJ *rel = (BIN_REL_OBJ *) new_obj(bin_rel_obj_mem_size(size)); 105 | rel->ref_obj.ref_count = 1; 106 | rel->size = size; 107 | return rel; 108 | } 109 | 110 | TERN_REL_OBJ *new_tern_rel(uint32 size) { 111 | assert(size > 0); 112 | 113 | TERN_REL_OBJ *rel = (TERN_REL_OBJ *) new_obj(tern_rel_obj_mem_size(size)); 114 | rel->ref_obj.ref_count = 1; 115 | rel->size = size; 116 | return rel; 117 | } 118 | 119 | TAG_OBJ *new_tag_obj() { 120 | TAG_OBJ *tag_obj = (TAG_OBJ *) new_obj(tag_obj_mem_size()); 121 | tag_obj->ref_obj.ref_count = 1; 122 | tag_obj->unused_field = 0; 123 | return tag_obj; 124 | } 125 | 126 | //////////////////////////////////////////////////////////////////////////////// 127 | 128 | //## WHY ISN'T THERE A shrink_map()? 129 | 130 | SET_OBJ *shrink_set(SET_OBJ *set, uint32 new_size) { 131 | assert(new_size < set->size); 132 | assert(set->ref_obj.ref_count == 1); 133 | 134 | uint32 size = set->size; 135 | uint32 mem_size = set_obj_mem_size(size); 136 | uint32 new_mem_size = set_obj_mem_size(new_size); 137 | 138 | if (mem_size == new_mem_size) { 139 | // If the memory footprint is exactly the same, I can safely reuse 140 | // the same object without causing problems in the memory allocator. 141 | set->size = new_size; 142 | return set; 143 | } 144 | 145 | SET_OBJ *new_set = ::new_set(new_size); 146 | memcpy(new_set->buffer, set->buffer, new_size * sizeof(OBJ)); 147 | free_obj(set, mem_size); 148 | 149 | return new_set; 150 | } 151 | 152 | //////////////////////////////////////////////////////////////////////////////// 153 | 154 | OBJ *new_obj_array(uint32 size) { 155 | return (OBJ *) new_obj(size * sizeof(OBJ)); 156 | } 157 | 158 | void delete_obj_array(OBJ *buffer, uint32 size) { 159 | free_obj(buffer, size * sizeof(OBJ)); 160 | } 161 | 162 | OBJ* resize_obj_array(OBJ* buffer, uint32 size, uint32 new_size) { 163 | return (OBJ *) resize_obj(buffer, size * sizeof(OBJ), new_size * sizeof(OBJ)); 164 | } 165 | 166 | uint32 *new_uint32_array(uint32 size) { 167 | return (uint32 *) new_obj(size * sizeof(uint32)); 168 | } 169 | 170 | void delete_uint32_array(uint32 *buffer, uint32 size) { 171 | free_obj(buffer, size * sizeof(uint32)); 172 | } 173 | 174 | int32 *new_int32_array(uint32 size) { 175 | return (int32 *) new_obj(size * sizeof(int32)); 176 | } 177 | 178 | void delete_int32_array(int32 *buffer, uint32 size) { 179 | free_obj(buffer, size * sizeof(int32)); 180 | } 181 | 182 | char *new_byte_array(uint32 size) { 183 | return (char *) new_obj(size); 184 | } 185 | 186 | void delete_byte_array(char *buffer, uint32 size) { 187 | free_obj(buffer, size); 188 | } 189 | 190 | void **new_ptr_array(uint32 size) { 191 | return (void **) new_obj(size * sizeof(void *)); 192 | } 193 | 194 | void delete_ptr_array(void **buffer, uint32 size) { 195 | free_obj(buffer, size * sizeof(void *)); 196 | } 197 | 198 | void *new_void_array(uint32 size) { 199 | return new_obj(size); 200 | } 201 | 202 | void delete_void_array(void *buffer, uint32 size) { 203 | free_obj(buffer, size); 204 | } 205 | 206 | //////////////////////////////////////////////////////////////////////////////// 207 | 208 | const uint32 MAX_QUEUE_SIZE = 1024; 209 | 210 | static void delete_obj(OBJ); 211 | 212 | static void release(OBJ *objs, uint32 count, OBJ *queue, uint32 &queue_start, uint32 &queue_size) { 213 | for (uint32 i=0 ; i < count ; i++) { 214 | OBJ obj = objs[i]; 215 | if (is_gc_obj(obj)) { 216 | REF_OBJ *ptr = get_ref_obj_ptr(obj); 217 | 218 | uint32 ref_count = ptr->ref_count; 219 | assert(ref_count > 0); 220 | 221 | if (ref_count == 1) { 222 | assert(queue_size <= MAX_QUEUE_SIZE); 223 | 224 | if (queue_size == MAX_QUEUE_SIZE) { 225 | uint32 idx = queue_start % MAX_QUEUE_SIZE; 226 | OBJ first_obj = queue[idx]; 227 | queue[idx] = obj; 228 | queue_start++; 229 | delete_obj(first_obj); 230 | } 231 | else { 232 | uint32 idx = (queue_start + queue_size) % MAX_QUEUE_SIZE; 233 | queue[idx] = obj; 234 | queue_size++; 235 | } 236 | } 237 | else { 238 | ptr->ref_count = ref_count - 1; 239 | } 240 | } 241 | } 242 | } 243 | 244 | static void delete_obj(OBJ obj, OBJ *queue, uint32 &queue_start, uint32 &queue_size) { 245 | assert(is_gc_obj(obj)); 246 | 247 | REF_OBJ *ref_obj = get_ref_obj_ptr(obj); 248 | OBJ_TYPE obj_type = get_ref_obj_type(obj); 249 | 250 | switch (obj_type) { 251 | case TYPE_SEQUENCE: { 252 | SEQ_OBJ *seq = (SEQ_OBJ *) ref_obj; 253 | release(seq->buffer, seq->size, queue, queue_start, queue_size); 254 | free_obj(seq, seq_obj_mem_size(seq->capacity)); 255 | break; 256 | } 257 | 258 | case TYPE_SET: { 259 | SET_OBJ *set = (SET_OBJ *) ref_obj; 260 | uint32 size = set->size; 261 | release(set->buffer, size, queue, queue_start, queue_size); 262 | free_obj(set, set_obj_mem_size(size)); 263 | break; 264 | } 265 | 266 | case TYPE_BIN_REL: case TYPE_LOG_MAP: case TYPE_MAP: { 267 | BIN_REL_OBJ *rel = (BIN_REL_OBJ *) ref_obj; 268 | uint32 size = rel->size; 269 | release(rel->buffer, 2*size, queue, queue_start, queue_size); 270 | free_obj(rel, obj_type == TYPE_MAP ? map_obj_mem_size(size) : bin_rel_obj_mem_size(size)); 271 | break; 272 | } 273 | 274 | case TYPE_TERN_REL: { 275 | TERN_REL_OBJ *rel = (TERN_REL_OBJ *) ref_obj; 276 | uint32 size = rel->size; 277 | release(rel->buffer, 3*size, queue, queue_start, queue_size); 278 | free_obj(rel, tern_rel_obj_mem_size(size)); 279 | break; 280 | } 281 | 282 | case TYPE_TAG_OBJ: { 283 | TAG_OBJ *tag_obj = (TAG_OBJ *) ref_obj; 284 | release(&tag_obj->obj, 1, queue, queue_start, queue_size); 285 | free_obj(tag_obj, tag_obj_mem_size()); 286 | break; 287 | } 288 | 289 | default: 290 | internal_fail(); 291 | } 292 | } 293 | 294 | static void delete_obj(OBJ obj) { 295 | assert(is_gc_obj(obj)); 296 | 297 | uint32 queue_start = 0; 298 | uint32 queue_size = 1; 299 | OBJ queue[MAX_QUEUE_SIZE]; 300 | queue[0] = obj; 301 | 302 | while (queue_size > 0) { 303 | OBJ next_obj = queue[queue_start % MAX_QUEUE_SIZE]; 304 | queue_size--; 305 | queue_start++; 306 | 307 | delete_obj(next_obj, queue, queue_start, queue_size); 308 | } 309 | } 310 | 311 | //////////////////////////////////////////////////////////////////////////////// 312 | 313 | void add_ref(REF_OBJ *ptr) { 314 | #ifndef NOGC 315 | assert(ptr->ref_count > 0); 316 | ptr->ref_count++; 317 | #endif 318 | } 319 | 320 | void add_ref(OBJ obj) { 321 | #ifndef NOGC 322 | if (is_gc_obj(obj)) 323 | add_ref(get_ref_obj_ptr(obj)); 324 | #endif 325 | } 326 | 327 | void release(OBJ obj) { 328 | #ifndef NOGC 329 | if (is_gc_obj(obj)) { 330 | REF_OBJ *ptr = get_ref_obj_ptr(obj); 331 | uint32 ref_count = ptr->ref_count; 332 | assert(ref_count > 0); 333 | if (ref_count == 1) 334 | delete_obj(obj); 335 | else 336 | ptr->ref_count = ref_count - 1; 337 | } 338 | #endif 339 | } 340 | 341 | //////////////////////////////////////////////////////////////////////////////// 342 | 343 | void vec_add_ref(OBJ *objs, uint32 len) { 344 | for (uint32 i=0 ; i < len ; i++) 345 | add_ref(objs[i]); 346 | } 347 | 348 | void vec_release(OBJ *objs, uint32 len) { 349 | for (uint32 i=0 ; i < len ; i++) 350 | release(objs[i]); 351 | } 352 | -------------------------------------------------------------------------------- /src/value-store.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | struct NODE { 5 | uint32 next; 6 | uint32 hash_code; 7 | }; 8 | 9 | 10 | NODE node(uint32 next_node, uint32 hash_code) { 11 | NODE node; 12 | node.next = next_node; 13 | node.hash_code = hash_code; 14 | return node; 15 | } 16 | 17 | NODE empty_node() { 18 | return node(0xFFFFFFFFU, 0xFFFFFFFFU); 19 | } 20 | 21 | //////////////////////////////////////////////////////////////////////////////// 22 | 23 | const int BYTES_PER_ENTRY = sizeof(OBJ) + sizeof(NODE) + sizeof(uint32) + sizeof(uint32); 24 | const int UPDATE_BYTES_PER_ENTRY = sizeof(OBJ) + sizeof(NODE) + sizeof(uint32) + sizeof(uint32); 25 | 26 | 27 | OBJ *slot_array(void *ptr) { 28 | return (OBJ *) ptr; 29 | } 30 | 31 | NODE *node_array(void *ptr, uint32 capacity) { 32 | return (NODE *)(slot_array(ptr) + capacity); 33 | } 34 | 35 | uint32 *hashtable_ptr(void *ptr, uint32 capacity) { 36 | return (uint32 *)(node_array(ptr, capacity) + capacity); 37 | } 38 | 39 | uint32 *ref_count_array(void *ptr, uint32 capacity) { 40 | return (uint32 *)(hashtable_ptr(ptr, capacity) + capacity); 41 | } 42 | 43 | uint32 *surr_array(void *ptr, uint32 capacity) { 44 | return (uint32 *)(hashtable_ptr(ptr, capacity) + capacity); 45 | } 46 | 47 | //////////////////////////////////////////////////////////////////////////////// 48 | 49 | const uint32 EMPTY_SLOT_MARKER = 0xFFFFFFFFU; 50 | 51 | 52 | void reset_slot(OBJ *slot, uint32 first_free) { 53 | slot->core_data.int_ = first_free; 54 | slot->extra_data = 0; // Should not be necessary 55 | assert(get_physical_type(*slot) == TYPE_BLANK_OBJ); 56 | } 57 | 58 | static void hashtable_clear(void *ptr, uint32 capacity) { 59 | OBJ *slots = slot_array(ptr); 60 | for (uint32 i=0 ; i < capacity ; i++) 61 | reset_slot(slots+i, i+1); 62 | uint32 *hash_table = hashtable_ptr(ptr, capacity); 63 | memset(hash_table, 0xFF, capacity * sizeof(uint32)); 64 | // Initializing the array of node is not stricly necessary 65 | NODE *nodes = node_array(ptr, capacity); 66 | for (uint32 i=0 ; i < capacity ; i++) 67 | nodes[i] = empty_node(); 68 | } 69 | 70 | static void hashtable_insert(void *ptr, uint32 capacity, uint32 hash_code, uint32 value) { 71 | uint32 *hashtable = hashtable_ptr(ptr, capacity); 72 | NODE *nodes = node_array(ptr, capacity); 73 | uint32 index = hash_code % capacity; 74 | uint32 entry = hashtable[index]; 75 | hashtable[index] = value; 76 | nodes[value] = node(entry, hash_code); 77 | } 78 | 79 | static void hashtable_delete(void *ptr, uint32 capacity, uint32 value) { 80 | uint32 *hashtable = hashtable_ptr(ptr, capacity); 81 | NODE *nodes = node_array(ptr, capacity); 82 | NODE del_node = nodes[value]; 83 | nodes[value] = empty_node(); // Not strictly necessary 84 | uint32 index = del_node.hash_code % capacity; 85 | uint32 entry = hashtable[index]; 86 | assert(entry >= 0 & entry < capacity); 87 | // We first check to see if the deleted node is the first one in the list 88 | if (entry == value) { 89 | hashtable[index] = del_node.next; // Note that can be EMPTY_SLOT_MARKER 90 | return; 91 | } 92 | // Here is the index of the current cell 93 | // We already know that the current cell is not the one we're looking for. 94 | NODE node = nodes[entry]; 95 | while (node.next != value) { 96 | entry = node.next; 97 | assert(entry >= 0 & entry < capacity); 98 | node = nodes[entry]; 99 | } 100 | // We've finally reached the cell before the one we are deleting 101 | // Again is the index of the current cell, and is its content 102 | node.next = del_node.next; 103 | nodes[entry] = node; 104 | } 105 | 106 | static int64 hashtable_lookup(void *ptr, uint32 capacity, OBJ value, uint32 hash_code) { 107 | OBJ *slots = slot_array(ptr); 108 | NODE *nodes = node_array(ptr, capacity); 109 | uint32 *hashtable = hashtable_ptr(ptr, capacity); 110 | uint32 index = hash_code % capacity; 111 | uint32 entry = hashtable[index]; 112 | while (entry != EMPTY_SLOT_MARKER) { 113 | NODE node = nodes[entry]; 114 | if (node.hash_code == hash_code && comp_objs(value, slots[entry]) == 0) 115 | return entry; 116 | entry = node.next; 117 | } 118 | return -1; 119 | } 120 | 121 | static void hashtable_copy(void *src_ptr, uint32 src_cpty, void *dest_ptr, uint32 dest_cpty) { 122 | assert(dest_cpty > src_cpty); 123 | 124 | OBJ *src_slots = slot_array(src_ptr); 125 | OBJ *dest_slots = slot_array(dest_ptr); 126 | memcpy(dest_slots, src_slots, src_cpty * sizeof(OBJ)); 127 | for (uint32 i=src_cpty ; i < dest_cpty ; i++) 128 | reset_slot(dest_slots+i, i+1); 129 | 130 | uint32 *dest_hashtable = hashtable_ptr(dest_ptr, dest_cpty); 131 | memset(dest_hashtable, 0xFF, dest_cpty * sizeof(uint32)); 132 | 133 | // Initializing the new array of node is not stricly necessary 134 | NODE *dest_nodes = node_array(dest_ptr, dest_cpty); 135 | for (uint32 i=0 ; i < dest_cpty ; i++) 136 | dest_nodes[i] = empty_node(); 137 | 138 | NODE *src_nodes = node_array(src_ptr, src_cpty); 139 | for (uint32 i=0 ; i < src_cpty ; i++) 140 | if (!is_blank_obj(src_slots[i])) 141 | hashtable_insert(dest_ptr, dest_cpty, src_nodes[i].hash_code, i); 142 | } 143 | 144 | // int64 ref_hashtable_lookup(void *ptr, uint32 capacity, OBJ value, uint32 hash_code) { 145 | // OBJ *slots = slot_array(ptr); 146 | // for (uint32 i=0 ; i < capacity ; i++) { 147 | // OBJ slot = slots[i]; 148 | // if (!is_blank_obj(slot) && comp_objs(value, slot) == 0) 149 | // return i; 150 | // } 151 | // return -1; 152 | // } 153 | // 154 | // int64 hashtable_lookup(void *ptr, uint32 capacity, OBJ value, uint32 hash_code) { 155 | // int64 ref_res = ref_hashtable_lookup(ptr, capacity, value, hash_code); 156 | // int64 res = hashtable_lookup_(ptr, capacity, value, hash_code); 157 | // assert(res == ref_res); 158 | // if (res != ref_res) 159 | // fail(); 160 | // return res; 161 | // } 162 | 163 | //////////////////////////////////////////////////////////////////////////////// 164 | //////////////////////////////////////////////////////////////////////////////// 165 | 166 | const uint32 INIT_SIZE = 4; 167 | 168 | uint32 calc_capacity(uint32 min_capacity) { 169 | uint32 capacity = INIT_SIZE; 170 | while (capacity < min_capacity) 171 | capacity *= 2; 172 | return capacity; 173 | } 174 | 175 | //////////////////////////////////////////////////////////////////////////////// 176 | 177 | void value_store_init(VALUE_STORE *store) { 178 | void *ptr = new_obj(INIT_SIZE * BYTES_PER_ENTRY); 179 | store->ptr = ptr; 180 | store->capacity = INIT_SIZE; 181 | store->usage = 0; 182 | store->first_free = 0; 183 | hashtable_clear(ptr, INIT_SIZE); 184 | memset(ref_count_array(ptr, INIT_SIZE), 0, INIT_SIZE * sizeof(uint32)); 185 | } 186 | 187 | void value_store_cleanup(VALUE_STORE *store) { 188 | uint32 capacity = store->capacity; 189 | OBJ *slots = slot_array(store->ptr); 190 | for (uint32 i=0 ; i < capacity ; i++) 191 | release(slots[i]); 192 | free_obj(slots, capacity * BYTES_PER_ENTRY); 193 | } 194 | 195 | //////////////////////////////////////////////////////////////////////////////// 196 | 197 | void value_store_updates_init(VALUE_STORE *store, VALUE_STORE_UPDATES *updates) { 198 | updates->capacity = 0; 199 | // Not strictly necessary 200 | updates->ptr = NULL; 201 | updates->count = 0; 202 | updates->first_free = 0; 203 | } 204 | 205 | void value_store_updates_cleanup(VALUE_STORE_UPDATES *updates) { 206 | // No need to delete anything, it's all allocated in temporary memory 207 | } 208 | 209 | //////////////////////////////////////////////////////////////////////////////// 210 | 211 | uint32 value_store_insert(VALUE_STORE *store, VALUE_STORE_UPDATES *updates, OBJ value) { 212 | uint32 hash_code = compute_hash_code(value); 213 | 214 | void *ptr = updates->ptr; 215 | uint32 capacity = updates->capacity; 216 | uint32 count = updates->count; 217 | assert(count <= capacity); 218 | 219 | if (count == capacity) { 220 | uint32 new_capacity = capacity != 0 ? 2 * capacity : 32; 221 | void *new_ptr = new_obj(new_capacity * UPDATE_BYTES_PER_ENTRY); 222 | if (capacity > 0) { 223 | hashtable_copy(ptr, capacity, new_ptr, new_capacity); 224 | uint32 *surrs = surr_array(ptr, capacity); 225 | uint32 *new_surrs = surr_array(new_ptr, new_capacity); 226 | memcpy(new_surrs, surrs, capacity * sizeof(uint32)); 227 | memset(new_surrs + capacity, 0, (new_capacity - capacity) * sizeof(uint32)); 228 | free_obj(ptr, capacity * UPDATE_BYTES_PER_ENTRY); 229 | } 230 | else 231 | hashtable_clear(new_ptr, new_capacity); 232 | updates->capacity = capacity = new_capacity; 233 | updates->ptr = ptr = new_ptr; 234 | } 235 | 236 | OBJ *values = slot_array(ptr); 237 | values[count] = value; 238 | hashtable_insert(ptr, capacity, hash_code, count); 239 | uint32 first_free = count == 0 ? store->first_free : updates->first_free; 240 | uint32 *surrs = surr_array(ptr, capacity); 241 | surrs[count] = first_free; 242 | updates->count = count + 1; 243 | if (first_free < store->capacity) 244 | updates->first_free = slot_array(store->ptr)[first_free].core_data.int_; 245 | else 246 | updates->first_free = first_free + 1; 247 | return first_free; 248 | } 249 | 250 | //////////////////////////////////////////////////////////////////////////////// 251 | 252 | void value_store_copy(VALUE_STORE *store, VALUE_STORE_UPDATES *updates) { 253 | 254 | } 255 | 256 | void value_store_apply(VALUE_STORE *store, VALUE_STORE_UPDATES *updates) { 257 | if (updates->capacity == 0) 258 | return; 259 | 260 | uint32 store_capacity = store->capacity; 261 | void *ptr = store->ptr; 262 | uint32 usage = store->usage; 263 | 264 | uint32 count = updates->count; 265 | uint32 new_usage = usage + count; 266 | 267 | if (store_capacity < new_usage) { 268 | uint32 new_capacity = calc_capacity(new_usage); 269 | void *new_ptr = new_obj(new_capacity * BYTES_PER_ENTRY); 270 | hashtable_copy(ptr, store_capacity, new_ptr, new_capacity); 271 | uint32 *ref_counts = ref_count_array(ptr, store_capacity); 272 | uint32 *new_ref_counts = ref_count_array(new_ptr, new_capacity); 273 | memcpy(new_ref_counts, ref_counts, store_capacity * sizeof(uint32)); 274 | memset(new_ref_counts+store_capacity, 0, (new_capacity-store_capacity) * sizeof(uint32)); 275 | free_obj(ptr, store_capacity * BYTES_PER_ENTRY); 276 | store->ptr = ptr = new_ptr; 277 | store->capacity = store_capacity = new_capacity; 278 | } 279 | 280 | OBJ *slots = slot_array(ptr); 281 | 282 | uint32 update_cpty = updates->capacity; 283 | void *update_ptr = updates->ptr; 284 | OBJ *values = slot_array(update_ptr); 285 | NODE *nodes = node_array(update_ptr, update_cpty); 286 | uint32 *surrs = surr_array(update_ptr, update_cpty); 287 | for (uint32 i=0 ; i < count ; i++) { 288 | uint32 surr = surrs[i]; 289 | slots[surr] = copy_obj(values[i]); 290 | hashtable_insert(ptr, store_capacity, nodes[i].hash_code, surr); 291 | } 292 | store->usage = new_usage; 293 | store->first_free = updates->first_free; 294 | } 295 | 296 | void value_store_add_ref(VALUE_STORE *store, uint32 surr) { 297 | assert(surr < store->capacity); 298 | uint32 *ref_counts = ref_count_array(store->ptr, store->capacity); 299 | ref_counts[surr]++; 300 | } 301 | 302 | void value_store_release(VALUE_STORE *store, uint32 surr) { 303 | void *ptr = store->ptr; 304 | uint32 capacity = store->capacity; 305 | assert(surr < store->capacity); 306 | uint32 *ref_counts = ref_count_array(ptr, capacity); 307 | uint32 count = ref_counts[surr]; 308 | assert(count != 0); 309 | if (count == 1) { 310 | OBJ *slot = slot_array(ptr) + surr; 311 | release(*slot); 312 | reset_slot(slot, store->first_free); 313 | store->first_free = surr; 314 | store->usage--; 315 | hashtable_delete(ptr, capacity, surr); 316 | } 317 | else 318 | ref_counts[surr] = count - 1; 319 | } 320 | 321 | //////////////////////////////////////////////////////////////////////////////// 322 | 323 | OBJ lookup_surrogate(VALUE_STORE *store, int64 surr) { 324 | OBJ value = slot_array(store->ptr)[surr]; 325 | add_ref(value); 326 | return value; 327 | } 328 | 329 | int64 lookup_value(VALUE_STORE *store, OBJ value) { 330 | return hashtable_lookup(store->ptr, store->capacity, value, compute_hash_code(value)); 331 | } 332 | 333 | //////////////////////////////////////////////////////////////////////////////// 334 | 335 | int64 lookup_value_ex(VALUE_STORE *store, VALUE_STORE_UPDATES *updates, OBJ value) { 336 | uint32 hash_code = compute_hash_code(value); 337 | int64 surr = hashtable_lookup(store->ptr, store->capacity, value, hash_code); 338 | if (surr != -1) 339 | return surr; 340 | uint32 capacity = updates->capacity; 341 | if (capacity > 0) { 342 | void *ptr = updates->ptr; 343 | int64 index = hashtable_lookup(ptr, capacity, value, hash_code); 344 | if (index >= 0) 345 | return surr_array(ptr, capacity)[index]; 346 | } 347 | return -1; 348 | } 349 | 350 | //////////////////////////////////////////////////////////////////////////////// 351 | 352 | OBJ *value_store_slot_array(VALUE_STORE *store) { 353 | return slot_array(store->ptr); 354 | } 355 | -------------------------------------------------------------------------------- /src/instrs.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | 5 | void init(STREAM &s) { 6 | s.buffer = 0; 7 | s.capacity = 0; 8 | s.count = 0; 9 | } 10 | 11 | void append(STREAM &s, OBJ obj) { // obj must be already reference-counted 12 | assert(s.count <= s.capacity); 13 | 14 | uint32 count = s.count; 15 | uint32 capacity = s.capacity; 16 | OBJ *buffer = s.buffer; 17 | 18 | if (count == capacity) { 19 | uint32 new_capacity = capacity == 0 ? 32 : 2 * capacity; 20 | OBJ *new_buffer = new_obj_array(new_capacity); 21 | for (uint32 i=0 ; i < count ; i++) 22 | new_buffer[i] = buffer[i]; 23 | if (capacity != 0) 24 | delete_obj_array(buffer, capacity); 25 | s.buffer = new_buffer; 26 | s.capacity = new_capacity; 27 | } 28 | 29 | s.buffer[count] = obj; 30 | s.count++; 31 | } 32 | 33 | OBJ build_seq(OBJ *elems, uint32 length) { // Objects in elems must be already reference-counted 34 | if (length == 0) 35 | return make_empty_seq(); 36 | 37 | SEQ_OBJ *seq = new_seq(length); 38 | 39 | for (uint32 i=0 ; i < length ; i++) 40 | seq->buffer[i] = elems[i]; 41 | 42 | return make_seq(seq, length); 43 | } 44 | 45 | OBJ build_seq(STREAM &s) { 46 | if (s.count == 0) 47 | return make_empty_seq(); 48 | 49 | //## COULD IT BE OPTIMIZED? 50 | 51 | OBJ seq = build_seq(s.buffer, s.count); 52 | 53 | delete_obj_array(s.buffer, s.capacity); 54 | 55 | return seq; 56 | } 57 | 58 | OBJ build_set(OBJ *elems, uint32 size) { 59 | if (size == 0) 60 | return make_empty_rel(); 61 | 62 | size = sort_and_release_dups(elems, size); 63 | 64 | SET_OBJ *set = new_set(size); 65 | OBJ *es = set->buffer; 66 | for (uint32 i=0 ; i < size ; i++) 67 | es[i] = elems[i]; 68 | 69 | return make_set(set); 70 | } 71 | 72 | OBJ build_set(STREAM &s) { 73 | assert((s.count == 0 && s.capacity == 0 && s.buffer == NULL) || (s.count > 0 && s.capacity > 0 && s.buffer != NULL)); 74 | 75 | uint32 count = s.count; 76 | if (count == 0) 77 | return make_empty_rel(); 78 | 79 | OBJ *buffer = s.buffer; 80 | OBJ set = build_set(buffer, count); 81 | delete_obj_array(buffer, s.capacity); 82 | return set; 83 | } 84 | 85 | OBJ build_tagged_obj(OBJ tag, OBJ obj) { 86 | assert(is_symb(tag)); 87 | return make_tag_obj(get_symb_idx(tag), obj); 88 | } 89 | 90 | OBJ neg_float(OBJ obj) { 91 | return make_float(-get_float(obj)); 92 | } 93 | 94 | OBJ add_floats(OBJ obj1, OBJ obj2) { 95 | return make_float(get_float(obj1) + get_float(obj2)); 96 | } 97 | 98 | OBJ sub_floats(OBJ obj1, OBJ obj2) { 99 | return make_float(get_float(obj1) - get_float(obj2)); 100 | } 101 | 102 | OBJ mult_floats(OBJ obj1, OBJ obj2) { 103 | return make_float(get_float(obj1) * get_float(obj2)); 104 | } 105 | 106 | OBJ div_floats(OBJ obj1, OBJ obj2) { 107 | return make_float(get_float(obj1) / get_float(obj2)); 108 | } 109 | 110 | OBJ exp_floats(OBJ obj1, OBJ obj2) { 111 | return make_float(pow(get_float(obj1), get_float(obj2))); 112 | } 113 | 114 | OBJ square_root(OBJ obj) { 115 | return make_float(sqrt(get_float(obj))); 116 | } 117 | 118 | OBJ floor(OBJ obj) { 119 | impl_fail("_floor_() not implemented"); 120 | } 121 | 122 | OBJ ceiling(OBJ obj) { 123 | impl_fail("_ceiling_() not implemented"); 124 | } 125 | 126 | OBJ int_to_float(OBJ obj) { 127 | return make_float(get_int_val(obj)); 128 | } 129 | 130 | OBJ blank_array(int64 size) { 131 | if (size > 0xFFFFFFFF) 132 | impl_fail("Maximum permitted array size exceeded"); 133 | 134 | if (size <= 0) //## I DON'T LIKE THIS 135 | return make_empty_seq(); 136 | 137 | SEQ_OBJ *seq = new_seq(size); 138 | OBJ *buffer = seq->buffer; 139 | OBJ blank_obj = make_blank_obj(); 140 | 141 | for (uint32 i=0 ; i < size ; i++) 142 | buffer[i] = blank_obj; 143 | 144 | return make_seq(seq, size); 145 | } 146 | 147 | OBJ get_seq_slice(OBJ seq, int64 idx_first, int64 len) { 148 | assert(is_seq(seq)); 149 | 150 | if (idx_first < 0 | len < 0 | idx_first + len > get_seq_length(seq)) 151 | soft_fail("_slice_(): Invalid start index and/or subsequence length"); 152 | 153 | if (len == 0) 154 | return make_empty_seq(); 155 | 156 | add_ref(seq); 157 | 158 | SEQ_OBJ *ptr = get_seq_ptr(seq); 159 | uint32 offset = get_seq_offset(seq); 160 | return make_slice(ptr, get_mem_layout(seq), offset+idx_first, len); 161 | } 162 | 163 | OBJ extend_sequence(OBJ seq, OBJ *new_elems, uint32 count) { 164 | assert(!is_empty_seq(seq)); 165 | assert(((uint64) get_seq_length(seq) + count <= 0xFFFFFFFF)); 166 | 167 | SEQ_OBJ *seq_ptr = get_seq_ptr(seq); 168 | uint32 offset = get_seq_offset(seq); 169 | uint32 length = get_seq_length(seq); 170 | 171 | uint32 new_length = length + count; 172 | 173 | uint32 size = seq_ptr->size; 174 | uint32 capacity = seq_ptr->capacity; 175 | 176 | bool ends_at_last_elem = offset + length == size; 177 | bool has_needed_spare_capacity = size + count <= capacity; 178 | bool can_be_extended = ends_at_last_elem & has_needed_spare_capacity; 179 | 180 | if (can_be_extended) { 181 | memcpy(seq_ptr->buffer+size, new_elems, sizeof(OBJ) * count); 182 | seq_ptr->size = size + count; 183 | vec_add_ref(new_elems, count); 184 | add_ref(seq); 185 | return make_slice(seq_ptr, get_mem_layout(seq), offset, new_length); 186 | } 187 | else { 188 | OBJ *buffer = get_seq_buffer_ptr(seq); 189 | 190 | SEQ_OBJ *new_seq_ptr = new_seq(new_length); 191 | OBJ *new_buffer = new_seq_ptr->buffer; 192 | 193 | memcpy(new_buffer, buffer, sizeof(OBJ) * length); 194 | memcpy(new_buffer+length, new_elems, sizeof(OBJ) * count); 195 | 196 | vec_add_ref(new_buffer, new_length); 197 | 198 | return make_seq(new_seq_ptr, new_length); 199 | } 200 | } 201 | 202 | OBJ append_to_seq(OBJ seq, OBJ obj) { // Obj must be reference counted already 203 | if (is_empty_seq(seq)) 204 | return build_seq(&obj, 1); 205 | 206 | // Checking that the new sequence doesn't overflow 207 | if (!(get_seq_length(seq) < 0xFFFFFFFF)) 208 | impl_fail("Resulting sequence is too large"); 209 | 210 | OBJ res = extend_sequence(seq, &obj, 1); 211 | release(seq); 212 | release(obj); 213 | return res; 214 | } 215 | 216 | OBJ update_seq_at(OBJ seq, OBJ idx, OBJ value) { // Value must be already reference counted 217 | uint32 len = get_seq_length(seq); 218 | int64 int_idx = get_int_val(idx); 219 | 220 | if (int_idx < 0 | int_idx >= len) 221 | soft_fail("Invalid sequence index"); 222 | 223 | OBJ *src_ptr = get_seq_buffer_ptr(seq); 224 | SEQ_OBJ *new_seq_ptr = new_seq(len); 225 | 226 | new_seq_ptr->buffer[int_idx] = value; 227 | for (uint32 i=0 ; i < len ; i++) 228 | if (i != int_idx) { 229 | OBJ elt = src_ptr[i]; 230 | add_ref(elt); 231 | new_seq_ptr->buffer[i] = elt; 232 | } 233 | 234 | return make_seq(new_seq_ptr, len); 235 | } 236 | 237 | OBJ join_seqs(OBJ left, OBJ right) { 238 | // No need to check the parameters here 239 | 240 | uint64 right_len = get_seq_length(right); 241 | if (right_len == 0) { 242 | add_ref(left); 243 | return left; 244 | } 245 | 246 | uint64 left_len = get_seq_length(left); 247 | if (left_len == 0) { 248 | add_ref(right); 249 | return right; 250 | } 251 | 252 | if (left_len + right_len > 0xFFFFFFFF) 253 | impl_fail("_cat_(): Resulting sequence is too large"); 254 | 255 | return extend_sequence(left, get_seq_buffer_ptr(right), right_len); 256 | } 257 | 258 | OBJ rev_seq(OBJ seq) { 259 | // No need to check the parameters here 260 | 261 | uint32 len = get_seq_length(seq); 262 | if (len <= 1) { 263 | if (len == 1) 264 | add_ref(seq); 265 | return seq; 266 | } 267 | 268 | OBJ *elems = get_seq_buffer_ptr(seq); 269 | vec_add_ref(elems, len); 270 | 271 | SEQ_OBJ *rs = new_seq(len); 272 | OBJ *rev_elems = rs->buffer; 273 | for (uint32 i=0 ; i < len ; i++) 274 | rev_elems[len-i-1] = elems[i]; 275 | 276 | return make_seq(rs, len); 277 | } 278 | 279 | void set_at(OBJ seq, uint32 idx, OBJ value) { // Value must be already reference counted 280 | // This is not called directly by the user, so asserts should be sufficient 281 | assert(idx < get_seq_length(seq)); 282 | 283 | OBJ *target = get_seq_buffer_ptr(seq) + idx; 284 | release(*target); 285 | *target = value; 286 | } 287 | 288 | OBJ internal_sort(OBJ set) { 289 | if (is_empty_rel(set)) 290 | return make_empty_seq(); 291 | 292 | SET_OBJ *s = get_set_ptr(set); 293 | uint32 size = s->size; 294 | OBJ *src = s->buffer; 295 | 296 | SEQ_OBJ *seq = new_seq(size); 297 | OBJ *dest = seq->buffer; 298 | for (uint32 i=0 ; i < size ; i++) 299 | dest[i] = src[i]; 300 | vec_add_ref(dest, size); 301 | 302 | return make_seq(seq, size); 303 | } 304 | 305 | OBJ parse_value(OBJ str_obj) { 306 | char *raw_str = obj_to_str(str_obj); 307 | uint32 len = strlen(raw_str); 308 | OBJ obj; 309 | uint32 error_offset; 310 | bool ok = parse(raw_str, len, &obj, &error_offset); 311 | delete_byte_array(raw_str, len+1); 312 | if (ok) 313 | return make_tag_obj(symb_idx_success, obj); 314 | else 315 | return make_tag_obj(symb_idx_failure, make_int(error_offset)); 316 | } 317 | 318 | char *print_value_alloc(void *ptr, uint32 size) { 319 | uint32 *size_ptr = (uint32 *) ptr; 320 | assert(*size_ptr == 0); 321 | *size_ptr = size; 322 | return new_byte_array(size); 323 | } 324 | 325 | OBJ print_value(OBJ obj) { 326 | uint32 size = 0; 327 | char *raw_str = printed_obj(obj, print_value_alloc, &size); 328 | OBJ str_obj = str_to_obj(raw_str); 329 | delete_byte_array(raw_str, size); 330 | return str_obj; 331 | } 332 | 333 | void get_set_iter(SET_ITER &it, OBJ set) { 334 | it.idx = 0; 335 | if (!is_empty_rel(set)) { 336 | SET_OBJ *ptr = get_set_ptr(set); 337 | it.buffer = ptr->buffer; 338 | it.size = ptr->size; 339 | } 340 | else { 341 | it.buffer = 0; //## NOT STRICTLY NECESSARY 342 | it.size = 0; 343 | } 344 | } 345 | 346 | void get_seq_iter(SEQ_ITER &it, OBJ seq) { 347 | it.idx = 0; 348 | if (!is_empty_seq(seq)) { 349 | it.buffer = get_seq_buffer_ptr(seq); 350 | it.len = get_seq_length(seq); 351 | } 352 | else { 353 | it.buffer = 0; //## NOT STRICTLY NECESSARY 354 | it.len = 0; 355 | } 356 | } 357 | 358 | void move_forward(SET_ITER &it) { 359 | assert(!is_out_of_range(it)); 360 | it.idx++; 361 | } 362 | 363 | void move_forward(SEQ_ITER &it) { 364 | assert(!is_out_of_range(it)); 365 | it.idx++; 366 | } 367 | 368 | void move_forward(BIN_REL_ITER &it) { 369 | assert(!is_out_of_range(it)); 370 | it.idx++; 371 | } 372 | 373 | void move_forward(TERN_REL_ITER &it) { 374 | assert(!is_out_of_range(it)); 375 | it.idx++; 376 | } 377 | 378 | void fail() { 379 | #ifndef NDEBUG 380 | const char *MSG = "\nFail statement reached. Call stack:\n\n"; 381 | #else 382 | const char *MSG = "\nFail statement reached\n"; 383 | #endif 384 | 385 | soft_fail(MSG); 386 | } 387 | 388 | void runtime_check(OBJ cond) { 389 | assert(is_bool(cond)); 390 | 391 | if (!get_bool(cond)) { 392 | #ifndef NDEBUG 393 | fputs("\nAssertion failed. Call stack:\n\n", stderr); 394 | #else 395 | fputs("\nAssertion failed\n", stderr); 396 | #endif 397 | fflush(stderr); 398 | print_call_stack(); 399 | *(char *)0 = 0; // Causing a runtime crash, useful for debugging 400 | } 401 | } 402 | 403 | //////////////////////////////////////////////////////////////////////////////// 404 | 405 | OBJ build_const_uint8_seq(const uint8* buffer, uint32 len) { 406 | if (len == 0) 407 | return make_empty_seq(); 408 | 409 | SEQ_OBJ *seq = new_seq(len); 410 | 411 | for (int i=0 ; i < len ; i++) 412 | seq->buffer[i] = make_int(buffer[i]); 413 | 414 | return make_seq(seq, len); 415 | } 416 | 417 | OBJ build_const_uint16_seq(const uint16* buffer, uint32 len) { 418 | if (len == 0) 419 | return make_empty_seq(); 420 | 421 | SEQ_OBJ *seq = new_seq(len); 422 | 423 | for (int i=0 ; i < len ; i++) 424 | seq->buffer[i] = make_int(buffer[i]); 425 | 426 | return make_seq(seq, len); 427 | } 428 | 429 | OBJ build_const_uint32_seq(const uint32* buffer, uint32 len) { 430 | if (len == 0) 431 | return make_empty_seq(); 432 | 433 | SEQ_OBJ *seq = new_seq(len); 434 | 435 | for (int i=0 ; i < len ; i++) 436 | seq->buffer[i] = make_int(buffer[i]); 437 | 438 | return make_seq(seq, len); 439 | } 440 | 441 | OBJ build_const_int8_seq(const int8* buffer, uint32 len) { 442 | if (len == 0) 443 | return make_empty_seq(); 444 | 445 | SEQ_OBJ *seq = new_seq(len); 446 | 447 | for (int i=0 ; i < len ; i++) 448 | seq->buffer[i] = make_int(buffer[i]); 449 | 450 | return make_seq(seq, len); 451 | } 452 | 453 | OBJ build_const_int16_seq(const int16* buffer, uint32 len) { 454 | if (len == 0) 455 | return make_empty_seq(); 456 | 457 | SEQ_OBJ *seq = new_seq(len); 458 | 459 | for (int i=0 ; i < len ; i++) 460 | seq->buffer[i] = make_int(buffer[i]); 461 | 462 | return make_seq(seq, len); 463 | } 464 | 465 | OBJ build_const_int32_seq(const int32* buffer, uint32 len) { 466 | if (len == 0) 467 | return make_empty_seq(); 468 | 469 | SEQ_OBJ *seq = new_seq(len); 470 | 471 | for (int i=0 ; i < len ; i++) 472 | seq->buffer[i] = make_int(buffer[i]); 473 | 474 | return make_seq(seq, len); 475 | } 476 | 477 | OBJ build_const_int64_seq(const int64* buffer, uint32 len) { 478 | if (len == 0) 479 | return make_empty_seq(); 480 | 481 | SEQ_OBJ *seq = new_seq(len); 482 | 483 | for (int i=0 ; i < len ; i++) 484 | seq->buffer[i] = make_int(buffer[i]); 485 | 486 | return make_seq(seq, len); 487 | } 488 | -------------------------------------------------------------------------------- /src/algs.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | struct obj_less { 5 | bool operator () (OBJ obj1, OBJ obj2) { 6 | return comp_objs(obj1, obj2) > 0; 7 | } 8 | }; 9 | 10 | struct obj_inline_less { 11 | bool operator () (OBJ obj1, OBJ obj2) { 12 | return shallow_cmp(obj1, obj2) > 0; 13 | } 14 | }; 15 | 16 | //////////////////////////////////////////////////////////////////////////////// 17 | 18 | uint32 find_obj(OBJ *sorted_array, uint32 len, OBJ obj, bool &found) { // The array mustn't contain duplicates 19 | if (len > 0) { 20 | int64 low_idx = 0; 21 | int64 high_idx = len - 1; 22 | 23 | while (low_idx <= high_idx) { 24 | int64 middle_idx = (low_idx + high_idx) / 2; 25 | OBJ middle_obj = sorted_array[middle_idx]; 26 | 27 | int cr = comp_objs(obj, middle_obj); 28 | 29 | if (cr == 0) { 30 | found = true; 31 | return middle_idx; 32 | } 33 | 34 | if (cr > 0) 35 | high_idx = middle_idx - 1; 36 | else 37 | low_idx = middle_idx + 1; 38 | } 39 | } 40 | 41 | found = false; 42 | return -1; 43 | } 44 | 45 | //////////////////////////////////////////////////////////////////////////////// 46 | 47 | uint32 count_at_start(uint32 *sorted_idx_array, OBJ *values, uint32 len, OBJ obj) { 48 | //## IMPLEMENT FOR REAL... 49 | int c = 0; 50 | while (c < len && comp_objs(obj, values[sorted_idx_array[c]]) == 0) 51 | c++; 52 | return c; 53 | } 54 | 55 | uint32 count_at_end(uint32 *sorted_idx_array, OBJ *values, uint32 len, OBJ obj) { 56 | //## IMPLEMENT FOR REAL... 57 | int c = 0; 58 | while (c < len && comp_objs(obj, values[sorted_idx_array[len-1-c]]) == 0) 59 | c++; 60 | return c; 61 | } 62 | 63 | uint32 find_idxs_range(uint32 *sorted_idx_array, OBJ *values, uint32 len, OBJ obj, uint32 &count) { 64 | int64 low_idx = 0; 65 | int64 high_idx = len - 1; 66 | 67 | while (low_idx <= high_idx) { 68 | int64 middle_idx = (low_idx + high_idx) / 2; 69 | OBJ middle_obj = values[sorted_idx_array[middle_idx]]; 70 | 71 | int cr = comp_objs(obj, middle_obj); 72 | 73 | if (cr == 0) { 74 | int count_up = count_at_start(sorted_idx_array + middle_idx + 1, values, len - middle_idx - 1, obj); 75 | int count_down = count_at_end(sorted_idx_array, values, middle_idx, obj); 76 | count = 1 + count_up + count_down; 77 | return middle_idx - count_down; 78 | } 79 | 80 | if (cr > 0) 81 | high_idx = middle_idx - 1; 82 | else 83 | low_idx = middle_idx + 1; 84 | } 85 | 86 | count = 0; 87 | return INVALID_INDEX; 88 | } 89 | 90 | //////////////////////////////////////////////////////////////////////////////// 91 | 92 | uint32 count_at_start(OBJ *sorted_array, uint32 len, OBJ obj) { 93 | //## IMPLEMENT FOR REAL... 94 | int c = 0; 95 | while (c < len && comp_objs(obj, sorted_array[c]) == 0) 96 | c++; 97 | return c; 98 | } 99 | 100 | uint32 count_at_end(OBJ *sorted_array, uint32 len, OBJ obj) { 101 | //## IMPLEMENT FOR REAL... 102 | int c = 0; 103 | while (c < len && comp_objs(obj, sorted_array[len-1-c]) == 0) 104 | c++; 105 | return c; 106 | } 107 | 108 | uint32 find_objs_range(OBJ *sorted_array, uint32 len, OBJ obj, uint32 &count) { 109 | int64 low_idx = 0; 110 | int64 high_idx = len - 1; 111 | 112 | while (low_idx <= high_idx) { 113 | int64 middle_idx = (low_idx + high_idx) / 2; 114 | OBJ middle_obj = sorted_array[middle_idx]; 115 | 116 | int cr = comp_objs(obj, middle_obj); 117 | 118 | if (cr == 0) { 119 | int count_up = count_at_start(sorted_array + middle_idx + 1, len - middle_idx - 1, obj); 120 | int count_down = count_at_end(sorted_array, middle_idx, obj); 121 | count = 1 + count_up + count_down; 122 | return middle_idx - count_down; 123 | } 124 | 125 | if (cr > 0) 126 | high_idx = middle_idx - 1; 127 | else 128 | low_idx = middle_idx + 1; 129 | } 130 | 131 | count = 0; 132 | return INVALID_INDEX; 133 | } 134 | 135 | //////////////////////////////////////////////////////////////////////////////// 136 | 137 | uint32 count_at_start(OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg) { 138 | //## IMPLEMENT FOR REAL... 139 | int c = 0; 140 | while (c < len && comp_objs(major_arg, major_col[c]) == 0 && comp_objs(minor_arg, minor_col[c]) == 0) 141 | c++; 142 | return c; 143 | } 144 | 145 | uint32 count_at_end(OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg) { 146 | //## IMPLEMENT FOR REAL... 147 | int c = 0; 148 | while (c < len && comp_objs(major_arg, major_col[len-1-c]) == 0 && comp_objs(minor_arg, minor_col[len-1-c]) == 0) 149 | c++; 150 | return c; 151 | } 152 | 153 | uint32 find_objs_range(OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg, uint32 &count) { 154 | int64 low_idx = 0; 155 | int64 high_idx = len - 1; 156 | 157 | while (low_idx <= high_idx) { 158 | int64 idx = (low_idx + high_idx) / 2; 159 | 160 | int cr = comp_objs(major_arg, major_col[idx]); 161 | if (cr == 0) 162 | cr = comp_objs(minor_arg, minor_col[idx]); 163 | 164 | if (cr == 0) { 165 | int count_up = count_at_start(major_col+idx+1, minor_col+idx+1, len-idx-1, major_arg, minor_arg); 166 | int count_down = count_at_end(major_col, minor_col, idx, major_arg, minor_arg); 167 | count = 1 + count_up + count_down; 168 | return idx - count_down; 169 | } 170 | 171 | if (cr > 0) 172 | high_idx = idx - 1; 173 | else 174 | low_idx = idx + 1; 175 | } 176 | 177 | count = 0; 178 | return INVALID_INDEX; 179 | } 180 | 181 | //////////////////////////////////////////////////////////////////////////////// 182 | 183 | uint32 count_at_start(uint32 *index, OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg) { 184 | //## IMPLEMENT FOR REAL... 185 | int c = 0; 186 | while (c < len) { 187 | uint32 idx = index[c]; 188 | if (comp_objs(major_arg, major_col[idx]) == 0 && comp_objs(minor_arg, minor_col[idx]) == 0) 189 | c++; 190 | else 191 | break; 192 | } 193 | return c; 194 | } 195 | 196 | uint32 count_at_end(uint32 *index, OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg) { 197 | //## IMPLEMENT FOR REAL... 198 | int c = 0; 199 | while (c < len) { 200 | uint32 idx = index[len-c-1]; 201 | if (comp_objs(major_arg, major_col[idx]) == 0 and comp_objs(minor_arg, minor_col[idx]) == 0) 202 | c++; 203 | else 204 | break; 205 | } 206 | return c; 207 | } 208 | 209 | uint32 find_idxs_range(uint32 *index, OBJ *major_col, OBJ *minor_col, uint32 len, OBJ major_arg, OBJ minor_arg, uint32 &count) { 210 | int64 low_idx = 0; 211 | int64 high_idx = len - 1; 212 | 213 | while (low_idx <= high_idx) { 214 | int64 idx = (low_idx + high_idx) / 2; 215 | uint32 dr_idx = index[idx]; 216 | 217 | int cr = comp_objs(major_arg, major_col[dr_idx]); 218 | if (cr == 0) 219 | cr = comp_objs(minor_arg, minor_col[dr_idx]); 220 | 221 | if (cr == 0) { 222 | int count_up = count_at_start(index+idx+1, major_col, minor_col, len-idx-1, major_arg, minor_arg); 223 | int count_down = count_at_end(index, major_col, minor_col, idx, major_arg, minor_arg); 224 | count = 1 + count_up + count_down; 225 | return idx - count_down; 226 | } 227 | 228 | if (cr > 0) 229 | high_idx = idx - 1; 230 | else 231 | low_idx = idx + 1; 232 | } 233 | 234 | count = 0; 235 | return INVALID_INDEX; 236 | } 237 | 238 | //////////////////////////////////////////////////////////////////////////////// 239 | 240 | uint32 sort_and_release_dups(OBJ *objs, uint32 size) { 241 | if (size < 2) 242 | return size; 243 | 244 | uint32 low_idx = 0; 245 | uint32 high_idx = size - 1; // size is greater than 0 (actually 1) here, so this is always non-negative (actually positive) 246 | for ( ; ; ) { 247 | // Advancing the lower cursor to the next non-inline object 248 | while (low_idx < high_idx & is_inline_obj(objs[low_idx])) 249 | low_idx++; 250 | 251 | // Advancing the upper cursor to the next inline object 252 | while (high_idx > low_idx & not is_inline_obj(objs[high_idx])) 253 | high_idx--; 254 | 255 | if (low_idx == high_idx) 256 | break; 257 | 258 | OBJ tmp = objs[low_idx]; 259 | objs[low_idx] = objs[high_idx]; 260 | objs[high_idx] = tmp; 261 | } 262 | 263 | uint32 inline_count = is_inline_obj(objs[low_idx]) ? low_idx + 1 : low_idx; 264 | 265 | uint32 idx = 0; 266 | if (inline_count > 0) { 267 | std::sort(objs, objs+inline_count, obj_inline_less()); 268 | 269 | OBJ last_obj = objs[0]; 270 | for (uint32 i=1 ; i < inline_count ; i++) { 271 | OBJ next_obj = objs[i]; 272 | if (!inline_eq(last_obj, next_obj)) { 273 | idx++; 274 | last_obj = next_obj; 275 | assert(idx <= i); 276 | if (idx != i) 277 | objs[idx] = next_obj; 278 | } 279 | } 280 | 281 | idx++; 282 | if (inline_count == size) 283 | return idx; 284 | } 285 | 286 | std::sort(objs+inline_count, objs+size, obj_less()); 287 | 288 | if (idx != inline_count) 289 | objs[idx] = objs[inline_count]; 290 | 291 | for (uint32 i=inline_count+1 ; i < size ; i++) 292 | // if (are_eq(objs[idx], objs[i])) 293 | if (comp_objs(objs[idx], objs[i]) == 0) 294 | release(objs[i]); 295 | else { 296 | idx++; 297 | assert(idx <= i); 298 | if (idx != i) 299 | objs[idx] = objs[i]; 300 | } 301 | 302 | return idx + 1; 303 | } 304 | 305 | uint32 adjust_map_with_duplicate_keys(OBJ *keys, OBJ *values, uint32 size) { 306 | assert(size >= 2); 307 | 308 | OBJ prev_key = keys[0]; 309 | OBJ prev_val = values[0]; 310 | 311 | uint32 next_slot_idx = 1; 312 | uint32 i = 1; 313 | do { 314 | OBJ curr_key = keys[i]; 315 | OBJ curr_val = values[i]; 316 | 317 | if (comp_objs(curr_key, prev_key) == 0) { 318 | if (comp_objs(curr_val, prev_val) == 0) { 319 | release(curr_key); 320 | release(curr_val); 321 | } 322 | else 323 | soft_fail("Map contains duplicate keys"); 324 | } 325 | else { 326 | keys[next_slot_idx] = curr_key; 327 | values[next_slot_idx] = curr_val; 328 | next_slot_idx++; 329 | prev_key = curr_key; 330 | prev_val = curr_val; 331 | } 332 | } while (++i < size); 333 | 334 | return next_slot_idx; 335 | } 336 | 337 | uint32 sort_and_check_no_dups(OBJ *keys, OBJ *values, uint32 size) { 338 | if (size < 2) 339 | return size; 340 | 341 | uint32 *idxs = new_uint32_array(size); 342 | index_sort(idxs, keys, size); 343 | 344 | for (uint32 i=0 ; i < size ; i++) 345 | if (idxs[i] != i) { 346 | OBJ key = keys[i]; 347 | OBJ value = values[i]; 348 | 349 | for (uint32 j = i ; ; ) { 350 | uint32 k = idxs[j]; 351 | idxs[j] = j; 352 | 353 | if (k == i) { 354 | keys[j] = key; 355 | values[j] = value; 356 | break; 357 | } 358 | else { 359 | keys[j] = keys[k]; 360 | values[j] = values[k]; 361 | j = k; 362 | } 363 | } 364 | } 365 | 366 | delete_uint32_array(idxs, size); 367 | 368 | OBJ prev_key = keys[0]; 369 | for (uint32 i=1 ; i < size ; i++) { 370 | OBJ curr_key = keys[i]; 371 | if (comp_objs(curr_key, prev_key) == 0) { 372 | uint32 offset = i - 1; 373 | return offset + adjust_map_with_duplicate_keys(keys+offset, values+offset, size-offset); 374 | } 375 | prev_key = curr_key; 376 | } 377 | 378 | return size; 379 | } 380 | 381 | 382 | void sort_obj_array(OBJ *objs, uint32 len) { 383 | std::sort(objs, objs+len, obj_less()); 384 | } 385 | 386 | //////////////////////////////////////////////////////////////////////////////// 387 | //////////////////////////////////////////////////////////////////////////////// 388 | 389 | // Returns: > 0 if obj1 < obj2 390 | // 0 if obj1 = obj2 391 | // < 0 if obj1 > obj2 392 | 393 | int comp_objs(OBJ obj1, OBJ obj2) { 394 | if (are_shallow_eq(obj1, obj2)) 395 | return 0; 396 | 397 | bool is_inline_1 = is_inline_obj(obj1); 398 | bool is_inline_2 = is_inline_obj(obj2); 399 | 400 | if (is_inline_1) 401 | if (is_inline_2) 402 | return shallow_cmp(obj1, obj2); 403 | else 404 | return 1; 405 | else if (is_inline_2) 406 | return -1; 407 | 408 | OBJ_TYPE type1 = get_logical_type(obj1); 409 | OBJ_TYPE type2 = get_logical_type(obj2); 410 | 411 | if (type1 != type2) 412 | return type2 - type1; 413 | 414 | uint32 count = 0; 415 | OBJ *elems1 = 0; 416 | OBJ *elems2 = 0; 417 | 418 | switch (type1) { 419 | case TYPE_SEQUENCE: { 420 | uint32 len1 = get_seq_length(obj1); 421 | uint32 len2 = get_seq_length(obj2); 422 | if (len1 != len2) 423 | return len2 - len1; //## BUG BUG BUG 424 | count = len1; 425 | elems1 = get_seq_buffer_ptr(obj1); 426 | elems2 = get_seq_buffer_ptr(obj2); 427 | break; 428 | } 429 | 430 | case TYPE_SET: { 431 | SET_OBJ *set1 = get_set_ptr(obj1); 432 | SET_OBJ *set2 = get_set_ptr(obj2); 433 | uint32 size1 = set1->size; 434 | uint32 size2 = set2->size; 435 | if (size1 != size2) 436 | return size2 - size1; //## BUG BUG BUG 437 | count = size1; 438 | elems1 = set1->buffer; 439 | elems2 = set2->buffer; 440 | break; 441 | } 442 | 443 | case TYPE_BIN_REL: { 444 | BIN_REL_OBJ *rel1 = get_bin_rel_ptr(obj1); 445 | BIN_REL_OBJ *rel2 = get_bin_rel_ptr(obj2); 446 | uint32 size1 = rel1->size; 447 | uint32 size2 = rel2->size; 448 | if (size1 != size2) 449 | return size2 - size1; //## BUG BUG BUG 450 | count = 2 * size1; 451 | elems1 = rel1->buffer; 452 | elems2 = rel2->buffer; 453 | break; 454 | } 455 | 456 | case TYPE_TERN_REL: { 457 | TERN_REL_OBJ *rel1 = get_tern_rel_ptr(obj1); 458 | TERN_REL_OBJ *rel2 = get_tern_rel_ptr(obj2); 459 | uint32 size1 = rel1->size; 460 | uint32 size2 = rel2->size; 461 | if (size1 != size2) 462 | return size2 - size1; //## BUG BUG BUG 463 | count = 3 * size1; 464 | elems1 = rel1->buffer; 465 | elems2 = rel2->buffer; 466 | break; 467 | } 468 | 469 | case TYPE_TAG_OBJ: { 470 | uint16 tag_idx_1 = get_tag_idx(obj1); 471 | uint16 tag_idx_2 = get_tag_idx(obj2); 472 | if (tag_idx_1 != tag_idx_2) 473 | return tag_idx_2 - tag_idx_1; 474 | return comp_objs(get_inner_obj(obj1), get_inner_obj(obj2)); 475 | } 476 | 477 | default: 478 | internal_fail(); 479 | } 480 | 481 | for (uint32 i=0 ; i < count ; i++) { 482 | int cr = comp_objs(elems1[i], elems2[i]); 483 | if (cr != 0) 484 | return cr; 485 | } 486 | 487 | return 0; 488 | } -------------------------------------------------------------------------------- /src/ternary-table.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include "table-utils.h" 3 | 4 | 5 | void ternary_table_init(TERNARY_TABLE *table) { 6 | 7 | } 8 | 9 | void ternary_table_cleanup(TERNARY_TABLE *table) { 10 | 11 | } 12 | 13 | //////////////////////////////////////////////////////////////////////////////// 14 | 15 | void ternary_table_updates_init(TERNARY_TABLE_UPDATES *updates) { 16 | 17 | } 18 | 19 | 20 | void ternary_table_updates_cleanup(TERNARY_TABLE_UPDATES *updates) { 21 | 22 | } 23 | 24 | //////////////////////////////////////////////////////////////////////////////// 25 | 26 | bool ternary_table_contains(TERNARY_TABLE *table, uint32 left_val, uint32 middle_val, uint32 right_val) { 27 | tuple3 entry; 28 | build(entry, left_val, middle_val, right_val); 29 | 30 | std::set &unshifted = table->unshifted; 31 | std::set::iterator it = unshifted.find(entry); 32 | return it != unshifted.end(); 33 | } 34 | 35 | //////////////////////////////////////////////////////////////////////////////// 36 | 37 | void ternary_table_delete_range_(TERNARY_TABLE_ITER *iter, TERNARY_TABLE_UPDATES *updates) { 38 | int shift = iter->shift; 39 | std::vector &deletes = updates->deletes; 40 | while (!ternary_table_iter_is_out_of_range(iter)) { 41 | if (shift == 0) { 42 | deletes.push_back(*iter->iter); 43 | } 44 | else { 45 | assert(shift == 1 || shift == 2); 46 | 47 | uint64 fields01 = iter->iter->fields01; 48 | uint32 field2 = iter->iter->field2; 49 | 50 | tuple3 entry; 51 | 52 | if (shift == 1) 53 | build(entry, field2, left(fields01), right(fields01)); 54 | else 55 | build(entry, right(fields01), field2, left(fields01)); 56 | 57 | deletes.push_back(entry); 58 | } 59 | 60 | ternary_table_iter_next(iter); 61 | } 62 | } 63 | 64 | void ternary_table_delete(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 left_val, uint32 middle_val, uint32 right_val) { 65 | if (ternary_table_contains(table, left_val, middle_val, right_val)) { 66 | tuple3 entry; 67 | build(entry, left_val, middle_val, right_val); 68 | updates->deletes.push_back(entry); 69 | } 70 | } 71 | 72 | void ternary_table_delete_by_cols_01(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value0, uint32 value1) { 73 | TERNARY_TABLE_ITER iter; 74 | ternary_table_get_iter_by_cols_01(table, &iter, value0, value1); 75 | ternary_table_delete_range_(&iter, updates); 76 | } 77 | 78 | void ternary_table_delete_by_cols_02(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value0, uint32 value2) { 79 | TERNARY_TABLE_ITER iter; 80 | ternary_table_get_iter_by_cols_02(table, &iter, value0, value2); 81 | ternary_table_delete_range_(&iter, updates); 82 | } 83 | 84 | void ternary_table_delete_by_cols_12(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value1, uint32 value2) { 85 | TERNARY_TABLE_ITER iter; 86 | ternary_table_get_iter_by_cols_12(table, &iter, value1, value2); 87 | ternary_table_delete_range_(&iter, updates); 88 | } 89 | 90 | void ternary_table_delete_by_col_0(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value) { 91 | TERNARY_TABLE_ITER iter; 92 | ternary_table_get_iter_by_col_0(table, &iter, value); 93 | ternary_table_delete_range_(&iter, updates); 94 | } 95 | 96 | void ternary_table_delete_by_col_1(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value) { 97 | TERNARY_TABLE_ITER iter; 98 | ternary_table_get_iter_by_col_1(table, &iter, value); 99 | ternary_table_delete_range_(&iter, updates); 100 | } 101 | 102 | void ternary_table_delete_by_col_2(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, uint32 value) { 103 | TERNARY_TABLE_ITER iter; 104 | ternary_table_get_iter_by_col_2(table, &iter, value); 105 | ternary_table_delete_range_(&iter, updates); 106 | } 107 | 108 | void ternary_table_clear(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates) { 109 | TERNARY_TABLE_ITER iter; 110 | ternary_table_get_iter(table, &iter); 111 | ternary_table_delete_range_(&iter, updates); 112 | } 113 | 114 | //////////////////////////////////////////////////////////////////////////////// 115 | 116 | void ternary_table_insert(TERNARY_TABLE_UPDATES *updates, uint32 left_val, uint32 middle_val, uint32 right_val) { 117 | tuple3 entry; 118 | build(entry, left_val, middle_val, right_val); 119 | updates->inserts.push_back(entry); 120 | } 121 | 122 | //////////////////////////////////////////////////////////////////////////////// 123 | 124 | void ternary_table_updates_apply(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, VALUE_STORE *vs0, VALUE_STORE *vs1, VALUE_STORE *vs2) { 125 | std::set &unshifted = table->unshifted; 126 | std::set &shifted_once = table->shifted_once; 127 | std::set &shifted_twice = table->shifted_twice; 128 | 129 | uint32 count = updates->deletes.size(); 130 | if (count > 0) { 131 | tuple3 *deletes = &updates->deletes.front(); 132 | for (uint32 i=0 ; i < count ; i++) { 133 | tuple3 entry = deletes[i]; 134 | if (unshifted.erase(entry) > 0) { 135 | shift(entry); 136 | shifted_once.erase(entry); 137 | shift(entry); 138 | shifted_twice.erase(entry); 139 | } 140 | else 141 | deletes[i].fields01 = 0xFFFFFFFFFFFFFFFFULL; 142 | } 143 | } 144 | 145 | count = updates->inserts.size(); 146 | if (count > 0) { 147 | tuple3 *inserts = &updates->inserts.front(); 148 | for (uint32 i=0 ; i < count ; i++) { 149 | tuple3 entry = inserts[i]; 150 | if (unshifted.insert(entry).second) { 151 | value_store_add_ref(vs0, left(entry.fields01)); 152 | value_store_add_ref(vs1, right(entry.fields01)); 153 | value_store_add_ref(vs2, entry.field2); 154 | shift(entry); 155 | shifted_once.insert(entry); 156 | shift(entry); 157 | shifted_twice.insert(entry); 158 | } 159 | } 160 | } 161 | } 162 | 163 | void ternary_table_updates_finish(TERNARY_TABLE_UPDATES *updates, VALUE_STORE *vs0, VALUE_STORE *vs1, VALUE_STORE *vs2) { 164 | uint32 count = updates->deletes.size(); 165 | if (count > 0) { 166 | tuple3 *deletes = &updates->deletes.front(); 167 | for (uint32 i=0 ; i < count ; i++) { 168 | tuple3 entry = deletes[i]; 169 | if (entry.fields01 != 0xFFFFFFFFFFFFFFFFULL) { 170 | value_store_release(vs0, left(entry.fields01)); 171 | value_store_release(vs1, right(entry.fields01)); 172 | value_store_release(vs2, entry.field2); 173 | } 174 | } 175 | } 176 | } 177 | 178 | //////////////////////////////////////////////////////////////////////////////// 179 | 180 | void ternary_table_get_iter_by_cols_01(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value0, uint32 value1) { 181 | std::set &target = table->unshifted; 182 | tuple3 lb; 183 | build(lb, value0, value1, 0); 184 | iter->iter = target.lower_bound(lb); 185 | iter->end = target.end(); 186 | iter->excl_upper_bound = pack(value0, value1+1); 187 | iter->shift = 0; 188 | } 189 | 190 | void ternary_table_get_iter_by_cols_02(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value0, uint32 value2) { 191 | std::set &target = table->shifted_twice; 192 | tuple3 lb; 193 | build(lb, value2, value0, 0); 194 | iter->iter = target.lower_bound(lb); 195 | iter->end = target.end(); 196 | iter->excl_upper_bound = pack(value2, value0+1); 197 | iter->shift = 2; 198 | } 199 | 200 | void ternary_table_get_iter_by_cols_12(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value1, uint32 value2) { 201 | std::set &target = table->shifted_once; 202 | tuple3 lb; 203 | build(lb, value1, value2, 0); 204 | iter->iter = target.lower_bound(lb); 205 | iter->end = target.end(); 206 | iter->excl_upper_bound = pack(value1, value2+1); 207 | iter->shift = 1; 208 | } 209 | 210 | void ternary_table_get_iter_by_col_0(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value) { 211 | std::set &target = table->unshifted; 212 | tuple3 lb; 213 | build(lb, value, 0, 0); 214 | iter->iter = target.lower_bound(lb); 215 | iter->end = target.end(); 216 | iter->excl_upper_bound = pack(value+1, 0); 217 | iter->shift = 0; 218 | } 219 | 220 | void ternary_table_get_iter_by_col_1(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value) { 221 | std::set &target = table->shifted_once; 222 | tuple3 lb; 223 | build(lb, value, 0, 0); 224 | iter->iter = target.lower_bound(lb); 225 | iter->end = target.end(); 226 | iter->excl_upper_bound = pack(value+1, 0); 227 | iter->shift = 1; 228 | } 229 | 230 | void ternary_table_get_iter_by_col_2(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter, uint32 value) { 231 | std::set &target = table->shifted_twice; 232 | tuple3 lb; 233 | build(lb, value, 0, 0); 234 | iter->iter = target.lower_bound(lb); 235 | iter->end = target.end(); 236 | iter->excl_upper_bound = pack(value+1, 0); 237 | iter->shift = 2; 238 | } 239 | 240 | void ternary_table_get_iter(TERNARY_TABLE *table, TERNARY_TABLE_ITER *iter) { 241 | std::set &target = table->unshifted; 242 | iter->iter = target.begin(); 243 | iter->end = target.end(); 244 | iter->excl_upper_bound = 0xFFFFFFFFFFFFFFFFULL; 245 | iter->shift = 0; 246 | } 247 | 248 | //////////////////////////////////////////////////////////////////////////////// 249 | 250 | bool ternary_table_iter_is_out_of_range(TERNARY_TABLE_ITER *iter) { 251 | std::set::iterator it = iter->iter; 252 | return it == iter->end || it->fields01 >= iter->excl_upper_bound; 253 | } 254 | 255 | //////////////////////////////////////////////////////////////////////////////// 256 | 257 | uint32 ternary_table_iter_get_left_field(TERNARY_TABLE_ITER *iter) { 258 | uint8 shift = iter->shift; 259 | assert(shift >= 0 && shift <= 2); 260 | if (shift == 0) 261 | return left(iter->iter->fields01); 262 | else if (shift == 1) 263 | return iter->iter->field2; 264 | else 265 | return right(iter->iter->fields01); 266 | } 267 | 268 | uint32 ternary_table_iter_get_middle_field(TERNARY_TABLE_ITER *iter) { 269 | uint8 shift = iter->shift; 270 | assert(shift >= 0 && shift <= 2); 271 | if (shift == 0) 272 | return right(iter->iter->fields01); 273 | else if (shift == 1) 274 | return left(iter->iter->fields01); 275 | else 276 | return iter->iter->field2; 277 | } 278 | 279 | uint32 ternary_table_iter_get_right_field(TERNARY_TABLE_ITER *iter) { 280 | uint8 shift = iter->shift; 281 | assert(shift >= 0 && shift <= 2); 282 | if (shift == 0) 283 | return iter->iter->field2; 284 | else if (shift == 1) 285 | return right(iter->iter->fields01); 286 | else 287 | return left(iter->iter->fields01); 288 | } 289 | 290 | //////////////////////////////////////////////////////////////////////////////// 291 | 292 | void ternary_table_iter_next(TERNARY_TABLE_ITER *iter) { 293 | assert(!ternary_table_iter_is_out_of_range(iter)); 294 | iter->iter++; 295 | } 296 | 297 | //////////////////////////////////////////////////////////////////////////////// 298 | 299 | bool ternary_table_updates_check_01(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates) { 300 | sort_unique(updates->inserts); 301 | return table_updates_check_key(updates->inserts, updates->deletes, table->unshifted); 302 | } 303 | 304 | bool ternary_table_updates_check_01_2(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates) { 305 | return ternary_table_updates_check_01(table, updates) && 306 | table_updates_check_key(updates->inserts, updates->deletes, table->shifted_twice); 307 | } 308 | 309 | bool ternary_table_updates_check_01_12(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates) { 310 | return ternary_table_updates_check_01(table, updates) && 311 | table_updates_check_key(updates->inserts, updates->deletes, table->shifted_once); 312 | } 313 | 314 | bool ternary_table_updates_check_01_12_20(TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates) { 315 | return ternary_table_updates_check_01_12(table, updates) && 316 | table_updates_check_key(updates->inserts, updates->deletes, table->shifted_twice); 317 | } 318 | 319 | //////////////////////////////////////////////////////////////////////////////// 320 | 321 | OBJ copy_ternary_table(TERNARY_TABLE *table, VALUE_STORE *vs1, VALUE_STORE *vs2, VALUE_STORE *vs3, int idx1, int idx2, int idx3) { 322 | OBJ *slots1 = value_store_slot_array(vs1); 323 | OBJ *slots2 = value_store_slot_array(vs2); 324 | OBJ *slots3 = value_store_slot_array(vs3); 325 | 326 | std::set &rows = table->unshifted; 327 | uint32 size = rows.size(); 328 | 329 | if (size == 0) 330 | return make_empty_rel(); 331 | 332 | OBJ *col1 = new_obj_array(3 * size); 333 | OBJ *col2 = col1 + size; 334 | OBJ *col3 = col2 + size; 335 | 336 | uint32 idx = 0; 337 | for (std::set::iterator it=rows.begin() ; it != rows.end() ; it++) { 338 | tuple3 row = *it; 339 | col1[idx] = slots1[left(row.fields01)]; 340 | col2[idx] = slots2[right(row.fields01)]; 341 | col3[idx++] = slots3[row.field2]; 342 | } 343 | assert(idx == size); 344 | 345 | for (int64 i=0 ; i < 3 * size ; i++) 346 | add_ref(col1[i]); 347 | 348 | OBJ *rec_cols[3]; 349 | rec_cols[idx1] = col1; 350 | rec_cols[idx2] = col2; 351 | rec_cols[idx3] = col3; 352 | OBJ rel = build_tern_rel(rec_cols[0], rec_cols[1], rec_cols[2], size); 353 | 354 | delete_obj_array(col1, 3 * size); 355 | 356 | return rel; 357 | } 358 | 359 | //////////////////////////////////////////////////////////////////////////////// 360 | 361 | void set_ternary_table( 362 | TERNARY_TABLE *table, TERNARY_TABLE_UPDATES *updates, 363 | VALUE_STORE *vs1, VALUE_STORE *vs2, VALUE_STORE *vs3, 364 | VALUE_STORE_UPDATES *vsu1, VALUE_STORE_UPDATES *vsu2, VALUE_STORE_UPDATES *vsu3, 365 | OBJ rel, int idx1, int idx2, int idx3 366 | ) { 367 | ternary_table_clear(table, updates); 368 | 369 | if (is_empty_rel(rel)) 370 | return; 371 | 372 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(rel); 373 | uint32 size = ptr->size; 374 | OBJ *col1 = get_col_array_ptr(ptr, idx1); 375 | OBJ *col2 = get_col_array_ptr(ptr, idx2); 376 | OBJ *col3 = get_col_array_ptr(ptr, idx3); 377 | 378 | for (uint32 i=0 ; i < size ; i++) { 379 | OBJ obj = col1[i]; 380 | uint32 ref1 = lookup_value_ex(vs1, vsu1, obj); 381 | if (ref1 == -1) { 382 | add_ref(obj); 383 | ref1 = value_store_insert(vs1, vsu1, obj); 384 | } 385 | 386 | obj = col2[i]; 387 | uint32 ref2 = lookup_value_ex(vs2, vsu2, obj); 388 | if (ref2 == -1) { 389 | add_ref(obj); 390 | ref2 = value_store_insert(vs2, vsu2, obj); 391 | } 392 | 393 | obj = col3[i]; 394 | uint32 ref3 = lookup_value_ex(vs3, vsu3, obj); 395 | if (ref3 == -1) { 396 | add_ref(obj); 397 | ref3 = value_store_insert(vs3, vsu3, obj); 398 | } 399 | 400 | ternary_table_insert(updates, ref1, ref2, ref3); 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /src/interface/value.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | #include "lib-cpp.h" 3 | 4 | using std::string; 5 | using std::to_string; 6 | using std::ostream; 7 | using std::ostringstream; 8 | 9 | using cell::Value; 10 | 11 | 12 | class ValueBase : public Value { 13 | public: 14 | bool is_symb(); 15 | bool is_int(); 16 | bool is_float(); 17 | bool is_seq(); 18 | bool is_set(); 19 | bool is_bin_rel(); 20 | bool is_tern_rel(); 21 | bool is_tagged(); 22 | 23 | const char *as_symb(); 24 | long long as_int(); 25 | double as_float(); 26 | 27 | unsigned int size(); 28 | Value *item(unsigned int); 29 | void entry(unsigned int, Value *&, Value *&); 30 | void entry(unsigned int, Value *&, Value *&, Value *&); 31 | 32 | const char *tag(); 33 | Value *untagged(); 34 | 35 | bool is_string(); 36 | bool is_record(); 37 | 38 | string as_str(); 39 | Value *lookup(const char *); 40 | 41 | string printed(); 42 | 43 | protected: 44 | ValueBase(); 45 | }; 46 | 47 | 48 | class SymbValue : public ValueBase { 49 | public: 50 | SymbValue(const char *); 51 | 52 | bool is_symb(); 53 | const char *as_symb(); 54 | void print(ostream &); 55 | 56 | private: 57 | const char *ptr; 58 | }; 59 | 60 | 61 | class IntValue : public ValueBase { 62 | public: 63 | IntValue(long long); 64 | 65 | bool is_int(); 66 | long long as_int(); 67 | void print(ostream &); 68 | 69 | private: 70 | long long value; 71 | }; 72 | 73 | 74 | class FloatValue : public ValueBase { 75 | public: 76 | FloatValue(double); 77 | 78 | bool is_float(); 79 | double as_float(); 80 | void print(ostream &); 81 | 82 | private: 83 | double value; 84 | }; 85 | 86 | 87 | class SeqSetValue : public ValueBase { 88 | public: 89 | SeqSetValue(Value **, unsigned int, bool); 90 | ~SeqSetValue(); 91 | 92 | bool is_seq(); 93 | bool is_set(); 94 | bool is_bin_rel(); 95 | bool is_tern_rel(); 96 | unsigned int size(); 97 | Value *item(unsigned int); 98 | void print(ostream &); 99 | 100 | private: 101 | Value **items; 102 | unsigned int count; 103 | bool ordered; 104 | }; 105 | 106 | 107 | class BinRelValue : public ValueBase { 108 | public: 109 | BinRelValue(Value *(*)[2], unsigned int, bool); 110 | ~BinRelValue(); 111 | 112 | bool is_bin_rel(); 113 | unsigned int size(); 114 | void entry(unsigned int, Value *&, Value *&); 115 | bool is_record(); 116 | Value *lookup(const char *); 117 | void print(ostream &); 118 | 119 | private: 120 | Value *(*entries)[2]; 121 | unsigned int count; 122 | bool is_map; 123 | bool is_rec; 124 | }; 125 | 126 | 127 | class TernRelValue : public ValueBase { 128 | public: 129 | TernRelValue(Value *(*)[3], unsigned int); 130 | ~TernRelValue(); 131 | 132 | bool is_tern_rel(); 133 | unsigned int size(); 134 | void entry(unsigned int, Value *&, Value *&, Value *&); 135 | void print(ostream &); 136 | 137 | private: 138 | Value *(*entries)[3]; 139 | unsigned int count; 140 | }; 141 | 142 | 143 | class TaggedValue : public ValueBase { 144 | public: 145 | TaggedValue(const char *, Value *); 146 | ~TaggedValue(); 147 | 148 | bool is_tagged(); 149 | const char *tag(); 150 | Value *untagged(); 151 | bool is_string(); 152 | string as_str(); 153 | Value *lookup(const char *); 154 | void print(ostream &); 155 | 156 | private: 157 | const char *tag_ptr; 158 | Value *value; 159 | }; 160 | 161 | //////////////////////////////////////////////////////////////////////////////// 162 | //////////////////////////////////////////////////////////////////////////////// 163 | 164 | ValueBase::ValueBase() { 165 | 166 | } 167 | 168 | bool ValueBase::is_symb() { 169 | return false; 170 | } 171 | 172 | bool ValueBase::is_int() { 173 | return false; 174 | } 175 | 176 | bool ValueBase::is_float() { 177 | return false; 178 | } 179 | 180 | bool ValueBase::is_seq() { 181 | return false; 182 | } 183 | 184 | bool ValueBase::is_set() { 185 | return false; 186 | } 187 | 188 | bool ValueBase::is_bin_rel() { 189 | return false; 190 | } 191 | 192 | bool ValueBase::is_tern_rel() { 193 | return false; 194 | } 195 | 196 | bool ValueBase::is_tagged() { 197 | return false; 198 | } 199 | 200 | const char *ValueBase::as_symb() { 201 | throw 0LL; 202 | } 203 | 204 | long long ValueBase::as_int() { 205 | throw 0LL; 206 | } 207 | 208 | double ValueBase::as_float() { 209 | throw 0LL; 210 | } 211 | 212 | unsigned int ValueBase::size() { 213 | throw 0LL; 214 | } 215 | 216 | Value *ValueBase::item(unsigned int) { 217 | throw 0LL; 218 | } 219 | 220 | void ValueBase::entry(unsigned int, Value *&, Value *&) { 221 | throw 0LL; 222 | } 223 | 224 | void ValueBase::entry(unsigned int, Value *&, Value *&, Value *&) { 225 | throw 0LL; 226 | } 227 | 228 | const char *ValueBase::tag() { 229 | throw 0LL; 230 | } 231 | 232 | Value *ValueBase::untagged() { 233 | throw 0LL; 234 | } 235 | 236 | bool ValueBase::is_string() { 237 | return false; 238 | } 239 | 240 | bool ValueBase::is_record() { 241 | return false; 242 | } 243 | 244 | string ValueBase::as_str() { 245 | throw 0LL; 246 | } 247 | 248 | Value *ValueBase::lookup(const char *) { 249 | throw 0LL; 250 | } 251 | 252 | string ValueBase::printed() { 253 | ostringstream os; 254 | print(os); 255 | return os.str(); 256 | } 257 | 258 | //////////////////////////////////////////////////////////////////////////////// 259 | 260 | SymbValue::SymbValue(const char *ptr) : ptr(ptr) { 261 | 262 | } 263 | 264 | bool SymbValue::is_symb() { 265 | return true; 266 | } 267 | 268 | const char *SymbValue::as_symb() { 269 | return ptr; 270 | } 271 | 272 | void SymbValue::print(ostream &os) { 273 | os << ptr; 274 | } 275 | 276 | //////////////////////////////////////////////////////////////////////////////// 277 | 278 | IntValue::IntValue(long long value) : value(value) { 279 | 280 | } 281 | 282 | bool IntValue::is_int() { 283 | return true; 284 | } 285 | 286 | long long IntValue::as_int() { 287 | return value; 288 | } 289 | 290 | void IntValue::print(ostream &os) { 291 | os << value; 292 | } 293 | 294 | //////////////////////////////////////////////////////////////////////////////// 295 | 296 | FloatValue::FloatValue(double value) : value(value) { 297 | 298 | } 299 | 300 | bool FloatValue::is_float() { 301 | return true; 302 | } 303 | 304 | double FloatValue::as_float() { 305 | return value; 306 | } 307 | 308 | void FloatValue::print(ostream &os) { 309 | os << value; 310 | } 311 | 312 | //////////////////////////////////////////////////////////////////////////////// 313 | 314 | SeqSetValue::SeqSetValue(Value **items, unsigned int count, bool ordered) 315 | : items(items), count(count), ordered(ordered) { 316 | 317 | } 318 | 319 | SeqSetValue::~SeqSetValue() { 320 | delete [] items; 321 | } 322 | 323 | 324 | bool SeqSetValue::is_seq() { 325 | return ordered; 326 | } 327 | 328 | bool SeqSetValue::is_set() { 329 | return !ordered; 330 | } 331 | 332 | bool SeqSetValue::is_bin_rel() { 333 | return !ordered & count == 0; 334 | } 335 | 336 | bool SeqSetValue::is_tern_rel() { 337 | return !ordered & count == 0; 338 | } 339 | 340 | unsigned int SeqSetValue::size() { 341 | return count; 342 | } 343 | 344 | Value *SeqSetValue::item(unsigned int idx) { 345 | if (idx < count) 346 | return items[idx]; 347 | else 348 | throw 0LL; 349 | } 350 | 351 | void SeqSetValue::print(ostream &os) { 352 | os << (ordered ? "(" : "["); 353 | for (uint32 i=0 ; i < count ; i++) { 354 | if (i > 0) 355 | os << ", "; 356 | items[i]->print(os); 357 | } 358 | os << (ordered ? ")" : "]"); 359 | } 360 | 361 | //////////////////////////////////////////////////////////////////////////////// 362 | 363 | BinRelValue::BinRelValue(Value *(*entries)[2], unsigned int count, bool is_map) 364 | : entries(entries), count(count), is_map(is_map) { 365 | is_rec = false; 366 | for (uint32 i=0 ; i < count ; i++) 367 | if (!entries[i][0]->is_symb()) 368 | return; 369 | is_rec = is_map & count > 0; 370 | } 371 | 372 | BinRelValue::~BinRelValue() { 373 | delete [] entries; 374 | } 375 | 376 | bool BinRelValue::is_bin_rel() { 377 | return true; 378 | } 379 | 380 | unsigned int BinRelValue::size() { 381 | return count; 382 | } 383 | 384 | void BinRelValue::entry(unsigned int idx, Value *&arg0, Value *&arg1) { 385 | if (idx < count) { 386 | arg0 = entries[idx][0]; 387 | arg1 = entries[idx][1]; 388 | } 389 | else 390 | throw 0LL; 391 | } 392 | 393 | bool BinRelValue::is_record() { 394 | return is_rec; 395 | } 396 | 397 | Value *BinRelValue::lookup(const char *field) { 398 | for (uint32 i=0 ; i < count ; i++) { 399 | Value *key = entries[i][0]; 400 | if (key->is_symb() && strcmp(field, key->as_symb()) == 0) 401 | return entries[i][1]; 402 | } 403 | throw 0LL; 404 | } 405 | 406 | void BinRelValue::print(ostream &os) { 407 | if (count > 0) { 408 | os << (is_rec ? "(" : "["); 409 | for (uint32 i=0 ; i < count ; i++) { 410 | os << (i == 0 ? "" : is_map ? ", " : " "); 411 | entries[i][0]->print(os); 412 | os << (is_rec ? ": " : is_map ? " -> " : ", "); 413 | entries[i][1]->print(os); 414 | os << (is_map ? "" : ";"); 415 | } 416 | os << (is_rec ? ")" : "]"); 417 | } 418 | else 419 | os << "[:]"; 420 | } 421 | 422 | //////////////////////////////////////////////////////////////////////////////// 423 | 424 | TernRelValue::TernRelValue(Value *(*entries)[3], unsigned int count) : entries(entries), count(count) { 425 | 426 | } 427 | 428 | TernRelValue::~TernRelValue() { 429 | delete [] entries; 430 | } 431 | 432 | bool TernRelValue::is_tern_rel() { 433 | return true; 434 | } 435 | 436 | unsigned int TernRelValue::size() { 437 | return count; 438 | } 439 | 440 | void TernRelValue::entry(unsigned int idx, Value *&arg0, Value *&arg1, Value *&arg2) { 441 | if (idx < count) { 442 | Value **entry = entries[count]; 443 | arg0 = entry[0]; 444 | arg1 = entry[1]; 445 | arg2 = entry[2]; 446 | } 447 | else 448 | throw 0LL; 449 | } 450 | 451 | void TernRelValue::print(ostream &os) { 452 | os << "["; 453 | for (uint32 i=0 ; i < count ; i++) { 454 | if (i > 0); 455 | os << " "; 456 | entries[i][0]->print(os); 457 | os << ", "; 458 | entries[i][1]->print(os); 459 | os << ", "; 460 | entries[i][2]->print(os); 461 | os << ";"; 462 | } 463 | os << "]"; 464 | } 465 | 466 | //////////////////////////////////////////////////////////////////////////////// 467 | 468 | TaggedValue::TaggedValue(const char *tag_ptr, Value *value) : tag_ptr(tag_ptr), value(value) { 469 | 470 | } 471 | 472 | TaggedValue::~TaggedValue() { 473 | delete value; 474 | } 475 | 476 | bool TaggedValue::is_tagged() { 477 | return true; 478 | } 479 | 480 | const char *TaggedValue::tag() { 481 | return tag_ptr; 482 | } 483 | 484 | Value *TaggedValue::untagged() { 485 | return value; 486 | } 487 | 488 | bool TaggedValue::is_string() { 489 | if (strcmp(tag_ptr, "string") != 0 || !value->is_seq()) 490 | return false; 491 | uint32 len = value->size(); 492 | for (uint32 i=0 ; i < len ; i++) { 493 | Value *item = value->item(i); 494 | if (!item->is_int()) 495 | return false; 496 | long long int_val = item->as_int(); 497 | if (int_val < 0 | int_val > 1114111) 498 | return false; 499 | } 500 | return true; 501 | } 502 | 503 | string TaggedValue::as_str() { 504 | if (strcmp(tag_ptr, "string") != 0) 505 | throw 0LL; 506 | string result; 507 | uint32 len = value->size(); 508 | for (uint32 i=0 ; i < len ; i++) 509 | result.push_back(value->item(i)->as_int()); 510 | return result; 511 | } 512 | 513 | Value *TaggedValue::lookup(const char *field) { 514 | return value->lookup(field); 515 | } 516 | 517 | void TaggedValue::print(ostream &os) { 518 | bool skip_pars = value->is_record() | (value->is_seq() && value->size() > 0); 519 | os << tag_ptr; 520 | if (!skip_pars) 521 | os << "("; 522 | value->print(os); 523 | if (!skip_pars) 524 | os << ")"; 525 | } 526 | 527 | //////////////////////////////////////////////////////////////////////////////// 528 | //////////////////////////////////////////////////////////////////////////////// 529 | 530 | Value *export_as_value_ptr(OBJ obj) { 531 | if (is_tag_obj(obj)) 532 | return new TaggedValue(symb_to_raw_str(get_tag(obj)), export_as_value_ptr(get_inner_obj(obj))); 533 | 534 | OBJ_TYPE physical_type = get_physical_type(obj); 535 | switch (physical_type) { 536 | case TYPE_SYMBOL: 537 | return new SymbValue(symb_to_raw_str(obj)); 538 | 539 | case TYPE_INTEGER: 540 | return new IntValue(get_int(obj)); 541 | 542 | case TYPE_FLOAT: 543 | return new FloatValue(get_float(obj)); 544 | 545 | case TYPE_SEQUENCE: 546 | case TYPE_SLICE: 547 | if (!is_empty_seq(obj)) { 548 | uint32 size = get_seq_length(obj); 549 | OBJ *objs = get_seq_buffer_ptr(obj); 550 | Value **items = new Value *[size]; 551 | for (uint32 i=0 ; i < size ; i++) 552 | items[i] = export_as_value_ptr(objs[i]); 553 | return new SeqSetValue(items, size, true); 554 | } 555 | else 556 | return new SeqSetValue(NULL, 0, true); 557 | 558 | case TYPE_SET: 559 | if (!is_empty_rel(obj)) { 560 | SET_OBJ *ptr = get_set_ptr(obj); 561 | uint32 size = ptr->size; 562 | OBJ *objs = ptr->buffer; 563 | Value **items = new Value*[size]; 564 | for (uint32 i=0 ; i < size ; i++) 565 | items[i] = export_as_value_ptr(objs[i]); 566 | return new SeqSetValue(items, size, false); 567 | } 568 | else 569 | return new SeqSetValue(NULL, 0, false); 570 | 571 | case TYPE_BIN_REL: 572 | case TYPE_MAP: 573 | case TYPE_LOG_MAP: { 574 | assert(!is_empty_rel(obj)); 575 | BIN_REL_OBJ *ptr = get_bin_rel_ptr(obj); 576 | uint32 size = ptr->size; 577 | OBJ *buffer = ptr->buffer; 578 | Value *(*entries)[2] = new Value*[size][2]; 579 | for (uint32 i=0 ; i < size ; i++) { 580 | entries[i][0] = export_as_value_ptr(buffer[i]); 581 | entries[i][1] = export_as_value_ptr(buffer[i+size]); 582 | } 583 | return new BinRelValue(entries, size, physical_type != TYPE_BIN_REL); 584 | } 585 | 586 | case TYPE_TERN_REL: { 587 | assert(!is_empty_rel(obj)); 588 | TERN_REL_OBJ *ptr = get_tern_rel_ptr(obj); 589 | uint32 size = ptr->size; 590 | OBJ *buffer = ptr->buffer; 591 | Value *(*entries)[3] = new Value*[size][3]; 592 | for (uint32 i=0 ; i < size ; i++) { 593 | entries[i][0] = export_as_value_ptr(buffer[3*i]); 594 | entries[i][1] = export_as_value_ptr(buffer[3*i+1]); 595 | entries[i][2] = export_as_value_ptr(buffer[3*i+2]); 596 | } 597 | return new TernRelValue(entries, size); 598 | } 599 | 600 | default: // case TYPE_BLANK_OBJ: case TYPE_NULL_OBJ: case TYPE_TAG_OBJ: 601 | fail(); 602 | } 603 | } 604 | 605 | unique_ptr export_as_value(OBJ obj) { 606 | return unique_ptr(export_as_value_ptr(obj)); 607 | } 608 | -------------------------------------------------------------------------------- /src/printing.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | bool is_str(uint16 tag_idx, OBJ obj) { 5 | if (tag_idx != symb_idx_string) 6 | return false; 7 | 8 | if (is_empty_seq(obj)) 9 | return true; 10 | 11 | if (!is_ne_seq(obj)) 12 | return false; 13 | 14 | uint32 len = get_seq_length(obj); 15 | OBJ *elems = get_seq_buffer_ptr(obj); 16 | 17 | for (uint32 i=0 ; i < len ; i++) { 18 | OBJ elem = elems[i]; 19 | 20 | if (!is_int(elem)) 21 | return false; 22 | 23 | int64 value = get_int_val(elem); 24 | if (value < 0 | value >= 65536) 25 | return false; 26 | } 27 | 28 | return true; 29 | } 30 | 31 | 32 | bool is_record(OBJ obj) { 33 | if (!is_ne_map(obj)) 34 | return false; 35 | 36 | BIN_REL_OBJ *map = get_bin_rel_ptr(obj); 37 | uint32 size = map->size; 38 | OBJ *keys = get_left_col_array_ptr(map); 39 | 40 | for (uint32 i=0 ; i < size ; i++) 41 | if (!is_symb(keys[i])) 42 | return false; 43 | 44 | return true; 45 | } 46 | 47 | 48 | void print_bare_str(OBJ str, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 49 | char buffer[64]; 50 | 51 | assert(is_str(get_tag_idx(str), get_inner_obj(str))); 52 | 53 | OBJ char_seq = get_inner_obj(str); 54 | if (is_empty_seq(char_seq)) 55 | return; 56 | 57 | uint32 len = get_seq_length(char_seq); 58 | OBJ *chars = get_seq_buffer_ptr(char_seq); 59 | 60 | for (uint32 i=0 ; i < len ; i++) { 61 | int64 ch = get_int_val(chars[i]); 62 | assert(ch >= 0 & ch < 65536); 63 | if (ch >= ' ' & ch <= '~') { 64 | buffer[0] = '\\'; 65 | buffer[1] = ch; 66 | buffer[2] = '\0'; 67 | emit(data, buffer + (ch == '"' | ch == '\\' ? 0 : 1), TEXT); 68 | } 69 | else if (ch == '\n') { 70 | emit(data, "\\n", TEXT); 71 | } 72 | else if (ch == '\t') { 73 | emit(data, "\\t", TEXT); 74 | } 75 | else { 76 | sprintf(buffer, "\\%04llx", ch); 77 | emit(data, buffer, TEXT); 78 | } 79 | } 80 | } 81 | 82 | 83 | void print_int(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 84 | int64 n = get_int(obj); 85 | char buffer[1024]; 86 | sprintf(buffer, "%lld", n); 87 | emit(data, buffer, TEXT); 88 | } 89 | 90 | 91 | void print_float(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 92 | double x = get_float(obj); 93 | char buffer[1024]; 94 | sprintf(buffer, "%g", x); 95 | bool is_integer = true; 96 | for (int i=0 ; buffer[i] != '\0' ; i++) 97 | if (!isdigit(buffer[i])) { 98 | is_integer = false; 99 | break; 100 | } 101 | if (is_integer) 102 | strcat(buffer, ".0"); 103 | emit(data, buffer, TEXT); 104 | } 105 | 106 | 107 | void print_symb(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 108 | OBJ str = to_str(obj); 109 | print_bare_str(str, emit, data); 110 | release(str); 111 | } 112 | 113 | 114 | void print_seq(OBJ obj, bool print_parentheses, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 115 | if (print_parentheses) 116 | emit(data, "(", TEXT); 117 | if (!is_empty_seq(obj)) { 118 | uint32 len = get_seq_length(obj); 119 | OBJ *elems = get_seq_buffer_ptr(obj); 120 | for (uint32 i=0 ; i < len ; i++) { 121 | if (i > 0) 122 | emit(data, ", ", TEXT); 123 | print_obj(elems[i], emit, data); 124 | } 125 | } 126 | if (print_parentheses) 127 | emit(data, ")", TEXT); 128 | } 129 | 130 | 131 | void print_set(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 132 | emit(data, "[", TEXT); 133 | if (!is_empty_rel(obj)) { 134 | SET_OBJ *set = get_set_ptr(obj); 135 | uint32 size = set->size; 136 | OBJ *elems = set->buffer; 137 | for (uint32 i=0 ; i < size ; i++) { 138 | if (i > 0) 139 | emit(data, ", ", TEXT); 140 | print_obj(elems[i], emit, data); 141 | } 142 | } 143 | emit(data, "]", TEXT); 144 | } 145 | 146 | 147 | void print_ne_bin_rel(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 148 | emit(data, "[", TEXT); 149 | 150 | BIN_REL_OBJ *rel = get_bin_rel_ptr(obj); 151 | uint32 size = rel->size; 152 | OBJ *left_col = get_left_col_array_ptr(rel); 153 | OBJ *right_col = get_right_col_array_ptr(rel); 154 | 155 | for (uint32 i=0 ; i < size ; i++) { 156 | if (i > 0) 157 | emit(data, "; ", TEXT); 158 | emit(data, NULL, SUB_START); 159 | print_obj(left_col[i], emit, data); 160 | emit(data, ", ", TEXT); 161 | print_obj(right_col[i], emit, data); 162 | emit(data, NULL, SUB_END); 163 | } 164 | 165 | if (size == 1) 166 | emit(data, ";", TEXT); 167 | 168 | emit(data, "]", TEXT); 169 | } 170 | 171 | 172 | void print_ne_map(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 173 | BIN_REL_OBJ *map = get_bin_rel_ptr(obj); 174 | uint32 size = map->size; 175 | OBJ *keys = get_left_col_array_ptr(map); 176 | OBJ *values = get_right_col_array_ptr(map); 177 | 178 | emit(data, "[", TEXT); 179 | 180 | for (uint32 i=0 ; i < size ; i++) { 181 | if (i > 0) 182 | emit(data, ", ", TEXT); 183 | emit(data, NULL, SUB_START); 184 | print_obj(keys[i], emit, data); 185 | emit(data, " -> ", TEXT); 186 | print_obj(values[i], emit, data); 187 | emit(data, NULL, SUB_END); 188 | } 189 | 190 | emit(data, "]", TEXT); 191 | } 192 | 193 | 194 | void print_record(OBJ obj, bool print_parentheses, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 195 | if (print_parentheses) 196 | emit(data, "(", TEXT); 197 | 198 | BIN_REL_OBJ *map = get_bin_rel_ptr(obj); 199 | uint32 size = map->size; 200 | OBJ *keys = get_left_col_array_ptr(map); 201 | OBJ *values = get_right_col_array_ptr(map); 202 | 203 | for (uint32 i=0 ; i < size ; i++) { 204 | if (i > 0) 205 | emit(data, ", ", TEXT); 206 | emit(data, NULL, SUB_START); 207 | print_symb(keys[i], emit, data); 208 | emit(data, ": ", TEXT); 209 | print_obj(values[i], emit, data); 210 | emit(data, NULL, SUB_END); 211 | } 212 | 213 | if (print_parentheses) 214 | emit(data, ")", TEXT); 215 | } 216 | 217 | 218 | void print_ne_tern_rel(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 219 | emit(data, "[", TEXT); 220 | 221 | TERN_REL_OBJ *rel = get_tern_rel_ptr(obj); 222 | uint32 size = rel->size; 223 | OBJ *col1 = get_col_array_ptr(rel, 0); 224 | OBJ *col2 = get_col_array_ptr(rel, 1); 225 | OBJ *col3 = get_col_array_ptr(rel, 2); 226 | 227 | for (uint32 i=0 ; i < size ; i++) { 228 | if (i > 0) 229 | emit(data, "; ", TEXT); 230 | emit(data, NULL, SUB_START); 231 | print_obj(col1[i], emit, data); 232 | emit(data, ", ", TEXT); 233 | print_obj(col2[i], emit, data); 234 | emit(data, ", ", TEXT); 235 | print_obj(col3[i], emit, data); 236 | emit(data, NULL, SUB_END); 237 | } 238 | 239 | if (size == 1) 240 | emit(data, ";", TEXT); 241 | 242 | emit(data, "]", TEXT); 243 | } 244 | 245 | 246 | void print_tag_obj(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 247 | uint16 tag_idx = get_tag_idx(obj); 248 | OBJ inner_obj = get_inner_obj(obj); 249 | if (is_str(tag_idx, inner_obj)) { 250 | emit(data, "\"", TEXT); 251 | print_bare_str(obj, emit, data); 252 | emit(data, "\"", TEXT); 253 | } 254 | else { 255 | print_symb(make_symb(tag_idx), emit, data); 256 | emit(data, "(", TEXT); 257 | 258 | if (is_record(inner_obj)) 259 | print_record(inner_obj, false, emit, data); 260 | else if (is_ne_seq(inner_obj) && get_seq_length(inner_obj) > 1) 261 | print_seq(inner_obj, false, emit, data); 262 | else 263 | print_obj(inner_obj, emit, data); 264 | 265 | emit(data, ")", TEXT); 266 | } 267 | } 268 | 269 | 270 | void print_obj(OBJ obj, void (*emit)(void *, const void *, EMIT_ACTION), void *data) { 271 | emit(data, NULL, SUB_START); 272 | 273 | if (is_blank_obj(obj)) 274 | emit(data, "BLANK", TEXT); 275 | 276 | else if (is_null_obj(obj)) 277 | emit(data, "NULL", TEXT); 278 | 279 | else if (is_int(obj)) 280 | print_int(obj, emit, data); 281 | 282 | else if (is_float(obj)) 283 | print_float(obj, emit, data); 284 | 285 | else if (is_symb(obj)) 286 | print_symb(obj, emit, data); 287 | 288 | else if (is_seq(obj)) 289 | print_seq(obj, true, emit, data); 290 | 291 | else if (is_set(obj)) 292 | print_set(obj, emit, data); 293 | 294 | else if (is_record(obj)) 295 | print_record(obj, true, emit, data); 296 | 297 | else if (is_ne_map(obj)) //## SHOULD I PRINT IT AS A MAP ONLY WHEN IT'S A PHYSICAL ONE? 298 | print_ne_map(obj, emit, data); 299 | 300 | else if (is_ne_bin_rel(obj)) 301 | print_ne_bin_rel(obj, emit, data); 302 | 303 | else if (is_ne_tern_rel(obj)) 304 | print_ne_tern_rel(obj, emit, data); 305 | 306 | else // is_tag_obj(obj) 307 | print_tag_obj(obj, emit, data); 308 | 309 | emit(data, NULL, SUB_END); 310 | } 311 | 312 | 313 | struct TEXT_FRAG { 314 | int depth; 315 | uint32 start; 316 | uint32 length; 317 | }; 318 | 319 | 320 | struct PRINT_BUFFER { 321 | uint32 str_len; 322 | char *buffer; 323 | uint32 buff_size; 324 | 325 | uint32 frags_count; 326 | TEXT_FRAG *fragments; 327 | uint32 frags_buff_size; 328 | 329 | int curr_depth; 330 | }; 331 | 332 | 333 | void init(PRINT_BUFFER *pb) { 334 | pb->str_len = 0; 335 | pb->buffer = new_byte_array(4096); 336 | pb->buff_size = 4096; 337 | pb->buffer[0] = '\0'; 338 | 339 | pb->frags_count = 0; 340 | pb->fragments = (TEXT_FRAG *) new_void_array(4096); 341 | pb->frags_buff_size = 4096; 342 | 343 | pb->curr_depth = -1; 344 | } 345 | 346 | 347 | void cleanup(PRINT_BUFFER *pb) { 348 | delete_byte_array(pb->buffer, pb->buff_size); 349 | delete_void_array(pb->fragments, pb->frags_buff_size); 350 | } 351 | 352 | 353 | void adjust_buff_capacity(PRINT_BUFFER *pb, uint32 extra_capacity) { 354 | uint32 buff_size = pb->buff_size; 355 | uint32 min_capacity = pb->str_len + extra_capacity + 1; 356 | if (buff_size < min_capacity) { 357 | uint32 new_capacity = 2 * buff_size; 358 | while (new_capacity < min_capacity) 359 | new_capacity *= 2; 360 | char *new_buff = new_byte_array(new_capacity); 361 | memcpy(new_buff, pb->buffer, pb->str_len+1); 362 | delete_byte_array(pb->buffer, buff_size); 363 | pb->buffer = new_buff; 364 | pb->buff_size = new_capacity; 365 | } 366 | } 367 | 368 | 369 | TEXT_FRAG *insert_new_fragment(PRINT_BUFFER *pb) { 370 | uint32 curr_capacity = pb->frags_buff_size; 371 | uint32 frags_count = pb->frags_count; 372 | uint32 min_capacity = sizeof(TEXT_FRAG) * (frags_count + 1); 373 | if (curr_capacity < min_capacity) { 374 | TEXT_FRAG *new_frags = (TEXT_FRAG *) new_void_array(2 * curr_capacity); 375 | memcpy(new_frags, pb->fragments, sizeof(TEXT_FRAG) * frags_count); 376 | delete_void_array(pb->fragments, curr_capacity); 377 | pb->fragments = new_frags; 378 | pb->frags_buff_size = 2 * curr_capacity; 379 | } 380 | pb->frags_count = frags_count + 1; 381 | return pb->fragments + frags_count; 382 | } 383 | 384 | 385 | uint32 printable_frags_count(PRINT_BUFFER *pb) { 386 | uint32 fc = pb->frags_count; 387 | TEXT_FRAG *fs = pb->fragments; 388 | 389 | TEXT_FRAG *lf = fs + fc - 1; 390 | assert(pb->curr_depth == lf->depth); 391 | 392 | if (lf->depth == -1) 393 | return fc - 1; 394 | 395 | uint32 curr_length = lf->length; 396 | 397 | for (uint32 i=fc-2 ; i >= 0 ; i--) { 398 | TEXT_FRAG *f = fs + i; 399 | curr_length += f->length; 400 | if (curr_length > 100) 401 | return i + 1; 402 | } 403 | 404 | return 0; 405 | } 406 | 407 | 408 | void calculate_subobjects_lengths(PRINT_BUFFER *pb, int32 *ls) { 409 | int fc = pb->frags_count; 410 | TEXT_FRAG *fs = pb->fragments; 411 | 412 | for (int i=0 ; i < fc ; i++) { 413 | TEXT_FRAG *f = fs + i; 414 | 415 | int pd; 416 | if (i == 0) 417 | pd = -1; 418 | else 419 | pd = fs[i-1].depth; 420 | 421 | if (f->depth > pd) { 422 | int len = 0; 423 | for (int j=i ; j < fc ; j++) { 424 | TEXT_FRAG *f2 = fs + j; 425 | if (f2->depth < f->depth) 426 | break; 427 | len += f2->length; 428 | } 429 | ls[i] = len; 430 | } 431 | else 432 | ls[i] = -1; 433 | } 434 | } 435 | 436 | 437 | void emit_known(PRINT_BUFFER *pb, void (*emit)(void *, const char *, uint32), void *data) { 438 | int pfc = printable_frags_count(pb); 439 | 440 | int32 *ls = new_int32_array(pb->frags_count); 441 | calculate_subobjects_lengths(pb, ls); 442 | 443 | char *buff = pb->buffer; 444 | TEXT_FRAG *fs = pb->fragments; 445 | 446 | int split_depth = ls[0] > 100 ? 0 : -1; 447 | 448 | for (int i=0 ; i < pfc ; i++) { 449 | TEXT_FRAG *f = fs + i; 450 | TEXT_FRAG *nf = f + 1; 451 | 452 | int len = f->length; 453 | 454 | int d = f->depth; 455 | int nd = nf->depth; 456 | 457 | assert(d == nd - 1 || d == nd + 1); 458 | assert(split_depth <= d); 459 | 460 | if (nd > d) { 461 | emit(data, buff + f->start, len); 462 | 463 | if (d <= split_depth) { 464 | if (len >= 2) { 465 | emit(data, "\n", 1); 466 | for (int j=0 ; j < nd ; j++) 467 | emit(data, " ", 2); 468 | } 469 | else if (len == 1) { 470 | emit(data, " ", 1); 471 | } 472 | } 473 | 474 | if (ls[i+1] > 100) { 475 | assert(split_depth == d); 476 | split_depth = nd; 477 | } 478 | } 479 | else { 480 | assert(nd < d); 481 | if (nd < split_depth) { 482 | assert(split_depth == d); 483 | split_depth = nd; 484 | if (len > 0) { 485 | emit(data, "\n", 1); 486 | for (int j=0 ; j <= nd ; j++) 487 | emit(data, " ", 2); 488 | } 489 | } 490 | emit(data, buff + f->start, len); 491 | } 492 | } 493 | delete_int32_array(ls, pb->frags_count); 494 | } 495 | 496 | 497 | void process_text(PRINT_BUFFER *pb, const char *text) { 498 | int len = strlen(text); 499 | adjust_buff_capacity(pb, len); 500 | memcpy(pb->buffer + pb->str_len, text, len+1); 501 | pb->str_len += len; 502 | TEXT_FRAG *curr_frag = pb->fragments + pb->frags_count - 1; 503 | assert(curr_frag->depth == pb->curr_depth); 504 | curr_frag->length += len; 505 | } 506 | 507 | 508 | void subobj_start(PRINT_BUFFER *pb) { 509 | int new_depth = pb->curr_depth + 1; 510 | pb->curr_depth = new_depth; 511 | 512 | TEXT_FRAG *new_frag = insert_new_fragment(pb); 513 | new_frag->depth = new_depth; 514 | new_frag->start = pb->str_len; 515 | new_frag->length = 0; 516 | } 517 | 518 | 519 | void subobj_end(PRINT_BUFFER *pb) { 520 | int new_depth = pb->curr_depth - 1; 521 | pb->curr_depth = new_depth; 522 | 523 | TEXT_FRAG *new_frag = insert_new_fragment(pb); 524 | new_frag->depth = new_depth; 525 | new_frag->start = pb->str_len; 526 | new_frag->length = 0; 527 | } 528 | 529 | 530 | //////////////////////////////////////////////////////////////////////////////// 531 | 532 | void emit_store(void *pb_, const void *data, EMIT_ACTION action) { 533 | PRINT_BUFFER *pb = (PRINT_BUFFER *) pb_; 534 | 535 | switch (action) { 536 | case TEXT: 537 | process_text(pb, (char *) data); 538 | break; 539 | case SUB_START: 540 | subobj_start(pb); 541 | break; 542 | case SUB_END: 543 | subobj_end(pb); 544 | break; 545 | } 546 | } 547 | 548 | 549 | void stdout_print(void *, const char *text, uint32 len) { 550 | fwrite(text, 1, len, stdout); 551 | fflush(stdout); 552 | } 553 | 554 | 555 | void print(OBJ obj) { 556 | PRINT_BUFFER pb; 557 | 558 | init(&pb); 559 | print_obj(obj, emit_store, &pb); 560 | fputs("\n", stdout); 561 | emit_known(&pb, stdout_print, NULL); 562 | fputs("\n", stdout); 563 | cleanup(&pb); 564 | } 565 | 566 | 567 | void write_to_file(void *fp, const char *text, uint32 len) { 568 | fwrite(text, 1, len, (FILE *) fp); 569 | } 570 | 571 | 572 | void append_to_string(void *ptr, const char *text, uint32 len) { 573 | char *str = (char *) ptr; 574 | int curr_len = strlen(str); 575 | memcpy(str + curr_len, text, len); 576 | str[curr_len + len] = '\0'; 577 | } 578 | 579 | 580 | void calc_length(void *ptr, const char *text, uint32 len) { 581 | uint32 *total_len = (uint32 *) ptr; 582 | *total_len += len; 583 | } 584 | 585 | 586 | void print_to_buffer_or_file(OBJ obj, char *buffer, uint32 max_size, const char *fname) { 587 | PRINT_BUFFER pb; 588 | 589 | init(&pb); 590 | print_obj(obj, emit_store, &pb); 591 | 592 | uint32 len = 0; 593 | emit_known(&pb, calc_length, &len); 594 | 595 | buffer[0] = '\0'; 596 | if (len < max_size) { 597 | emit_known(&pb, append_to_string, buffer); 598 | } 599 | else { 600 | FILE *fp = fopen(fname, "w"); 601 | emit_known(&pb, write_to_file, fp); 602 | fclose(fp); 603 | } 604 | 605 | cleanup(&pb); 606 | } 607 | 608 | 609 | uint32 printed_obj(OBJ obj, char *buffer, uint32 max_size) { 610 | PRINT_BUFFER pb; 611 | 612 | init(&pb); 613 | print_obj(obj, emit_store, &pb); 614 | 615 | uint32 len = 0; 616 | emit_known(&pb, calc_length, &len); 617 | 618 | if (len + 1 < max_size) { 619 | memcpy(buffer, pb.buffer, len + 1); 620 | } 621 | 622 | cleanup(&pb); 623 | return len + 1; 624 | } 625 | 626 | 627 | char *printed_obj(OBJ obj, char *alloc_buffer(void *, uint32), void *data) { 628 | PRINT_BUFFER pb; 629 | 630 | init(&pb); 631 | print_obj(obj, emit_store, &pb); 632 | 633 | uint32 len = 0; 634 | emit_known(&pb, calc_length, &len); 635 | 636 | char *buffer = alloc_buffer(data, len+1); 637 | memcpy(buffer, pb.buffer, len + 1); 638 | 639 | cleanup(&pb); 640 | return buffer; 641 | } 642 | -------------------------------------------------------------------------------- /src/parsing.cpp: -------------------------------------------------------------------------------- 1 | #include "lib.h" 2 | 3 | 4 | enum TOKEN_TYPE { 5 | COMMA, 6 | COLON, 7 | SEMICOLON, 8 | ARROW, 9 | OPEN_PAR, 10 | CLOSE_PAR, 11 | OPEN_BRACKET, 12 | CLOSE_BRACKET, 13 | INT, 14 | FLOAT, 15 | SYMBOL, 16 | STRING, 17 | WHATEVER 18 | }; 19 | 20 | union VALUE { 21 | int64 integer; 22 | double floating; 23 | uint16 symb_idx; 24 | struct { 25 | const char *ptr; 26 | uint32 length; 27 | } string; 28 | }; 29 | 30 | struct TOKEN { 31 | uint32 offset; 32 | uint32 length; 33 | TOKEN_TYPE type; 34 | VALUE value; 35 | }; 36 | 37 | //////////////////////////////////////////////////////////////////////////////// 38 | 39 | inline int64 read_nat(const char *text, uint32 length, int64 *offset_ptr) { 40 | int64 start_offset = *offset_ptr; 41 | int64 end_offset = start_offset; 42 | int64 value = 0; 43 | char ch; 44 | while (end_offset < length && isdigit(ch = text[end_offset])) { 45 | value = 10 * value + (ch - '0'); 46 | end_offset++; 47 | } 48 | assert(end_offset > start_offset); 49 | int64 count = end_offset - start_offset; 50 | if (count > 19) { 51 | *offset_ptr = -start_offset - 1; 52 | return -1; 53 | } 54 | else if (count == 19) { 55 | static const char *MAX = "9223372036854775807"; 56 | for (int i=0 ; i < 19 ; i++) { 57 | ch = text[start_offset + i]; 58 | char max_ch = MAX[i]; 59 | if (ch > max_ch) { 60 | *offset_ptr = -start_offset - 1; 61 | return -1; 62 | } 63 | else if (ch < max_ch) 64 | break; 65 | } 66 | } 67 | *offset_ptr = end_offset; 68 | return value; 69 | } 70 | 71 | 72 | inline int64 read_number(const char *text, uint32 length, int64 offset, TOKEN *token, bool negate) { 73 | char ch; 74 | 75 | int64 i = offset; 76 | 77 | int64 int_value = read_nat(text, length, &i); 78 | if (i < 0) 79 | return i; 80 | 81 | bool is_int; 82 | if (i == length) 83 | is_int = true; 84 | else { 85 | ch = text[i]; 86 | is_int = ch != '.' & !islower(ch); 87 | assert(!isdigit(ch)); 88 | } 89 | 90 | if (is_int) { 91 | if (token != NULL) { 92 | token->offset = offset; 93 | token->length = i - offset; 94 | token->type = INT; 95 | token->value.integer = negate ? -int_value : int_value; 96 | } 97 | return i; 98 | } 99 | 100 | double float_value = int_value; 101 | if (ch == '.') { 102 | uint32 start = ++i; 103 | int64 dec_int_value = read_nat(text, length, &i); 104 | if (i < 0) 105 | return i; 106 | float_value += ((double) dec_int_value) / pow(10, i - start); 107 | } 108 | 109 | if (i < length) { 110 | ch = text[i]; 111 | if (ch == 'e') { 112 | if (++i == length) 113 | return -i - 1; 114 | ch = text[i]; 115 | 116 | bool neg_exp = false; 117 | if (ch == '-') { 118 | if (++i == length) 119 | return -i - 1; 120 | ch = text[i]; 121 | neg_exp = true; 122 | } 123 | 124 | if (!isdigit(ch)) 125 | return -i - 1; 126 | 127 | int64 exp_value = read_nat(text, length, &i); 128 | if (i < 0) 129 | return i; 130 | 131 | float_value *= pow(10, neg_exp ? -exp_value : exp_value); 132 | } 133 | 134 | if (islower(ch)) 135 | return -i - 1; 136 | } 137 | 138 | if (token != NULL) { 139 | token->offset = offset; 140 | token->length = i - offset; 141 | token->type = FLOAT; 142 | token->value.floating = negate ? -float_value : float_value; 143 | } 144 | return i; 145 | } 146 | 147 | 148 | inline int64 read_symbol(const char *text, uint32 length, int64 offset, TOKEN *token) { 149 | int64 i = offset; 150 | while (++i < length) { 151 | char ch = text[i]; 152 | if (ch == '_') { 153 | if (++i == length) 154 | return -i - 1; 155 | ch = text[i]; 156 | if (!islower(ch) & !isdigit(ch)) 157 | return -i - 1; 158 | } 159 | else if (!islower(ch) & !isdigit(ch)) 160 | break; 161 | } 162 | 163 | if (token != NULL) { 164 | token->offset = offset; 165 | token->length = i - offset; 166 | token->type = SYMBOL; 167 | token->value.symb_idx = lookup_symb_idx(text+offset, i-offset); 168 | } 169 | return i; 170 | } 171 | 172 | 173 | inline int64 read_string(const char *text, uint32 length, int64 offset, TOKEN *token) { 174 | uint32 str_len = 0; 175 | for (int64 i=offset+1 ; i < length ; i++) { 176 | char ch = text[i]; 177 | 178 | if (ch < ' ' | ch > '~') 179 | return -offset - 1; 180 | 181 | if (ch == '"') { 182 | if (token != NULL) { 183 | token->offset = offset; 184 | token->length = i + 1 - offset; 185 | token->type = STRING; 186 | token->value.string.ptr = text + offset + 1; 187 | token->value.string.length = str_len; 188 | } 189 | return i + 1; 190 | } 191 | 192 | str_len++; 193 | 194 | if (ch == '\\') { 195 | if (++i == length) 196 | return -i - 1; 197 | ch = text[i]; 198 | if (isxdigit(ch)) { 199 | if (i + 3 >= length || !(isxdigit(text[i+1]) & isxdigit(text[i+2]) & isxdigit(text[i+3]))) 200 | return -i; 201 | i += 3; 202 | } 203 | else if (ch != '\\' & ch != '"' & ch != 'n' & ch != 't') 204 | return -i; 205 | } 206 | } 207 | return -(length + 1); 208 | } 209 | 210 | 211 | int64 tokenize(const char *text, uint32 length, TOKEN *tokens) { 212 | bool ok; 213 | 214 | uint32 index = 0; 215 | int64 offset = 0; 216 | 217 | while (offset < length) { 218 | char ch = text[offset]; 219 | 220 | if (isspace(ch)) { 221 | offset++; 222 | continue; 223 | } 224 | 225 | TOKEN *token = tokens != NULL ? tokens + index : NULL; 226 | index++; 227 | 228 | bool negate = false; 229 | if (ch == '-') { 230 | if (offset + 1 == length) 231 | return -offset - 1; 232 | 233 | offset++; 234 | ch = text[offset]; 235 | 236 | // Arrow 237 | if (ch == '>') { 238 | if (token != NULL) { 239 | token->offset = offset - 1; 240 | token->length = 2; 241 | token->type = ARROW; 242 | } 243 | offset++; 244 | continue; 245 | } 246 | 247 | if (!isdigit(ch)) 248 | return -offset - 2; 249 | 250 | negate = true; 251 | } 252 | 253 | // Integer and floating point numbers 254 | if (ch >= '0' && ch <= '9') { 255 | offset = read_number(text, length, offset, token, negate); 256 | if (offset < 0) 257 | return offset; 258 | else 259 | continue; 260 | } 261 | 262 | // Symbols 263 | if (ch >= 'a' && ch <= 'z') { 264 | offset = read_symbol(text, length, offset, token); 265 | if (offset < 0) 266 | return offset; 267 | else 268 | continue; 269 | } 270 | 271 | // Strings 272 | if (ch == '"') { 273 | offset = read_string(text, length, offset, token); 274 | if (offset < 0) 275 | return offset; 276 | else 277 | continue; 278 | } 279 | 280 | // Single character tokens 281 | TOKEN_TYPE type; 282 | switch (ch) { 283 | case ',': 284 | type = COMMA; 285 | break; 286 | 287 | case ':': 288 | type = COLON; 289 | break; 290 | 291 | case ';': 292 | type = SEMICOLON; 293 | break; 294 | 295 | case '(': 296 | type = OPEN_PAR; 297 | break; 298 | 299 | case ')': 300 | type = CLOSE_PAR; 301 | break; 302 | 303 | case '[': 304 | type = OPEN_BRACKET; 305 | break; 306 | 307 | case ']': 308 | type = CLOSE_BRACKET; 309 | break; 310 | 311 | default: 312 | return -offset - 1; 313 | } 314 | 315 | if (token != NULL) { 316 | token->offset = offset; 317 | token->length = 1; 318 | token->type = type; 319 | } 320 | 321 | offset++; 322 | } 323 | 324 | return index; 325 | } 326 | 327 | //////////////////////////////////////////////////////////////////////////////// 328 | //////////////////////////////////////////////////////////////////////////////// 329 | 330 | struct STATE { 331 | OBJ *cols[3]; 332 | uint32 count; 333 | uint32 capacity; 334 | }; 335 | 336 | static void init(STATE *state, int arity) { 337 | const int MIN_CAPACITY = 128; 338 | OBJ **cols = state->cols; 339 | for (int i=0 ; i < 3 ; i++) 340 | cols[i] = i < arity ? new_obj_array(MIN_CAPACITY) : NULL; 341 | state->count = 0; 342 | state->capacity = MIN_CAPACITY; 343 | } 344 | 345 | void cleanup(STATE *state, bool release) { 346 | uint32 count = state->count; 347 | for (int c=0 ; c < 3 ; c++) { 348 | OBJ *col = state->cols[c]; 349 | if (col == NULL) 350 | break; 351 | if (release) 352 | vec_release(col, count); 353 | delete_obj_array(col, state->capacity); 354 | } 355 | } 356 | 357 | void store(STATE *state, OBJ *objs, int size) { 358 | uint32 count = state->count; 359 | uint32 capacity = state->capacity; 360 | OBJ **cols = state->cols; 361 | if (count >= capacity) { 362 | //## ARE WE SURE HERE THAT THIS NEW CAPACITY IS ALWAYS ENOUGH? 363 | uint32 new_capacity = 2 * capacity; 364 | for (int i=0 ; i < 3 && cols[i] != NULL ; i++) 365 | cols[i] = resize_obj_array(cols[i], capacity, new_capacity); 366 | state->capacity = new_capacity; 367 | } 368 | for (int i=0 ; i < size ; i++) 369 | cols[i][count] = objs[i]; 370 | state->count = count + 1; 371 | } 372 | 373 | //////////////////////////////////////////////////////////////////////////////// 374 | 375 | typedef int64 (*parser)(TOKEN*, uint32, int64, STATE*); 376 | 377 | int64 read_list(TOKEN *tokens, uint32 length, int64 offset, TOKEN_TYPE sep, TOKEN_TYPE term, parser parse_elem, STATE *state) { 378 | // Empty list 379 | if (offset < length && tokens[offset].type == term) 380 | return offset + 1; 381 | 382 | for ( ; ; ) { 383 | offset = parse_elem(tokens, length, offset, state); 384 | 385 | // Unexpected EOF 386 | if (offset >= length) 387 | offset = -offset - 1; 388 | 389 | // Parsing failed 390 | if (offset < 0) 391 | return offset; 392 | 393 | TOKEN_TYPE type = tokens[offset++].type; 394 | 395 | // One more item 396 | if (type == sep) 397 | continue; 398 | 399 | // Done 400 | if (type == term) 401 | return offset; 402 | 403 | // Done 404 | if (term == WHATEVER) 405 | return offset - 1; 406 | 407 | // Unexpected separator/terminator 408 | return -offset; 409 | } 410 | } 411 | 412 | //////////////////////////////////////////////////////////////////////////////// 413 | 414 | int64 parse_obj(TOKEN *tokens, uint32 length, int64 offset, OBJ *var); 415 | 416 | //////////////////////////////////////////////////////////////////////////////// 417 | 418 | int64 parse_entry(TOKEN *tokens, uint32 length, int64 offset, uint32 count, TOKEN_TYPE sep, OBJ *vars) { 419 | uint32 read = 0; 420 | 421 | for (read = 0 ; read < count ; read++) { 422 | if (read > 0) 423 | if (offset < length && tokens[offset].type == sep) 424 | offset++; 425 | else 426 | break; 427 | offset = parse_obj(tokens, length, offset, vars+read); 428 | if (offset < 0) 429 | break; 430 | } 431 | 432 | if (read == count & offset < length) 433 | return offset; 434 | 435 | for (uint32 i=0 ; i < read ; i++) 436 | release(vars[i]); 437 | 438 | return offset < 0 ? offset : -offset - 1; 439 | } 440 | 441 | //////////////////////////////////////////////////////////////////////////////// 442 | 443 | int64 read_obj(TOKEN *tokens, uint32 length, int64 offset, STATE *state) { 444 | OBJ obj; 445 | offset = parse_obj(tokens, length, offset, &obj); 446 | if (offset >= 0) 447 | store(state, &obj, 1); 448 | return offset; 449 | } 450 | 451 | int64 read_entry(TOKEN *tokens, uint32 length, int64 offset, STATE *state, int size, TOKEN_TYPE sep) { 452 | OBJ entry[3]; 453 | offset = parse_entry(tokens, length, offset, size, sep, entry); 454 | if (offset >= 0) 455 | store(state, entry, size); 456 | return offset; 457 | } 458 | 459 | int64 read_map_entry(TOKEN *tokens, uint32 length, int64 offset, STATE *state) { 460 | return read_entry(tokens, length, offset, state, 2, ARROW); 461 | } 462 | 463 | int64 read_bin_rel_entry(TOKEN *tokens, uint32 length, int64 offset, STATE *state) { 464 | return read_entry(tokens, length, offset, state, 2, COMMA); 465 | } 466 | 467 | int64 read_tern_rel_entry(TOKEN *tokens, uint32 length, int64 offset, STATE *state) { 468 | return read_entry(tokens, length, offset, state, 3, COMMA); 469 | } 470 | 471 | int64 read_rec_entry(TOKEN *tokens, uint32 length, int64 offset, STATE *state) { 472 | if (offset >= length || tokens[offset].type != SYMBOL) 473 | return -offset - 1; 474 | uint16 symb_idx = tokens[offset++].value.symb_idx; 475 | if (offset >= length || tokens[offset].type != COLON) 476 | return -offset - 1; 477 | OBJ entry[2]; 478 | offset = parse_obj(tokens, length, offset+1, entry+1); 479 | if (offset < 0) 480 | return offset; 481 | entry[0] = make_symb(symb_idx); 482 | store(state, entry, 2); 483 | return offset; 484 | } 485 | 486 | //////////////////////////////////////////////////////////////////////////////// 487 | 488 | int64 parse_seq(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 489 | STATE state; 490 | init(&state, 1); 491 | 492 | offset = read_list(tokens, length, offset+1, COMMA, CLOSE_PAR, read_obj, &state); 493 | 494 | if (offset >= 0) 495 | *var = build_seq(state.cols[0], state.count); 496 | 497 | cleanup(&state, offset < 0); 498 | return offset; 499 | } 500 | 501 | //////////////////////////////////////////////////////////////////////////////// 502 | 503 | bool is_record(TOKEN *tokens, uint32 length, int64 offset) { 504 | return offset + 2 < length && tokens[offset+1].type == SYMBOL && tokens[offset+2].type == COLON; 505 | } 506 | 507 | int64 parse_rec(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 508 | STATE state; 509 | init(&state, 2); 510 | 511 | offset = read_list(tokens, length, offset+1, COMMA, CLOSE_PAR, read_rec_entry, &state); 512 | 513 | if (offset >= 0) 514 | *var = build_bin_rel(state.cols[0], state.cols[1], state.count); 515 | 516 | cleanup(&state, offset < 0); 517 | return offset; 518 | } 519 | 520 | //////////////////////////////////////////////////////////////////////////////// 521 | 522 | int64 parse_inner_obj_or_tuple(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 523 | STATE state; 524 | init(&state, 1); 525 | 526 | offset = read_list(tokens, length, offset+1, COMMA, CLOSE_PAR, read_obj, &state); 527 | bool ok = offset >= 0 & state.count > 0; 528 | if (ok) 529 | *var = state.count == 1 ? state.cols[0][0] : build_seq(state.cols[0], state.count); 530 | 531 | cleanup(&state, !ok); 532 | return offset; 533 | } 534 | 535 | int64 parse_symb_or_tagged_obj(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 536 | uint16 symb_idx = tokens[offset].value.symb_idx; 537 | if (++offset < length) { 538 | if (tokens[offset].type == OPEN_PAR) { 539 | OBJ inner_obj; 540 | if (is_record(tokens, length, offset)) 541 | offset = parse_rec(tokens, length, offset, &inner_obj); 542 | else 543 | offset = parse_inner_obj_or_tuple(tokens, length, offset, &inner_obj); 544 | if (offset >= 0) 545 | *var = make_tag_obj(symb_idx, inner_obj); 546 | return offset; 547 | } 548 | } 549 | *var = make_symb(symb_idx); 550 | return offset; 551 | } 552 | 553 | //////////////////////////////////////////////////////////////////////////////// 554 | 555 | int64 parse_rel_tail(TOKEN *tokens, uint32 length, int64 offset, int size, OBJ *first_entry, bool is_map, OBJ *var) { 556 | STATE state; 557 | init(&state, size); 558 | store(&state, first_entry, size); 559 | 560 | parser entry_parser = size == 2 ? 561 | (is_map ? read_map_entry : read_bin_rel_entry) : 562 | read_tern_rel_entry; 563 | 564 | offset = read_list(tokens, length, offset, is_map ? COMMA : SEMICOLON, CLOSE_BRACKET, entry_parser, &state); 565 | 566 | if (offset >= 0) 567 | if (size == 2) 568 | *var = build_bin_rel(state.cols[0], state.cols[1], state.count); 569 | else 570 | *var = build_tern_rel(state.cols[0], state.cols[1], state.cols[2], state.count); 571 | 572 | cleanup(&state, offset < 0); 573 | return offset; 574 | } 575 | 576 | //////////////////////////////////////////////////////////////////////////////// 577 | 578 | int64 parse_unord_coll(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 579 | if (++offset >= length) 580 | return -offset - 1; 581 | if (tokens[offset].type == CLOSE_BRACKET) { 582 | *var = make_empty_rel(); 583 | return offset + 1; 584 | } 585 | 586 | STATE state; 587 | init(&state, 1); 588 | 589 | offset = read_list(tokens, length, offset, COMMA, WHATEVER, read_obj, &state); 590 | if (offset < 0) { 591 | cleanup(&state, true); 592 | return offset; 593 | } 594 | 595 | TOKEN_TYPE type = tokens[offset++].type; 596 | 597 | bool is_map = type == ARROW & state.count == 1; 598 | bool is_rel = type == SEMICOLON & (state.count == 2 | state.count == 3); 599 | 600 | if (is_map) { 601 | offset = read_obj(tokens, length, offset, &state); 602 | if (offset >= length) 603 | offset = -offset - 1; 604 | if (offset < 0) { 605 | cleanup(&state, true); 606 | return offset; 607 | } 608 | type = tokens[offset++].type; 609 | } 610 | else if (is_rel && offset < length && tokens[offset].type == CLOSE_BRACKET) { 611 | type = CLOSE_BRACKET; 612 | offset++; 613 | } 614 | 615 | if (type == CLOSE_BRACKET) { 616 | if (is_map | (is_rel & state.count == 2)) 617 | *var = build_bin_rel(state.cols[0], state.cols[0] + 1, 1); 618 | else if (is_rel) 619 | *var = build_tern_rel(state.cols[0], state.cols[0] + 1, state.cols[0] + 2, 1); 620 | else 621 | *var = build_set(state.cols[0], state.count); 622 | cleanup(&state, false); 623 | return offset; 624 | } 625 | 626 | if (is_map | is_rel) { 627 | OBJ entry[3]; 628 | for (int i=0 ; i < state.count ; i++) 629 | entry[i] = state.cols[0][i]; 630 | cleanup(&state, false); 631 | return parse_rel_tail(tokens, length, offset, state.count, entry, is_map, var); 632 | } 633 | 634 | cleanup(&state, true); 635 | return -offset; 636 | } 637 | 638 | //////////////////////////////////////////////////////////////////////////////// 639 | 640 | inline char hex_digit(char ch) { 641 | assert(isxdigit(ch)); 642 | return isdigit(ch) ? (ch - '0') : (tolower(ch) - 'a' + 10); 643 | } 644 | 645 | void parse_string(TOKEN *token, OBJ *var) { 646 | const char *text = token->value.string.ptr; 647 | uint32 length = token->value.string.length; 648 | 649 | if (length == 0) { 650 | *var = make_tag_obj(symb_idx_string, make_empty_seq()); 651 | return; 652 | } 653 | 654 | SEQ_OBJ *raw_str = new_seq(length); 655 | *var = make_tag_obj(symb_idx_string, make_seq(raw_str, length)); 656 | 657 | OBJ *buffer = raw_str->buffer; 658 | 659 | for (uint32 i=0 ; i < length ; i++) { 660 | char ch = *(text++); 661 | uint16 parsed_char; 662 | if (ch == '\\') { 663 | ch = *(text++); 664 | if (ch == '"' | ch == '\\') 665 | parsed_char = ch; 666 | else if (ch == 'n') 667 | parsed_char = '\n'; 668 | else if (ch == 't') 669 | parsed_char = '\t'; 670 | else { 671 | char hex3 = hex_digit(ch); 672 | char hex2 = hex_digit(*(text++)); 673 | char hex1 = hex_digit(*(text++)); 674 | char hex0 = hex_digit(*(text++)); 675 | parsed_char = 16 * (16 * (16 * hex3 + hex2) + hex1) + hex0; 676 | } 677 | } 678 | else 679 | parsed_char = ch; 680 | buffer[i] = make_int(parsed_char); 681 | } 682 | } 683 | 684 | //////////////////////////////////////////////////////////////////////////////// 685 | 686 | // If the function is successfull, it returns the index of the next token to consume 687 | // If it fails, it returns the location/index of the error, negated and decremented by one 688 | int64 parse_obj(TOKEN *tokens, uint32 length, int64 offset, OBJ *var) { 689 | if (offset >= length) 690 | return -offset - 1; 691 | 692 | TOKEN *token = tokens + offset; 693 | 694 | switch (token->type) { 695 | case COMMA: 696 | case COLON: 697 | case SEMICOLON: 698 | case ARROW: 699 | case CLOSE_PAR: 700 | case CLOSE_BRACKET: 701 | return -offset - 1; 702 | 703 | case INT: 704 | *var = make_int(token->value.integer); 705 | return offset + 1; 706 | 707 | case FLOAT: 708 | *var = make_float(token->value.floating); 709 | return offset + 1; 710 | 711 | case SYMBOL: 712 | return parse_symb_or_tagged_obj(tokens, length, offset, var); 713 | 714 | case OPEN_PAR: 715 | if (is_record(tokens, length, offset)) 716 | return parse_rec(tokens, length, offset, var); 717 | else 718 | return parse_seq(tokens, length, offset, var); 719 | 720 | case OPEN_BRACKET: 721 | return parse_unord_coll(tokens, length, offset, var); 722 | 723 | case STRING: 724 | parse_string(token, var); 725 | return offset + 1; 726 | 727 | default: 728 | internal_fail(); 729 | } 730 | } 731 | 732 | //////////////////////////////////////////////////////////////////////////////// 733 | //////////////////////////////////////////////////////////////////////////////// 734 | 735 | bool parse(const char *text, uint32 size, OBJ *var, uint32 *error_offset) { 736 | int64 count = tokenize(text, size, NULL); 737 | if (count <= 0) { 738 | *error_offset = count < 0 ? -count - 1 : size; 739 | return false; 740 | } 741 | 742 | TOKEN *tokens = (TOKEN *) new_void_array(count * sizeof(TOKEN)); 743 | tokenize(text, size, tokens); 744 | 745 | memset(var, 0, sizeof(OBJ)); 746 | int64 res = parse_obj(tokens, count, 0, var); 747 | if (res < 0 | res < count) { 748 | *error_offset = res < 0 ? tokens[-res-1].offset : size; 749 | delete_void_array(tokens, count * sizeof(TOKEN)); 750 | return false; 751 | } 752 | 753 | delete_void_array(tokens, count * sizeof(TOKEN)); 754 | return true; 755 | } 756 | --------------------------------------------------------------------------------