├── .gitignore ├── Makefile ├── README.md ├── dump-bin-arm.sh ├── dump-bin-x64.sh ├── front ├── bjit.cpp ├── front-ast.h ├── front-lexer.cpp ├── front-lexer.h ├── front-parse.cpp └── front-parse.h ├── run-tests.sh ├── src ├── arch-arm64-asm.h ├── arch-arm64-emit.cpp ├── arch-arm64-ops.cpp ├── arch-arm64.h ├── arch-x64-asm.h ├── arch-x64-emit.cpp ├── arch-x64-ops.cpp ├── arch-x64.h ├── bjit-impl.h ├── bjit.h ├── debug.cpp ├── hash.h ├── ir-ops.cpp ├── ir-ops.h ├── module.cpp ├── opt-cse.cpp ├── opt-dce.cpp ├── opt-dom.cpp ├── opt-fold.cpp ├── opt-jump.cpp ├── opt-ra.cpp ├── opt-reassoc.cpp ├── opt-sink.cpp └── sanity.cpp ├── tests ├── test_add_ff.cpp ├── test_add_ii.cpp ├── test_call_stub.cpp ├── test_calln.cpp ├── test_callp.cpp ├── test_ci2f_cf2i.cpp ├── test_divmod.cpp ├── test_fib.cpp ├── test_fuzzfold.cpp ├── test_load_store.cpp ├── test_loop.cpp ├── test_mem_opt.cpp ├── test_shift.cpp ├── test_sieve.cpp ├── test_sub_ii.cpp └── test_sx_zx.cpp └── win ├── README.md ├── mkdir-p.bat └── rm-rf.bat /.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !Makefile 4 | !*.md 5 | !/src 6 | !/tests 7 | !/front 8 | !**/*.h 9 | !**/*.c 10 | !**/*.cpp 11 | 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # No default rules 3 | .SUFFIXES: 4 | 5 | # Generic compilation flags, both C and C++ 6 | CFLAGS := -Isrc -g -ferror-limit=5 #-fsanitize=address 7 | CFLAGS += -Ofast -fomit-frame-pointer 8 | CFLAGS += -Wall -Werror -Wfloat-conversion -Wno-unused-function 9 | 10 | # C++ specific flags 11 | CXXFLAGS := -std=c++11 -fno-exceptions 12 | 13 | # this is just so local.make can add flags 14 | LINKFLAGS := #-fsanitize=address 15 | 16 | # if local.make exists, then include it for local configuration 17 | -include local.make 18 | 19 | BJIT_BINDIR ?= bin 20 | BJIT_BUILDDIR ?= build 21 | 22 | # We assume clang on all platforms 23 | BJIT_USE_CC ?= clang 24 | CC := $(BJIT_USE_CC) 25 | 26 | TARGET := bjit 27 | 28 | # FIXME: Windows 29 | BINEXT := 30 | 31 | # Windows specific 32 | ifeq ($(OS),Windows_NT) 33 | LIBRARY := $(BJIT_BUILDDIR)/$(TARGET).lib 34 | 35 | MAKEDIR := win\mkdir-p.bat 36 | BJIT_LINKLIB ?= llvm-lib $(LINKFLAGS) /out:$(LIBRARY) 37 | CLEANALL := win\rm-rf.bat $(BJIT_BUILDDIR) && win\rm-rf.bat $(BJIT_BINDIR) 38 | 39 | # Link flags 40 | BJIT_LINKFLAGS ?= $(LINKFLAGS) $(LIBRARY) 41 | 42 | CFLAGS += -D_CRT_SECURE_NO_WARNINGS 43 | 44 | BINEXT := .exe 45 | 46 | else 47 | LIBRARY := $(BJIT_BUILDDIR)/$(TARGET).a 48 | 49 | BJIT_LINKFLAGS ?= $(LINKFLAGS) $(LIBRARY) -lc++ 50 | 51 | MAKEDIR := mkdir -p 52 | CLEANALL := rm -rf $(BJIT_BUILDDIR) $(BJIT_BINDIR) 53 | BJIT_LINKLIB ?= libtool -static -o $(LIBRARY) 54 | 55 | ifeq ($(shell uname),Darwin) 56 | BJIT_LINKLIB += -no_warning_for_no_symbols 57 | endif 58 | 59 | endif 60 | 61 | # this works with clang on Windows too 62 | BJIT_LINKBIN ?= $(CC) 63 | 64 | # Automatically figure out source files 65 | LIB_SOURCES := $(wildcard src/*.cpp) 66 | 67 | LIB_OBJECTS := $(patsubst %,$(BJIT_BUILDDIR)/%.o,$(LIB_SOURCES)) 68 | DEPENDS := $(LIB_OBJECTS:.o=.d) 69 | 70 | # Front-end 71 | FRONTEND := $(BJIT_BINDIR)/$(TARGET)$(BINEXT) 72 | FRONT_OBJECTS := $(patsubst %,$(BJIT_BUILDDIR)/%.o,$(wildcard front/*.cpp)) 73 | DEPENDS += $(FRONT_OBJECTS:.o=.d) 74 | 75 | # automatic target generation for any .cpp files in tests/ 76 | define TestTarget 77 | DEPENDS += $(patsubst %,$(BJIT_BUILDDIR)/%.d,$1) 78 | $(BJIT_BINDIR)/$(patsubst tests/%.cpp,%,$1)$(BINEXT): $(LIBRARY) \ 79 | $(patsubst %,$(BJIT_BUILDDIR)/%.o,$1) 80 | @echo LINK $$@ 81 | @$(MAKEDIR) "$(BJIT_BINDIR)" 82 | @$(BJIT_LINKBIN) -o $$@ $(patsubst %,$(BJIT_BUILDDIR)/%.o,$1) $(BJIT_LINKFLAGS) 83 | endef 84 | 85 | TESTS_CPP := $(wildcard tests/*.cpp) 86 | TESTS := $(patsubst tests/%.cpp,$(BJIT_BINDIR)/%$(BINEXT),$(TESTS_CPP)) 87 | 88 | .PHONY: all test clean 89 | 90 | all: $(LIBRARY) $(FRONTEND) 91 | @echo DONE 92 | 93 | test: all $(TESTS) 94 | @echo Running tests with output to 'test.out' 95 | @/bin/bash -e ./run-tests.sh > test.out 2>&1 || /bin/bash -e ./run-tests.sh 96 | @echo Tests done. 97 | 98 | clean: 99 | @$(CLEANALL) 100 | @echo Removed '$(BJIT_BUILDDIR)' and '$(BJIT_BINDIR)' 101 | 102 | $(foreach i,$(TESTS_CPP),$(eval $(call TestTarget,$(i)))) 103 | 104 | $(FRONTEND): $(FRONT_OBJECTS) $(LIBRARY) 105 | @echo LINK $@ 106 | @$(MAKEDIR) "$(BJIT_BINDIR)" 107 | @$(BJIT_LINKBIN) -o $@ $(FRONT_OBJECTS) $(BJIT_LINKFLAGS) 108 | 109 | $(LIBRARY): $(LIB_OBJECTS) 110 | @echo LIB $@ 111 | @$(MAKEDIR) "$(dir $@)" 112 | @$(BJIT_LINKLIB) $(LIB_OBJECTS) 113 | 114 | $(BJIT_BUILDDIR)/%.c.o: %.c 115 | @echo CC $< 116 | @$(MAKEDIR) "$(dir $@)" 117 | @$(CC) -MMD -MP $(CFLAGS) -c $< -o $@ 118 | 119 | $(BJIT_BUILDDIR)/%.cpp.o: %.cpp 120 | @echo CC $< 121 | @$(MAKEDIR) "$(dir $@)" 122 | @$(CC) -MMD -MP $(CFLAGS) $(CXXFLAGS) -c $< -o $@ 123 | 124 | -include $(DEPENDS) 125 | -------------------------------------------------------------------------------- /dump-bin-arm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | gobjdump -maarch64 -d -D -b binary out.bin 3 | -------------------------------------------------------------------------------- /dump-bin-x64.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | gobjdump --insn-width=16 -mi386:x86-64:intel -d -D -b binary out.bin 3 | -------------------------------------------------------------------------------- /front/bjit.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include "front-parse.h" 5 | 6 | int main() 7 | { 8 | std::vector codeOut; 9 | 10 | bjit::parse(codeOut); 11 | 12 | if(codeOut.size()) 13 | { 14 | FILE * f = fopen("out.bin", "wb"); 15 | fwrite(codeOut.data(), 1, codeOut.size(), f); 16 | fclose(f); 17 | 18 | BJIT_LOG(" - Wrote out.bin\n"); 19 | return 0; 20 | } 21 | else 22 | { 23 | return 1; // probably syntax errors 24 | } 25 | } -------------------------------------------------------------------------------- /front/front-lexer.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "front-parse.h" 3 | #include "front-lexer.h" 4 | 5 | #include 6 | 7 | using namespace bjit; 8 | 9 | static bool isDigit(int ch) 10 | { 11 | return ch >= '0' && ch <= '9'; 12 | } 13 | 14 | static bool isSymbolChar(int ch) 15 | { 16 | return (ch >= 'a' && ch <= 'z') 17 | || (ch >= 'A' && ch <= 'Z') 18 | || (ch >= '0' && ch <= '9') 19 | || (ch == '_') 20 | ; 21 | } 22 | 23 | static void eatSpace(Parser & ps) 24 | { 25 | while(true) 26 | { 27 | int ch = ps.peek(); 28 | 29 | // anything from # to end of line is a comment 30 | if(ch == '#') 31 | { 32 | while(ps.peek() != '\n') ps.consume(); 33 | continue; 34 | } 35 | 36 | // break if this is not one of the whitespace characters 37 | if(ch != ' ' 38 | && ch != '\n' 39 | && ch != '\r' 40 | && ch != '\t') break; 41 | 42 | ps.consume(); 43 | } 44 | } 45 | 46 | static void lexNumber(Parser & ps, bool leadingDot) 47 | { 48 | bool valid = false; // do we have at least one digit 49 | 50 | // if there are too many digits, then we want 51 | // different behavior depending on int/float 52 | // so collect first, until we know what to do 53 | std::vector mantissaDigits; 54 | 55 | // this is only relevant for integers 56 | int base = 10; 57 | 58 | // this checks for hex contants 59 | if(!leadingDot && ps.peek() == '0') 60 | { 61 | // integers are octals 62 | ps.consume(); 63 | 64 | base = 8; 65 | 66 | if(ps.peek() == 'x' || ps.peek() == 'X') 67 | { 68 | base = 16; 69 | 70 | while(true) 71 | { 72 | ps.consume(); 73 | int ch = ps.peek(); 74 | 75 | if(isDigit(ch)) 76 | { 77 | mantissaDigits.push_back(ch - '0'); 78 | continue; 79 | } 80 | 81 | if(ch >= 'a' && ch <= 'f') 82 | { 83 | mantissaDigits.push_back(ch + 10 - 'a'); 84 | continue; 85 | } 86 | 87 | if(ch >= 'A' && ch <= 'F') 88 | { 89 | mantissaDigits.push_back(ch + 10 - 'A'); 90 | continue; 91 | } 92 | 93 | break; 94 | } 95 | } 96 | else 97 | { 98 | mantissaDigits.push_back(0); 99 | } 100 | } 101 | 102 | // if this is not hex, parse digits 103 | if(!leadingDot && base < 16) 104 | { 105 | while(true) 106 | { 107 | int ch = ps.peek(); 108 | if(!isDigit(ch)) break; 109 | 110 | mantissaDigits.push_back(ch - '0'); 111 | ps.consume(); 112 | } 113 | } 114 | 115 | if(mantissaDigits.size()) valid = true; 116 | 117 | // do we have a decimal point or exponent? 118 | if((base < 16) 119 | && (leadingDot || ps.peek() == '.' || ps.peek() == 'e' || ps.peek() == 'E')) 120 | { 121 | double m = 0, d = 1; 122 | // collect the integer part of mantissa 123 | for(auto & d : mantissaDigits) { m = 10*m + d; } 124 | 125 | // handle digits after decimal point if any 126 | if(leadingDot || ps.peek() == '.') 127 | { 128 | if(!leadingDot) ps.consume(); 129 | while(true) 130 | { 131 | int ch = ps.peek(); 132 | if(!isDigit(ch)) break; 133 | 134 | m = 10*m + (ch - '0'); 135 | d = 10*d; 136 | 137 | valid = true; 138 | 139 | ps.consume(); 140 | } 141 | 142 | // if there are no digits around, 143 | // then a dot is a binary operator 144 | if(!valid) 145 | { 146 | ps.token.type = Token::Tdot; 147 | return; 148 | } 149 | 150 | // fix the decimal point 151 | m /= d; 152 | } 153 | 154 | double e = 0; 155 | if(ps.peek() == 'e' || ps.peek() == 'E') 156 | { 157 | ps.consume(); 158 | 159 | // get sign of exponent 160 | bool negate = false; 161 | if(ps.peek() == '-') negate = true; 162 | if(negate || ps.peek() == '+') ps.consume(); 163 | 164 | // consume digits 165 | while(true) 166 | { 167 | int ch = ps.peek(); 168 | if(!isDigit(ch)) break; 169 | 170 | e = 10*e + (ch - '0'); 171 | ps.consume(); 172 | } 173 | 174 | // apply exponent to mantissa 175 | m *= pow(10., negate ? -e : e); 176 | } 177 | 178 | ps.token.type = Token::Tfloat; 179 | ps.token.vFloat = m; 180 | } 181 | else 182 | { 183 | 184 | // plain old integer 185 | int64_t i = 0; 186 | 187 | // here we use variable base 188 | for(auto & d : mantissaDigits) 189 | { 190 | if(d >= base) valid = false; 191 | i = base*i + d; 192 | } 193 | 194 | ps.token.type = Token::Tint; 195 | ps.token.vInt = i; 196 | 197 | if(ps.peek() == 'U' || ps.peek() == 'u') 198 | { 199 | ps.token.type = Token::Tuint; 200 | ps.consume(); 201 | } 202 | 203 | // this can happen with 0x without digits 204 | // or when octal has digits 8 or 9 205 | if(!valid) 206 | { 207 | ps.errorAt(ps.token, "invalid numeric literal"); 208 | 209 | ps.token.type = Token::Terror; 210 | } 211 | } 212 | 213 | // check that there isn't trailing garbage 214 | if(isSymbolChar(ps.peek())) 215 | { 216 | ps.errorAt(ps.token, "invalid character in numeric literal"); 217 | 218 | // eat it all, so we can try to continue 219 | while(isSymbolChar(ps.peek())) ps.consume(); 220 | } 221 | } 222 | 223 | static struct { 224 | const char * str; 225 | Token::Type ttype; 226 | } keywords[] = { 227 | { "if", Token::Tif }, 228 | { "else", Token::Telse }, 229 | 230 | { "while", Token::Twhile }, 231 | { "break", Token::Tbreak }, 232 | { "continue", Token::Tcontinue }, 233 | 234 | { "return", Token::Treturn }, 235 | 236 | { 0, Token::Terror } // end of list marker 237 | }; 238 | 239 | static void lexSymbol(Parser & ps) 240 | { 241 | std::vector symbol; 242 | 243 | while(true) 244 | { 245 | int ch = ps.peek(); 246 | if(!isSymbolChar(ch)) break; 247 | 248 | symbol.push_back(ch); 249 | ps.consume(); 250 | } 251 | 252 | if(!symbol.size()) 253 | { 254 | ps.errorAt(ps.token, "invalid syntax"); 255 | ps.consume(); 256 | return; 257 | } 258 | 259 | symbol.push_back(0); // null-termination 260 | 261 | ps.token.type = Token::Tsymbol; 262 | 263 | for(int i = 0; keywords[i].str; ++i) 264 | { 265 | if(!strcmp(symbol.data(), keywords[i].str)) 266 | { 267 | ps.token.type = keywords[i].ttype; 268 | return; 269 | } 270 | } 271 | 272 | // if we didn't match a keyboard, intern symbol 273 | SymbolPtr * sptr = ps.symbols.find(symbol); 274 | if(sptr) ps.token.symbol = sptr->ptr.get(); 275 | else 276 | { 277 | ps.token.symbol = new Symbol; 278 | std::swap(ps.token.symbol->string, symbol); 279 | 280 | SymbolPtr newSym; 281 | newSym.ptr.reset(ps.token.symbol); 282 | ps.symbols.insert(newSym); 283 | } 284 | } 285 | 286 | void bjit::lexToken(Parser & ps) 287 | { 288 | eatSpace(ps); 289 | 290 | ps.token.posChar = ps.posChar; 291 | ps.token.posLine = ps.posLine; 292 | 293 | ps.token.type = Token::Terror; 294 | 295 | switch(int ch = ps.peek()) 296 | { 297 | case EOF: ps.token.type = Token::Teof; return; 298 | 299 | case '!': 300 | ps.consume(); 301 | switch(ps.peek()) 302 | { 303 | case '=': ps.token.type = Token::TnotEq; ps.consume(); return; 304 | default: ps.token.type = Token::TlogNot; return; 305 | } 306 | case '<': 307 | ps.consume(); 308 | switch(ps.peek()) 309 | { 310 | case '=': ps.token.type = Token::TlessEq; ps.consume(); return; 311 | case '<': ps.token.type = Token::TshiftL; ps.consume(); return; 312 | default: ps.token.type = Token::Tless; return; 313 | } 314 | case '>': 315 | ps.consume(); 316 | switch(ps.peek()) 317 | { 318 | case '=': ps.token.type = Token::TgreaterEq; ps.consume(); return; 319 | case '>': ps.token.type = Token::TshiftR; ps.consume(); return; 320 | default: ps.token.type = Token::Tgreater; return; 321 | } 322 | case '=': 323 | ps.consume(); 324 | switch(ps.peek()) 325 | { 326 | case '=': ps.token.type = Token::Teq; ps.consume(); return; 327 | default: ps.token.type = Token::Tassign; return; 328 | } 329 | 330 | case '{': ps.token.type = Token::ToBlock; ps.consume(); return; 331 | case '}': ps.token.type = Token::TcBlock; ps.consume(); return; 332 | 333 | case '[': ps.token.type = Token::ToIndex; ps.consume(); return; 334 | case ']': ps.token.type = Token::TcIndex; ps.consume(); return; 335 | 336 | case '(': ps.token.type = Token::ToParen; ps.consume(); return; 337 | case ')': ps.token.type = Token::TcParen; ps.consume(); return; 338 | 339 | case '+': ps.token.type = Token::Tadd; ps.consume(); return; 340 | case '-': ps.token.type = Token::Tsub; ps.consume(); return; 341 | 342 | case '*': ps.token.type = Token::Tmul; ps.consume(); return; 343 | case '/': ps.token.type = Token::Tdiv; ps.consume(); return; 344 | case '%': ps.token.type = Token::Tmod; ps.consume(); return; 345 | 346 | case '~': ps.token.type = Token::TbitNot; ps.consume(); return; 347 | case '^': ps.token.type = Token::TbitXor; ps.consume(); return; 348 | 349 | case '&': 350 | ps.consume(); 351 | switch(ps.peek()) 352 | { 353 | case '&': ps.token.type = Token::TlogAnd; ps.consume(); return; 354 | default: ps.token.type = Token::TbitAnd; return; 355 | } 356 | case '|': 357 | ps.consume(); 358 | switch(ps.peek()) 359 | { 360 | case '|': ps.token.type = Token::TlogOr; ps.consume(); return; 361 | default: ps.token.type = Token::TbitOr; return; 362 | } 363 | 364 | case '.': 365 | ps.consume(); 366 | if(isDigit(ps.peek())) lexNumber(ps, true); 367 | else ps.token.type = Token::Tdot; 368 | return; 369 | case ':': 370 | ps.consume(); 371 | switch(ps.peek()) 372 | { 373 | case '=': ps.token.type = Token::Tdefine; ps.consume(); return; 374 | default: ps.token.type = Token::Tcolon; return; 375 | } 376 | 377 | case ',': ps.token.type = Token::Tcomma; ps.consume(); return; 378 | case ';': ps.token.type = Token::Tsemicolon; ps.consume(); return; 379 | default: 380 | if(isDigit(ps.peek())) lexNumber(ps, false); 381 | else lexSymbol(ps); 382 | return; 383 | } 384 | 385 | } -------------------------------------------------------------------------------- /front/front-lexer.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | namespace bjit 8 | { 9 | 10 | struct Symbol 11 | { 12 | std::vector string; 13 | }; 14 | 15 | // lexer tokens 16 | struct Token 17 | { 18 | enum Type 19 | { 20 | Teof, // end of file 21 | 22 | // literals 23 | Tint, 24 | Tuint, 25 | Tfloat, 26 | 27 | Tsymbol, 28 | 29 | // keywords 30 | Tif, 31 | Telse, 32 | 33 | Twhile, 34 | Tbreak, 35 | Tcontinue, 36 | 37 | Treturn, 38 | 39 | // operators - only one use-case is defined here 40 | // typically this should be the binary operator 41 | 42 | ToParen, // ( 43 | TcParen, // ) 44 | 45 | ToIndex, // [ 46 | TcIndex, // ] 47 | 48 | ToBlock, // { 49 | TcBlock, // } 50 | 51 | Tdot, // . 52 | Tcolon, // : 53 | 54 | Tcomma, // , 55 | Tsemicolon, // ; 56 | 57 | Tadd, // + 58 | Tsub, // - 59 | 60 | Tmul, // * 61 | Tdiv, // / 62 | Tmod, // % 63 | 64 | TshiftL, // << 65 | TshiftR, // >> 66 | 67 | TbitOr, // | 68 | TbitAnd, // & 69 | TbitXor, // ^ 70 | TbitNot, // ~ 71 | 72 | TlogNot, // ! 73 | TlogAnd, // && 74 | TlogOr, // || 75 | 76 | Tassign, // = 77 | Tdefine, // := 78 | 79 | Teq, // == 80 | TnotEq, // != 81 | Tless, // < 82 | TlessEq, // <= 83 | Tgreater, // > 84 | TgreaterEq, // >= 85 | 86 | // pseudo-token types for the parser 87 | // these are mostly alternatives to the above 88 | Tpos, // unary + 89 | Tneg, // unary - 90 | 91 | Tfuncall, // opening paren for function calls 92 | 93 | TifBody, // Tif after condition is done 94 | TwhileBody, // Twhile after condition is done 95 | 96 | Terror // invalid token 97 | 98 | } type; 99 | 100 | int posChar; 101 | int posLine; 102 | 103 | union 104 | { 105 | int64_t vInt; 106 | double vFloat; 107 | 108 | int32_t nArgs; // for funcalls 109 | 110 | Symbol * symbol; 111 | }; 112 | }; 113 | } -------------------------------------------------------------------------------- /front/front-parse.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "front-parse.h" 3 | #include "front-ast.h" 4 | 5 | using namespace bjit; 6 | 7 | // precedence classes, order from high to low 8 | // 9 | // NOTE: parens are logically in the highest precedence class 10 | // but we treat them as the lowest internally, so that we can 11 | // reduce everything up to the opening paren before checking match 12 | enum Precedence 13 | { 14 | P_unary, // unary operators 15 | 16 | P_product, // mul / div / mod 17 | P_sum, // add / sub 18 | P_shift, // bitshifts 19 | 20 | P_compare, // relative comparisons 21 | P_equal, // equality comparisons 22 | 23 | P_bitAnd, // bitwise and 24 | P_bitXor, // bitwise xor 25 | P_bitOr, // bitwise or 26 | 27 | P_logAnd, // logical and 28 | P_logOr, // logical or 29 | 30 | P_assign, // assignments 31 | 32 | P_comma, // comma 33 | 34 | P_flow // parens, control flow constructs 35 | }; 36 | 37 | static int getPrecede(Token const & t) 38 | { 39 | switch(t.type) 40 | { 41 | case Token::Tpos: case Token::Tneg: 42 | return P_unary; 43 | 44 | case Token::Tmul: case Token::Tdiv: case Token::Tmod: 45 | return P_product; 46 | 47 | case Token::Tadd: case Token::Tsub: 48 | return P_sum; 49 | 50 | case Token::TshiftL: case Token::TshiftR: 51 | return P_shift; 52 | 53 | case Token::Tless: case Token::TlessEq: 54 | case Token::Tgreater: case Token::TgreaterEq: 55 | return P_compare; 56 | 57 | case Token::Teq: case Token::TnotEq: 58 | return P_equal; 59 | 60 | case Token::TbitAnd: return P_bitAnd; 61 | case Token::TbitXor: return P_bitXor; 62 | case Token::TbitOr: return P_bitOr; 63 | case Token::TlogAnd: return P_logAnd; 64 | case Token::TlogOr: return P_logOr; 65 | case Token::Tassign: case Token::Tdefine: return P_assign; 66 | case Token::Tcomma: return P_comma; 67 | 68 | case Token::ToParen: case Token::ToBlock: case Token::ToIndex: 69 | case Token::Tif: case Token::TifBody: case Token::Telse: 70 | case Token::Twhile: case Token::TwhileBody: 71 | case Token::Tfuncall: case Token::Treturn: 72 | return P_flow; 73 | 74 | default: BJIT_LOG("TT: %d\n", t.type); assert(false); return 0; 75 | } 76 | 77 | } 78 | 79 | static void defer(Parser & ps) 80 | { 81 | ps.defer.push_back(ps.token); 82 | } 83 | 84 | static void deferAs(Parser & ps, Token::Type t) 85 | { 86 | ps.token.type = t; 87 | ps.defer.push_back(ps.token); 88 | } 89 | 90 | static void fragment(Parser & ps, Token const & t) 91 | { 92 | switch(t.type) 93 | { 94 | case Token::Tint: ps.frags.emplace_back(new EConst(t)); break; 95 | case Token::Tuint: ps.frags.emplace_back(new EConst(t)); break; 96 | case Token::Tfloat: ps.frags.emplace_back(new EConst(t)); break; 97 | case Token::Tsymbol: ps.frags.emplace_back(new ESymbol(t)); break; 98 | 99 | case Token::Tadd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 100 | case Token::Tsub: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 101 | case Token::Tmul: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 102 | case Token::Tdiv: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 103 | case Token::Tmod: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 104 | 105 | case Token::TshiftL: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 106 | case Token::TshiftR: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 107 | 108 | case Token::TbitOr: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 109 | case Token::TbitAnd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 110 | case Token::TbitXor: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 111 | 112 | case Token::TbitNot: ps.frags.emplace_back(new EUnary(t, ps.frags)); break; 113 | case Token::TlogNot: ps.frags.emplace_back(new EUnary(t, ps.frags)); break; 114 | 115 | case Token::TlogAnd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 116 | case Token::TlogOr: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 117 | 118 | case Token::Tassign: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 119 | case Token::Tdefine: ps.frags.emplace_back(new EDefine(t, ps.frags)); break; 120 | 121 | case Token::Teq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 122 | case Token::TnotEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 123 | 124 | case Token::Tless: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 125 | case Token::TlessEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 126 | case Token::Tgreater: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 127 | case Token::TgreaterEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 128 | 129 | case Token::Tpos: ps.frags.emplace_back(new EUnary(t, ps.frags)); break; 130 | case Token::Tneg: ps.frags.emplace_back(new EUnary(t, ps.frags)); break; 131 | 132 | case Token::Treturn: ps.frags.emplace_back(new EReturn(t, ps.frags)); break; 133 | case Token::Tfuncall: ps.frags.emplace_back(new ECall(t, ps.frags)); break; 134 | 135 | case Token::ToIndex: ps.frags.emplace_back(new EBinary(t, ps.frags)); break; 136 | 137 | case Token::TifBody: ps.frags.emplace_back(new EIf(t, ps.frags, false)); break; 138 | case Token::Telse: ps.frags.emplace_back(new EIf(t, ps.frags, true)); break; 139 | 140 | case Token::TwhileBody: ps.frags.emplace_back(new EWhile(t, ps.frags)); break; 141 | case Token::Tbreak: ps.frags.emplace_back(new EBreak(t)); break; 142 | case Token::Tcontinue: ps.frags.emplace_back(new EContinue(t)); break; 143 | case Token::ToBlock: ps.frags.emplace_back(new EBlock(t, ps.frags)); break; 144 | 145 | default: BJIT_LOG("TT: %d\n", t.type); assert(false); 146 | } 147 | 148 | } 149 | 150 | void reduce(Parser & ps, int precede) 151 | { 152 | // then reduce everything at current or higher priority 153 | while(ps.defer.size()) 154 | { 155 | auto & t = ps.defer.back(); 156 | if(getPrecede(t) > precede) return; 157 | 158 | fragment(ps, ps.defer.back()); 159 | ps.defer.pop_back(); 160 | } 161 | } 162 | 163 | // forward declare all states 164 | static void psStatement(Parser & ps); // function top-level statement 165 | static void psStatementEnd(Parser & ps); // only ending ; accepted 166 | static void psMaybeAssign(Parser & ps); // expect variable value or comma 167 | static void psInfix(Parser & ps); // infix operators 168 | static void psExpr(Parser & ps); // expression 169 | static void psCondition(Parser & ps); // if/while condition context 170 | static void psMaybeElse(Parser & ps); // statement or else for an if 171 | 172 | void bjit::parse(std::vector & codeOut) 173 | { 174 | Parser ps; 175 | 176 | ps.state = psStatement; 177 | 178 | while(true) 179 | { 180 | lexToken(ps); 181 | 182 | if(ps.token.type == Token::Teof) 183 | { 184 | // FIXME: this should be removed once statements go into functions 185 | while(ps.defer.size() && ps.defer.back().type == Token::TifBody) 186 | { 187 | fragment(ps, ps.defer.back()); ps.defer.pop_back(); 188 | } 189 | break; 190 | } 191 | if(ps.token.type == Token::Terror) continue; 192 | 193 | ps.state(ps); 194 | } 195 | 196 | // create Proc for the initial env-size 197 | Proc p(0, ""); 198 | Env env(p.env.size()); 199 | 200 | // wrap all expressions into a block 201 | ps.token.type = Token::ToBlock; 202 | ps.token.nArgs = ps.frags.size(); 203 | fragment(ps, ps.token); 204 | 205 | assert(ps.frags.size() == 1); 206 | auto & ast = ps.frags.back(); 207 | ast->typecheck(ps, env); 208 | ast->debug(0); 209 | BJIT_LOG("\n"); 210 | 211 | if(ps.nErrors) return; 212 | 213 | CodeGen cg(p); 214 | 215 | ast->codeGen(cg); 216 | 217 | // always force return 218 | p.iret(p.lci(0)); 219 | p.debug(); 220 | 221 | BJIT_LOG("-- Compiling:\n"); 222 | p.compile(codeOut, 2); 223 | } 224 | 225 | // helper to figure out what to do with statements 226 | // we'll need this also from psStatement once we add blocks 227 | static void reduceStatement(Parser & ps) 228 | { 229 | ps.state = psStatement; // default to statement 230 | 231 | while(ps.defer.size()) 232 | { 233 | switch(ps.defer.back().type) 234 | { 235 | // for these we've completed the control-flow construct 236 | case Token::Telse: 237 | case Token::TwhileBody: 238 | case Token::Treturn: 239 | fragment(ps, ps.defer.back()); 240 | ps.defer.pop_back(); 241 | // only break, don't return, we might need to reduce more 242 | break; 243 | 244 | case Token::TifBody: ps.state = psMaybeElse; return; 245 | case Token::ToBlock: ++ps.defer.back().nArgs; return; 246 | 247 | default: 248 | ps.errorAt(ps.token, "unexpected ';'"); 249 | ps.errorAt(ps.defer.back(), "incomplete expression here"); 250 | 251 | // reset state to avoid error cascade 252 | while(getPrecede(ps.defer.back()) < P_flow) 253 | ps.defer.pop_back(); 254 | return; 255 | } 256 | } 257 | } 258 | 259 | static void psStatement(Parser & ps) 260 | { 261 | switch(ps.token.type) 262 | { 263 | case Token::TcBlock: 264 | if(!ps.defer.size() || ps.defer.back().type != Token::ToBlock) 265 | { 266 | ps.errorAt(ps.token, "unexpected '}'"); return; 267 | } 268 | fragment(ps, ps.defer.back()); ps.defer.pop_back(); 269 | reduceStatement(ps); 270 | break; 271 | 272 | case Token::ToBlock: ps.token.nArgs = 0; defer(ps); break; 273 | case Token::Tif: defer(ps); ps.state = psCondition; break; 274 | case Token::Twhile: defer(ps); ps.state = psCondition; break; 275 | case Token::Treturn: defer(ps); ps.state = psExpr; break; 276 | 277 | case Token::Tbreak: 278 | { 279 | // sanity check that we are in a valid context 280 | bool good = false; 281 | for(auto & t : ps.defer) 282 | { 283 | if(t.type == Token::TwhileBody) good = true; 284 | if(good) break; 285 | } 286 | 287 | if(!good) ps.errorAt(ps.token, "'break' not within a loop"); 288 | else fragment(ps, ps.token); 289 | } 290 | ps.state = psStatementEnd; 291 | break; 292 | 293 | case Token::Tcontinue: // separate in case we add other breakable constructs 294 | { 295 | // sanity check that we are in a valid context 296 | bool good = false; 297 | for(auto & t : ps.defer) 298 | { 299 | if(t.type == Token::TwhileBody) good = true; 300 | if(good) break; 301 | } 302 | 303 | if(!good) ps.errorAt(ps.token, "'continue' not within a loop"); 304 | else fragment(ps, ps.token); 305 | } 306 | ps.state = psStatementEnd; 307 | break; 308 | 309 | case Token::Tsymbol: fragment(ps, ps.token); ps.state = psMaybeAssign; break; 310 | 311 | default: ps.state = psExpr; psExpr(ps); break; 312 | } 313 | } 314 | 315 | static void psStatementEnd(Parser & ps) 316 | { 317 | reduceStatement(ps); 318 | 319 | if(ps.token.type != Token::Tsemicolon) 320 | { 321 | ps.errorAt(ps.token, "expected ';'"); 322 | // try to recover by pretending we had a semi 323 | ps.state(ps); 324 | } 325 | } 326 | 327 | static void psMaybeAssign(Parser & ps) 328 | { 329 | switch(ps.token.type) 330 | { 331 | case Token::Tassign: 332 | reduce(ps, P_assign-1); defer(ps); ps.state = psExpr; break; 333 | 334 | case Token::Tdefine: 335 | reduce(ps, P_assign-1); defer(ps); ps.state = psExpr; break; 336 | 337 | default: ps.state = psInfix; psInfix(ps); break; 338 | } 339 | } 340 | 341 | static void psInfix(Parser & ps) 342 | { 343 | switch(ps.token.type) 344 | { 345 | case Token::ToParen: 346 | ps.token.nArgs = 0; 347 | deferAs(ps, Token::Tfuncall); 348 | ps.state = psExpr; break; 349 | 350 | case Token::TcParen: 351 | reduce(ps, P_flow-1); 352 | if(!ps.defer.size()) 353 | { 354 | ps.errorAt(ps.token, "mismatched ')'"); 355 | break; 356 | } 357 | switch(ps.defer.back().type) 358 | { 359 | case Token::ToParen: 360 | ps.defer.pop_back(); break; 361 | 362 | case Token::Tfuncall: 363 | ++ps.defer.back().nArgs; 364 | fragment(ps, ps.defer.back()); 365 | ps.defer.pop_back(); break; 366 | 367 | case Token::Tif: // is this a condition for if-statement? 368 | ps.defer.back().type = Token::TifBody; 369 | ps.state = psStatement; 370 | break; 371 | case Token::Twhile: // is this a condition for while-statement? 372 | ps.defer.back().type = Token::TwhileBody; 373 | ps.state = psStatement; 374 | break; 375 | 376 | default: 377 | ps.errorAt(ps.token, "mismatched ')'"); 378 | break; 379 | } 380 | break; 381 | 382 | case Token::Tcomma: 383 | reduce(ps, P_comma); 384 | // check that this is a context where comma is valid 385 | switch(ps.defer.back().type) 386 | { 387 | case Token::Tfuncall: ++ps.defer.back().nArgs; ps.state = psExpr; break; 388 | default: ps.errorAt(ps.token, "unexpected ','"); 389 | } 390 | break; 391 | 392 | case Token::ToIndex: defer(ps); ps.state = psExpr; break; 393 | 394 | case Token::TcIndex: 395 | reduce(ps, P_flow-1); 396 | if(!ps.defer.size()) 397 | { 398 | ps.errorAt(ps.token, "mismatched ']'"); 399 | break; 400 | } 401 | switch(ps.defer.back().type) 402 | { 403 | case Token::ToIndex: 404 | fragment(ps, ps.defer.back()); 405 | ps.defer.pop_back(); break; 406 | 407 | default: 408 | ps.errorAt(ps.token, "mismatched ']'"); 409 | break; 410 | } 411 | break; 412 | 413 | case Token::Tsemicolon: 414 | reduce(ps, P_assign); 415 | reduceStatement(ps); 416 | break; 417 | 418 | case Token::Tadd: 419 | case Token::Tsub: 420 | reduce(ps, P_sum); defer(ps); ps.state = psExpr; break; 421 | 422 | case Token::Tmul: 423 | case Token::Tdiv: 424 | case Token::Tmod: 425 | reduce(ps, P_product); defer(ps); ps.state = psExpr; break; 426 | 427 | case Token::TshiftL: 428 | case Token::TshiftR: 429 | reduce(ps, P_shift); defer(ps); ps.state = psExpr; break; 430 | 431 | case Token::TbitOr: 432 | reduce(ps, P_bitOr); defer(ps); ps.state = psExpr; break; 433 | 434 | case Token::TbitAnd: 435 | reduce(ps, P_bitAnd); defer(ps); ps.state = psExpr; break; 436 | 437 | case Token::TbitXor: 438 | reduce(ps, P_bitXor); defer(ps); ps.state = psExpr; break; 439 | 440 | case Token::TlogOr: 441 | reduce(ps, P_logOr); defer(ps); ps.state = psExpr; break; 442 | 443 | case Token::TlogAnd: 444 | reduce(ps, P_logAnd); defer(ps); ps.state = psExpr; break; 445 | 446 | case Token::Tless: 447 | case Token::TlessEq: 448 | case Token::Tgreater: 449 | case Token::TgreaterEq: 450 | reduce(ps, P_compare); defer(ps); ps.state = psExpr; break; 451 | 452 | case Token::Teq: 453 | case Token::TnotEq: 454 | reduce(ps, P_equal); defer(ps); ps.state = psExpr; break; 455 | 456 | 457 | case Token::TcBlock: 458 | // this is always an error, but we'll recover the case where 459 | // we are simply missing a semicolon 460 | reduce(ps, P_assign); 461 | if(ps.defer.back().type == Token::ToBlock) 462 | { 463 | ps.errorAt(ps.token, "missing ';'"); 464 | ++ps.defer.back().nArgs; 465 | psStatement(ps); 466 | return; 467 | } 468 | // fall-thru to default error 469 | default: 470 | ps.errorAt(ps.token, "unexpected token - expecting operator"); break; 471 | } 472 | } 473 | 474 | static void psExpr(Parser & ps) 475 | { 476 | // special case for closing paren of function calls 477 | if(ps.token.type == Token::TcParen) 478 | { 479 | if(ps.defer.back().type == Token::Tfuncall && ps.defer.back().nArgs == 0) 480 | { 481 | reduce(ps, P_flow); ps.state = psInfix; return; 482 | } 483 | } 484 | 485 | switch(ps.token.type) 486 | { 487 | case Token::ToParen: defer(ps); break; 488 | 489 | // no reduce for unary operators as they are already the highest 490 | // and should be reduced right-to-left 491 | case Token::Tadd: deferAs(ps, Token::Tpos); break; 492 | case Token::Tsub: deferAs(ps, Token::Tneg); break; 493 | case Token::TlogNot: defer(ps); break; 494 | case Token::TbitNot: defer(ps); break; 495 | 496 | case Token::Tint: 497 | case Token::Tuint: 498 | case Token::Tfloat: 499 | case Token::Tsymbol: 500 | fragment(ps, ps.token); ps.state = psInfix; break; 501 | 502 | default: ps.errorAt(ps.token, "unexpected token - expecting expression"); break; 503 | } 504 | } 505 | 506 | static void psCondition(Parser & ps) 507 | { 508 | assert(ps.defer.back().type == Token::Tif 509 | || ps.defer.back().type == Token::Twhile); 510 | 511 | switch(ps.token.type) 512 | { 513 | case Token::ToParen: 514 | ps.state = psExpr; 515 | break; 516 | 517 | default: 518 | ps.errorAt(ps.token, "expected '(' for condition"); 519 | // try to recover by ignoring the previous keyword 520 | ps.defer.pop_back(); 521 | psStatement(ps); 522 | break; 523 | } 524 | } 525 | 526 | static void psMaybeElse(Parser & ps) 527 | { 528 | if(ps.token.type == Token::Telse) 529 | { 530 | assert(ps.defer.back().type == Token::TifBody); 531 | ps.defer.back().type = Token::Telse; 532 | ps.state = psStatement; 533 | return; 534 | } 535 | 536 | // no else keyword, reduce any conditions 537 | while(ps.defer.size() && ps.defer.back().type == Token::TifBody) 538 | { 539 | fragment(ps, ps.defer.back()); 540 | ps.defer.pop_back(); 541 | } 542 | 543 | reduceStatement(ps); 544 | 545 | psStatement(ps); 546 | } 547 | -------------------------------------------------------------------------------- /front/front-parse.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef _WIN32 10 | #include 11 | #endif 12 | 13 | #include "hash.h" 14 | #include "front-lexer.h" 15 | 16 | namespace bjit 17 | { 18 | 19 | // printf into std::vector 20 | // 21 | // appends to the end of the vector, does NOT zero-terminate 22 | static inline void vformat(std::vector & out, const char * fmt, ...) 23 | { 24 | va_list va; 25 | 26 | // get length - we need the va_crap twice on x64 27 | va_start(va, fmt); 28 | int len = vsnprintf(0, 0, fmt, va); 29 | va_end(va); 30 | 31 | // get the offset 32 | int vOff = out.size(); 33 | // resize to fit string + null 34 | out.resize(vOff + len + 1); 35 | 36 | va_start(va, fmt); 37 | vsnprintf(out.data() + vOff, len + 1, fmt, va); 38 | va_end(va); 39 | 40 | // remove null-termination 41 | out.pop_back(); 42 | } 43 | 44 | struct SymbolPtr 45 | { 46 | std::unique_ptr ptr; 47 | 48 | bool isEqual(const SymbolPtr & s) { return isEqual(s.ptr->string); } 49 | bool isEqual(const std::vector & k) 50 | { 51 | return (k.size() == ptr->string.size()) 52 | && !(memcmp(k.data(), ptr->string.data(), k.size())); 53 | } 54 | 55 | static uint64_t getHash(const SymbolPtr & s) { return getHash(s.ptr->string); } 56 | static uint64_t getHash(const std::vector & k) 57 | { 58 | return stringHash64((uint8_t*)k.data(), k.size()); 59 | } 60 | }; 61 | 62 | struct Parser 63 | { 64 | // keep input for error reporting 65 | std::vector inputBuffer; 66 | std::vector inputLines = { 0 }; 67 | 68 | HashTable symbols; 69 | 70 | int peek() { return peekChar; } 71 | void consume() 72 | { 73 | if(peekChar == '\n') 74 | { 75 | ++posLine; posChar = 0; 76 | 77 | // interactive prompt only on TTY 78 | #ifndef _WIN32 79 | if (isatty(fileno(stdin))) 80 | #endif 81 | printf("%6d> ", posLine); 82 | 83 | peekPos = inputBuffer.size(); 84 | while(true) 85 | { 86 | int ch = fgetc(stdin); 87 | if(0 <= ch) inputBuffer.push_back(ch); 88 | if(ch < 0 || ch == '\n') break; 89 | } 90 | inputLines.push_back(inputBuffer.size()); 91 | } 92 | else ++posChar; 93 | 94 | if(peekPos < inputBuffer.size()) 95 | peekChar = inputBuffer[peekPos++]; 96 | else peekChar = -1; 97 | } 98 | 99 | unsigned peekPos = 0; 100 | int peekChar = '\n'; // make initial consume do a prompt 101 | 102 | int posLine = 0; 103 | int posChar = 0; 104 | 105 | // record all errors 106 | std::vector errorBuffer; 107 | std::vector formatBuffer; 108 | 109 | int nErrors = 0; 110 | 111 | // FIXME: redirect to the error buffer 112 | void doError(const char * file, int line, int col, 113 | const char * type, const char * what) 114 | { 115 | formatBuffer.clear(); 116 | vformat(formatBuffer, "%s:%d:%d: %s: %s\n", 117 | file, line, col, type, what); 118 | vformat(formatBuffer, " "); 119 | assert(line < inputLines.size()); 120 | int i = inputLines[line-1]; 121 | int j = inputLines[line]; 122 | while(i < j) formatBuffer.push_back(inputBuffer[i++]); 123 | if(formatBuffer.back() != '\n') formatBuffer.push_back('\n'); 124 | vformat(formatBuffer, "%*s^\n", 4+col, ""); 125 | 126 | // append to collected error buffer 127 | errorBuffer.insert(errorBuffer.end(), 128 | formatBuffer.begin(), formatBuffer.end()); 129 | 130 | // then print .. need null terminate for puts 131 | formatBuffer.push_back(0); 132 | fflush(stdout); // keep debugs cleaner 133 | fprintf(stderr, "%s", formatBuffer.data()); 134 | } 135 | 136 | void errorAt(Token & t, const char * what) 137 | { 138 | ++nErrors; 139 | doError("", t.posLine, t.posChar, "error", what); 140 | } 141 | 142 | void warningAt(Token & t, const char * what) 143 | { 144 | doError("", t.posLine, t.posChar, "warning", what); 145 | } 146 | 147 | // current token 148 | Token token; 149 | 150 | // defer is a stack of tokens not yet reduced 151 | std::vector defer; 152 | 153 | // frags is a stack of AST fragments that eventually get 154 | // consumed when the defer-stack is reduced 155 | std::vector> frags; 156 | 157 | void (*state)(Parser &) = 0; 158 | }; 159 | 160 | void lexToken(Parser & ps); 161 | 162 | void parse(std::vector & codeOut); 163 | } -------------------------------------------------------------------------------- /run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # This is random collection of very simple tests that are crafted 4 | # to expose potential problems mostly in CSE ruleset 5 | 6 | bin/test_add_ii 7 | bin/test_add_ff 8 | bin/test_sub_ii # test parameter order, mostly 9 | 10 | bin/test_shift 11 | bin/test_divmod 12 | 13 | bin/test_ci2f_cf2i 14 | bin/test_sx_zx 15 | bin/test_load_store 16 | 17 | bin/test_callp 18 | bin/test_calln 19 | 20 | bin/test_fib 21 | bin/test_call_stub 22 | bin/test_loop # this tries to confuse opt_jump_be 23 | 24 | bin/test_mem_opt 25 | 26 | cat << END | bin/bjit 27 | x := 0/0; y := x/1u; 28 | while(x < 10) { if(y != 2) x = x+1; x = x+1; } return x; 29 | END 30 | 31 | cat << END | bin/bjit 32 | x := 0/0; y := x/1u; 33 | while(x < 10) { if(y != 2) y = x+1; x = x+1; } return x; 34 | END 35 | 36 | cat << END | bin/bjit 37 | x := 0/0; y := x/1u; 38 | while(x < 10) { if(y != 2) x = x+1; else x = x+1; x=x+1; } return x; 39 | END 40 | 41 | cat << END | bin/bjit 42 | x := 0/0; y := x/1u; 43 | while(x < 10) { if(y != 2) x = x+1; else x = x+1; } return x+1; 44 | END 45 | 46 | cat << END | bin/bjit 47 | x := 0; y := 0/0; while(x < 10) { if(y != 2) x = x+1; else x = x+1; } return (x+1); 48 | END 49 | 50 | cat << END | bin/bjit 51 | x := 0; y := 0/0; while(x < 10) { x = x+(y/0); } return (x+1); 52 | END 53 | 54 | cat << END | bin/bjit 55 | x := 1; while(1) { x = x+1; if (x < 10) continue; break; } 56 | END 57 | 58 | cat << END | bin/bjit 59 | y := 2/0; x := 1; while(1) { x = x+1; if ((y+x+y) < (y+10+y)) continue; break; } 60 | END 61 | 62 | cat << END | bin/bjit 63 | y := 2/0; z := 3/0; x := 1; while(1) { x = x+1; if (((y+x)+(x+z)) < ((y+10)+(z+10))) continue; break; } 64 | END 65 | 66 | # fuzzfold generates tons of garbage, so throw it into /dev/null 67 | # and then run the thing manually if it fails 68 | echo "Fuzzing..." 69 | bin/test_fuzzfold 2> /dev/null 70 | echo "Fuzz passed." 71 | 72 | bin/test_sieve 73 | 74 | echo "Looks like it didn't crash, at least... ;-)" 75 | 76 | -------------------------------------------------------------------------------- /src/arch-arm64-asm.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "arch-arm64.h" 9 | 10 | namespace bjit 11 | { 12 | 13 | static const uint8_t PC = 0xff; // otherwise invalid, used for RIP relative 14 | 15 | // encode register from our naming to X64 values 16 | static uint8_t REG(int r) 17 | { 18 | using namespace regs; 19 | 20 | switch(r) 21 | { 22 | case x0: case v0: return 0; 23 | case x1: case v1: return 1; 24 | case x2: case v2: return 2; 25 | case x3: case v3: return 3; 26 | case x4: case v4: return 4; 27 | case x5: case v5: return 5; 28 | case x6: case v6: return 6; 29 | case x7: case v7: return 7; 30 | case x8: case v8: return 8; 31 | case x9: case v9: return 9; 32 | 33 | case x10: case v10: return 10; 34 | case x11: case v11: return 11; 35 | case x12: case v12: return 12; 36 | case x13: case v13: return 13; 37 | case x14: case v14: return 14; 38 | case x15: case v15: return 15; 39 | case x16: case v16: return 16; 40 | case x17: case v17: return 17; 41 | case x18: case v18: return 18; 42 | case x19: case v19: return 19; 43 | 44 | case x20: case v20: return 20; 45 | case x21: case v21: return 21; 46 | case x22: case v22: return 22; 47 | case x23: case v23: return 23; 48 | case x24: case v24: return 24; 49 | case x25: case v25: return 25; 50 | case x26: case v26: return 26; 51 | case x27: case v27: return 27; 52 | case x28: case v28: return 28; 53 | 54 | case fp: case v29: return 29; 55 | 56 | case lr: case v30: return 30; 57 | case sp: case v31: return 31; 58 | 59 | // this is only used internally 60 | case PC: return PC; 61 | } 62 | 63 | BJIT_ASSERT(false); 64 | return 0; 65 | } 66 | 67 | // return the 4-bit Condition Code part of conditional ops 68 | uint8_t _CC(uint8_t opcode) 69 | { 70 | switch(opcode) 71 | { 72 | case ops::jilt: return 0xB; 73 | case ops::jige: case ops::jdge: case ops::jfge: return 0xA; 74 | case ops::jigt: case ops::jdgt: case ops::jfgt: return 0xC; 75 | case ops::jile: case ops::jdle: case ops::jfle: return 0xD; 76 | 77 | // 0xE = always, 0xF = ??? 78 | 79 | // on ARM using signed LT for floats also matches unordered 80 | // where as using unsigned only allows proper match 81 | case ops::jult: case ops::jdlt: case ops::jflt: return 0x3; 82 | case ops::juge: return 0x2; 83 | case ops::jugt: return 0x8; 84 | case ops::jule: return 0x9; 85 | 86 | case ops::jine: case ops::jdne: case ops::jfne: case ops::jnz: return 0x1; 87 | case ops::jieq: case ops::jdeq: case ops::jfeq: case ops::jz: return 0x0; 88 | 89 | default: break; 90 | } 91 | // silence warning if assert is nop 92 | BJIT_ASSERT(false); return 0; 93 | } 94 | 95 | struct AsmArm64 96 | { 97 | std::vector & out; 98 | 99 | AsmArm64(std::vector & out, unsigned nBlocks) : out(out) 100 | { 101 | rodata32_index = nBlocks++; 102 | rodata64_index = nBlocks++; 103 | //rodata128_index = nBlocks++; 104 | blockOffsets.resize(nBlocks); 105 | } 106 | 107 | // separate .rodata for 128/64/32 bit constants 108 | // we will place the most aligned block first 109 | /* 110 | std::vector<__m128> rodata128; 111 | uint32_t rodata128_index; // index into blockOffsets 112 | */ 113 | std::vector rodata64; 114 | uint32_t rodata64_index; // index into blockOffsets 115 | 116 | std::vector rodata32; 117 | uint32_t rodata32_index; // index into blockOffsets 118 | 119 | // stores byteOffsets to each basic block for relocation 120 | std::vector blockOffsets; 121 | 122 | struct Reloc 123 | { 124 | uint32_t codeOffset; 125 | uint32_t blockIndex; 126 | }; 127 | 128 | std::vector relocations; 129 | 130 | void emit(uint8_t byte) { out.push_back(byte); } 131 | void emit32(uint32_t data) 132 | { 133 | out.push_back(data & 0xff); data >>= 8; 134 | out.push_back(data & 0xff); data >>= 8; 135 | out.push_back(data & 0xff); data >>= 8; 136 | out.push_back(data & 0xff); 137 | } 138 | 139 | // add relocation entry 140 | void addReloc(uint32_t block) 141 | { 142 | relocations.resize(relocations.size()+1); 143 | relocations.back().codeOffset = out.size(); 144 | relocations.back().blockIndex = block; 145 | } 146 | 147 | // store 32-bit constant into .rodata32 148 | // add relocation and return offset for RIP relative 149 | uint32_t data32(uint32_t data) 150 | { 151 | unsigned index = rodata32.size(); 152 | // try to find an existing constant with same value 153 | for(unsigned i = 0; i < rodata32.size(); ++i) 154 | { 155 | if(rodata32[i] == data) { index = i; break; } 156 | } 157 | 158 | if(index == rodata32.size()) rodata32.push_back(data); 159 | addReloc(rodata32_index); 160 | return index*sizeof(uint32_t); 161 | } 162 | 163 | // store 64-bit constant into .rodata64 164 | // add relocation and return offset for RIP relative 165 | uint32_t data64(uint64_t data) 166 | { 167 | unsigned index = rodata64.size(); 168 | // try to find an existing constant with same value 169 | for(unsigned i = 0; i < rodata64.size(); ++i) 170 | { 171 | if(rodata64[i] == data) { index = i; break; } 172 | } 173 | 174 | if(index == rodata64.size()) rodata64.push_back(data); 175 | addReloc(rodata64_index); 176 | return index*sizeof(uint64_t); 177 | } 178 | 179 | uint32_t data32f(float data) 180 | { 181 | return data32(*reinterpret_cast(&data)); 182 | } 183 | 184 | uint32_t data64f(double data) 185 | { 186 | return data64(*reinterpret_cast(&data)); 187 | } 188 | 189 | /* 190 | uint32_t data128(__m128 data) 191 | { 192 | unsigned index = rodata128.size(); 193 | // try to find an existing constant with same value 194 | for(unsigned i = 0; i < rodata128.size(); ++i) 195 | { 196 | if(!memcmp(&rodata128[i],&data,sizeof(__m128))) { index = i; break; } 197 | } 198 | 199 | if(index == rodata128.size()) rodata128.push_back(data); 200 | addReloc(rodata128_index); 201 | return index*sizeof(__m128); 202 | } 203 | */ 204 | 205 | void MOVri(int r, int64_t imm64) 206 | { 207 | if(imm64 == (0xffff & imm64)) 208 | { 209 | // MOVZ 210 | emit32(0xD2800000 | REG(r) | ((0xffff & imm64) << 5)); 211 | return; 212 | } 213 | 214 | if(imm64 == ~(0xffff & ~imm64)) 215 | { 216 | // MOVN 217 | emit32(0x92800000 | REG(r) | ((0xffff & ~imm64) << 5)); 218 | return; 219 | } 220 | 221 | if(imm64 == (uint32_t) imm64) 222 | { 223 | // LDR pc-relative .. imm19 224 | auto off = (data32(imm64) - out.size()) >> 2; 225 | emit32(0x18000000 | REG(r) | ((0x7ffff & off) << 5)); 226 | return; 227 | } 228 | 229 | if(imm64 == (int32_t) imm64) 230 | { 231 | // LDR pc-relative .. imm19 232 | auto off = (data32(imm64) - out.size()) >> 2; 233 | emit32(0x98000000 | REG(r) | ((0x7ffff & off) << 5)); 234 | return; 235 | } 236 | 237 | // general 64-bit LDR pc-relative .. imm19 238 | auto off = (data64(imm64) - out.size()) >> 2; 239 | emit32(0x58000000 | REG(r) | ((0x7ffff & off) << 5)); 240 | 241 | } 242 | 243 | void _mem(uint32_t op, int r0, int r1, int32_t offset, int shift) 244 | { 245 | if(offset < 0 || offset > (0x3ff << shift) 246 | || (offset & ~((~0u)<>shift)&0x1ff) << 10)); 257 | } 258 | 259 | void _mem2(uint32_t op, int r0, int r1, int r2, int32_t offset) 260 | { 261 | if(offset) 262 | { 263 | // need some magic 264 | MOVri(regs::x16, offset); 265 | _rrr(_ADD, regs::x16, regs::x16, r1); 266 | 267 | r1 = regs::x16; 268 | } 269 | 270 | emit32(op | REG(r0) | (REG(r1)<<5) | (REG(r2) << 16)); 271 | } 272 | 273 | void _rrr(uint32_t op, int r0, int r1, int r2) 274 | { 275 | emit32(op | REG(r0) | (REG(r1)<<5) | (REG(r2) << 16)); 276 | 277 | } 278 | 279 | // ADD/SUB have 12-bit immediate versions 280 | void _rri12(uint32_t immop, int r0, int r1, int32_t imm32) 281 | { 282 | BJIT_ASSERT(imm32 == (imm32 & 0xfff)); 283 | 284 | _rrr(immop | (imm32 << 10), r0, r1, regs::x0); 285 | } 286 | 287 | // NOTE: bit0 is set and needs to be invert, so XOR the condition code 288 | static const uint32_t _CSET = 0x9A9F17E0; 289 | 290 | void CMPrr(int r0, int r1) { _rrr(0xEB000000, regs::sp, r0, r1); } 291 | void TSTrr(int r0, int r1) { _rrr(0xEA000000, regs::sp, r0, r1); } 292 | 293 | void FCMPss(int r0, int r1) { _rrr(0x1E202000, regs::x0, r0, r1); } 294 | void FCMPdd(int r0, int r1) { _rrr(0x1E602000, regs::x0, r0, r1); } 295 | 296 | void MOVrr(int r0, int r1) { _rrr(0xAA0003E0, r0, 0, r1); } 297 | 298 | static const uint32_t _ADD = 0x8B000000; 299 | static const uint32_t _SUB = 0xCB000000; 300 | 301 | // SUB from zero reg 302 | void NEGr(int r0, int r1) { _rrr(_SUB, r0, regs::sp, r1); } 303 | 304 | static const uint32_t _MUL = 0x9B007C00; 305 | static const uint32_t _SDIV = 0x9AC00C00; 306 | static const uint32_t _UDIV = 0x9AC00800; 307 | 308 | void MSUBrrrr(int r0, int r1, int r2, int r3) 309 | { _rrr(0x9B008000 | (REG(r3)<<10), r0, r1, r2); } 310 | 311 | // this uses EON with zero register 312 | void NOTr(int r0, int r1) { _rrr(0xCA3F0000, r0, r1, 0); } 313 | 314 | static const uint32_t _AND = 0x8A000000; 315 | static const uint32_t _OR = 0xAA000000; 316 | static const uint32_t _XOR = 0xCA000000; 317 | 318 | }; 319 | 320 | } -------------------------------------------------------------------------------- /src/arch-arm64-ops.cpp: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __aarch64__ 3 | 4 | #include "bjit.h" 5 | 6 | using namespace bjit; 7 | using namespace bjit::impl; 8 | 9 | RegMask Op::regsMask() 10 | { 11 | switch(flags.type) 12 | { 13 | case _ptr: return regs::mask_int; 14 | case _f32: return regs::mask_float; 15 | case _f64: return regs::mask_float; 16 | 17 | default: BJIT_LOG("%s\n", strOpcode()); 18 | } 19 | // silence warning if assert is nop 20 | BJIT_ASSERT(false); return 0; 21 | } 22 | 23 | RegMask Op::regsOut() 24 | { 25 | // only deal with anything that isn't regs::mask_int explicit 26 | switch(opcode) 27 | { 28 | default: return regsMask(); // no special case -> any valid 29 | 30 | // special 31 | case ops::alloc: return R2Mask(regs::sp); 32 | 33 | case ops::icallp: case ops::icalln: return R2Mask(regs::x0); 34 | 35 | case ops::fcallp: case ops::fcalln: 36 | case ops::dcallp: case ops::dcalln: return R2Mask(regs::v0); 37 | 38 | // we have in[0] = index in type, in[1] = index total 39 | // which one we want to use varies by platform 40 | case ops::iarg: 41 | switch(indexType) // AArch64 uses position by type 42 | { 43 | case 0: return R2Mask(regs::x0); 44 | case 1: return R2Mask(regs::x1); 45 | case 2: return R2Mask(regs::x2); 46 | case 3: return R2Mask(regs::x3); 47 | case 4: return R2Mask(regs::x4); 48 | case 5: return R2Mask(regs::x5); 49 | case 6: return R2Mask(regs::x6); 50 | case 7: return R2Mask(regs::x7); 51 | 52 | // FIXME: We need to teach RA about stack parameters. 53 | default: BJIT_ASSERT(false); 54 | } 55 | case ops::farg: 56 | case ops::darg: 57 | switch(indexType) // AArch64 uses position by type 58 | { 59 | case 0: return R2Mask(regs::v0); 60 | case 1: return R2Mask(regs::v1); 61 | case 2: return R2Mask(regs::v2); 62 | case 3: return R2Mask(regs::v3); 63 | case 4: return R2Mask(regs::v4); 64 | case 5: return R2Mask(regs::v5); 65 | case 6: return R2Mask(regs::v6); 66 | case 7: return R2Mask(regs::v7); 67 | 68 | // FIXME: We need to teach RA about stack parameters. 69 | default: BJIT_ASSERT(false); 70 | } 71 | } 72 | 73 | // silence warning if assert is nop 74 | BJIT_ASSERT(false); return 0; 75 | } 76 | 77 | RegMask Op::regsIn(int i) 78 | { 79 | switch(opcode) 80 | { 81 | default: return regsMask(); // no special case -> same as output 82 | 83 | // indirect calls can theoretically take any GP register 84 | // but we don't want to use x0-x8 used for passing arguments 85 | // and we MUST have a caller saved register for tcallp 86 | // 87 | // so practically it makes sense to use x9-x15 88 | case ops::icallp: case ops::dcallp: 89 | case ops::fcallp: case ops::tcallp: 90 | return R2Mask(regs::x9) 91 | |R2Mask(regs::x10) |R2Mask(regs::x11) 92 | |R2Mask(regs::x12) |R2Mask(regs::x13) 93 | |R2Mask(regs::x14) |R2Mask(regs::x15); 94 | 95 | // loads and stores allow stack pointer as their first argument 96 | // FIXME: we do NOT want to rename to RSP though :D 97 | case ops::li8: case ops::li16: case ops::li32: case ops::li64: 98 | case ops::lu8: case ops::lu16: case ops::lu32: 99 | case ops::lf32: case ops::lf64: 100 | case ops::si8: case ops::si16: case ops::si32: case ops::si64: 101 | case ops::s2i8: case ops::s2i16: case ops::s2i32: case ops::s2i64: 102 | return regs::mask_int | (i ? 0 : R2Mask(regs::sp)); 103 | case ops::sf32: case ops::sf64: 104 | case ops::s2f32: case ops::s2f64: 105 | return i ? regs::mask_float : ((regs::mask_int) | R2Mask(regs::sp)); 106 | 107 | // jumps and float compares need explicit types 108 | case ops::jilt: case ops::jige: 109 | case ops::jigt: case ops::jile: 110 | case ops::jieq: case ops::jine: 111 | case ops::jiltI: case ops::jigeI: 112 | case ops::jigtI: case ops::jileI: 113 | case ops::jieqI: case ops::jineI: 114 | case ops::jz: case ops::jnz: 115 | return regs::mask_int; 116 | 117 | case ops::jdlt: case ops::jdge: 118 | case ops::jdgt: case ops::jdle: 119 | case ops::jdeq: case ops::jdne: 120 | 121 | case ops::flt: case ops::fge: 122 | case ops::fgt: case ops::fle: 123 | case ops::feq: case ops::fne: 124 | 125 | case ops::lcf: case ops::cf2i: 126 | 127 | case ops::dlt: case ops::dge: 128 | case ops::dgt: case ops::dle: 129 | case ops::deq: case ops::dne: 130 | 131 | case ops::lcd: case ops::cd2i: 132 | case ops::bcd2i: case ops::bcf2i: 133 | return regs::mask_float; 134 | 135 | // explicit with casts (duh) 136 | case ops::ci2f: case ops::bci2f: 137 | case ops::ci2d: case ops::bci2d: 138 | return regs::mask_int; 139 | 140 | case ops::ipass: 141 | switch(indexType) // AArch64 uses position by type 142 | { 143 | case 0: return R2Mask(regs::x0); 144 | case 1: return R2Mask(regs::x1); 145 | case 2: return R2Mask(regs::x2); 146 | case 3: return R2Mask(regs::x3); 147 | case 4: return R2Mask(regs::x4); 148 | case 5: return R2Mask(regs::x5); 149 | case 6: return R2Mask(regs::x6); 150 | case 7: return R2Mask(regs::x7); 151 | 152 | // FIXME: We need to teach RA about stack parameters. 153 | default: BJIT_ASSERT(false); 154 | } 155 | case ops::fpass: 156 | case ops::dpass: 157 | switch(indexType) // AArch64 uses position by type 158 | { 159 | case 0: return R2Mask(regs::v0); 160 | case 1: return R2Mask(regs::v1); 161 | case 2: return R2Mask(regs::v2); 162 | case 3: return R2Mask(regs::v3); 163 | case 4: return R2Mask(regs::v4); 164 | case 5: return R2Mask(regs::v5); 165 | case 6: return R2Mask(regs::v6); 166 | case 7: return R2Mask(regs::v7); 167 | 168 | // FIXME: We need to teach RA about stack parameters. 169 | default: BJIT_ASSERT(false); 170 | } 171 | 172 | // these are fixed 173 | case ops::iret: return R2Mask(regs::x0); 174 | case ops::fret: return R2Mask(regs::v0); 175 | case ops::dret: return R2Mask(regs::v0); 176 | 177 | } 178 | } 179 | 180 | RegMask Op::regsLost() 181 | { 182 | switch(opcode) 183 | { 184 | // for now, collect registers used by previous args 185 | // this should help convince RA to do the right thing 186 | case ops::ipass: 187 | case ops::dpass: 188 | { 189 | RegMask used = 0; 190 | for(int i = 0; i < in[1]; ++i) 191 | { 192 | used |= regsIn(i); 193 | } 194 | return used; 195 | } 196 | 197 | case ops::icalln: case ops::fcalln: case ops::dcalln: 198 | case ops::icallp: case ops::fcallp: case ops::dcallp: 199 | return regs::caller_saved | R2Mask(regs::lr); 200 | 201 | default: return 0; 202 | } 203 | } 204 | 205 | #endif -------------------------------------------------------------------------------- /src/arch-arm64.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | namespace bjit 5 | { 6 | // this is a hint for opt-ra 7 | static const bool arch_explicit_output_regs = true; 8 | 9 | // we use this for types, etc 10 | typedef uint64_t RegMask; 11 | 12 | // convert single-register to a mask 13 | static RegMask R2Mask(int r) { return ((RegMask)1)< any valid 29 | 30 | // special 31 | case ops::alloc: return R2Mask(regs::rsp); 32 | 33 | // divisions are fixed registers 34 | case ops::idiv: case ops::udiv: return R2Mask(regs::rax); 35 | case ops::imod: case ops::umod: return R2Mask(regs::rdx); 36 | 37 | case ops::icallp: case ops::icalln: return R2Mask(regs::rax); 38 | 39 | case ops::fcallp: case ops::fcalln: 40 | case ops::dcallp: case ops::dcalln: return R2Mask(regs::xmm0); 41 | 42 | // we have in[0] = index in type, in[1] = index total 43 | // which one we want to use varies by platform 44 | case ops::iarg: 45 | #ifdef _WIN32 46 | switch(indexTotal) // Win64 wants the total position 47 | { 48 | case 0: return R2Mask(regs::rcx); 49 | case 1: return R2Mask(regs::rdx); 50 | case 2: return R2Mask(regs::r8); 51 | case 3: return R2Mask(regs::r9); 52 | 53 | // FIXME: We need to teach RA about stack parameters. 54 | default: BJIT_ASSERT(false); 55 | } 56 | #else 57 | switch(indexType) // SysV uses position by type 58 | { 59 | case 0: return R2Mask(regs::rdi); 60 | case 1: return R2Mask(regs::rsi); 61 | case 2: return R2Mask(regs::rdx); 62 | case 3: return R2Mask(regs::rcx); 63 | case 4: return R2Mask(regs::r8); 64 | case 5: return R2Mask(regs::r9); 65 | 66 | // FIXME: We need to teach RA about stack parameters. 67 | default: BJIT_ASSERT(false); 68 | } 69 | #endif 70 | case ops::farg: 71 | case ops::darg: 72 | #ifdef _WIN32 73 | switch(indexTotal) // Win64 wants the total position 74 | { 75 | case 0: return R2Mask(regs::xmm0); 76 | case 1: return R2Mask(regs::xmm1); 77 | case 2: return R2Mask(regs::xmm2); 78 | case 3: return R2Mask(regs::xmm3); 79 | 80 | // FIXME: We need to teach RA about stack parameters. 81 | default: BJIT_ASSERT(false); 82 | } 83 | #else 84 | switch(indexType) // SysV uses position by type 85 | { 86 | case 0: return R2Mask(regs::xmm0); 87 | case 1: return R2Mask(regs::xmm1); 88 | case 2: return R2Mask(regs::xmm2); 89 | case 3: return R2Mask(regs::xmm3); 90 | case 4: return R2Mask(regs::xmm4); 91 | case 5: return R2Mask(regs::xmm5); 92 | case 6: return R2Mask(regs::xmm6); 93 | case 7: return R2Mask(regs::xmm7); 94 | 95 | // FIXME: We need to teach RA about stack parameters. 96 | default: BJIT_ASSERT(false); 97 | } 98 | #endif 99 | } 100 | 101 | // silence warning if assert is nop 102 | BJIT_ASSERT(false); return 0; 103 | } 104 | 105 | RegMask Op::regsIn(int i) 106 | { 107 | switch(opcode) 108 | { 109 | default: return regsMask(); // no special case -> same as input 110 | 111 | // indirect calls can theoretically take any GP register 112 | // but force RAX so we hopefully don't globber stuff 113 | case ops::icallp: case ops::dcallp: 114 | case ops::fcallp: case ops::tcallp: 115 | return R2Mask(regs::rax); 116 | 117 | // loads and stores allow stack pointer as their first argument 118 | // FIXME: we do NOT want to rename to RSP though :D 119 | case ops::li8: case ops::li16: case ops::li32: case ops::li64: 120 | case ops::lu8: case ops::lu16: case ops::lu32: 121 | case ops::lf32: case ops::lf64: 122 | case ops::si8: case ops::si16: case ops::si32: case ops::si64: 123 | case ops::s2i8: case ops::s2i16: case ops::s2i32: case ops::s2i64: 124 | return regs::mask_int | (i ? R2Mask(regs::rsp) : 0); 125 | case ops::sf32: case ops::sf64: 126 | case ops::s2f32: case ops::s2f64: 127 | return i ? ((regs::mask_int) | R2Mask(regs::rsp)) : regs::mask_float; 128 | 129 | // allow iadd and iaddI to take RSP too, saves moves if we use LEA 130 | case ops::iadd: case ops::iaddI: 131 | return regs::mask_int | R2Mask(regs::rsp); 132 | 133 | // integer division takes RDX:RAX as 128-bit first operand 134 | // we only do 64-bit, but force RAX on 1st and forbid RDX on 2nd 135 | case ops::idiv: case ops::udiv: 136 | case ops::imod: case ops::umod: 137 | return (!i) ? R2Mask(regs::rax) 138 | : (regs::mask_int & ~R2Mask(regs::rdx)); 139 | 140 | case ops::jilt: case ops::jige: 141 | case ops::jigt: case ops::jile: 142 | case ops::jieq: case ops::jine: 143 | case ops::jiltI: case ops::jigeI: 144 | case ops::jigtI: case ops::jileI: 145 | case ops::jieqI: case ops::jineI: 146 | case ops::jz: case ops::jnz: 147 | return regs::mask_int; 148 | 149 | case ops::jdlt: case ops::jdge: 150 | case ops::jdgt: case ops::jdle: 151 | case ops::jdeq: case ops::jdne: 152 | 153 | case ops::flt: case ops::fge: 154 | case ops::fgt: case ops::fle: 155 | case ops::feq: case ops::fne: 156 | 157 | case ops::lcf: case ops::cf2i: 158 | 159 | case ops::dlt: case ops::dge: 160 | case ops::dgt: case ops::dle: 161 | case ops::deq: case ops::dne: 162 | 163 | case ops::lcd: case ops::cd2i: 164 | case ops::bcd2i: case ops::bcf2i: 165 | return regs::mask_float; 166 | 167 | case ops::ci2f: case ops::bci2f: 168 | case ops::ci2d: case ops::bci2d: 169 | return regs::mask_int; 170 | 171 | // shifts want their second operand in CL 172 | case ops::ishl: case ops::ishr: case ops::ushr: 173 | return i ? R2Mask(regs::rcx) : 174 | (regs::mask_int &~ R2Mask(regs::rcx)); 175 | 176 | case ops::ipass: 177 | #ifdef _WIN32 178 | switch(indexTotal) // Win64 wants the total position 179 | { 180 | case 0: return R2Mask(regs::rcx); 181 | case 1: return R2Mask(regs::rdx); 182 | case 2: return R2Mask(regs::r8); 183 | case 3: return R2Mask(regs::r9); 184 | 185 | default: BJIT_ASSERT(false); // FIXME: RA can't handle 186 | } 187 | #else 188 | switch(indexType) // SysV uses position by type 189 | { 190 | case 0: return R2Mask(regs::rdi); 191 | case 1: return R2Mask(regs::rsi); 192 | case 2: return R2Mask(regs::rdx); 193 | case 3: return R2Mask(regs::rcx); 194 | case 4: return R2Mask(regs::r8); 195 | case 5: return R2Mask(regs::r9); 196 | 197 | default: BJIT_ASSERT(false); // FIXME: RA can't handle 198 | } 199 | #endif 200 | case ops::fpass: 201 | case ops::dpass: 202 | #ifdef _WIN32 203 | switch(indexTotal) // Win64 wants the total index 204 | { 205 | case 0: return R2Mask(regs::xmm0); 206 | case 1: return R2Mask(regs::xmm1); 207 | case 2: return R2Mask(regs::xmm2); 208 | case 3: return R2Mask(regs::xmm3); 209 | 210 | default: BJIT_ASSERT(false); // FIXME: RA can't handle 211 | } 212 | #else 213 | switch(indexType) // SysV uses position by type 214 | { 215 | case 0: return R2Mask(regs::xmm0); 216 | case 1: return R2Mask(regs::xmm1); 217 | case 2: return R2Mask(regs::xmm2); 218 | case 3: return R2Mask(regs::xmm3); 219 | case 4: return R2Mask(regs::xmm4); 220 | case 5: return R2Mask(regs::xmm5); 221 | case 6: return R2Mask(regs::xmm6); 222 | case 7: return R2Mask(regs::xmm7); 223 | 224 | default: BJIT_ASSERT(false); // FIXME: RA can't handle 225 | } 226 | #endif 227 | 228 | // these are fixed 229 | case ops::iret: return R2Mask(regs::rax); 230 | case ops::fret: return R2Mask(regs::xmm0); 231 | case ops::dret: return R2Mask(regs::xmm0); 232 | 233 | } 234 | } 235 | 236 | RegMask Op::regsLost() 237 | { 238 | switch(opcode) 239 | { 240 | case ops::idiv: case ops::udiv: 241 | case ops::imod: case ops::umod: 242 | // mark the output as lost as well, so RA tries to save 243 | // if we still need the value after the division 244 | return R2Mask(regs::rax)|R2Mask(regs::rdx); 245 | 246 | // for now, collect registers used by previous args 247 | // this should help convince RA to do the right thing 248 | case ops::ipass: 249 | case ops::dpass: 250 | { 251 | RegMask used = 0; 252 | for(int i = 0; i < in[1]; ++i) 253 | { 254 | used |= regsIn(i); 255 | } 256 | return used; 257 | } 258 | 259 | case ops::icalln: case ops::fcalln: case ops::dcalln: 260 | case ops::icallp: case ops::fcallp: case ops::dcallp: 261 | return regs::caller_saved; 262 | 263 | default: return 0; 264 | } 265 | } 266 | 267 | #endif -------------------------------------------------------------------------------- /src/arch-x64.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | /* 5 | 6 | // 7 | // Basic instruction order is: 8 | // REX | OP | ModRM | SIB | DISP | IMM 9 | // 10 | // REX encoding: 0 1 0 0 W R X B 11 | // - W: op size override (1 = 64 bit) 12 | // - R: prefix ModRM.reg field 13 | // - X: prefix SIB.index 14 | // - B: prefix ModRM.rm field 15 | 16 | This is the table for 32-bit mode, with RIP-relative patched in, 17 | since rest of it is exactly the same except for REX bytes 18 | 19 | [---] means SIB byte follows 20 | 21 | r8(/r) AL CL DL BL AH CH DH BH 22 | r16(/r) AX CX DX BX SP BP SI DI 23 | r32(/r) EAX ECX EDX EBX ESP EBP ESI EDI 24 | mm(/r) MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7 25 | xmm(/r) XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 26 | (In decimal) /digit (Opcode) 0 1 2 3 4 5 6 7 27 | (In binary) REG = 000 001 010 011 100 101 110 111 28 | Effective Address Mod R/M Value of ModR/M Byte (in Hexadecimal) 29 | [EAX] 00 000 00 08 10 18 20 28 30 38 30 | [ECX] 001 01 09 11 19 21 29 31 39 31 | [EDX] 010 02 0A 12 1A 22 2A 32 3A 32 | [EBX] 011 03 0B 13 1B 23 2B 33 3B 33 | [---] 100 04 0C 14 1C 24 2C 34 3C 34 | [RIP]+disp32 101 05 0D 15 1D 25 2D 35 3D 35 | [ESI] 110 06 0E 16 1E 26 2E 36 3E 36 | [EDI] 111 07 0F 17 1F 27 2F 37 3F 37 | [EAX]+disp8 01 000 40 48 50 58 60 68 70 78 38 | [ECX]+disp8 001 41 49 51 59 61 69 71 79 39 | [EDX]+disp8 010 42 4A 52 5A 62 6A 72 7A 40 | [EBX]+disp8 011 43 4B 53 5B 63 6B 73 7B 41 | [---]+disp8 100 44 4C 54 5C 64 6C 74 7C 42 | [EBP]+disp8 101 45 4D 55 5D 65 6D 75 7D 43 | [ESI]+disp8 110 46 4E 56 5E 66 6E 76 7E 44 | [EDI]+disp8 111 47 4F 57 5F 67 6F 77 7F 45 | [EAX]+disp32 10 000 80 88 90 98 A0 A8 B0 B8 46 | [ECX]+disp32 001 81 89 91 99 A1 A9 B1 B9 47 | [EDX]+disp32 010 82 8A 92 9A A2 AA B2 BA 48 | [EBX]+disp32 011 83 8B 93 9B A3 AB B3 BB 49 | [---]+disp32 100 84 8C 94 9C A4 AC B4 BC 50 | [EBP]+disp32 101 85 8D 95 9D A5 AD B5 BD 51 | [ESI]+disp32 110 86 8E 96 9E A6 AE B6 BE 52 | [EDI]+disp32 111 87 8F 97 9F A7 AF B7 BF 53 | EAX/AX/AL/MM0/XMM0 11 000 C0 C8 D0 D8 E0 E8 F0 F8 54 | ECX/CX/CL/MM/XMM1 001 C1 C9 D1 D9 E1 E9 F1 F9 55 | EDX/DX/DL/MM2/XMM2 010 C2 CA D2 DA E2 EA F2 FA 56 | EBX/BX/BL/MM3/XMM3 011 C3 CB D3 DB E3 EB F3 FB 57 | ESP/SP/AH/MM4/XMM4 100 C4 CC D4 DC E4 EC F4 FC 58 | EBP/BP/CH/MM5/XMM5 101 C5 CD D5 DD E5 ED F5 FD 59 | ESI/SI/DH/MM6/XMM6 110 C6 CE D6 DE E6 EE F6 FE 60 | EDI/DI/BH/MM7/XMM7 111 C7 CF D7 DF E7 EF F7 FF 61 | 62 | */ 63 | 64 | namespace bjit 65 | { 66 | // this is a hint for opt-ra 67 | static const bool arch_explicit_output_regs = false; 68 | 69 | // we use this for types, etc 70 | typedef uint64_t RegMask; 71 | 72 | // convert single-register to a mask 73 | static RegMask R2Mask(int r) { return ((RegMask)1)<= 1 ? op.in[0] : noVal; 197 | in[1] = op.nInputs() >= 2 ? op.in[1] : noVal; 198 | 199 | BJIT_ASSERT(op.nInputs() <= 2); 200 | } 201 | } 202 | 203 | // NOTE: we need temporary to force the "noVals" 204 | bool isEqual(Op const & op) const 205 | { OpCSE tmp(noVal, op); return isEqual(tmp); } 206 | 207 | // NOTE: we need temporary to force the "noVals" 208 | static uint64_t getHash(Op const & op) 209 | { OpCSE tmp(noVal, op); return getHash(tmp); } 210 | 211 | bool isEqual(OpCSE const & op) const 212 | { return i64 == op.i64 && opcode == op.opcode; } 213 | 214 | static uint64_t getHash(OpCSE const & op) 215 | { return hash64(op.i64 + op.opcode); } 216 | }; 217 | 218 | // Variable rename tracker 219 | struct Rename 220 | { 221 | struct Map { 222 | uint16_t src, dst; 223 | Map(uint16_t s, uint16_t d) : src(s), dst(d) {} 224 | }; 225 | std::vector map; 226 | 227 | void add(uint16_t s, uint16_t d) { map.emplace_back(s,d); } 228 | 229 | Op & operator()(Op & op) 230 | { 231 | int n = op.nInputs(); 232 | if(!n) return op; 233 | for(auto & r : map) 234 | { 235 | switch(n) 236 | { 237 | case 3: if(op.in[2] == r.src) op.in[2] = r.dst; 238 | case 2: if(op.in[1] == r.src) op.in[1] = r.dst; 239 | case 1: if(op.in[0] == r.src) op.in[0] = r.dst; 240 | case 0: break; 241 | default: BJIT_ASSERT(false); 242 | } 243 | } 244 | return op; 245 | } 246 | }; 247 | 248 | // Use by Block to track actual phi-alternatives 249 | struct Phi 250 | { 251 | uint16_t phiop; 252 | uint16_t tmp; // used by DCE 253 | 254 | Phi() : phiop(noVal) {} 255 | Phi(uint16_t phiop) : phiop(phiop) {} 256 | }; 257 | 258 | struct PhiAlt 259 | { 260 | uint16_t phi; 261 | uint16_t src; 262 | uint16_t val; 263 | }; 264 | 265 | // One basic block 266 | struct Block 267 | { 268 | std::vector code; 269 | std::vector args; 270 | std::vector alts; 271 | 272 | void newAlt(uint16_t phi, uint16_t src, uint16_t val) 273 | { 274 | alts.emplace_back(PhiAlt{phi, src, val}); 275 | } 276 | 277 | std::vector livein; 278 | std::vector comeFrom; // which blocks we come from? 279 | 280 | // register state on input 281 | uint16_t regsIn[regs::nregs]; 282 | 283 | // register state on output (used for shuffling) 284 | uint16_t regsOut[regs::nregs]; 285 | 286 | // dominators 287 | std::vector dom; 288 | 289 | uint16_t idom; // immediate dominator 290 | uint16_t pdom; // immediate post-dominator 291 | 292 | uint16_t memtag; // memory version into the block 293 | uint16_t memout; // memory version out of the block 294 | 295 | struct { 296 | bool live : 1; // used/reset by DCE, RA 297 | bool regsDone : 1; // reg-alloc uses this 298 | bool codeDone : 1; // backend uses this 299 | } flags = {}; 300 | 301 | Block() 302 | { 303 | for(int i = 0; i < regs::nregs; ++i) regsIn[i] = regsOut[i] = noVal; 304 | } 305 | }; 306 | 307 | // used to communicate relocations from Proc to Module 308 | struct NearReloc 309 | { 310 | uint32_t codeOffset; // where to add offset 311 | uint32_t procIndex; // which offset to add 312 | }; 313 | }; 314 | }; -------------------------------------------------------------------------------- /src/debug.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | 6 | using namespace bjit; 7 | 8 | // register names 9 | #define none --- 10 | #define BJIT_STR(x) #x 11 | #define BJIT_REGS_NAME(x) BJIT_STR(x) 12 | static const char * regNames[] = { BJIT_REGS(BJIT_REGS_NAME) }; 13 | #undef none 14 | 15 | const char * bjit::Proc::regName(int r) const { return regNames[r]; } 16 | 17 | void bjit::Proc::debugOp(uint16_t iop) const 18 | { 19 | if(iop == noVal) { BJIT_LOG(" -- removed op -- \n"); return; } 20 | auto & op = ops[iop]; 21 | 22 | BJIT_LOG("%4x:", op.pos); 23 | 24 | if(op.hasOutput()) 25 | { 26 | if(op.flags.spill) BJIT_LOG("=[%04x]= ", op.scc); 27 | else if(op.scc == noSCC) BJIT_LOG(" ---- "); 28 | else BJIT_LOG(" (%04x) ", op.scc); 29 | //else BJIT_LOG(" "); 30 | } 31 | else BJIT_LOG(" "); 32 | 33 | // make it clear which renames actually cause moves 34 | bool nopRename = false; 35 | if(op.opcode == ops::rename && op.reg == ops[op.in[0]].reg) 36 | nopRename = true; 37 | BJIT_LOG("%04x %6s %8s %c", iop, 38 | op.hasOutput() ? regName(op.reg) : "", 39 | nopRename ? " - " : op.strOpcode(), 40 | op.flags.no_opt ? '*' : ' '); 41 | 42 | switch(op.flags.type) 43 | { 44 | case Op::_none: BJIT_LOG(" "); break; 45 | case Op::_ptr: BJIT_LOG(" %3d ptr ", op.nUse); break; 46 | case Op::_f32: BJIT_LOG(" %3d f32 ", op.nUse); break; 47 | case Op::_f64: BJIT_LOG(" %3d f64 ", op.nUse); break; 48 | }; 49 | 50 | // this should now hold 51 | if(!op.hasI64() && !op.hasF64()) 52 | { 53 | if(op.nInputs() < 1) BJIT_ASSERT(op.in[0] == noVal); 54 | if(!op.hasMemTag() 55 | && op.nInputs() < 2) BJIT_ASSERT(op.in[1] == noVal); 56 | } 57 | 58 | // special-case reload to not print register 59 | if(op.opcode == ops::reload) 60 | BJIT_LOG(" [%04x]:%04x", ops[op.in[0]].scc, op.in[0]); 61 | else 62 | { 63 | switch(op.nInputs()) 64 | { 65 | case 1: BJIT_LOG(" %s:%04x", 66 | regNames[ops[op.in[0]].reg], op.in[0]); 67 | if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" "); 68 | break; 69 | case 2: BJIT_LOG(" %s:%04x %s:%04x", 70 | regNames[ops[op.in[0]].reg], op.in[0], 71 | regNames[ops[op.in[1]].reg], op.in[1]); 72 | if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" "); 73 | if(ops[op.in[1]].opcode == ops::nop) BJIT_LOG(" "); 74 | break; 75 | case 3: BJIT_LOG(" %s:%04x %s:%04x %s:%04x", 76 | regNames[ops[op.in[0]].reg], op.in[0], 77 | regNames[ops[op.in[1]].reg], op.in[1], 78 | regNames[ops[op.in[2]].reg], op.in[2]); 79 | if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" "); 80 | if(ops[op.in[1]].opcode == ops::nop) BJIT_LOG(" "); 81 | if(ops[op.in[2]].opcode == ops::nop) BJIT_LOG(" "); 82 | break; 83 | case 0: break; 84 | default: BJIT_ASSERT(false); 85 | } 86 | } 87 | 88 | if(op.opcode == ops::icalln 89 | || op.opcode == ops::fcalln 90 | || op.opcode == ops::dcalln 91 | || op.opcode == ops::tcalln) BJIT_LOG(" near: %d", op.imm32); 92 | else if(op.hasImm32()) BJIT_LOG(" %+d", op.imm32); 93 | else if(op.hasMem()) BJIT_LOG(" %d", op.off16); 94 | 95 | if(op.hasMemTag()) BJIT_LOG(" mem(%04x)", op.memtag); 96 | 97 | if(op.hasI64()) BJIT_LOG(" i64:%" PRId64, op.i64); 98 | if(op.hasF32()) BJIT_LOG(" f32:%.8e", op.f32); 99 | if(op.hasF64()) BJIT_LOG(" f64:%.8e", op.f64); 100 | 101 | if(op.opcode == ops::phi) 102 | { 103 | for(auto & a : blocks[op.block].alts) 104 | { 105 | if(a.phi != iop) continue; 106 | if(ops[a.val].scc != noSCC) 107 | BJIT_LOG(" L%d:[%04x]:%04x", a.src, ops[a.val].scc, a.val); 108 | else 109 | BJIT_LOG(" L%d:[----]:%04x", a.src, a.val); 110 | } 111 | 112 | if(op.iv != noVal) BJIT_LOG(" IV:%04x", op.iv); 113 | } 114 | 115 | if(op.opcode == ops::iarg || op.opcode == ops::farg || op.opcode == ops::darg) 116 | { 117 | BJIT_LOG(" #%d total #%d", op.indexType, op.indexTotal); 118 | } 119 | 120 | if(op.opcode <= ops::jmp) BJIT_LOG(" L%d", op.label[0]); 121 | if(op.opcode < ops::jmp) BJIT_LOG(" L%d", op.label[1]); 122 | 123 | BJIT_LOG("\n"); 124 | } 125 | 126 | void bjit::Proc::debug() const 127 | { 128 | BJIT_LOG("\n;----"); 129 | if(raDone) BJIT_LOG(" Slots: %d\n", nSlots); else BJIT_LOG("\n"); 130 | 131 | if(live.size()) 132 | { 133 | for(auto b : live) 134 | { 135 | BJIT_LOG("L%d:", b); 136 | for(auto s : blocks[b].comeFrom) BJIT_LOG(" 5 | 6 | namespace bjit 7 | { 8 | // This uses the mix13 constants (also used by splitmix64) from 9 | // https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html 10 | static uint64_t hash64(uint64_t x) 11 | { 12 | x ^= x >> 30; x *= 0xbf58476d1ce4e5b9LLU; 13 | x ^= x >> 27; x *= 0x94d049bb133111ebLLU; 14 | x ^= x >> 31; 15 | return x; 16 | } 17 | 18 | // This does 32-bits at a time using hash64 with the upper bytes 19 | // set to the remaining length of the string. 20 | // 21 | // It's basically a variant of the sponge-construction, except we 22 | // xor the length into the capacity on every round. 23 | static uint64_t stringHash64(const uint8_t * bytes, uint32_t nBytes) 24 | { 25 | uint64_t x = 0; 26 | uint64_t seed = ((uint64_t)nBytes) << 32; 27 | while(nBytes >= 4) 28 | { 29 | x ^= (*(uint32_t*)bytes); 30 | x ^= seed; 31 | x = hash64(x); 32 | 33 | bytes += 4; nBytes -= 4; 34 | } 35 | 36 | switch(nBytes) 37 | { 38 | case 3: x += bytes[2] << 16; 39 | case 2: x += bytes[1] << 8; 40 | case 1: x += bytes[0]; 41 | x ^= seed; 42 | x = hash64(x); 43 | default: 44 | break; 45 | } 46 | 47 | return x; 48 | } 49 | 50 | // This is a closed hashtable that stores a set of items 51 | // and which allows searching by any key for which the 52 | // Item-type provides methods getHash(Key) and isEqual(Key), 53 | // which means it can be used as either a set or a map. 54 | template 55 | struct HashTable 56 | { 57 | // minimum number of slots to use 58 | static const unsigned minSlots = 4; 59 | 60 | // resize when less than 1/freeFactor slots are free 61 | // this should normally be between 2 and 4 62 | // 63 | static const unsigned freeFactor = 3; 64 | 65 | // this implements visitor pattern 66 | // calls fn(key, value) for each non-null pair 67 | template 68 | void foreach(Visitor && fn) 69 | { 70 | for(unsigned i = 0; i < slots.size(); ++i) 71 | { 72 | Slot & s = slots[i]; 73 | if(slotInUse == (s.hash & 0x3)) 74 | { 75 | fn(s.item); 76 | } 77 | } 78 | } 79 | 80 | unsigned size() { return nUsed; } 81 | 82 | // return total capacity before resize 83 | unsigned capacity() 84 | { 85 | // FIXME: properly test that this isn't off-by-one or something? 86 | return (slots.size() * (freeFactor-1)) / freeFactor; 87 | } 88 | 89 | HashTable(unsigned reserveCapacity = 0) { reserve(reserveCapacity); } 90 | 91 | void reserve(unsigned fitSize) 92 | { 93 | if(fitSize < minSlots) fitSize = minSlots; 94 | 95 | // FIXME: properly test that this isn't off-by-one or something? 96 | fitSize = 1 + fitSize * (freeFactor+1) / freeFactor; 97 | 98 | unsigned wantSize = minSlots; 99 | while(wantSize < fitSize) wantSize <<= 1; 100 | 101 | if(wantSize > slots.size()) resize(wantSize); 102 | } 103 | 104 | // return existing item matching key or null 105 | template 106 | Item * find(const Key & k) 107 | { 108 | Slot & s = internalFind(k, false); 109 | if((s.hash & 0x3) != slotInUse) return 0; 110 | return &s.item; 111 | } 112 | 113 | // remove existing item matching key if any 114 | template 115 | void remove(const Key & k) 116 | { 117 | Slot & s = internalFind(k, false); 118 | if((s.hash & 0x3) == slotInUse) 119 | { 120 | s.hash = slotRemoved | (s.hash & ~(uint64_t)0x3); 121 | s.item = std::move(Item()); 122 | --nUsed; 123 | } 124 | } 125 | 126 | // add a new item - any existing matching key is replaced 127 | void insert(Item & i) 128 | { 129 | Slot & s = internalFind(i, true); 130 | s.item = std::move(i); 131 | 132 | // check for resize, multiply out the divides from: 133 | // (nSlots - nUsed) / nSlots < 1 / freeFactor 134 | if((slots.size() - (++nUsed)) * freeFactor < slots.size()) 135 | { 136 | resize(slots.size() << 1); 137 | } 138 | } 139 | 140 | // clear the whole table 141 | void clear() 142 | { 143 | for(unsigned i = 0; i < slots.size(); ++i) 144 | { 145 | Slot & s = slots[i]; 146 | s.hash = slotFree; 147 | s.item = std::move(Item()); 148 | } 149 | } 150 | 151 | // explicit rehash is useful in some situations 152 | // to clear lazily deleted junk that leads to long probes 153 | // 154 | // this can also optionally attempt to compact the table 155 | // 156 | void rehash(bool compact = false) 157 | { 158 | unsigned wantSlots = slots.size(); 159 | if(compact) 160 | { 161 | // reserve at least minSlots before resize 162 | unsigned needSlots = nUsed + minSlots; 163 | 164 | // can we halve the size? 165 | while(wantSlots > minSlots) 166 | { 167 | // next candidate size 168 | unsigned halfSlots = wantSlots >> 1; 169 | 170 | // would this cause a resize up 171 | if(halfSlots 172 | > (halfSlots - needSlots) * freeFactor) break; 173 | 174 | // accept the smaller size and iterate 175 | wantSlots = halfSlots; 176 | } 177 | } 178 | 179 | // do the actual rehash 180 | resize(wantSlots); 181 | } 182 | 183 | private: 184 | // these are stored in low 2-bits of Slot's hash 185 | enum { slotFree, slotInUse, slotRemoved }; 186 | struct Slot 187 | { 188 | Item item; 189 | uint64_t hash; 190 | 191 | // default to both null-pointers 192 | Slot() : hash(slotFree) {} 193 | }; 194 | 195 | unsigned nUsed = 0; // for resize control 196 | 197 | // use std::vector for memory management 198 | std::vector slots; 199 | 200 | // the ultimate hash probe of death: 201 | // - use a 2nd hash (upper 32 bits) to seed the probe 202 | // - force the 2nd hash to be odd (all slots for pow2) 203 | // - then use quadratic probe order on that 204 | // - terrible for cache, but shouldn't cluster 205 | uint32_t probe(uint64_t hash, unsigned j) 206 | { 207 | return (hash + ((hash>>32)|1)*((j+j*j)/2)) & (slots.size() - 1); 208 | } 209 | 210 | // find a slot for a given key, or a free slot to insert into 211 | template 212 | Slot & internalFind(const Key & k, bool doInsert) 213 | { 214 | uint64_t hash = Item::getHash(k); 215 | 216 | // probe loop 217 | for(int j = 0; j < slots.size(); ++j) 218 | { 219 | unsigned i = probe(hash, j); 220 | 221 | Slot & s = slots[i]; 222 | 223 | // is this a free slot or a slot for this key 224 | if((slotFree == (s.hash & 0x3)) 225 | || (s.hash>>2 == hash>>2 && s.item.isEqual(k))) 226 | { 227 | if(doInsert) 228 | { 229 | // mark the slot as in use and set hash 230 | s.hash = slotInUse | (hash & ~(uint64_t)0x3); 231 | } 232 | return s; 233 | } 234 | 235 | // if this is a removed slot, then we need to 236 | // do a further probe 237 | if(slotRemoved == (s.hash & 0x3)) 238 | { 239 | while(++j < slots.size()) 240 | { 241 | i = probe(hash, j); 242 | 243 | Slot & ss = slots[i]; 244 | 245 | // if we find free slot, then key not found 246 | // and we can reuse the removed slot 247 | if(slotFree == (ss.hash & 0x3)) break; 248 | 249 | // if we find legit match, return this slot 250 | if(ss.hash>>2 == hash>>2 && ss.item.isEqual(k)) 251 | { 252 | if(doInsert) 253 | { 254 | // mark the slot as in use and set hash 255 | ss.hash = slotInUse | (hash & ~(uint64_t)0x3); 256 | } 257 | return ss; 258 | } 259 | } 260 | 261 | // didn't find a legit match 262 | // so reuse the first candidate 263 | if(doInsert) 264 | { 265 | // mark the slot as in use and set hash 266 | s.hash = slotInUse | (hash & ~(uint64_t)0x3); 267 | } 268 | return s; 269 | } 270 | } 271 | 272 | fprintf(stderr, "bjit::HashTable warning: probe failed\n"); 273 | 274 | // if we are here then something is wrong with 275 | // our probing function.. but resize to play safe 276 | rehash(slots.size() << 1); 277 | 278 | // recursively try again, should never happen 279 | return internalFind(k, doInsert); 280 | } 281 | 282 | void resize(unsigned newSize) 283 | { 284 | // create a new vector with new size 285 | std::vector tmp(newSize); 286 | 287 | // swap with the existing slots 288 | slots.swap(tmp); 289 | 290 | // fast-path resize on empty table 291 | if(!nUsed) return; 292 | 293 | // reset load factor, this gets recalculated 294 | nUsed = 0; 295 | 296 | // loop the old table to rehash 297 | for(unsigned i = 0; i < tmp.size(); ++i) 298 | { 299 | Slot & s = tmp[i]; 300 | if(slotInUse == (s.hash&0x3)) 301 | { 302 | insert(s.item); 303 | } 304 | } 305 | } 306 | }; 307 | }; -------------------------------------------------------------------------------- /src/ir-ops.cpp: -------------------------------------------------------------------------------- 1 | 2 | // Opcode data definitions 3 | 4 | #include "bjit.h" 5 | 6 | #define BJIT_DATA(name, out, in) { #name, out, in } 7 | static struct 8 | { 9 | const char * name; 10 | 11 | unsigned outputs; 12 | unsigned inputs; 13 | } opData[] = { BJIT_OPS(BJIT_DATA) }; 14 | 15 | const char * bjit::impl::Op::strOpcode() const 16 | { 17 | return opData[this->opcode].name; 18 | } 19 | 20 | bool bjit::impl::Op::hasOutput() const 21 | { 22 | return 0 != (opData[this->opcode].outputs & 0x3); 23 | } 24 | 25 | unsigned bjit::impl::Op::nInputs() const 26 | { 27 | return opData[this->opcode].inputs & 0x3; // mask the flags 28 | } 29 | 30 | bool bjit::impl::Op::hasImm32() const 31 | { 32 | return 0 != (opData[this->opcode].inputs & BJIT_IMM32); 33 | } 34 | 35 | bool bjit::impl::Op::hasI64() const 36 | { 37 | return 0 != (opData[this->opcode].inputs & BJIT_I64); 38 | } 39 | 40 | bool bjit::impl::Op::hasF64() const 41 | { 42 | return 0 != (opData[this->opcode].inputs & BJIT_F64); 43 | } 44 | 45 | bool bjit::impl::Op::hasF32() const 46 | { 47 | return 0 != (opData[this->opcode].inputs & BJIT_F32); 48 | } 49 | 50 | bool bjit::impl::Op::hasMem() const 51 | { 52 | return (opData[this->opcode].inputs & BJIT_MEM); 53 | } 54 | 55 | bool bjit::impl::Op::hasMemTag() const 56 | { 57 | return hasMem() && canCSE(); 58 | } 59 | 60 | bool bjit::impl::Op::hasSideFX() const 61 | { 62 | return !opData[this->opcode].outputs 63 | || (opData[this->opcode].outputs & BJIT_SIDEFX); 64 | } 65 | 66 | bool bjit::impl::Op::canCSE() const 67 | { 68 | return (opData[this->opcode].outputs & BJIT_CSE); 69 | } 70 | 71 | bool bjit::impl::Op::canMove() const 72 | { 73 | return !(opData[this->opcode].outputs & BJIT_NOMOVE); 74 | } 75 | 76 | bool bjit::impl::Op::anyOutReg() const 77 | { 78 | return (opData[this->opcode].outputs & BJIT_ANYREG); 79 | } 80 | -------------------------------------------------------------------------------- /src/ir-ops.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | // List of operations, order is significant 5 | // _(name, nOutputs, nInputs) 6 | 7 | // output flags: 8 | // 9 | // if both SIDEFX and CSE are defined (eg. idiv) then we treat it as 10 | // having SIDEFX for "safe" optimization, and CSE for "unsafe" optimizations 11 | // 12 | 13 | #define BJIT_SIDEFX 0x10 // never DCE, don't move loads across 14 | #define BJIT_CSE 0x20 // can CSE 15 | #define BJIT_NOMOVE 0x40 // must be in the beginning of a block 16 | #define BJIT_ANYREG 0x80 // ignore 2-reg ISA (eg. can swap operands) 17 | 18 | // input flags (lowest 2 bits are nInputs) 19 | #define BJIT_MEM 0x08 // offset16 + memtag 20 | #define BJIT_IMM32 0x10 // has imm32 operand 21 | #define BJIT_I64 0x20 // has 64-bit integer constant 22 | #define BJIT_F64 0x40 // has double constant 23 | #define BJIT_F32 0x80 // has single constant 24 | 25 | #define BJIT_OPS(_) \ 26 | /* CAREFUL WITH THE ORDER HERE (see below also) */ \ 27 | /* (xor 1): branch signed integer comparisons */ \ 28 | /* (xor 2): operations swapped */ \ 29 | _(jilt, 0, 2), \ 30 | _(jige, 0, 2), \ 31 | _(jigt, 0, 2), \ 32 | _(jile, 0, 2), \ 33 | /* (xor 1): branch unsigned integer comparisons */ \ 34 | /* (xor 2): operations swapped */ \ 35 | _(jult, 0, 2), \ 36 | _(juge, 0, 2), \ 37 | _(jugt, 0, 2), \ 38 | _(jule, 0, 2), \ 39 | /* (xor 1): branch integer equality (equal, not equal) */ \ 40 | _(jieq, 0, 2), \ 41 | _(jine, 0, 2), \ 42 | /* (xor 1): branch double equality (equal, not equal) */ \ 43 | _(jdeq, 0, 2), \ 44 | _(jdne, 0, 2), \ 45 | /* (xor 1): branch double comparisons */ \ 46 | /* (xor 2): operations swapped */ \ 47 | _(jdlt, 0, 2), \ 48 | _(jdge, 0, 2), \ 49 | _(jdgt, 0, 2), \ 50 | _(jdle, 0, 2), \ 51 | /* (xor 1): branch float comparisons */ \ 52 | /* (xor 2): operations swapped */ \ 53 | _(jflt, 0, 2), \ 54 | _(jfge, 0, 2), \ 55 | _(jfgt, 0, 2), \ 56 | _(jfle, 0, 2), \ 57 | /* (xor 1): branch float equality (equal, not equal) */ \ 58 | _(jfeq, 0, 2), \ 59 | _(jfne, 0, 2), \ 60 | /* (xor 1): integer zero, not-zero tests */ \ 61 | _(jz, 0, 1), \ 62 | _(jnz, 0, 1), \ 63 | /* */ \ 64 | /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \ 65 | /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \ 66 | /* */ \ 67 | /* (xor 1): branch signed integer comparisons */ \ 68 | _(jiltI, 0, 1+BJIT_IMM32), \ 69 | _(jigeI, 0, 1+BJIT_IMM32), \ 70 | _(jigtI, 0, 1+BJIT_IMM32), \ 71 | _(jileI, 0, 1+BJIT_IMM32), \ 72 | /* (xor 1): branch unsigned integer comparisons */ \ 73 | _(jultI, 0, 1+BJIT_IMM32), \ 74 | _(jugeI, 0, 1+BJIT_IMM32), \ 75 | _(jugtI, 0, 1+BJIT_IMM32), \ 76 | _(juleI, 0, 1+BJIT_IMM32), \ 77 | /* (xor 1): branch integer equality comparisons */ \ 78 | _(jieqI, 0, 1+BJIT_IMM32), \ 79 | _(jineI, 0, 1+BJIT_IMM32), \ 80 | /* control flow, jump must come after conditionals! */ \ 81 | /* make sure there are even number of these (for xor1 below) */ \ 82 | _(jmp, 0, 0), \ 83 | _(dret, 0, 1), \ 84 | _(fret, 0, 1), \ 85 | _(iret, 0, 1), \ 86 | _(iretI, 0, BJIT_IMM32), /* opt-dce needs to know which one is last */ \ 87 | _(tcallp, 0, 1), \ 88 | _(tcalln, 0, BJIT_IMM32), \ 89 | _(dummy_align, 0, 0), \ 90 | /* */ \ 91 | /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \ 92 | /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \ 93 | /* */ \ 94 | /* (xor 1): signed integer comparisons */ \ 95 | _(ilt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 96 | _(ige, BJIT_ANYREG+BJIT_CSE+1, 2), \ 97 | _(igt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 98 | _(ile, BJIT_ANYREG+BJIT_CSE+1, 2), \ 99 | /* (xor 1): unsigned integer comparisons */ \ 100 | _(ult, BJIT_ANYREG+BJIT_CSE+1, 2), \ 101 | _(uge, BJIT_ANYREG+BJIT_CSE+1, 2), \ 102 | _(ugt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 103 | _(ule, BJIT_ANYREG+BJIT_CSE+1, 2), \ 104 | /* (xor 1): integer equality (equal, not equal) */ \ 105 | _(ieq, BJIT_ANYREG+BJIT_CSE+1, 2), \ 106 | _(ine, BJIT_ANYREG+BJIT_CSE+1, 2), \ 107 | /* (xor 1): double equality (equal, not equal) */ \ 108 | _(deq, BJIT_ANYREG+BJIT_CSE+1, 2), \ 109 | _(dne, BJIT_ANYREG+BJIT_CSE+1, 2), \ 110 | /* (xor 1): double comparisons */ \ 111 | _(dlt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 112 | _(dge, BJIT_ANYREG+BJIT_CSE+1, 2), \ 113 | _(dgt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 114 | _(dle, BJIT_ANYREG+BJIT_CSE+1, 2), \ 115 | /* (xor 1): floating point comparisons */ \ 116 | _(flt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 117 | _(fge, BJIT_ANYREG+BJIT_CSE+1, 2), \ 118 | _(fgt, BJIT_ANYREG+BJIT_CSE+1, 2), \ 119 | _(fle, BJIT_ANYREG+BJIT_CSE+1, 2), \ 120 | /* (xor 1): float equality (equal, not equal) */ \ 121 | _(feq, BJIT_ANYREG+BJIT_CSE+1, 2), \ 122 | _(fne, BJIT_ANYREG+BJIT_CSE+1, 2), \ 123 | /* */ \ 124 | /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \ 125 | /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \ 126 | /* */ \ 127 | /* (xor 1): signed integer comparisons */ \ 128 | _(iltI, BJIT_CSE+1, 1+BJIT_IMM32), \ 129 | _(igeI, BJIT_CSE+1, 1+BJIT_IMM32), \ 130 | _(igtI, BJIT_CSE+1, 1+BJIT_IMM32), \ 131 | _(ileI, BJIT_CSE+1, 1+BJIT_IMM32), \ 132 | /* (xor 1): unsigned integer comparisons */ \ 133 | _(ultI, BJIT_CSE+1, 1+BJIT_IMM32), \ 134 | _(ugeI, BJIT_CSE+1, 1+BJIT_IMM32), \ 135 | _(ugtI, BJIT_CSE+1, 1+BJIT_IMM32), \ 136 | _(uleI, BJIT_CSE+1, 1+BJIT_IMM32), \ 137 | /* (xor 1): integer equality (equal, not equal) */ \ 138 | _(ieqI, BJIT_CSE+1, 1+BJIT_IMM32), \ 139 | _(ineI, BJIT_CSE+1, 1+BJIT_IMM32), \ 140 | /* integer arithmetic */ \ 141 | _(iadd, BJIT_ANYREG+BJIT_CSE+1, 2), \ 142 | _(isub, BJIT_CSE+1, 2), \ 143 | _(ineg, BJIT_CSE+1, 1), \ 144 | _(imul, BJIT_ANYREG+BJIT_CSE+1, 2), \ 145 | /* division by zero is a "side-effect" */ \ 146 | _(idiv, BJIT_SIDEFX+BJIT_CSE+1, 2), \ 147 | _(imod, BJIT_SIDEFX+BJIT_CSE+1, 2), \ 148 | /* unsigned integer arithmetic */ \ 149 | _(udiv, BJIT_SIDEFX+BJIT_CSE+1, 2), \ 150 | _(umod, BJIT_SIDEFX+BJIT_CSE+1, 2), \ 151 | /* integer bitwise */ \ 152 | _(inot, BJIT_CSE+1, 1), \ 153 | _(iand, BJIT_ANYREG+BJIT_CSE+1, 2), \ 154 | _(ior, BJIT_ANYREG+BJIT_CSE+1, 2), \ 155 | _(ixor, BJIT_ANYREG+BJIT_CSE+1, 2), \ 156 | /* integer shifts */ \ 157 | _(ishl, BJIT_CSE+1, 2), \ 158 | _(ishr, BJIT_CSE+1, 2), \ 159 | _(ushr, BJIT_CSE+1, 2), \ 160 | /* integer arithmetic */ \ 161 | _(iaddI, BJIT_CSE+1, 1+BJIT_IMM32), \ 162 | _(isubI, BJIT_CSE+1, 1+BJIT_IMM32), \ 163 | _(imulI, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_IMM32), \ 164 | /* integer bitwise */ \ 165 | _(iandI, BJIT_CSE+1, 1+BJIT_IMM32), \ 166 | _(iorI, BJIT_CSE+1, 1+BJIT_IMM32), \ 167 | _(ixorI, BJIT_CSE+1, 1+BJIT_IMM32), \ 168 | /* integer shifts */ \ 169 | _(ishlI, BJIT_CSE+1, 1+BJIT_IMM32), \ 170 | _(ishrI, BJIT_CSE+1, 1+BJIT_IMM32), \ 171 | _(ushrI, BJIT_CSE+1, 1+BJIT_IMM32), \ 172 | /* double arithmetic */ \ 173 | _(dadd, BJIT_ANYREG+BJIT_CSE+1, 2), \ 174 | _(dsub, BJIT_CSE+1, 2), \ 175 | _(dneg, BJIT_CSE+1, 1), \ 176 | _(dabs, BJIT_CSE+1, 1), \ 177 | _(dmul, BJIT_ANYREG+BJIT_CSE+1, 2), \ 178 | _(ddiv, BJIT_CSE+1, 2), \ 179 | /* float arithmetic */ \ 180 | _(fadd, BJIT_ANYREG+BJIT_CSE+1, 2), \ 181 | _(fsub, BJIT_CSE+1, 2), \ 182 | _(fneg, BJIT_CSE+1, 1), \ 183 | _(fabs, BJIT_CSE+1, 1), \ 184 | _(fmul, BJIT_ANYREG+BJIT_CSE+1, 2), \ 185 | _(fdiv, BJIT_CSE+1, 2), \ 186 | /* type conversions */ \ 187 | _(ci2d, BJIT_CSE+1, 1), \ 188 | _(cd2i, BJIT_CSE+1, 1), \ 189 | _(ci2f, BJIT_CSE+1, 1), \ 190 | _(cf2i, BJIT_CSE+1, 1), \ 191 | _(cf2d, BJIT_CSE+1, 1), \ 192 | _(cd2f, BJIT_CSE+1, 1), \ 193 | /* reinterpret bitcasts */ \ 194 | _(bci2d, BJIT_CSE+1, 1), \ 195 | _(bcd2i, BJIT_CSE+1, 1), \ 196 | _(bci2f, BJIT_CSE+1, 1), \ 197 | _(bcf2i, BJIT_CSE+1, 1), \ 198 | /* load constants */ \ 199 | _(lci, BJIT_CSE+1, BJIT_I64), \ 200 | _(lcf, BJIT_CSE+1, BJIT_F32), \ 201 | _(lcd, BJIT_CSE+1, BJIT_F64), \ 202 | /* load near proc address */ \ 203 | _(lnp, BJIT_CSE+1, BJIT_IMM32), \ 204 | /* sign-extend values (cast to smaller type) */ \ 205 | _(i8, BJIT_CSE+1, 1), \ 206 | _(i16, BJIT_CSE+1, 1), \ 207 | _(i32, BJIT_CSE+1, 1), \ 208 | /* unsigned variants (zero-extend) */ \ 209 | _(u8, BJIT_CSE+1, 1), \ 210 | _(u16, BJIT_CSE+1, 1), \ 211 | _(u32, BJIT_CSE+1, 1), \ 212 | /* memory loads: load out <- [in0+offset] */ \ 213 | /* ANYREG 'cos typically explicit output reg */ \ 214 | /* integer variants: sign-extended */ \ 215 | /* treat as potentially causing side-effects */ \ 216 | _(li8, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 217 | _(li16, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 218 | _(li32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 219 | _(li64, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 220 | /* unsigned variants (zero-extend) */ \ 221 | _(lu8, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 222 | _(lu16, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 223 | _(lu32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 224 | /* float */ \ 225 | _(lf32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 226 | _(lf64, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \ 227 | /* two reg versions - NOTE: must be in same order! */ \ 228 | _(l2i8, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 229 | _(l2i16, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 230 | _(l2i32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 231 | _(l2i64, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 232 | /* unsigned variants (zero-extend) */ \ 233 | _(l2u8, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 234 | _(l2u16, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 235 | _(l2u32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 236 | /* float */ \ 237 | _(l2f32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 238 | _(l2f64, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \ 239 | /* memory stores: store [in0+offset] <- in1 */ \ 240 | _(si8, 0, 2+BJIT_MEM), \ 241 | _(si16, 0, 2+BJIT_MEM), \ 242 | _(si32, 0, 2+BJIT_MEM), \ 243 | _(si64, 0, 2+BJIT_MEM), \ 244 | /* floating point */ \ 245 | _(sf32, 0, 2+BJIT_MEM), \ 246 | _(sf64, 0, 2+BJIT_MEM), \ 247 | /* two reg versions - NOTE: must be in same order! */ \ 248 | _(s2i8, 0, 3+BJIT_MEM), \ 249 | _(s2i16, 0, 3+BJIT_MEM), \ 250 | _(s2i32, 0, 3+BJIT_MEM), \ 251 | _(s2i64, 0, 3+BJIT_MEM), \ 252 | /* floating point */ \ 253 | _(s2f32, 0, 3+BJIT_MEM), \ 254 | _(s2f64, 0, 3+BJIT_MEM), \ 255 | /* procedure arguments */ \ 256 | _(iarg, 1+BJIT_NOMOVE, 0), \ 257 | _(farg, 1+BJIT_NOMOVE, 0), \ 258 | _(darg, 1+BJIT_NOMOVE, 0), \ 259 | /* Call arguments - right to left before call */ \ 260 | _(ipass, 0, 1), \ 261 | _(fpass, 0, 1), \ 262 | _(dpass, 0, 1), \ 263 | /* Indirect calls: typed for return value */ \ 264 | _(icallp, 1+BJIT_SIDEFX, 1), \ 265 | _(fcallp, 1+BJIT_SIDEFX, 1), \ 266 | _(dcallp, 1+BJIT_SIDEFX, 1), \ 267 | /* Module local "near" calls, relocated */ \ 268 | _(icalln, 1+BJIT_SIDEFX, BJIT_IMM32), \ 269 | _(fcalln, 1+BJIT_SIDEFX, BJIT_IMM32), \ 270 | _(dcalln, 1+BJIT_SIDEFX, BJIT_IMM32), \ 271 | /* this is user-requested allocation with reg = stack pointer */ \ 272 | _(alloc, 1+BJIT_SIDEFX+BJIT_NOMOVE, BJIT_IMM32), \ 273 | /* this keeps the compiler from moving loads across */ \ 274 | _(fence, BJIT_SIDEFX, 0), \ 275 | /* pseudo-ops: polymorphic */ \ 276 | _(phi, 1+BJIT_NOMOVE, 0), \ 277 | _(rename, 1, 1), \ 278 | _(reload, 1, 1), \ 279 | _(nop, 0, 0) /* removed by DCE */ 280 | -------------------------------------------------------------------------------- /src/module.cpp: -------------------------------------------------------------------------------- 1 | 2 | // We need separate logic for Unix vs. Windows for loading code. 3 | // Define BJIT_USE_MMAP on platforms where we can use the Unix version. 4 | #if defined(__unix__) || defined(__LINUX__) || defined(__APPLE__) 5 | # define BJIT_USE_MMAP 6 | # define BJIT_CAN_LOAD 7 | # include 8 | #endif 9 | 10 | #if defined(_WIN32) 11 | # define BJIT_CAN_LOAD 12 | # include 13 | #endif 14 | 15 | #ifdef __APPLE__ 16 | # define MAP_ANONYMOUS MAP_ANON // the joy of being different 17 | #endif 18 | 19 | #include 20 | 21 | #include "bjit.h" 22 | 23 | using namespace bjit; 24 | 25 | static void flush_cache(char * exec_mem, uint32_t mmapSize) 26 | { 27 | #if defined(__GNUC__) || defined(__clang__) 28 | // flush icache with a "portable" builtin 29 | __builtin___clear_cache(exec_mem, exec_mem + mmapSize); 30 | #elif defined(__aarch64__) 31 | # warning arm64 i-cache might be left stale 32 | #endif 33 | } 34 | 35 | uintptr_t Module::load(unsigned mmapSizeMin) 36 | { 37 | BJIT_ASSERT(!exec_mem); 38 | 39 | #ifndef BJIT_CAN_LOAD 40 | return 0; 41 | #endif 42 | 43 | // compute sizes 44 | mmapSize = mmapSizeMin; 45 | loadSize = bytes.size(); 46 | 47 | if(mmapSize < loadSize) mmapSize = loadSize; 48 | 49 | #ifdef BJIT_USE_MMAP 50 | // get a block of memory we can mess with, read+write 51 | exec_mem = mmap(NULL, mmapSize, PROT_READ | PROT_WRITE, 52 | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 53 | if(!exec_mem) 54 | { 55 | BJIT_LOG("error: mmap failed in bjit::Module::load()\n"); 56 | return 0; 57 | } 58 | #endif 59 | #ifdef _WIN32 60 | exec_mem = VirtualAlloc(0, mmapSize, MEM_COMMIT, PAGE_READWRITE); 61 | if(!exec_mem) 62 | { 63 | BJIT_LOG("error: VirtualAlloc failed in bjit::Module::load()\n"); 64 | return 0; 65 | } 66 | #endif 67 | 68 | // copy & relocate 69 | memcpy(exec_mem, bytes.data(), bytes.size()); 70 | for(auto & r : relocs) 71 | { 72 | BJIT_ASSERT(r.procIndex < offsets.size()); 73 | arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, offsets[r.procIndex]); 74 | } 75 | 76 | #ifdef BJIT_USE_MMAP 77 | // return zero on success 78 | if(mprotect(exec_mem, mmapSize, PROT_READ | PROT_EXEC)) 79 | { 80 | BJIT_LOG("error: mprotect failed in bjit::Module::load()\n"); 81 | // if we can't set executable, then try to unload 82 | unload(); 83 | return 0; 84 | } 85 | 86 | flush_cache((char*)exec_mem, mmapSize); 87 | 88 | #endif 89 | #ifdef _WIN32 90 | // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer! 91 | // returns non-zero on success 92 | DWORD oldFlags = 0; 93 | if(!VirtualProtect(exec_mem, mmapSize, PAGE_EXECUTE_READ, &oldFlags)) 94 | { 95 | BJIT_LOG("error: mprotect failed in bjit::Module::load()\n"); 96 | // if we can't set executable, then try to unload 97 | unload(); 98 | return 0; 99 | } 100 | #endif 101 | 102 | return (uintptr_t) exec_mem; 103 | } 104 | 105 | bool Module::patch() 106 | { 107 | BJIT_ASSERT(exec_mem); 108 | 109 | // check if patching is going to work? 110 | if(mmapSize < bytes.size()) return false; 111 | 112 | #ifdef BJIT_USE_MMAP 113 | // return zero on success 114 | BJIT_ASSERT(!mprotect(exec_mem, mmapSize, PROT_READ | PROT_WRITE)); 115 | #endif 116 | #ifdef _WIN32 117 | // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer! 118 | // returns non-zero on success 119 | DWORD oldFlags = 0; 120 | BJIT_ASSERT(VirtualProtect(exec_mem, mmapSize, PAGE_READWRITE, &oldFlags)); 121 | #endif 122 | 123 | // copy and relocate, only new ones 124 | memcpy(loadSize+(uint8_t*)exec_mem, loadSize+bytes.data(), 125 | bytes.size()-loadSize); 126 | for(auto & r : relocs) 127 | { 128 | if(r.codeOffset < loadSize) continue; 129 | 130 | BJIT_ASSERT(r.procIndex < offsets.size()); 131 | arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, offsets[r.procIndex]); 132 | } 133 | loadSize = bytes.size(); 134 | 135 | // do all pending stub-patches 136 | for(auto & p : stubPatches) 137 | { 138 | arch_patchStub(offsets[p.procIndex] + (uint8_t*)exec_mem, p.newAddress); 139 | } 140 | stubPatches.clear(); 141 | 142 | // near patches 143 | for(auto & p : nearPatches) 144 | { 145 | uint32_t delta = offsets[p.newTarget] - offsets[p.oldTarget]; 146 | for(auto & r : relocs) 147 | { 148 | if(r.codeOffset < p.offsetStart 149 | || r.codeOffset >= p.offsetEnd) continue; 150 | 151 | if(r.procIndex == p.oldTarget) 152 | { 153 | r.procIndex = p.newTarget; 154 | // relocate 155 | arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, delta); 156 | } 157 | } 158 | } 159 | nearPatches.clear(); 160 | 161 | #ifdef BJIT_USE_MMAP 162 | // return zero on success 163 | BJIT_ASSERT(!mprotect(exec_mem, mmapSize, PROT_READ | PROT_EXEC)); 164 | 165 | flush_cache((char*)exec_mem, mmapSize); 166 | 167 | #endif 168 | #ifdef _WIN32 169 | // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer! 170 | // returns non-zero on success 171 | BJIT_ASSERT(VirtualProtect(exec_mem, mmapSize, PAGE_EXECUTE_READ, &oldFlags)); 172 | #endif 173 | 174 | return true; 175 | } 176 | 177 | uintptr_t Module::unload() 178 | { 179 | BJIT_ASSERT(exec_mem); 180 | 181 | #ifdef BJIT_USE_MMAP 182 | munmap(exec_mem, mmapSize); 183 | #endif 184 | #ifdef _WIN32 185 | VirtualFree(exec_mem, 0, MEM_RELEASE); 186 | #endif 187 | 188 | uintptr_t ret = (uintptr_t) exec_mem; 189 | 190 | // do near-patches on unload if any 191 | for(auto & p : nearPatches) 192 | { 193 | for(auto & r : relocs) 194 | { 195 | if(r.codeOffset < p.offsetStart 196 | || r.codeOffset >= p.offsetEnd) continue; 197 | 198 | if(r.procIndex == p.oldTarget) 199 | { 200 | r.procIndex = p.newTarget; 201 | } 202 | } 203 | } 204 | 205 | // patches are not useful after unload 206 | stubPatches.clear(); 207 | nearPatches.clear(); 208 | 209 | exec_mem = 0; 210 | mmapSize = 0; 211 | loadSize = 0; 212 | 213 | return ret; 214 | } -------------------------------------------------------------------------------- /src/opt-dce.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | using namespace bjit; 5 | 6 | void Proc::opt_dce(bool unsafeOpt) 7 | { 8 | bool progress = true; 9 | 10 | int iters = 0; 11 | while(progress) 12 | { 13 | ++iters; 14 | progress = false; 15 | for(auto & b : blocks) 16 | { 17 | if(!b.flags.live) continue; 18 | b.flags.live = false; 19 | 20 | for(auto & i : b.code) 21 | { 22 | if(i == noVal) continue; 23 | // NOTE: nUse aliases on labels 24 | if(ops[i].hasOutput()) ops[i].nUse = 0; 25 | } 26 | } 27 | 28 | todo.clear(); 29 | live.clear(); 30 | 31 | todo.push_back(0); 32 | live.push_back(todo.back()); 33 | blocks[0].flags.live = true; 34 | 35 | while(todo.size()) 36 | { 37 | auto b = todo.back(); todo.pop_back(); 38 | bool deadTail = false; 39 | 40 | for(auto i : blocks[b].code) 41 | { 42 | if(i == noVal) continue; 43 | 44 | if(deadTail) 45 | { 46 | ops[i].makeNOP(); 47 | continue; 48 | } 49 | 50 | switch(ops[i].nInputs()) 51 | { 52 | case 3: ++ops[ops[i].in[2]].nUse; 53 | case 2: ++ops[ops[i].in[1]].nUse; 54 | case 1: ++ops[ops[i].in[0]].nUse; 55 | case 0: break; 56 | default: BJIT_ASSERT(false); 57 | } 58 | 59 | // only need to look at last op 60 | if(ops[i].opcode <= ops::jmp) 61 | for(int k = 0; k < 2; ++k) 62 | { 63 | if(k && ops[i].opcode == ops::jmp) break; 64 | 65 | while(true) 66 | { 67 | auto bsrc = ops[i].label[k]; 68 | auto & kc = blocks[bsrc].code; 69 | auto tjmp = noVal; 70 | 71 | // threading conditional jumps through empty loop preheaders 72 | // with multiple entry-blocks causes havoc with IV logic 73 | // so avoid conditional jumps into targets with phis for now 74 | if(ops[i].opcode < ops::jmp 75 | && (kc[0] == noVal || ops[kc[0]].opcode == ops::phi)) break; 76 | 77 | // skip over phis 78 | for(int i = 0; i < kc.size(); ++i) 79 | { 80 | tjmp = kc[i]; 81 | if(tjmp == noVal || ops[tjmp].opcode != ops::phi) 82 | break; 83 | } 84 | 85 | // can we thread this? 86 | if(tjmp == noVal || i == tjmp 87 | || ops[tjmp].opcode != ops::jmp) break; 88 | 89 | auto target = ops[tjmp].label[0]; 90 | 91 | // do another pass 92 | if(false && blocks[target].code[0] == noVal) 93 | { 94 | progress = true; 95 | break; 96 | } 97 | 98 | // if the block we're jumping from has phis 99 | // then validate that target block also has them 100 | bool noPhi = false; 101 | for(auto & p : blocks[bsrc].args) 102 | { 103 | // this can happen 104 | if(p.phiop == noVal 105 | || ops[p.phiop].opcode == ops::nop) continue; 106 | 107 | bool good = false; 108 | for(auto & a : blocks[target].alts) 109 | { 110 | if(a.src == bsrc && a.val == p.phiop) 111 | { 112 | good = true; 113 | break; 114 | } 115 | } 116 | if(!good) { noPhi = true; break; } 117 | } 118 | 119 | if(noPhi) break; 120 | 121 | // if we are jumping into a block with phis then 122 | // validate that a blocks isn't there for shuffle 123 | //if(ops[blocks[target].code[0]].opcode == ops::phi) 124 | if(blocks[target].alts.size()) 125 | { 126 | bool bad = false; 127 | 128 | // clear temps 129 | for(auto & a : blocks[target].args) a.tmp = noVal; 130 | 131 | // find relevant alternatives 132 | auto & args = blocks[target].args; 133 | for(auto & a : blocks[target].alts) 134 | { 135 | // is this alternative relevant? 136 | if(a.src != ops[i].block 137 | && a.src != ops[i].label[k]) continue; 138 | 139 | auto val = a.val; 140 | 141 | // resolve local phis 142 | if(ops[val].opcode == ops::phi 143 | && ops[val].block == a.src) 144 | { 145 | bool good = false; 146 | for(auto & s : blocks[a.src].alts) 147 | { 148 | if(s.phi != val) continue; 149 | if(s.src != b) continue; 150 | val = s.val; 151 | 152 | good = true; 153 | break; 154 | } 155 | // FIXME: figure out why we might not 156 | // sometimes find a suitable alt? 157 | if(!good) { bad = true; break; } 158 | } 159 | 160 | // check for duplicate 161 | for(auto & s : blocks[target].alts) 162 | { 163 | if(a.phi == s.phi && b == s.src) 164 | { 165 | if(s.val == val) continue; 166 | 167 | // seems this block exists for shuffle 168 | bad = true; 169 | break; 170 | } 171 | } 172 | 173 | // phi got removed this pass? 174 | if(ops[a.phi].opcode == ops::nop) continue; 175 | 176 | // if we've not seen it, store tmp 177 | if(args[ops[a.phi].phiIndex].tmp == noVal) 178 | { 179 | args[ops[a.phi].phiIndex].tmp = val; 180 | } 181 | else if(args[ops[a.phi].phiIndex].tmp != val) 182 | { 183 | bad = true; 184 | break; 185 | } 186 | } 187 | 188 | if(bad) break; 189 | } 190 | 191 | // patch target phis 192 | for(int ai = 0, sz = blocks[target].alts.size(); 193 | ai < sz; ++ai) 194 | { 195 | auto & a = blocks[target].alts[ai]; 196 | if(a.src == ops[i].label[k]) 197 | { 198 | auto val = a.val; 199 | 200 | // resolve local phis 201 | if(ops[val].opcode == ops::phi 202 | && ops[val].block == a.src) 203 | { 204 | bool good = false; 205 | for(auto & s : blocks[a.src].alts) 206 | { 207 | if(s.phi != val) continue; 208 | if(s.src != b) continue; 209 | val = s.val; 210 | good = true; 211 | break; 212 | } 213 | BJIT_ASSERT_MORE(good); 214 | } 215 | 216 | // check for duplicate 217 | bool dedup = false; 218 | for(auto & s : blocks[target].alts) 219 | { 220 | if(a.phi == s.phi && b == s.src) 221 | { 222 | BJIT_ASSERT_MORE(s.val == val); 223 | dedup = true; 224 | } 225 | } 226 | 227 | if(!dedup) blocks[target].newAlt(a.phi, b, val); 228 | } 229 | } 230 | 231 | ops[i].label[k] = target; 232 | progress = true; // need at least new DOMs 233 | } 234 | 235 | if(!blocks[ops[i].label[k]].flags.live) 236 | { 237 | todo.push_back(ops[i].label[k]); 238 | live.push_back(todo.back()); 239 | blocks[ops[i].label[k]].flags.live = true; 240 | } 241 | } 242 | 243 | if(ops[i].opcode < ops::jmp) 244 | { 245 | if(ops[i].label[0] == ops[i].label[1]) 246 | { 247 | ops[i].opcode = ops::jmp; 248 | ops[i].in[0] = noVal; 249 | ops[i].in[1] = noVal; 250 | progress = true; 251 | } 252 | } 253 | 254 | if(ops[i].opcode <= ops::tcallp) deadTail = true; 255 | } 256 | } 257 | 258 | // phi-uses 259 | for(auto & bi : live) 260 | { 261 | auto & b = blocks[bi]; 262 | // cleanup dead sources 263 | { 264 | int j = 0; 265 | for(int i = 0; i < b.alts.size(); ++i) 266 | { 267 | if(ops[b.alts[i].phi].opcode == ops::nop) continue; 268 | if(!blocks[b.alts[i].src].flags.live) continue; 269 | if(j != i) b.alts[j] = b.alts[i]; 270 | ++j; 271 | } 272 | b.alts.resize(j); 273 | } 274 | // set tmp sources to noVal 275 | for(auto & a : b.args) a.tmp = noVal; 276 | 277 | // find which phis have actual uses 278 | for(auto & s : b.alts) 279 | { 280 | // ignore simple loopback 281 | if(s.phi == s.val) continue; 282 | 283 | // if we don't have a value yet, set this as value 284 | if(b.args[ops[s.phi].phiIndex].tmp == noVal) 285 | { 286 | b.args[ops[s.phi].phiIndex].tmp = s.val; 287 | } 288 | else if(b.args[ops[s.phi].phiIndex].tmp != s.val) 289 | { 290 | // had more than one value, need to keep this 291 | b.args[ops[s.phi].phiIndex].tmp = s.phi; 292 | } 293 | } 294 | 295 | // set use-counts for phis we're going to keep 296 | for(auto & s : b.alts) 297 | { 298 | if(b.args[ops[s.phi].phiIndex].tmp == s.phi) 299 | { 300 | ++ops[s.val].nUse; 301 | } 302 | } 303 | } 304 | 305 | for(auto & b : live) 306 | { 307 | // rename 308 | for(auto i : blocks[b].code) 309 | { 310 | if(i == noVal) continue; 311 | 312 | // rename phis we can eliminate 313 | for(int k = 0; k < ops[i].nInputs(); ++k) 314 | { 315 | auto phiIndex = ops[i].in[k]; 316 | auto & phi = ops[phiIndex]; 317 | 318 | if(phi.opcode != ops::phi) continue; 319 | 320 | auto src = blocks[phi.block].args[phi.phiIndex].tmp; 321 | if(src != phiIndex) 322 | { 323 | ops[i].in[k] = src; 324 | ++ops[src].nUse; 325 | progress = true; 326 | } 327 | } 328 | } 329 | 330 | for(auto & a : blocks[b].alts) 331 | { 332 | auto & phi = ops[a.val]; 333 | if(phi.opcode != ops::phi) continue; 334 | 335 | auto src = blocks[phi.block].args[phi.phiIndex].tmp; 336 | if(src != a.val) 337 | { 338 | a.val = src; 339 | ++ops[src].nUse; 340 | progress = true; 341 | } 342 | } 343 | } 344 | 345 | // count how many ops we have alive 346 | // this is used by CSE for intelligent hash sizing 347 | liveOps = 0; 348 | 349 | for(auto bi : live) 350 | { 351 | auto & b = blocks[bi]; 352 | 353 | // loop backwards to figure out what's dead 354 | for(int i = b.code.size(); i--;) 355 | { 356 | if(b.code[i] == noVal) continue; 357 | 358 | auto & op = ops[b.code[i]]; 359 | if(op.opcode == ops::nop) continue; 360 | 361 | // NOTE: nUse aliases on labels, check other stuff first 362 | if((op.hasSideFX() && (!unsafeOpt || !op.canCSE())) 363 | || op.nUse) continue; 364 | 365 | switch(op.nInputs()) 366 | { 367 | case 3: --ops[op.in[2]].nUse; 368 | case 2: --ops[op.in[1]].nUse; 369 | case 1: --ops[op.in[0]].nUse; 370 | case 0: 371 | op.makeNOP(); 372 | progress = true; 373 | break; 374 | default: BJIT_ASSERT(false); 375 | } 376 | } 377 | 378 | // loop forward to cleanup 379 | int j = 0; 380 | for(int i = 0; i < b.code.size(); ++i) 381 | { 382 | if(b.code[i] == noVal) continue; 383 | if(ops[b.code[i]].opcode == ops::nop) continue; 384 | if(!ops[b.code[i]].hasSideFX() && !ops[b.code[i]].nUse) continue; 385 | 386 | if(j != i) 387 | { 388 | b.code[j] = b.code[i]; 389 | } 390 | ops[b.code[j]].pos = j; 391 | ++j; 392 | } 393 | 394 | if(b.code.size() != j) { b.code.resize(j); progress = true; } 395 | liveOps += j; 396 | } 397 | } 398 | 399 | BJIT_LOG("\n DCE:%d", iters); 400 | } 401 | 402 | void Proc::findUsesBlock(int b, bool inOnly, bool localOnly) 403 | { 404 | // compute which ops are used by this block 405 | // this must be done in reverse 406 | for(int c = blocks[b].code.size(); c--;) 407 | { 408 | if(blocks[b].code[c] == noVal) continue; 409 | auto & op = ops[blocks[b].code[c]]; 410 | 411 | if(!localOnly && op.opcode <= ops::jmp) 412 | for(int k = 0; k < 2; ++k) 413 | { 414 | if(k && op.opcode == ops::jmp) break; 415 | 416 | for(auto & s : blocks[op.label[k]].alts) 417 | { 418 | if(s.src != b) continue; 419 | 420 | if(0) BJIT_LOG("live out %d->%d : v%04x\n", 421 | b, op.label[k], s.val); 422 | 423 | ++ops[s.val].nUse; 424 | } 425 | for(auto & a : blocks[op.label[k]].livein) 426 | { 427 | ++ops[a].nUse; 428 | } 429 | } 430 | 431 | switch(op.nInputs()) 432 | { 433 | case 3: ++ops[op.in[2]].nUse; 434 | case 2: ++ops[op.in[1]].nUse; 435 | case 1: ++ops[op.in[0]].nUse; 436 | case 0: break; 437 | default: BJIT_ASSERT(false); 438 | } 439 | 440 | // for ops that define values, set nUse to zero 441 | if(inOnly && op.hasOutput()) op.nUse = 0; 442 | } 443 | } 444 | 445 | void Proc::rebuild_livein() 446 | { 447 | // cleanup stale phis 448 | rebuild_cfg(); 449 | 450 | BJIT_ASSERT(live.size()); 451 | 452 | for(auto & op : ops) 453 | { 454 | // NOTE: nUse aliases on labels 455 | if(op.hasOutput()) op.nUse = 0; 456 | } 457 | 458 | for(auto & b : live) blocks[b].livein.clear(); 459 | 460 | int iter = 0; 461 | bool progress = true; 462 | while(progress) 463 | { 464 | ++iter; 465 | progress = false; 466 | 467 | // reverse live almost always requires less iteration 468 | for(int b = live.size();b--;) 469 | { 470 | auto sz = blocks[live[b]].livein.size(); 471 | 472 | findUsesBlock(live[b], true, false); 473 | blocks[live[b]].livein.clear(); 474 | 475 | for(int i = 0; i < ops.size(); ++i) 476 | { 477 | // is this a variable that we need? 478 | if(!ops[i].hasOutput() || !ops[i].nUse) continue; 479 | 480 | //BJIT_LOG(" v%04x live in %d\n", i, live[b]); 481 | blocks[live[b]].livein.push_back(i); 482 | ops[i].nUse = 0; 483 | } 484 | 485 | if(blocks[live[b]].livein.size() != sz) progress = true; 486 | } 487 | } 488 | 489 | if(blocks[0].livein.size()) debug(); 490 | BJIT_ASSERT(!blocks[0].livein.size()); 491 | 492 | BJIT_LOG(" Live:%d", iter); 493 | } 494 | -------------------------------------------------------------------------------- /src/opt-dom.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | using namespace bjit; 5 | 6 | void Proc::rebuild_cfg() 7 | { 8 | // Redo liveblocks .. sometimes this is called before DCE 9 | for(auto & b : live) blocks[b].flags.live = false; 10 | todo.clear(); 11 | live.clear(); 12 | 13 | todo.push_back(0); 14 | live.push_back(0); 15 | blocks[0].flags.live = true; 16 | while(todo.size()) 17 | { 18 | auto b = todo.back(); 19 | todo.pop_back(); 20 | 21 | auto & jmp = ops[blocks[b].code.back()]; 22 | 23 | if(jmp.opcode <= ops::jmp) 24 | for(int k = 0; k < 2; ++k) 25 | { 26 | if(k && jmp.opcode == ops::jmp) break; 27 | 28 | if(!blocks[jmp.label[k]].flags.live) 29 | { 30 | todo.push_back(jmp.label[k]); 31 | live.push_back(jmp.label[k]); 32 | blocks[jmp.label[k]].flags.live = true; 33 | } 34 | } 35 | } 36 | 37 | // rebuild comeFrom, should delay this until iteration done 38 | for(int b = live.size();b--;) blocks[live[b]].comeFrom.clear(); 39 | for(int b = live.size();b--;) 40 | { 41 | // if this fails, we're probably missing return 42 | BJIT_ASSERT(blocks[live[b]].code.size()); 43 | 44 | BJIT_ASSERT(blocks[live[b]].code.back() != noVal); 45 | 46 | BJIT_ASSERT(ops[blocks[live[b]].code.back()].opcode <= ops::tcalln); 47 | 48 | auto & op = ops[blocks[live[b]].code.back()]; 49 | if(op.opcode < ops::jmp) 50 | { 51 | blocks[op.label[1]].comeFrom.push_back(live[b]); 52 | } 53 | if(op.opcode <= ops::jmp) 54 | { 55 | blocks[op.label[0]].comeFrom.push_back(live[b]); 56 | } 57 | } 58 | 59 | // cleanup dead phi alternatives 60 | for(auto & b : live) 61 | { 62 | int j = 0; 63 | auto & alts = blocks[b].alts; 64 | for(int i = 0; i < alts.size(); ++i) 65 | { 66 | if(ops[alts[i].val].opcode == ops::nop) continue; 67 | 68 | bool keep = false; 69 | for(auto s : blocks[b].comeFrom) 70 | { 71 | if(alts[i].src != s) continue; 72 | keep = true; 73 | break; 74 | } 75 | if(!keep) continue; 76 | if(i != j) alts[j] = alts[i]; 77 | ++j; 78 | } 79 | if(j != alts.size()) alts.resize(j); 80 | } 81 | } 82 | 83 | void Proc::rebuild_dom() 84 | { 85 | rebuild_cfg(); 86 | 87 | // find dominator algorithm 88 | // 89 | // start with every node dominating itself 90 | // iterate blocks n until no change: 91 | // tdom(n) = blocks 92 | // for p in comeFrom(n): 93 | // tdom(n) = sdom(n) intersect dom(p) 94 | // dom(n) = { n } union sdom(n) 95 | 96 | 97 | // We run the same algorithm twice, first for post-dominators 98 | // then for dominators. We only keep immediate post-dominators 99 | // but we rebuild (in order) the full-list for dominators. 100 | // 101 | // We do post-dominators first so we can use .dom as temp for both 102 | // which saves some useless per-block allocation. 103 | 104 | int domIters = 0; 105 | bool iterate = true; 106 | std::vector tdom; 107 | 108 | // post dominators first, so we can reuse .dom 109 | for(auto & b : live) 110 | { 111 | // reset postdominators 112 | if(ops[blocks[b].code.back()].opcode > ops::jmp) 113 | { 114 | blocks[b].dom.clear(); 115 | blocks[b].dom.push_back(b); 116 | } 117 | else blocks[b].dom = live; 118 | } 119 | 120 | while(iterate) 121 | { 122 | iterate = false; 123 | ++domIters; 124 | 125 | // backwards 126 | for(int bi = live.size(); bi--;) 127 | { 128 | auto & b = blocks[live[bi]]; 129 | auto & jmp = ops[b.code.back()]; 130 | // this is an exit block 131 | if(jmp.opcode > ops::jmp) continue; 132 | 133 | int nLabel = (jmp.opcode == ops::jmp) ? 1 : 2; 134 | 135 | tdom = live; 136 | for(int k = 0; k < nLabel; ++k) 137 | { 138 | for(int t = 0; t < tdom.size();) 139 | { 140 | bool found = false; 141 | for(auto & d : blocks[jmp.label[k]].dom) 142 | { 143 | if(d == tdom[t]) found = true; 144 | } 145 | 146 | if(found) { ++t; } 147 | else 148 | { 149 | std::swap(tdom[t], tdom.back()); 150 | tdom.pop_back(); 151 | } 152 | } 153 | } 154 | 155 | bool foundSelf = false; 156 | for(auto & t : tdom) 157 | { if(t != live[bi]) continue; foundSelf = true; break; } 158 | 159 | if(!foundSelf) tdom.push_back(live[bi]); 160 | if(tdom.size() != b.dom.size()) iterate = true; 161 | 162 | // save copy, we'll reset tdom above anyway 163 | std::swap(b.dom, tdom); 164 | } 165 | } 166 | 167 | // push theoretical exit-block (unifies multiple returns) 168 | for(auto & b : live) { blocks[b].dom.push_back(noVal); } 169 | 170 | // find immediate post-dominators: we use the fact that the immediate 171 | // dominator must have exactly one less dominator 172 | for(auto & b : live) 173 | { 174 | blocks[b].pdom = noVal; 175 | for(auto & d : blocks[b].dom) 176 | { 177 | if(d == noVal) continue; // no common post-dominator 178 | if(blocks[d].dom.size() == blocks[b].dom.size() - 1) 179 | { 180 | blocks[b].pdom = d; 181 | break; 182 | } 183 | } 184 | } 185 | 186 | // forward pass 187 | for(auto & b : live) 188 | { 189 | // reset dominators 190 | if(!b) { 191 | blocks[b].dom.clear(); 192 | blocks[b].dom.push_back(b); 193 | } 194 | else blocks[b].dom = live; 195 | } 196 | 197 | iterate = true; 198 | while(iterate) 199 | { 200 | iterate = false; 201 | ++domIters; 202 | 203 | for(auto & b : live) 204 | { 205 | // this is entry block 206 | if(!b) continue; 207 | BJIT_ASSERT(blocks[b].comeFrom.size()); 208 | 209 | tdom = live; 210 | for(auto & f : blocks[b].comeFrom) 211 | { 212 | for(int t = 0; t < tdom.size();) 213 | { 214 | bool found = false; 215 | for(auto & d : blocks[f].dom) 216 | { 217 | if(d == tdom[t]) found = true; 218 | } 219 | 220 | if(found) { ++t; } 221 | else 222 | { 223 | std::swap(tdom[t], tdom.back()); 224 | tdom.pop_back(); 225 | } 226 | } 227 | } 228 | 229 | bool foundSelf = false; 230 | for(auto & t : tdom) { if(t != b) continue; foundSelf = true; break; } 231 | 232 | if(!foundSelf) tdom.push_back(b); 233 | if(tdom.size() != blocks[b].dom.size()) iterate = true; 234 | 235 | // save copy, we'll reset tdom above anyway 236 | std::swap(blocks[b].dom, tdom); 237 | } 238 | } 239 | 240 | // find immediate dominators: we use the fact that the immediate 241 | // dominator must have exactly one less dominator 242 | for(auto & b : live) 243 | { 244 | blocks[b].idom = 0; 245 | 246 | for(auto & d : blocks[b].dom) 247 | { 248 | if(blocks[d].dom.size() == blocks[b].dom.size() - 1) 249 | { 250 | blocks[b].idom = d; 251 | break; 252 | } 253 | } 254 | 255 | } 256 | 257 | // order dominators; we use these for CCD in CSE 258 | for(auto & b : live) 259 | { 260 | for(auto & d : blocks[b].dom) d = noVal; 261 | for(int d = b, i = blocks[b].dom.size(); i--;) 262 | { 263 | blocks[b].dom[i] = d; 264 | d = blocks[d].idom; 265 | } 266 | } 267 | 268 | BJIT_LOG(" Dom:%d", domIters); 269 | }; -------------------------------------------------------------------------------- /src/opt-jump.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | using namespace bjit; 5 | 6 | /* 7 | 8 | This implements non-trivial control-flow optimizations. 9 | Trivial jump-threading is not done here, we let DCE handle that. 10 | 11 | */ 12 | 13 | static const bool jump_debug = false; 14 | 15 | // This optimizes simple back edges: target block dominates and branches. 16 | // Break critical edges if any. Copy target block into a new block. 17 | // 18 | // Then for each block whose immediate dominator is the target block 19 | // find all live-in variables originating from that block and insert phis. 20 | // Rename the variables in any block dominated by this block. 21 | // 22 | // This effectively gives us loop-inversion for simple loops. 23 | // 24 | bool Proc::opt_jump_be(uint16_t b) 25 | { 26 | auto & jmp = ops[blocks[b].code.back()]; 27 | 28 | // is this simple jump? 29 | if(jmp.opcode != ops::jmp) 30 | { 31 | if(jump_debug) BJIT_LOG(" JUMP:%d not jump (%s)\n", b, jmp.strOpcode()); 32 | return false; 33 | } 34 | 35 | auto target = jmp.label[0]; 36 | 37 | // does the target dominate? 38 | if(blocks[b].dom.size() <= blocks[target].dom.size() 39 | || blocks[b].dom[blocks[target].dom.size()-1] != target) 40 | { 41 | if(jump_debug) BJIT_LOG(" JUMP:%d target doesn't dominate\n", b); 42 | return false; 43 | } 44 | 45 | // does the target end in a branch? 46 | auto & jcc = ops[blocks[target].code.back()]; 47 | if(jcc.opcode >= ops::jmp) 48 | { 49 | if(jump_debug) BJIT_LOG(" JUMP:%d not branch\n", b); 50 | return false; 51 | } 52 | 53 | if(jump_debug) 54 | BJIT_LOG(" LOOP:%d (%d:%d,%d)", b, target, jcc.label[0], jcc.label[1]); 55 | else 56 | BJIT_LOG(" LOOP:%d", b); 57 | 58 | // break edges if target has phis (valid or not) 59 | if(ops[blocks[jcc.label[0]].code[0]].opcode == ops::phi) 60 | { 61 | jcc.label[0] = breakEdge(target, jcc.label[0]); 62 | } 63 | if(ops[blocks[jcc.label[1]].code[0]].opcode == ops::phi) 64 | { 65 | jcc.label[1] = breakEdge(target, jcc.label[1]); 66 | } 67 | 68 | // Make a carbon-copy of the target block 69 | uint16_t nb = blocks.size(); 70 | blocks.resize(blocks.size() + 1); 71 | if(jump_debug) BJIT_LOG("\n Jump L%d -> L%d (was: L%d)\n", b, nb, target); 72 | 73 | auto & head = blocks[target]; 74 | auto & copy = blocks[nb]; 75 | copy.flags.live = true; 76 | copy.dom = blocks[b].dom; 77 | copy.dom.push_back(nb); 78 | copy.idom = b; 79 | copy.pdom = blocks[b].pdom; // shouldn't REALLY need pdoms, but fix anyway 80 | blocks[b].pdom = nb; 81 | live.push_back(nb); 82 | 83 | jmp.label[0] = nb; 84 | 85 | // we copy all the phis too 86 | copy.args.resize(head.args.size()); 87 | 88 | impl::Rename renameCopy; 89 | impl::Rename renameJump; 90 | 91 | BJIT_ASSERT(head.code.size()); 92 | for(int i = 0; i < head.code.size(); ++i) 93 | { 94 | auto opiIndex = head.code[i]; 95 | auto & opi = ops[opiIndex]; 96 | auto opcIndex = addOp(opi.opcode, opi.flags.type, nb); 97 | auto & opc = ops[opcIndex]; 98 | 99 | // copy operands 100 | opc.i64 = opi.i64; 101 | 102 | renameCopy(opc); 103 | 104 | // for jumps copy labels 105 | if(opc.opcode <= ops::jmp) 106 | { 107 | opc.label[0] = opi.label[0]; 108 | opc.label[1] = opi.label[1]; 109 | 110 | // need to fix come from 111 | blocks[opc.label[0]].comeFrom.push_back(nb); 112 | blocks[opc.label[1]].comeFrom.push_back(nb); 113 | 114 | // stop further loop-optimization here 115 | // even if this folds into a simple jmp 116 | opc.flags.no_opt = true; 117 | 118 | break; // never copy dead tails 119 | } 120 | 121 | // for phis, copy sources 122 | if(opc.opcode == ops::phi) 123 | { 124 | BJIT_ASSERT(opc.phiIndex == opi.phiIndex); 125 | copy.args[opc.phiIndex].phiop = opcIndex; 126 | } 127 | 128 | renameCopy.add(opiIndex, opcIndex); 129 | } 130 | 131 | // copy phi alts 132 | copy.alts = head.alts; 133 | for(auto & a : copy.alts) 134 | { 135 | a.phi = blocks[nb].args[ops[a.phi].phiIndex].phiop; 136 | } 137 | 138 | if(jump_debug) BJIT_LOG("Copied %d ops.\n", (int) copy.code.size()); 139 | 140 | BJIT_ASSERT(copy.code.size()); 141 | 142 | // next we need to fix all blocks that target immediately dominates 143 | for(auto fb : live) 144 | { 145 | auto & fixBlock = blocks[fb]; 146 | if(fixBlock.idom != target) continue; 147 | 148 | if(jump_debug) BJIT_LOG("Block %d needs fixup.\n", fb); 149 | 150 | int nPhi = 0; 151 | for(auto & in : fixBlock.livein) 152 | { 153 | // does this come from original head? 154 | if(ops[in].block == target) ++nPhi; 155 | } 156 | 157 | // insert phis 158 | fixBlock.code.insert(fixBlock.code.begin(), nPhi, noVal); 159 | 160 | int iPhi = 0; 161 | for(auto & in : fixBlock.livein) 162 | { 163 | // does this come from original head? 164 | if(ops[in].block != target) continue; 165 | 166 | fixBlock.code[iPhi] = newOp(ops::phi, ops[in].flags.type, fb); 167 | 168 | // target needs to rename to use the phi 169 | renameJump.add(in, fixBlock.code[iPhi]); 170 | 171 | // setup the new phi 172 | ops[fixBlock.code[iPhi]].phiIndex = fixBlock.args.size(); 173 | ops[fixBlock.code[iPhi]].iv = noVal; 174 | fixBlock.args.emplace_back(impl::Phi(fixBlock.code[iPhi])); 175 | 176 | // add alternatives, they are in our rename map 177 | // we fix the real sources later 178 | for(auto & r : renameCopy.map) 179 | { 180 | if(r.src != in) continue; 181 | 182 | fixBlock.newAlt(fixBlock.code[iPhi], target, r.src); 183 | fixBlock.newAlt(fixBlock.code[iPhi], nb, r.dst); 184 | 185 | break; 186 | } 187 | 188 | ++iPhi; 189 | } 190 | } 191 | 192 | // put the original phis to jump rename list 193 | for(auto & r : renameCopy.map) renameJump.add(r.src, r.dst); 194 | 195 | // do a second pass to actually rename 196 | for(auto fb : live) 197 | { 198 | auto & fixBlock = blocks[fb]; 199 | if(fixBlock.idom != target) continue; 200 | 201 | renameCopy.map.clear(); 202 | // filter renames relevant to this block 203 | for(auto & r : renameJump.map) 204 | { 205 | if(ops[r.dst].block == fb) renameCopy.add(r.src, r.dst); 206 | } 207 | 208 | // find all blocks dominated by this block 209 | for(auto rb : live) 210 | { 211 | // we need to do this the old-fashioned way because 212 | // break-edge doesn't try to fix .dom globally 213 | bool found = false; 214 | for(int db = rb; db; db = blocks[db].idom) 215 | { 216 | if(db != fb) continue; 217 | found = true; 218 | break; 219 | } 220 | if(!found) continue; 221 | 222 | if(jump_debug) 223 | BJIT_LOG("Renaming L%d in branch %d\n", rb, fb); 224 | 225 | // rename livein for better debugs 226 | for(auto & in : blocks[rb].livein) 227 | for(auto & r : renameCopy.map) 228 | { 229 | if(in == r.src) in = r.dst; 230 | } 231 | 232 | for(auto & rop : blocks[rb].code) 233 | { 234 | renameCopy(ops[rop]); 235 | } 236 | 237 | // don't patch jumps in the copied block, we already fixed these 238 | if(rb == nb) continue; 239 | 240 | auto & rjmp = ops[blocks[rb].code.back()]; 241 | if(rjmp.opcode > ops::jmp) continue; // return or tail-call 242 | 243 | for(int x = 0; x < 2; ++x) 244 | { 245 | if(x && rjmp.opcode == ops::jmp) break; 246 | 247 | if(jump_debug) BJIT_LOG("Patching jump to %d\n", rjmp.label[x]); 248 | for(auto & s : blocks[rjmp.label[x]].alts) 249 | { 250 | if(s.src == rb && ops[s.val].block != target) 251 | { 252 | if(jump_debug) 253 | BJIT_LOG("L:%d:%04x is from %d (keep)\n", 254 | s.src, s.val, ops[s.val].block); 255 | continue; 256 | } 257 | // is this from somewhere else? 258 | if(s.src != rb) continue; 259 | 260 | for(auto & r : renameCopy.map) 261 | { 262 | if(s.val == r.src) 263 | { 264 | if(jump_debug) 265 | BJIT_LOG("L:%d:%04x needs rewrite: ", 266 | s.src, s.val); 267 | 268 | if(s.src == rb) 269 | { 270 | if(jump_debug) 271 | BJIT_LOG("simple: L:%d:%04x\n", 272 | ops[r.dst].block, r.dst); 273 | s.val = r.dst; 274 | } 275 | else BJIT_ASSERT(false); 276 | break; 277 | } 278 | } 279 | } 280 | } 281 | } 282 | } 283 | 284 | if(jump_debug) { debug(); } 285 | 286 | return true; 287 | } 288 | 289 | bool Proc::opt_jump() 290 | { 291 | rebuild_dom(); // don't need this if after CSE 292 | rebuild_livein(); // don't need this if after sink 293 | 294 | if(jump_debug) debug(); 295 | 296 | BJIT_LOG(" JUMP"); 297 | 298 | bool progress = false; 299 | for(int li = 0, liveSz = live.size(); li < liveSz; ++li) 300 | { 301 | auto b = live[li]; 302 | if(blocks[b].code.back() == noVal) continue; 303 | 304 | auto & op = ops[blocks[b].code.back()]; 305 | 306 | // if this is a pointless jump then pull the contents 307 | if(op.opcode == ops::jmp 308 | && blocks[op.label[0]].comeFrom.size() == 1 309 | && ops[blocks[op.label[0]].code[0]].opcode != ops::phi) 310 | { 311 | blocks[b].code.pop_back(); 312 | for(auto & tc : blocks[op.label[0]].code) 313 | { 314 | blocks[b].code.push_back(tc); 315 | ops[tc].block = b; 316 | tc = noVal; 317 | } 318 | 319 | auto & jmp = ops[blocks[b].code.back()]; 320 | // rewrite phi-sources 321 | if(jmp.opcode <= ops::jmp) 322 | { 323 | for(auto & s : blocks[jmp.label[0]].alts) 324 | { 325 | if(s.src == op.label[0]) s.src = b; 326 | } 327 | } 328 | if(jmp.opcode < ops::jmp) 329 | { 330 | for(auto & s : blocks[jmp.label[1]].alts) 331 | { 332 | if(s.src == op.label[0]) s.src = b; 333 | } 334 | } 335 | 336 | BJIT_LOG(" MERGE"); 337 | progress = true; 338 | break; 339 | } 340 | 341 | // if second branch is pdom, swap so DFS runs on loops first 342 | if(op.opcode < ops::jmp && blocks[b].pdom == op.label[1]) 343 | { 344 | op.opcode ^= 1; 345 | std::swap(op.label[0], op.label[1]); 346 | } 347 | 348 | if(op.flags.no_opt) 349 | { 350 | continue; 351 | } 352 | 353 | // handle degenerate loops too? 354 | if(op.opcode < ops::jmp && op.label[0] == b) 355 | { 356 | op.label[0] = breakEdge(b, b); 357 | op.flags.no_opt = true; 358 | progress = true; 359 | 360 | // want doms though (FIXME: move this to opt_jmp_be only?) 361 | rebuild_dom(); 362 | 363 | if(jump_debug) BJIT_LOG(" TRY %d\n", op.label[0]); 364 | 365 | if(opt_jump_be(op.label[0])) 366 | { 367 | break; 368 | } 369 | } 370 | 371 | if(op.opcode < ops::jmp && op.label[1] == b) 372 | { 373 | op.label[1] = breakEdge(b, b); 374 | op.flags.no_opt = true; 375 | progress = true; 376 | 377 | // want doms though (FIXME: move this to opt_jmp_be only?) 378 | rebuild_dom(); 379 | 380 | if(jump_debug) BJIT_LOG(" TRY %d\n", op.label[1]); 381 | 382 | if(opt_jump_be(op.label[1])) 383 | { 384 | break; 385 | } 386 | } 387 | 388 | // if we didn't do a trivial pull, try opt_jump 389 | // but only once per fold, we need to update live info 390 | if(op.opcode == ops::jmp && opt_jump_be(b)) 391 | { 392 | progress = true; 393 | break; 394 | } 395 | } 396 | 397 | // we really need this here because it cleans up 398 | // any stale phis, so DCE doesn't get confused 399 | rebuild_cfg(); 400 | opt_dce(); 401 | 402 | return progress; 403 | } 404 | 405 | void Proc::find_ivs() 406 | { 407 | //debug(); 408 | // detect IVs 409 | rebuild_dom(); 410 | BJIT_LOG(" IV"); 411 | for(auto & b : live) 412 | { 413 | for(auto & p : blocks[b].args) 414 | { 415 | if(p.phiop == noVal) continue; 416 | ops[p.phiop].iv = p.phiop; 417 | } 418 | 419 | auto findSource = [&](uint16_t val) -> uint16_t 420 | { 421 | while(ops[val].opcode == ops::rename) 422 | { 423 | val = ops[val].in[0]; 424 | } 425 | return val; 426 | }; 427 | 428 | for(auto & a : blocks[b].alts) 429 | { 430 | // FIXME: This condition assumes that all loops have a preheader 431 | if(a.src == blocks[b].idom) continue; 432 | 433 | auto avs = findSource(a.val); 434 | auto & val = ops[avs]; 435 | auto & phi = ops[a.phi]; 436 | 437 | if(phi.iv == avs) continue; 438 | if(val.opcode == ops::phi) { phi.iv = noVal; continue; } 439 | 440 | if(phi.iv == noVal) continue; 441 | if(phi.iv != a.phi) { phi.iv = noVal; continue; } 442 | 443 | switch(val.nInputs()) 444 | { 445 | case 2: 446 | if(findSource(val.in[1]) == a.phi) 447 | { 448 | // other operand must dominate PHI 449 | for(auto & d : blocks[blocks[b].idom].dom) 450 | { 451 | if(ops[val.in[0]].block != d) continue; 452 | phi.iv = avs; 453 | break; 454 | } 455 | if(phi.iv != avs) phi.iv = noVal; 456 | } 457 | else if(findSource(val.in[0]) == a.phi) 458 | { 459 | // other operand must dominate PHI 460 | for(auto & d : blocks[blocks[b].idom].dom) 461 | { 462 | if(ops[val.in[1]].block != d) continue; 463 | phi.iv = avs; 464 | break; 465 | } 466 | if(phi.iv != avs) phi.iv = noVal; 467 | } 468 | break; 469 | case 1: 470 | if(findSource(val.in[0]) == a.phi) phi.iv = avs; 471 | break; 472 | 473 | default: 474 | // if this is not one-op or two-op then 475 | // it's almost certainly not a valid IV 476 | phi.iv = noVal; 477 | break; 478 | } 479 | 480 | } 481 | 482 | for(auto & p : blocks[b].args) 483 | { 484 | if(p.phiop == noVal) continue; 485 | if(ops[p.phiop].iv == p.phiop) ops[p.phiop].iv = noVal; 486 | } 487 | 488 | } 489 | } -------------------------------------------------------------------------------- /src/opt-sink.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | using namespace bjit; 5 | 6 | static const bool sink_debug = false; 7 | 8 | bool Proc::opt_sink(bool unsafeOpt) 9 | { 10 | rebuild_livein(); 11 | 12 | // livescan doesn't find phi-inputs, we need them here 13 | for(auto b : live) 14 | { 15 | for(auto & s : blocks[b].alts) 16 | { 17 | blocks[b].livein.push_back(s.val); 18 | } 19 | } 20 | 21 | BJIT_LOG(" SINK"); 22 | 23 | // collect moved ops into tmp (in reverse) 24 | // so that we can merge them all together 25 | std::vector tmp0, tmp1; 26 | 27 | // one pass should be enough 'cos DFS 28 | bool progress = false; 29 | for(int li = 0, liveSz = live.size(); li < liveSz; ++li) 30 | { 31 | auto b = live[li]; 32 | // find local uses 33 | findUsesBlock(b, false, true); 34 | 35 | auto & jmp = ops[blocks[b].code.back()]; 36 | 37 | if(sink_debug) BJIT_LOG("\nSink in L%d?", b); 38 | 39 | // is this a return block? 40 | if(jmp.opcode > ops::jmp) 41 | { 42 | if(sink_debug) BJIT_LOG("\nL%d is exit block", b); 43 | continue; 44 | } 45 | 46 | // is this a straight jmp? 47 | if(jmp.opcode == ops::jmp) 48 | { 49 | // if we don't dominate the block, then bail out 50 | if(blocks[jmp.label[0]].idom != b) 51 | { 52 | if(sink_debug) 53 | BJIT_LOG("\nL%d doesn't dominate L%d", b, jmp.label[0]); 54 | continue; 55 | } 56 | } 57 | 58 | tmp0.clear(); 59 | tmp1.clear(); 60 | 61 | // loop code backwards 62 | for(int c = blocks[b].code.size(); c--;) 63 | { 64 | auto opIndex = blocks[b].code[c]; 65 | auto op = ops[opIndex]; 66 | 67 | // if this has no local uses, is it something we can sink? 68 | if(op.nUse || !op.canCSE() || (!unsafeOpt && op.hasSideFX())) 69 | { 70 | if(sink_debug) 71 | BJIT_LOG("\n %04x not eligible in L%d", opIndex, b); 72 | continue; 73 | } 74 | 75 | // it must be live-out in at least one block 76 | bool live0 = false, live1 = false; 77 | 78 | for(auto l : blocks[jmp.label[0]].livein) 79 | { 80 | if(opIndex != l) continue; 81 | live0 = true; 82 | continue; 83 | } 84 | 85 | if(jmp.opcode < ops::jmp) 86 | for(auto l : blocks[jmp.label[1]].livein) 87 | { 88 | if(opIndex != l) continue; 89 | live1 = true; 90 | continue; 91 | } 92 | 93 | if(sink_debug) BJIT_LOG("\nLive (%s, %s)...", 94 | live0 ? "yes" : "no", live1 ? "yes" : "no"); 95 | 96 | // don't move if live (or dead) in both branches 97 | if(live0 == live1) continue; 98 | 99 | if(sink_debug) BJIT_LOG("\nTry to sink..."); 100 | 101 | // do not move into blocks that merge paths 102 | // this prevents us from sinking loop invariants 103 | // back into the loop, which would be silly 104 | if(blocks[jmp.label[live0?0:1]].comeFrom.size() > 1) 105 | { 106 | // if the edge is not critical, don't sink at all 107 | // NOTE: we check this here (not at the top) because jump-opt 108 | // which we want to try only after finding sinkable op 109 | if(jmp.opcode == ops::jmp) 110 | { 111 | if(sink_debug) BJIT_LOG("\nMerging path not critical..."); 112 | break; // no point scanning the rest 113 | } 114 | 115 | // otherwise break the edge 116 | jmp.label[live0?0:1] = breakEdge(b, jmp.label[live0?0:1]); 117 | if(sink_debug) BJIT_LOG(" L%d", jmp.label[live0?0:1]); 118 | } 119 | 120 | if(sink_debug) BJIT_LOG("\nSinking..."); 121 | 122 | // pick the block where this is live 123 | (live0 ? tmp0 : tmp1).push_back(opIndex); 124 | blocks[b].code[c] = noVal; // dead at original site 125 | 126 | // see if we should be moving inputs too? 127 | for(int k = 0; k < op.nInputs(); ++k) 128 | { 129 | // if this was last use, for something in this block 130 | // then mark it as livein for the block where we moved to 131 | if(ops[op.in[k]].block == b && !--ops[op.in[k]].nUse) 132 | { 133 | if(sink_debug) BJIT_LOG("\nLast use for %04x", op.in[k]); 134 | blocks[jmp.label[live0?0:1]].livein.push_back(op.in[k]); 135 | } 136 | } 137 | } 138 | 139 | // did we move anything? 140 | if(tmp0.size()) 141 | { 142 | progress = true; 143 | 144 | // skip any ops that must be in the beginning 145 | // really just phis, but try to be future-proof 146 | int insertAt = 0; 147 | int tBlock = jmp.label[0]; 148 | while(insertAt < blocks[tBlock].code.size()) 149 | { 150 | if(ops[blocks[tBlock].code[insertAt]].canMove()) break; 151 | ++insertAt; 152 | } 153 | 154 | // make room and move original ops back 155 | auto & tcode = blocks[tBlock].code; 156 | tcode.resize(tcode.size() + tmp0.size()); 157 | for(int i = tcode.size(); --i > insertAt;) 158 | { 159 | tcode[i] = tcode[i-tmp0.size()]; 160 | } 161 | 162 | //BJIT_LOG("Moving B%d -> B%d:\n", b, jmp.label[0]); 163 | 164 | // then work merge tmp which needs to be reversed 165 | for(int i = insertAt; tmp0.size(); i++) 166 | { 167 | //debugOp(tmp0.back()); 168 | tcode[i] = tmp0.back(); tmp0.pop_back(); 169 | ops[tcode[i]].block = tBlock; 170 | ops[tcode[i]].flags.no_opt = true; // don't hoist further 171 | } 172 | } 173 | 174 | if(tmp1.size()) 175 | { 176 | progress = true; 177 | 178 | BJIT_ASSERT_MORE(jmp.opcode < ops::jmp); 179 | 180 | // skip any ops that must be in the beginning 181 | // really just phis, but try to be future-proof 182 | int insertAt = 0; 183 | int tBlock = jmp.label[1]; 184 | while(insertAt < blocks[tBlock].code.size()) 185 | { 186 | if(ops[blocks[tBlock].code[insertAt]].canMove()) break; 187 | ++insertAt; 188 | } 189 | 190 | // make room and move original ops back 191 | auto & tcode = blocks[tBlock].code; 192 | tcode.resize(tcode.size() + tmp1.size()); 193 | for(int i = tcode.size(); --i > insertAt;) 194 | { 195 | tcode[i] = tcode[i-tmp1.size()]; 196 | } 197 | 198 | //BJIT_LOG("Moving B%d -> B%d:\n", b, jmp.label[1]); 199 | 200 | // then work merge tmp which needs to be reversed 201 | for(int i = insertAt; tmp1.size(); i++) 202 | { 203 | //debugOp(tmp1.back()); 204 | 205 | tcode[i] = tmp1.back(); tmp1.pop_back(); 206 | ops[tcode[i]].block = tBlock; 207 | ops[tcode[i]].flags.no_opt = true; // don't hoist further 208 | } 209 | } 210 | } 211 | 212 | //debug(); 213 | 214 | return progress; 215 | } 216 | -------------------------------------------------------------------------------- /src/sanity.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | using namespace bjit; 5 | 6 | // Internal sanity checker 7 | // 8 | // This checks (some of the) invariants that we rely on. 9 | // 10 | // Note that in some cases we can violate invariants temporarily, 11 | // but they should always check out after each DCE pass. 12 | // 13 | void Proc::sanity() 14 | { 15 | BJIT_ASSERT(live.size()); // must have one pass DCE 16 | 17 | rebuild_livein(); 18 | opt_dce(); // do another round to get use counts? 19 | rebuild_dom(); 20 | 21 | debug(); 22 | 23 | for(auto & b : live) 24 | { 25 | for(auto & c : blocks[b].code) 26 | { 27 | auto & op = ops[c]; 28 | 29 | //debugOp(c); 30 | 31 | // sanity check that block/index are correct 32 | BJIT_ASSERT(op.block == b); 33 | 34 | if(op.opcode == ops::phi) 35 | { 36 | int nPhiSrc = 0; 37 | for(auto & a : blocks[b].alts) if(a.phi == c) ++nPhiSrc; 38 | BJIT_ASSERT(nPhiSrc == blocks[b].comeFrom.size()); 39 | 40 | int phiSourcesFound = 0; 41 | for(auto & s : blocks[b].alts) 42 | { 43 | if(s.phi != c) continue; 44 | 45 | bool phiSourceInComeFrom = false; 46 | for(auto cf : blocks[b].comeFrom) 47 | { 48 | if(s.src != cf) continue; 49 | phiSourceInComeFrom = true; 50 | break; 51 | } 52 | BJIT_ASSERT(phiSourceInComeFrom); 53 | ++phiSourcesFound; 54 | } 55 | BJIT_ASSERT(phiSourcesFound == blocks[b].comeFrom.size()); 56 | } 57 | 58 | // sanity check that definitions dominate uses 59 | // also check that non-locals are marked as livein 60 | for(int i = 0; i < op.nInputs(); ++i) 61 | { 62 | bool inputDominates = false; 63 | for(auto & d : blocks[b].dom) 64 | { 65 | if(d == ops[op.in[i]].block) 66 | { 67 | inputDominates = true; 68 | break; 69 | } 70 | } 71 | 72 | BJIT_ASSERT(inputDominates); 73 | 74 | bool liveIn = (ops[op.in[i]].block == b); 75 | if(!liveIn) 76 | { 77 | for(auto & in : blocks[b].livein) 78 | { 79 | if(in == op.in[i]) liveIn = true; 80 | } 81 | } 82 | BJIT_ASSERT(liveIn); 83 | } 84 | } 85 | } 86 | 87 | BJIT_LOG(" SANE\n"); 88 | } -------------------------------------------------------------------------------- /tests/test_add_ff.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | 9 | { 10 | bjit::Proc proc(0, "ff"); 11 | proc.fret(proc.fadd(proc.env[0], proc.env[1])); 12 | module.compile(proc); 13 | } 14 | { 15 | bjit::Proc proc(0, "dd"); 16 | proc.dret(proc.dadd(proc.env[0], proc.env[1])); 17 | module.compile(proc); 18 | } 19 | { 20 | bjit::Proc proc(0, "dd"); 21 | proc.dret(proc.cf2d(proc.fadd( 22 | proc.cd2f(proc.env[0]), proc.cd2f(proc.env[1])))); 23 | module.compile(proc); 24 | } 25 | 26 | auto & codeOut = module.getBytes(); 27 | 28 | FILE * f = fopen("out.bin", "wb"); 29 | fwrite(codeOut.data(), 1, codeOut.size(), f); 30 | fclose(f); 31 | 32 | printf(" - Wrote out.bin\n"); 33 | 34 | BJIT_ASSERT(module.load()); 35 | 36 | BJIT_ASSERT(module.getPointer(0)(1.f, 5.5f) == 6.5f); 37 | BJIT_ASSERT(module.getPointer(1)(2.5, 3.25) == 5.75); 38 | BJIT_ASSERT(module.getPointer(2)(3.25, 4.5) == 7.75); 39 | 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /tests/test_add_ii.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | bjit::Proc proc(0, "ii"); 9 | 10 | proc.iret(proc.iadd(proc.env[0], proc.env[1])); 11 | 12 | int i = module.compile(proc); 13 | 14 | BJIT_ASSERT(module.load()); 15 | 16 | auto ptr = module.getPointer(i); 17 | 18 | printf(" 2 + 5 = %d\n", ptr(2, 5)); 19 | 20 | BJIT_ASSERT(ptr(2,5) == 7); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/test_call_stub.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | #include 6 | 7 | int hello() 8 | { 9 | printf("Hello world\n"); 10 | 11 | return 42; 12 | } 13 | 14 | int helloAgain() 15 | { 16 | printf("Hello world, again\n"); 17 | 18 | return 45; 19 | } 20 | 21 | int main() 22 | { 23 | 24 | bjit::Module module; 25 | 26 | // proc 0, stub 27 | module.compileStub(0); 28 | 29 | // proc 1, near-call stub 30 | { 31 | bjit::Proc proc(0, ""); 32 | proc.iret(proc.icalln(0, 0)); 33 | module.compile(proc); 34 | } 35 | 36 | BJIT_ASSERT(module.load()); 37 | 38 | auto codeOut = module.getBytes(); 39 | if(codeOut.size()) 40 | { 41 | FILE * f = fopen("out.bin", "wb"); 42 | fwrite(codeOut.data(), 1, codeOut.size(), f); 43 | fclose(f); 44 | 45 | printf(" - Wrote out.bin\n"); 46 | } 47 | 48 | module.patchStub(0, (uintptr_t)&hello); 49 | module.patch(); 50 | 51 | BJIT_ASSERT(module.getPointer(1)() == 42); 52 | 53 | module.unload(); 54 | 55 | module.patchStub(0, (uintptr_t)&helloAgain); 56 | module.load(); 57 | BJIT_ASSERT(module.getPointer(1)() == 45); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /tests/test_calln.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | 6 | 7 | int main() 8 | { 9 | 10 | bjit::Module module; 11 | 12 | // proc 0, does actual sub 13 | { 14 | bjit::Proc proc(0, "ii"); 15 | proc.iret(proc.isub(proc.env[0], proc.env[1])); 16 | module.compile(proc); 17 | } 18 | // proc 1, icalln 19 | { 20 | bjit::Proc proc(0, "ii"); 21 | proc.iret(proc.icalln(2, 2)); 22 | module.compile(proc); 23 | } 24 | // proc 2: tcalln 25 | { 26 | bjit::Proc proc(0, "ii"); 27 | proc.tcalln(0, 2); 28 | module.compile(proc); 29 | } 30 | 31 | BJIT_ASSERT(module.load()); 32 | 33 | auto codeOut = module.getBytes(); 34 | if(codeOut.size()) 35 | { 36 | FILE * f = fopen("out.bin", "wb"); 37 | fwrite(codeOut.data(), 1, codeOut.size(), f); 38 | fclose(f); 39 | 40 | printf(" - Wrote out.bin\n"); 41 | } 42 | 43 | BJIT_ASSERT(module.getPointer(1)(5, 2) == 3); 44 | BJIT_ASSERT(module.getPointer(2)(7, 3) == 4); 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_callp.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | #include 6 | 7 | int isub2(int a, int b) 8 | { 9 | printf("%d - %d = %d\n", a, b, a-b); 10 | return a - b; 11 | } 12 | 13 | float fsub2(float a, float b) 14 | { 15 | printf("%f - %f = %f\n", a, b, a-b); 16 | return a - b; 17 | } 18 | 19 | double dsub2(double a, double b) 20 | { 21 | printf("%f - %f = %f\n", a, b, a-b); 22 | return a - b; 23 | } 24 | 25 | int main() 26 | { 27 | 28 | bjit::Module module; 29 | 30 | printf("isub2 %" PRIuPTR ", fsub2 %p\n", (uintptr_t)isub2, fsub2); 31 | 32 | { 33 | bjit::Proc proc(0, "ii"); 34 | proc.env[0] = proc.iadd(proc.env[0], proc.lci(1)); 35 | proc.iret(proc.icallp(proc.lci(uintptr_t(isub2)), 2)); 36 | proc.debug(); 37 | module.compile(proc); 38 | } 39 | { 40 | bjit::Proc proc(0, "ii"); 41 | proc.env[0] = proc.iadd(proc.env[0], proc.lci(1)); 42 | proc.tcallp(proc.lci(uintptr_t(isub2)), 2); 43 | proc.debug(); 44 | module.compile(proc); 45 | } 46 | { 47 | bjit::Proc proc(0, "ff"); 48 | proc.fret(proc.fcallp(proc.lci(uintptr_t(fsub2)), 2)); 49 | proc.debug(); 50 | module.compile(proc); 51 | } 52 | { 53 | bjit::Proc proc(0, "dd"); 54 | proc.dret(proc.dcallp(proc.lci(uintptr_t(dsub2)), 2)); 55 | proc.debug(); 56 | module.compile(proc); 57 | } 58 | 59 | BJIT_ASSERT(module.load()); 60 | 61 | auto codeOut = module.getBytes(); 62 | if(codeOut.size()) 63 | { 64 | FILE * f = fopen("out.bin", "wb"); 65 | fwrite(codeOut.data(), 1, codeOut.size(), f); 66 | fclose(f); 67 | 68 | printf(" - Wrote out.bin\n"); 69 | } 70 | 71 | printf("icall\n"); 72 | BJIT_ASSERT(module.getPointer(0)(5, 2) == 4); 73 | printf("tcall\n"); 74 | BJIT_ASSERT(module.getPointer(1)(7, 1) == 7); 75 | printf("fcall\n"); 76 | BJIT_ASSERT(module.getPointer(2)(15.5f, 6.f) == 9.5f); 77 | printf("dcall\n"); 78 | BJIT_ASSERT(module.getPointer(3)(5.5, 2) == 3.5); 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /tests/test_ci2f_cf2i.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | 9 | { 10 | bjit::Proc proc(0, "if"); 11 | proc.iret(proc.cf2i(proc.fadd(proc.ci2f(proc.env[0]), proc.env[1]))); 12 | 13 | module.compile(proc); 14 | } 15 | 16 | 17 | { 18 | bjit::Proc proc(0, "id"); 19 | proc.iret(proc.cd2i(proc.dadd(proc.ci2d(proc.env[0]), proc.env[1]))); 20 | 21 | module.compile(proc); 22 | } 23 | 24 | 25 | BJIT_ASSERT(module.load()); 26 | { 27 | auto ptr = module.getPointer(0); 28 | printf(" 2 + 5 = %d\n", ptr(2, 5)); 29 | BJIT_ASSERT(ptr(2,5) == 7); 30 | } 31 | 32 | { 33 | auto ptr = module.getPointer(1); 34 | printf(" 2 + 5 = %d\n", ptr(2, 5)); 35 | BJIT_ASSERT(ptr(2,5) == 7); 36 | } 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /tests/test_divmod.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | 9 | { 10 | bjit::Proc proc(0, "ii"); 11 | proc.iret(proc.idiv(proc.env[0], proc.env[1])); 12 | module.compile(proc); 13 | } 14 | { 15 | bjit::Proc proc(0, "ii"); 16 | proc.iret(proc.imod(proc.env[0], proc.env[1])); 17 | module.compile(proc); 18 | } 19 | { 20 | bjit::Proc proc(0, "ii"); 21 | proc.iret(proc.udiv(proc.env[0], proc.env[1])); 22 | module.compile(proc); 23 | } 24 | { 25 | bjit::Proc proc(0, "ii"); 26 | proc.iret(proc.umod(proc.env[0], proc.env[1])); 27 | module.compile(proc); 28 | } 29 | 30 | auto & codeOut = module.getBytes(); 31 | 32 | FILE * f = fopen("out.bin", "wb"); 33 | fwrite(codeOut.data(), 1, codeOut.size(), f); 34 | fclose(f); 35 | 36 | printf(" - Wrote out.bin\n"); 37 | 38 | BJIT_ASSERT(module.load()); 39 | 40 | int64_t s = -3249421; 41 | uint64_t u = 55425439; 42 | 43 | BJIT_ASSERT(module.getPointer(0)(s,3) == (s/3)); 44 | BJIT_ASSERT(module.getPointer(1)(s,3) == (s%3)); 45 | BJIT_ASSERT(module.getPointer(1)(s,-3) == (s%-3)); 46 | 47 | BJIT_ASSERT(module.getPointer(2)(u,3) == (u/3)); 48 | BJIT_ASSERT(module.getPointer(3)(u,3) == (u%3)); 49 | BJIT_ASSERT(module.getPointer(3)(u,-3) == (u%-3)); 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /tests/test_fib.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int fib(int x) 5 | { 6 | if( x <= 1 ) return 1; 7 | return fib(x-1) + fib(x-2); 8 | } 9 | 10 | int main() 11 | { 12 | 13 | bjit::Module module; 14 | { 15 | bjit::Proc pr(0, "i"); 16 | 17 | auto lt = pr.newLabel(); 18 | auto le = pr.newLabel(); 19 | 20 | pr.jnz(pr.ile(pr.env[0], pr.lci(1)), lt, le); 21 | 22 | pr.emitLabel(lt); 23 | pr.iret(pr.lci(1)); 24 | 25 | pr.emitLabel(le); 26 | 27 | pr.env.push_back(pr.isub(pr.env[0], pr.lci(1))); 28 | auto a = pr.icalln(0, 1); // recursive fib(x-1) 29 | pr.env.pop_back(); 30 | 31 | pr.env.push_back(pr.isub(pr.env[0], pr.lci(2))); 32 | auto b = pr.icalln(0, 1); // recursive fib(x-2) 33 | pr.env.pop_back(); 34 | 35 | pr.iret(pr.iadd(a,b)); 36 | 37 | module.compile(pr); 38 | } 39 | auto & codeOut = module.getBytes(); 40 | FILE * f = fopen("out.bin", "wb"); 41 | fwrite(codeOut.data(), 1, codeOut.size(), f); 42 | fclose(f); 43 | printf(" - Wrote out.bin\n"); 44 | BJIT_ASSERT(module.load()); 45 | 46 | auto x = 16; 47 | auto y = fib(x); 48 | printf("C-fib: %d\n", y); 49 | 50 | BJIT_ASSERT(y == module.getPointer(0)(x)); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /tests/test_fuzzfold.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | // Define to only run one (eg. known to fail) case 5 | // and enable dumping the code into a file 6 | //#define ONECASE 93495 7 | 8 | uintptr_t iFuzzSeed(uint64_t seed, int opt) 9 | { 10 | bjit::Module module; 11 | bjit::Proc proc(0, "iiii"); 12 | 13 | auto random = [&]() -> uint64_t { return bjit::hash64(seed++); }; 14 | 15 | for(int i = 0; i < 64; ++i) 16 | { 17 | int op = random() % 11; 18 | 19 | // skip the alloc 20 | int a0 = 1 + (random() % (proc.env.size()-1)); 21 | int a1 = 1 + (random() % (proc.env.size()-1)); 22 | 23 | switch(op) 24 | { 25 | case 0: proc.env.push_back(proc.lci(random())); break; 26 | case 1: proc.env.push_back(proc.lci(0x1<<31)); break; 27 | case 2: proc.env.push_back(proc.lci((uint32_t)random())); break; 28 | 29 | case 3: proc.env.push_back(proc.iadd(proc.env[a0], proc.env[a1])); break; 30 | case 4: proc.env.push_back(proc.isub(proc.env[a0], proc.env[a1])); break; 31 | case 5: proc.env.push_back(proc.imul(proc.env[a0], proc.env[a1])); break; 32 | 33 | case 6: proc.env.push_back(proc.iand(proc.env[a0], proc.env[a1])); break; 34 | case 7: proc.env.push_back(proc.ior(proc.env[a0], proc.env[a1])); break; 35 | case 8: proc.env.push_back(proc.ixor(proc.env[a0], proc.env[a1])); break; 36 | 37 | case 9: proc.env.push_back(proc.ineg(proc.env[a0])); break; 38 | case 10: proc.env.push_back(proc.inot(proc.env[a0])); break; 39 | } 40 | } 41 | 42 | proc.iret(proc.env[1 + (random() % (proc.env.size()-1))]); 43 | module.compile(proc, opt); 44 | #ifdef ONECASE 45 | if(opt) 46 | #else 47 | if(false) 48 | #endif 49 | { 50 | auto & codeOut = module.getBytes(); 51 | FILE * f = fopen("out.bin", "wb"); 52 | fwrite(codeOut.data(), 1, codeOut.size(), f); 53 | fclose(f); 54 | printf(" - Wrote out.bin\n"); 55 | } 56 | module.load(); 57 | auto ptr = module.getPointer(0); 58 | 59 | int p0 = random(); 60 | int p1 = random(); 61 | int p2 = random(); 62 | int p3 = random(); 63 | return ptr(p0, p1, p2, p3); 64 | } 65 | 66 | int main() 67 | { 68 | #ifdef ONECASE 69 | for(int i = ONECASE; i == ONECASE; ++i) 70 | #else 71 | for(int i = 0; i < 123456; ++i) 72 | #endif 73 | { 74 | auto seed = bjit::hash64(i); 75 | auto fuzz0 = iFuzzSeed(seed, 0); 76 | auto fuzz2 = iFuzzSeed(seed, 2); 77 | BJIT_LOG("\nTest iter %d\n", i); 78 | if(fuzz0 != fuzz2) 79 | BJIT_LOG(" %p != %p\n", (void*)fuzz0, (void*)fuzz2); 80 | BJIT_ASSERT(fuzz0 == fuzz2); 81 | 82 | BJIT_LOG(" OK: %d\n", i); 83 | } 84 | 85 | return 0; 86 | } 87 | -------------------------------------------------------------------------------- /tests/test_load_store.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | 6 | struct TestData 7 | { 8 | union 9 | { 10 | int8_t i8; 11 | uint8_t u8; 12 | uint64_t i8_64; 13 | }; 14 | union 15 | { 16 | int16_t i16; 17 | uint16_t u16; 18 | uint64_t i16_64; 19 | }; 20 | union 21 | { 22 | int32_t i32; 23 | uint32_t u32; 24 | uint64_t i32_64; 25 | }; 26 | uint64_t i64; 27 | 28 | float f32; 29 | double f64; 30 | }; 31 | 32 | int main() 33 | { 34 | 35 | bjit::Module module; 36 | 37 | { 38 | bjit::Proc proc(0, "i"); 39 | proc.iret(proc.li8(proc.env[0], offsetof(TestData,i8))); 40 | module.compile(proc); 41 | } 42 | { 43 | bjit::Proc proc(0, "i"); 44 | proc.iret(proc.li16(proc.env[0], offsetof(TestData,i16))); 45 | module.compile(proc); 46 | } 47 | { 48 | bjit::Proc proc(0, "i"); 49 | proc.iret(proc.li32(proc.env[0], offsetof(TestData,i32))); 50 | module.compile(proc); 51 | } 52 | { 53 | bjit::Proc proc(0, "i"); 54 | proc.iret(proc.lu8(proc.env[0], offsetof(TestData,u8))); 55 | module.compile(proc); 56 | } 57 | { 58 | bjit::Proc proc(0, "i"); 59 | proc.iret(proc.lu16(proc.env[0], offsetof(TestData,u16))); 60 | module.compile(proc); 61 | } 62 | { 63 | bjit::Proc proc(0, "i"); 64 | proc.iret(proc.lu32(proc.env[0], offsetof(TestData,u32))); 65 | module.compile(proc); 66 | } 67 | { 68 | bjit::Proc proc(0, "i"); 69 | proc.iret(proc.li64(proc.env[0], offsetof(TestData,i64))); 70 | module.compile(proc); 71 | } 72 | { 73 | bjit::Proc proc(0, "i"); 74 | proc.fret(proc.lf32(proc.env[0], offsetof(TestData,f32))); 75 | module.compile(proc); 76 | } 77 | { 78 | bjit::Proc proc(0, "i"); 79 | proc.dret(proc.lf64(proc.env[0], offsetof(TestData,f64))); 80 | module.compile(proc); 81 | } 82 | 83 | BJIT_ASSERT(module.load(0x10000)); 84 | 85 | { 86 | auto & codeOut = module.getBytes(); 87 | FILE * f = fopen("out.bin", "wb"); 88 | fwrite(codeOut.data(), 1, codeOut.size(), f); 89 | fclose(f); 90 | printf(" - Wrote out.bin\n"); 91 | } 92 | 93 | TestData test; 94 | test.i8_64 = 0xc0c1c2c3c4c5c6c7; 95 | test.i16_64 = 0xd0d1d2d3d4d5d6d7; 96 | test.i32_64 = 0xe0e1e2e3e4e5e6e7; 97 | test.i64 = 0xf0f1f2f3f4f5f6f7; 98 | test.f32 = 1.5f; 99 | test.f64 = 3.14; 100 | 101 | BJIT_ASSERT(module.getPointer(0)(&test) == test.i8); 102 | BJIT_ASSERT(module.getPointer(1)(&test) == test.i16); 103 | BJIT_ASSERT(module.getPointer(2)(&test) == test.i32); 104 | 105 | BJIT_ASSERT(module.getPointer(3)(&test) == test.u8); 106 | BJIT_ASSERT(module.getPointer(4)(&test) == test.u16); 107 | BJIT_ASSERT(module.getPointer(5)(&test) == test.u32); 108 | 109 | BJIT_ASSERT(module.getPointer(6)(&test) == test.i64); 110 | 111 | BJIT_ASSERT(module.getPointer(7)(&test) == test.f32); 112 | BJIT_ASSERT(module.getPointer(8)(&test) == test.f64); 113 | 114 | { 115 | bjit::Proc proc(0, "ii"); 116 | proc.si8(proc.env[1], proc.env[0], offsetof(TestData,i8)); 117 | proc.iret(proc.lci(0)); 118 | module.compile(proc); 119 | } 120 | { 121 | bjit::Proc proc(0, "ii"); 122 | proc.si16(proc.env[1], proc.env[0], offsetof(TestData,i16)); 123 | proc.iret(proc.lci(0)); 124 | module.compile(proc); 125 | } 126 | { 127 | bjit::Proc proc(0, "ii"); 128 | proc.si32(proc.env[1], proc.env[0], offsetof(TestData,i32)); 129 | proc.iret(proc.lci(0)); 130 | module.compile(proc); 131 | } 132 | { 133 | bjit::Proc proc(0, "ii"); 134 | proc.si64(proc.env[1], proc.env[0], offsetof(TestData,i64)); 135 | proc.iret(proc.lci(0)); 136 | module.compile(proc); 137 | } 138 | { 139 | bjit::Proc proc(0, "if"); 140 | proc.sf32(proc.env[1], proc.env[0], offsetof(TestData,f32)); 141 | proc.iret(proc.lci(0)); 142 | module.compile(proc); 143 | } 144 | { 145 | bjit::Proc proc(0, "id"); 146 | proc.sf64(proc.env[1], proc.env[0], offsetof(TestData,f64)); 147 | proc.iret(proc.lci(0)); 148 | module.compile(proc); 149 | } 150 | 151 | { 152 | auto & codeOut = module.getBytes(); 153 | FILE * f = fopen("out.bin", "wb"); 154 | fwrite(codeOut.data(), 1, codeOut.size(), f); 155 | fclose(f); 156 | printf(" - Wrote out.bin\n"); 157 | } 158 | 159 | BJIT_ASSERT(module.patch()); 160 | uint64_t v = 0xf0f1f2f3f4f5f6f7; 161 | 162 | module.getPointer(9)(&test, v); 163 | BJIT_ASSERT(test.i8 == (int8_t) v); 164 | 165 | module.getPointer(10)(&test, v); 166 | BJIT_ASSERT(test.i16 == (int16_t) v); 167 | 168 | module.getPointer(11)(&test, v); 169 | BJIT_ASSERT(test.i32 == (int32_t) v); 170 | 171 | module.getPointer(12)(&test, v); 172 | BJIT_ASSERT(test.i64 == v); 173 | 174 | module.getPointer(13)(&test, 3.14f); 175 | BJIT_ASSERT(test.f32 == 3.14f); 176 | 177 | module.getPointer(14)(&test, 1.5); 178 | BJIT_ASSERT(test.f64 == 1.5); 179 | 180 | module.unload(); 181 | 182 | return 0; 183 | } 184 | -------------------------------------------------------------------------------- /tests/test_loop.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int proc(int x, int y) 5 | { 6 | int i = 0; 7 | 8 | while(true) 9 | { 10 | if((++i) >= x) break; 11 | if((++i) >= y) break; 12 | 13 | ++i; 14 | }; 15 | 16 | return i; 17 | } 18 | 19 | int main() 20 | { 21 | 22 | bjit::Module module; 23 | { 24 | bjit::Proc pr(0, "ii"); 25 | 26 | pr.env.push_back(pr.lci(0)); 27 | 28 | auto la = pr.newLabel(); 29 | auto lb = pr.newLabel(); 30 | auto lc = pr.newLabel(); 31 | auto le = pr.newLabel(); 32 | 33 | pr.jmp(la); 34 | 35 | pr.emitLabel(la); 36 | pr.env[2] = pr.iadd(pr.env[2], pr.lci(1)); 37 | pr.jnz(pr.ige(pr.env[2], pr.env[0]), le, lb); 38 | 39 | pr.emitLabel(lb); 40 | pr.env[2] = pr.iadd(pr.env[2], pr.lci(1)); 41 | pr.jnz(pr.ige(pr.env[2], pr.env[1]), le, lc); 42 | 43 | pr.emitLabel(lc); 44 | pr.env[2] = pr.iadd(pr.env[2], pr.lci(1)); 45 | pr.jmp(la); 46 | 47 | pr.emitLabel(le); 48 | pr.iret(pr.env[2]); 49 | 50 | pr.debug(); 51 | module.compile(pr); 52 | } 53 | auto & codeOut = module.getBytes(); 54 | FILE * f = fopen("out.bin", "wb"); 55 | fwrite(codeOut.data(), 1, codeOut.size(), f); 56 | fclose(f); 57 | printf(" - Wrote out.bin\n"); 58 | BJIT_ASSERT(module.load()); 59 | 60 | for(int i = 0; i < 16; ++i) 61 | { 62 | auto h = bjit::hash64(i+1); 63 | int x = h&0xff; 64 | int y = (h>>8)&0xff; 65 | int z = proc(x,y); 66 | int zjit = module.getPointer(0)(x,y); 67 | printf("proc(%d,%d) = %d (jit says %d)\n", x, y, z, zjit); 68 | BJIT_ASSERT(z == zjit); 69 | } 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /tests/test_mem_opt.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | { 9 | bjit::Proc pr(0, "i"); 10 | 11 | // these should get CSE 12 | auto a = pr.iadd(pr.li32(pr.env[0],0), pr.li32(pr.env[0],0)); 13 | 14 | // this should break CSE on loads 15 | pr.si32(pr.lci(1), pr.env[0], 0); 16 | 17 | pr.iret(pr.iadd(a, pr.li32(pr.env[0],0))); 18 | 19 | pr.debug(); 20 | module.compile(pr); 21 | } 22 | { 23 | bjit::Proc pr(0, "i"); 24 | 25 | // these should get CSE 26 | auto a = pr.iadd(pr.li32(pr.env[0],0), pr.li32(pr.env[0],0)); 27 | 28 | // this should break CSE on loads 29 | pr.si32(pr.lci(1), pr.env[0], 0); 30 | 31 | pr.iret(pr.iadd(a, pr.li32(pr.env[0],0))); 32 | 33 | pr.debug(); 34 | module.compile(pr); 35 | } 36 | auto & codeOut = module.getBytes(); 37 | FILE * f = fopen("out.bin", "wb"); 38 | fwrite(codeOut.data(), 1, codeOut.size(), f); 39 | fclose(f); 40 | printf(" - Wrote out.bin\n"); 41 | BJIT_ASSERT(module.load()); 42 | 43 | int v = 42; 44 | 45 | BJIT_ASSERT((2*42+1) == module.getPointer(0)(&v)); 46 | BJIT_ASSERT(v == 1); 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /tests/test_shift.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | 9 | { 10 | bjit::Proc proc(0, "ii"); 11 | proc.iret(proc.ishl(proc.env[0], proc.env[1])); 12 | module.compile(proc); 13 | } 14 | { 15 | bjit::Proc proc(0, "ii"); 16 | proc.iret(proc.ishr(proc.env[0], proc.env[1])); 17 | module.compile(proc); 18 | } 19 | { 20 | bjit::Proc proc(0, "ii"); 21 | proc.iret(proc.ushr(proc.env[0], proc.env[1])); 22 | module.compile(proc); 23 | } 24 | 25 | { 26 | bjit::Proc proc(0, "i"); 27 | proc.iret(proc.ishl(proc.env[0], proc.lci(3))); 28 | module.compile(proc); 29 | } 30 | { 31 | bjit::Proc proc(0, "i"); 32 | proc.iret(proc.ishr(proc.env[0], proc.lci(3))); 33 | module.compile(proc); 34 | } 35 | { 36 | bjit::Proc proc(0, "i"); 37 | proc.iret(proc.ushr(proc.env[0], proc.lci(3))); 38 | module.compile(proc); 39 | } 40 | 41 | auto & codeOut = module.getBytes(); 42 | 43 | FILE * f = fopen("out.bin", "wb"); 44 | fwrite(codeOut.data(), 1, codeOut.size(), f); 45 | fclose(f); 46 | 47 | printf(" - Wrote out.bin\n"); 48 | 49 | BJIT_ASSERT(module.load()); 50 | 51 | int64_t s = 3; 52 | uint64_t u = 5; 53 | 54 | BJIT_ASSERT(module.getPointer(0)(s,3) == (s<<3)); 55 | BJIT_ASSERT(module.getPointer(1)(~s,3) == (~s>>3)); 56 | BJIT_ASSERT(module.getPointer(2)(~u,3) == (~u>>3)); 57 | 58 | BJIT_ASSERT(module.getPointer(3)(s) == (s<<3)); 59 | BJIT_ASSERT(module.getPointer(4)(~s) == (~s>>3)); 60 | BJIT_ASSERT(module.getPointer(5)(~u) == (~u>>3)); 61 | 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /tests/test_sieve.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #ifdef _WIN32 5 | #pragma comment(lib,"winmm.lib") 6 | #include 7 | static inline unsigned getTimeMs() 8 | { 9 | return timeGetTime(); 10 | } 11 | #else 12 | #include 13 | static inline unsigned getTimeMs() 14 | { 15 | timeval time; 16 | gettimeofday(&time, NULL); 17 | return (time.tv_sec * 1000) + (time.tv_usec / 1000); 18 | } 19 | #endif 20 | 21 | int sieve(char * flags, int size) 22 | { 23 | int count = 0; 24 | 25 | for (int i = 0; i < size; ++i) flags[i] = true; 26 | for (int i = 2; i < size; ++i) 27 | { 28 | if (flags[i]) 29 | { 30 | int prime = i + 1; 31 | int k = i + prime; 32 | 33 | while (k < size) 34 | { 35 | flags[k] = false; 36 | k += prime; 37 | } 38 | 39 | ++count; 40 | } 41 | } 42 | 43 | return count; 44 | } 45 | 46 | static char data[819000]; 47 | 48 | int main() 49 | { 50 | 51 | bjit::Module module; 52 | { 53 | bjit::Proc pr(0, "ii"); 54 | 55 | int _flags = 0; 56 | int _size = 1; 57 | 58 | // i = 0 59 | int _i = pr.env.size(); pr.env.push_back(pr.lci(0)); 60 | // count = 0 61 | int _count = pr.env.size(); pr.env.push_back(pr.lci(0)); 62 | 63 | auto ls0 = pr.newLabel(); 64 | auto lb0 = pr.newLabel(); 65 | auto le0 = pr.newLabel(); 66 | 67 | pr.jmp(ls0); 68 | 69 | pr.emitLabel(ls0); 70 | // while i < size 71 | pr.jz(pr.ilt(pr.env[_i], pr.env[_size]), le0, lb0); 72 | pr.emitLabel(lb0); 73 | 74 | // *(flags + i) = 1 75 | pr.si8(pr.lci(1), pr.iadd(pr.env[_flags], pr.env[_i]), 0); 76 | // ++i 77 | pr.env[_i] = pr.iadd(pr.env[_i], pr.lci(1)); 78 | pr.jmp(ls0); 79 | 80 | pr.emitLabel(le0); 81 | 82 | // i = 2 83 | pr.env[_i] = pr.lci(2); 84 | auto ls1 = pr.newLabel(); 85 | auto lb1 = pr.newLabel(); 86 | auto le1 = pr.newLabel(); 87 | 88 | pr.jmp(ls1); 89 | pr.emitLabel(ls1); 90 | 91 | // while i < size 92 | pr.jz(pr.ilt(pr.env[_i], pr.env[_size]), le1, lb1); 93 | pr.emitLabel(lb1); 94 | 95 | auto bt = pr.newLabel(); 96 | auto be = pr.newLabel(); 97 | 98 | // if flags[i] != 0 99 | pr.jnz(pr.li8(pr.iadd(pr.env[_flags], pr.env[_i]), 0), bt, be); 100 | pr.emitLabel(bt); 101 | 102 | // prime = i + 1 103 | int _prime = pr.env.size(); 104 | pr.env.push_back(pr.iadd(pr.env[_i], pr.lci(1))); 105 | 106 | // k = i + prim 107 | int _k = pr.env.size(); 108 | pr.env.push_back(pr.iadd(pr.env[_i], pr.env[_prime])); 109 | 110 | auto ls2 = pr.newLabel(); 111 | auto lb2 = pr.newLabel(); 112 | auto le2 = pr.newLabel(); 113 | 114 | pr.jmp(ls2); 115 | pr.emitLabel(ls2); 116 | 117 | // while k < size 118 | pr.jnz(pr.ilt(pr.env[_k], pr.env[_size]), lb2, le2); 119 | pr.emitLabel(lb2); 120 | 121 | // flags[k] = false; 122 | pr.si8(pr.lci(0), pr.iadd(pr.env[_flags], pr.env[_k]), 0); 123 | 124 | // k = k + prime 125 | pr.env[_k] = pr.iadd(pr.env[_k], pr.env[_prime]); 126 | 127 | pr.jmp(ls2); 128 | 129 | pr.emitLabel(le2); 130 | 131 | pr.env.pop_back(); // k 132 | pr.env.pop_back(); // prime 133 | 134 | pr.env[_count] = pr.iadd(pr.env[_count], pr.lci(1)); 135 | 136 | pr.jmp(be); 137 | 138 | pr.emitLabel(be); 139 | 140 | // ++i 141 | pr.env[_i] = pr.iadd(pr.env[_i], pr.lci(1)); 142 | 143 | pr.jmp(ls1); 144 | 145 | pr.emitLabel(le1); 146 | 147 | pr.iret(pr.env[_count]); 148 | 149 | pr.debug(); 150 | 151 | module.compile(pr); 152 | } 153 | 154 | auto & codeOut = module.getBytes(); 155 | FILE * f = fopen("out.bin", "wb"); 156 | fwrite(codeOut.data(), 1, codeOut.size(), f); 157 | fclose(f); 158 | printf(" - Wrote out.bin\n"); 159 | BJIT_ASSERT(module.load()); 160 | 161 | auto proc = module.getPointer(0); 162 | 163 | printf("C-sieve: %d primes\n", sieve(data, sizeof(data))); 164 | printf("BJIT-sieve: %d primes\n", proc(data, sizeof(data))); 165 | 166 | BJIT_ASSERT(sieve(data, sizeof(data)) == proc(data, sizeof(data))); 167 | 168 | printf("Iterating 1000 times...\n"); 169 | { 170 | auto start = getTimeMs(); 171 | 172 | for(int i = 0; i < 1000; ++i) 173 | { 174 | sieve(data, sizeof(data)); 175 | } 176 | 177 | printf("C time: %dms\n", getTimeMs() - start); 178 | } 179 | 180 | { 181 | auto start = getTimeMs(); 182 | 183 | for(int i = 0; i < 1000; ++i) 184 | { 185 | proc(data, sizeof(data)); 186 | } 187 | 188 | printf("BJIT time: %dms\n", getTimeMs() - start); 189 | } 190 | } -------------------------------------------------------------------------------- /tests/test_sub_ii.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | int main() 5 | { 6 | 7 | bjit::Module module; 8 | bjit::Proc proc(0, "ii"); 9 | 10 | proc.iret(proc.isub(proc.env[0], proc.env[1])); 11 | 12 | int i = module.compile(proc); 13 | 14 | BJIT_ASSERT(module.load()); 15 | 16 | auto ptr = module.getPointer(i); 17 | 18 | printf(" 5 - 2 = %d\n", ptr(5, 2)); 19 | 20 | BJIT_ASSERT(ptr(5,2) == 3); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/test_sx_zx.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "bjit.h" 3 | 4 | #include 5 | 6 | int main() 7 | { 8 | 9 | bjit::Module module; 10 | 11 | { 12 | bjit::Proc proc(0, "i"); 13 | proc.iret(proc.u8(proc.env[0])); 14 | module.compile(proc); 15 | } 16 | { 17 | bjit::Proc proc(0, "i"); 18 | proc.iret(proc.u16(proc.env[0])); 19 | module.compile(proc); 20 | } 21 | { 22 | bjit::Proc proc(0, "i"); 23 | proc.iret(proc.u32(proc.env[0])); 24 | module.compile(proc); 25 | } 26 | { 27 | bjit::Proc proc(0, "i"); 28 | proc.iret(proc.i8(proc.env[0])); 29 | module.compile(proc); 30 | } 31 | { 32 | bjit::Proc proc(0, "i"); 33 | proc.iret(proc.i16(proc.env[0])); 34 | module.compile(proc); 35 | } 36 | { 37 | bjit::Proc proc(0, "i"); 38 | proc.iret(proc.i32(proc.env[0])); 39 | module.compile(proc); 40 | } 41 | 42 | auto & codeOut = module.getBytes(); 43 | 44 | FILE * f = fopen("out.bin", "wb"); 45 | fwrite(codeOut.data(), 1, codeOut.size(), f); 46 | fclose(f); 47 | 48 | printf(" - Wrote out.bin\n"); 49 | 50 | BJIT_ASSERT(module.load()); 51 | 52 | uint64_t v = 0xfedcba9876543210ull; 53 | 54 | BJIT_ASSERT(module.getPointer(0)(v) == 0x10); 55 | BJIT_ASSERT(module.getPointer(1)(v) == 0x3210); 56 | BJIT_ASSERT(module.getPointer(2)(v) == 0x76543210); 57 | 58 | BJIT_ASSERT(module.getPointer(3)(0x2ff) == ~0ull); 59 | BJIT_ASSERT(module.getPointer(4)(0x2ffff) == ~0ull); 60 | BJIT_ASSERT(module.getPointer(5)(0x2ffffffff) == ~0ull); 61 | 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /win/README.md: -------------------------------------------------------------------------------- 1 | These files are used to support `make` on Windows. 2 | -------------------------------------------------------------------------------- /win/mkdir-p.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enableextensions 3 | if not exist %1 mkdir %1 4 | endlocal 5 | -------------------------------------------------------------------------------- /win/rm-rf.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enableextensions 3 | if exist %1 rmdir /S /Q %1 4 | endlocal 5 | --------------------------------------------------------------------------------