├── .gitignore
├── Makefile
├── README.md
├── dump-bin-arm.sh
├── dump-bin-x64.sh
├── front
    ├── bjit.cpp
    ├── front-ast.h
    ├── front-lexer.cpp
    ├── front-lexer.h
    ├── front-parse.cpp
    └── front-parse.h
├── run-tests.sh
├── src
    ├── arch-arm64-asm.h
    ├── arch-arm64-emit.cpp
    ├── arch-arm64-ops.cpp
    ├── arch-arm64.h
    ├── arch-x64-asm.h
    ├── arch-x64-emit.cpp
    ├── arch-x64-ops.cpp
    ├── arch-x64.h
    ├── bjit-impl.h
    ├── bjit.h
    ├── debug.cpp
    ├── hash.h
    ├── ir-ops.cpp
    ├── ir-ops.h
    ├── module.cpp
    ├── opt-cse.cpp
    ├── opt-dce.cpp
    ├── opt-dom.cpp
    ├── opt-fold.cpp
    ├── opt-jump.cpp
    ├── opt-ra.cpp
    ├── opt-reassoc.cpp
    ├── opt-sink.cpp
    └── sanity.cpp
├── tests
    ├── test_add_ff.cpp
    ├── test_add_ii.cpp
    ├── test_call_stub.cpp
    ├── test_calln.cpp
    ├── test_callp.cpp
    ├── test_ci2f_cf2i.cpp
    ├── test_divmod.cpp
    ├── test_fib.cpp
    ├── test_fuzzfold.cpp
    ├── test_load_store.cpp
    ├── test_loop.cpp
    ├── test_mem_opt.cpp
    ├── test_shift.cpp
    ├── test_sieve.cpp
    ├── test_sub_ii.cpp
    └── test_sx_zx.cpp
└── win
    ├── README.md
    ├── mkdir-p.bat
    └── rm-rf.bat


/.gitignore:
--------------------------------------------------------------------------------
 1 | *
 2 | !.gitignore
 3 | !Makefile
 4 | !*.md
 5 | !/src
 6 | !/tests
 7 | !/front
 8 | !**/*.h
 9 | !**/*.c
10 | !**/*.cpp
11 | 
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | 
  2 | # No default rules
  3 | .SUFFIXES:
  4 | 
  5 | # Generic compilation flags, both C and C++
  6 | CFLAGS := -Isrc -g -ferror-limit=5 #-fsanitize=address
  7 | CFLAGS += -Ofast -fomit-frame-pointer
  8 | CFLAGS += -Wall -Werror -Wfloat-conversion -Wno-unused-function
  9 | 
 10 | # C++ specific flags
 11 | CXXFLAGS := -std=c++11 -fno-exceptions
 12 | 
 13 | # this is just so local.make can add flags
 14 | LINKFLAGS := #-fsanitize=address
 15 | 
 16 | # if local.make exists, then include it for local configuration
 17 | -include local.make
 18 | 
 19 | BJIT_BINDIR ?= bin
 20 | BJIT_BUILDDIR ?= build
 21 | 
 22 | # We assume clang on all platforms
 23 | BJIT_USE_CC ?= clang
 24 | CC := $(BJIT_USE_CC)
 25 | 
 26 | TARGET := bjit
 27 | 
 28 | # FIXME: Windows
 29 | BINEXT :=
 30 | 
 31 | # Windows specific
 32 | ifeq ($(OS),Windows_NT)
 33 |     LIBRARY := $(BJIT_BUILDDIR)/$(TARGET).lib
 34 | 
 35 |     MAKEDIR := win\mkdir-p.bat
 36 |     BJIT_LINKLIB ?= llvm-lib $(LINKFLAGS) /out:$(LIBRARY)
 37 |     CLEANALL := win\rm-rf.bat $(BJIT_BUILDDIR) && win\rm-rf.bat $(BJIT_BINDIR)
 38 | 
 39 |     # Link flags
 40 |     BJIT_LINKFLAGS ?= $(LINKFLAGS) $(LIBRARY)
 41 | 
 42 |     CFLAGS += -D_CRT_SECURE_NO_WARNINGS
 43 |     
 44 |     BINEXT := .exe
 45 | 
 46 | else
 47 |     LIBRARY := $(BJIT_BUILDDIR)/$(TARGET).a
 48 | 
 49 |     BJIT_LINKFLAGS ?= $(LINKFLAGS) $(LIBRARY) -lc++
 50 | 
 51 |     MAKEDIR := mkdir -p
 52 |     CLEANALL := rm -rf $(BJIT_BUILDDIR) $(BJIT_BINDIR)
 53 |     BJIT_LINKLIB ?= libtool -static -o $(LIBRARY)
 54 | 
 55 |     ifeq ($(shell uname),Darwin)
 56 |         BJIT_LINKLIB += -no_warning_for_no_symbols
 57 |     endif
 58 |     
 59 | endif
 60 | 
 61 | # this works with clang on Windows too
 62 | BJIT_LINKBIN ?= $(CC)
 63 | 
 64 | # Automatically figure out source files
 65 | LIB_SOURCES := $(wildcard src/*.cpp)
 66 | 
 67 | LIB_OBJECTS := $(patsubst %,$(BJIT_BUILDDIR)/%.o,$(LIB_SOURCES))
 68 | DEPENDS := $(LIB_OBJECTS:.o=.d)
 69 | 
 70 | # Front-end
 71 | FRONTEND := $(BJIT_BINDIR)/$(TARGET)$(BINEXT)
 72 | FRONT_OBJECTS := $(patsubst %,$(BJIT_BUILDDIR)/%.o,$(wildcard front/*.cpp))
 73 | DEPENDS += $(FRONT_OBJECTS:.o=.d)
 74 | 
 75 | # automatic target generation for any .cpp files in tests/
 76 | define TestTarget
 77 |  DEPENDS += $(patsubst %,$(BJIT_BUILDDIR)/%.d,$1)
 78 |  $(BJIT_BINDIR)/$(patsubst tests/%.cpp,%,$1)$(BINEXT): $(LIBRARY) \
 79 |   $(patsubst %,$(BJIT_BUILDDIR)/%.o,$1)
 80 | 	@echo LINK $$@
 81 | 	@$(MAKEDIR) "$(BJIT_BINDIR)"
 82 | 	@$(BJIT_LINKBIN) -o $$@ $(patsubst %,$(BJIT_BUILDDIR)/%.o,$1) $(BJIT_LINKFLAGS)
 83 | endef
 84 | 
 85 | TESTS_CPP := $(wildcard tests/*.cpp)
 86 | TESTS := $(patsubst tests/%.cpp,$(BJIT_BINDIR)/%$(BINEXT),$(TESTS_CPP))
 87 | 
 88 | .PHONY: all test clean
 89 | 
 90 | all: $(LIBRARY) $(FRONTEND)
 91 | 	@echo DONE
 92 | 
 93 | test: all $(TESTS)
 94 | 	@echo Running tests with output to 'test.out'
 95 | 	@/bin/bash -e ./run-tests.sh > test.out 2>&1 || /bin/bash -e ./run-tests.sh
 96 | 	@echo Tests done.
 97 |     
 98 | clean:
 99 | 	@$(CLEANALL)
100 | 	@echo Removed '$(BJIT_BUILDDIR)' and '$(BJIT_BINDIR)'
101 | 
102 | $(foreach i,$(TESTS_CPP),$(eval $(call TestTarget,$(i))))
103 | 
104 | $(FRONTEND): $(FRONT_OBJECTS) $(LIBRARY)
105 | 	@echo LINK $@
106 | 	@$(MAKEDIR) "$(BJIT_BINDIR)"
107 | 	@$(BJIT_LINKBIN) -o $@ $(FRONT_OBJECTS) $(BJIT_LINKFLAGS)
108 | 
109 | $(LIBRARY): $(LIB_OBJECTS)
110 | 	@echo LIB $@
111 | 	@$(MAKEDIR) "$(dir $@)"
112 | 	@$(BJIT_LINKLIB) $(LIB_OBJECTS)
113 | 
114 | $(BJIT_BUILDDIR)/%.c.o: %.c
115 | 	@echo CC $<
116 | 	@$(MAKEDIR) "$(dir $@)"
117 | 	@$(CC) -MMD -MP $(CFLAGS) -c $< -o $@
118 | 
119 | $(BJIT_BUILDDIR)/%.cpp.o: %.cpp
120 | 	@echo CC $<
121 | 	@$(MAKEDIR) "$(dir $@)"
122 | 	@$(CC) -MMD -MP $(CFLAGS) $(CXXFLAGS) -c $< -o $@
123 | 
124 | -include $(DEPENDS)
125 | 


--------------------------------------------------------------------------------
/dump-bin-arm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | gobjdump -maarch64 -d -D -b binary out.bin
3 | 


--------------------------------------------------------------------------------
/dump-bin-x64.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | gobjdump --insn-width=16 -mi386:x86-64:intel -d -D -b binary out.bin
3 | 


--------------------------------------------------------------------------------
/front/bjit.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | #include "front-parse.h"
 5 | 
 6 | int main()
 7 | {
 8 |     std::vector<uint8_t> codeOut;
 9 | 
10 |     bjit::parse(codeOut);
11 | 
12 |     if(codeOut.size())
13 |     {
14 |         FILE * f = fopen("out.bin", "wb");
15 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
16 |         fclose(f);
17 |         
18 |         BJIT_LOG(" - Wrote out.bin\n");
19 |         return 0;
20 |     }
21 |     else
22 |     {
23 |         return 1;   // probably syntax errors
24 |     }
25 | }


--------------------------------------------------------------------------------
/front/front-lexer.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "front-parse.h"
  3 | #include "front-lexer.h"
  4 | 
  5 | #include <cmath>
  6 | 
  7 | using namespace bjit;
  8 | 
  9 | static bool isDigit(int ch)
 10 | {
 11 |     return ch >= '0' && ch <= '9';
 12 | }
 13 | 
 14 | static bool isSymbolChar(int ch)
 15 | {
 16 |     return (ch >= 'a' && ch <= 'z')
 17 |         || (ch >= 'A' && ch <= 'Z')
 18 |         || (ch >= '0' && ch <= '9')
 19 |         || (ch == '_')
 20 |         ;
 21 | }
 22 | 
 23 | static void eatSpace(Parser & ps)
 24 | {
 25 |     while(true)
 26 |     {
 27 |         int ch = ps.peek();
 28 | 
 29 |         // anything from # to end of line is a comment
 30 |         if(ch == '#')
 31 |         {
 32 |             while(ps.peek() != '\n') ps.consume();
 33 |             continue;
 34 |         }
 35 | 
 36 |         // break if this is not one of the whitespace characters
 37 |         if(ch != ' '
 38 |         && ch != '\n'
 39 |         && ch != '\r'
 40 |         && ch != '\t') break;
 41 | 
 42 |         ps.consume();
 43 |     }
 44 | }
 45 | 
 46 | static void lexNumber(Parser & ps, bool leadingDot)
 47 | {
 48 |     bool valid = false; // do we have at least one digit
 49 | 
 50 |     // if there are too many digits, then we want
 51 |     // different behavior depending on int/float
 52 |     // so collect first, until we know what to do
 53 |     std::vector<char>   mantissaDigits;
 54 | 
 55 |     // this is only relevant for integers
 56 |     int base = 10;
 57 | 
 58 |     // this checks for hex contants
 59 |     if(!leadingDot && ps.peek() == '0')
 60 |     {
 61 |         // integers are octals
 62 |         ps.consume();
 63 | 
 64 |         base = 8;
 65 | 
 66 |         if(ps.peek() == 'x' || ps.peek() == 'X')
 67 |         {
 68 |             base = 16;
 69 | 
 70 |             while(true)
 71 |             {
 72 |                 ps.consume();
 73 |                 int ch = ps.peek();
 74 |                 
 75 |                 if(isDigit(ch))
 76 |                 {
 77 |                     mantissaDigits.push_back(ch - '0');
 78 |                     continue;
 79 |                 }
 80 | 
 81 |                 if(ch >= 'a' && ch <= 'f')
 82 |                 {
 83 |                     mantissaDigits.push_back(ch + 10 - 'a');
 84 |                     continue;
 85 |                 }
 86 |                 
 87 |                 if(ch >= 'A' && ch <= 'F')
 88 |                 {
 89 |                     mantissaDigits.push_back(ch + 10 - 'A');
 90 |                     continue;
 91 |                 }
 92 |                 
 93 |                 break;
 94 |             }
 95 |         }
 96 |         else
 97 |         {
 98 |             mantissaDigits.push_back(0);
 99 |         }
100 |     }
101 | 
102 |     // if this is not hex, parse digits
103 |     if(!leadingDot && base < 16)
104 |     {
105 |         while(true)
106 |         {
107 |             int ch = ps.peek();
108 |             if(!isDigit(ch)) break;
109 |     
110 |             mantissaDigits.push_back(ch - '0');
111 |             ps.consume();
112 |         }
113 |     }
114 | 
115 |     if(mantissaDigits.size()) valid = true;
116 | 
117 |     // do we have a decimal point or exponent?
118 |     if((base < 16)
119 |     && (leadingDot || ps.peek() == '.' || ps.peek() == 'e' || ps.peek() == 'E'))
120 |     {
121 |         double m = 0, d = 1;
122 |         // collect the integer part of mantissa
123 |         for(auto & d : mantissaDigits) { m = 10*m + d; }
124 | 
125 |         // handle digits after decimal point if any
126 |         if(leadingDot || ps.peek() == '.')
127 |         {
128 |             if(!leadingDot) ps.consume();
129 |             while(true)
130 |             {
131 |                 int ch = ps.peek();
132 |                 if(!isDigit(ch)) break;
133 | 
134 |                 m = 10*m + (ch - '0');
135 |                 d = 10*d;
136 | 
137 |                 valid = true;
138 | 
139 |                 ps.consume();
140 |             }
141 | 
142 |             // if there are no digits around,
143 |             // then a dot is a binary operator
144 |             if(!valid)
145 |             {
146 |                 ps.token.type = Token::Tdot;
147 |                 return;
148 |             }
149 | 
150 |             // fix the decimal point
151 |             m /= d;
152 |         }
153 | 
154 |         double e = 0;
155 |         if(ps.peek() == 'e' || ps.peek() == 'E')
156 |         {
157 |             ps.consume();
158 | 
159 |             // get sign of exponent
160 |             bool negate = false;
161 |             if(ps.peek() == '-') negate = true;
162 |             if(negate || ps.peek() == '+') ps.consume();
163 | 
164 |             // consume digits
165 |             while(true)
166 |             {
167 |                 int ch = ps.peek();
168 |                 if(!isDigit(ch)) break;
169 | 
170 |                 e = 10*e + (ch - '0');
171 |                 ps.consume();
172 |             }
173 | 
174 |             // apply exponent to mantissa
175 |             m *= pow(10., negate ? -e : e);
176 |         }
177 | 
178 |         ps.token.type = Token::Tfloat;
179 |         ps.token.vFloat = m;
180 |     }
181 |     else
182 |     {
183 |         
184 |         // plain old integer
185 |         int64_t i = 0;
186 | 
187 |         // here we use variable base
188 |         for(auto & d : mantissaDigits)
189 |         {
190 |             if(d >= base) valid = false;
191 |             i = base*i + d;
192 |         }
193 | 
194 |         ps.token.type = Token::Tint;
195 |         ps.token.vInt = i;
196 | 
197 |         if(ps.peek() == 'U' || ps.peek() == 'u')
198 |         {
199 |             ps.token.type = Token::Tuint;
200 |             ps.consume();
201 |         }
202 | 
203 |         // this can happen with 0x without digits
204 |         // or when octal has digits 8 or 9
205 |         if(!valid)
206 |         {
207 |             ps.errorAt(ps.token, "invalid numeric literal");
208 | 
209 |             ps.token.type = Token::Terror;
210 |         }
211 |     }
212 | 
213 |     // check that there isn't trailing garbage
214 |     if(isSymbolChar(ps.peek()))
215 |     {
216 |         ps.errorAt(ps.token, "invalid character in numeric literal");
217 | 
218 |         // eat it all, so we can try to continue
219 |         while(isSymbolChar(ps.peek())) ps.consume();
220 |     }
221 | }
222 | 
223 | static struct {
224 |     const char * str;
225 |     Token::Type  ttype;
226 | } keywords[] = {
227 |     { "if",         Token::Tif          },
228 |     { "else",       Token::Telse        },
229 | 
230 |     { "while",      Token::Twhile       },
231 |     { "break",      Token::Tbreak       },
232 |     { "continue",   Token::Tcontinue    },
233 | 
234 |     { "return",     Token::Treturn      },
235 | 
236 |     { 0, Token::Terror } // end of list marker
237 | };
238 | 
239 | static void lexSymbol(Parser & ps)
240 | {
241 |     std::vector<char>   symbol;
242 | 
243 |     while(true)
244 |     {
245 |         int ch = ps.peek();
246 |         if(!isSymbolChar(ch)) break;
247 | 
248 |         symbol.push_back(ch);
249 |         ps.consume();
250 |     }
251 | 
252 |     if(!symbol.size())
253 |     {
254 |         ps.errorAt(ps.token, "invalid syntax");
255 |         ps.consume();
256 |         return;
257 |     }
258 | 
259 |     symbol.push_back(0);    // null-termination
260 |     
261 |     ps.token.type = Token::Tsymbol;
262 | 
263 |     for(int i = 0; keywords[i].str; ++i)
264 |     {
265 |         if(!strcmp(symbol.data(), keywords[i].str))
266 |         {
267 |             ps.token.type = keywords[i].ttype;
268 |             return;
269 |         }
270 |     }
271 | 
272 |     // if we didn't match a keyboard, intern symbol
273 |     SymbolPtr * sptr = ps.symbols.find(symbol);
274 |     if(sptr) ps.token.symbol = sptr->ptr.get();
275 |     else
276 |     {
277 |         ps.token.symbol = new Symbol;
278 |         std::swap(ps.token.symbol->string, symbol);
279 |         
280 |         SymbolPtr newSym;
281 |         newSym.ptr.reset(ps.token.symbol);
282 |         ps.symbols.insert(newSym);
283 |     }
284 | }
285 | 
286 | void bjit::lexToken(Parser & ps)
287 | {
288 |     eatSpace(ps);
289 | 
290 |     ps.token.posChar = ps.posChar;
291 |     ps.token.posLine = ps.posLine;
292 | 
293 |     ps.token.type = Token::Terror;
294 |     
295 |     switch(int ch = ps.peek())
296 |     {
297 |     case EOF: ps.token.type = Token::Teof; return;
298 | 
299 |     case '!':
300 |         ps.consume();
301 |         switch(ps.peek())
302 |         {
303 |         case '=': ps.token.type = Token::TnotEq; ps.consume(); return;
304 |         default: ps.token.type = Token::TlogNot; return;
305 |         }
306 |     case '<':
307 |         ps.consume();
308 |         switch(ps.peek())
309 |         {
310 |         case '=': ps.token.type = Token::TlessEq; ps.consume(); return;
311 |         case '<': ps.token.type = Token::TshiftL; ps.consume(); return;
312 |         default: ps.token.type = Token::Tless; return;
313 |         }
314 |     case '>':
315 |         ps.consume();
316 |         switch(ps.peek())
317 |         {
318 |         case '=': ps.token.type = Token::TgreaterEq; ps.consume(); return;
319 |         case '>': ps.token.type = Token::TshiftR; ps.consume(); return;
320 |         default: ps.token.type = Token::Tgreater; return;
321 |         }
322 |     case '=':
323 |         ps.consume();
324 |         switch(ps.peek())
325 |         {
326 |         case '=': ps.token.type = Token::Teq; ps.consume(); return;
327 |         default: ps.token.type = Token::Tassign; return;
328 |         }
329 | 
330 |     case '{': ps.token.type = Token::ToBlock; ps.consume(); return;
331 |     case '}': ps.token.type = Token::TcBlock; ps.consume(); return;
332 | 
333 |     case '[': ps.token.type = Token::ToIndex; ps.consume(); return;
334 |     case ']': ps.token.type = Token::TcIndex; ps.consume(); return;
335 |     
336 |     case '(': ps.token.type = Token::ToParen; ps.consume(); return;
337 |     case ')': ps.token.type = Token::TcParen; ps.consume(); return;
338 | 
339 |     case '+': ps.token.type = Token::Tadd; ps.consume(); return;
340 |     case '-': ps.token.type = Token::Tsub; ps.consume(); return;
341 | 
342 |     case '*': ps.token.type = Token::Tmul; ps.consume(); return;
343 |     case '/': ps.token.type = Token::Tdiv; ps.consume(); return;
344 |     case '%': ps.token.type = Token::Tmod; ps.consume(); return;
345 | 
346 |     case '~': ps.token.type = Token::TbitNot; ps.consume(); return;
347 |     case '^': ps.token.type = Token::TbitXor; ps.consume(); return;
348 |     
349 |     case '&':
350 |         ps.consume();
351 |         switch(ps.peek())
352 |         {
353 |         case '&': ps.token.type = Token::TlogAnd; ps.consume(); return;
354 |         default: ps.token.type = Token::TbitAnd; return;
355 |         }
356 |     case '|':
357 |         ps.consume();
358 |         switch(ps.peek())
359 |         {
360 |         case '|': ps.token.type = Token::TlogOr; ps.consume(); return;
361 |         default: ps.token.type = Token::TbitOr; return;
362 |         }
363 | 
364 |     case '.':
365 |         ps.consume();
366 |         if(isDigit(ps.peek())) lexNumber(ps, true);
367 |         else ps.token.type = Token::Tdot;
368 |         return;
369 |     case ':':
370 |         ps.consume();
371 |         switch(ps.peek())
372 |         {
373 |         case '=': ps.token.type = Token::Tdefine; ps.consume(); return;
374 |         default: ps.token.type = Token::Tcolon; return;
375 |         }
376 |         
377 |     case ',': ps.token.type = Token::Tcomma; ps.consume(); return;
378 |     case ';': ps.token.type = Token::Tsemicolon; ps.consume(); return;
379 |     default:
380 |         if(isDigit(ps.peek())) lexNumber(ps, false);
381 |         else lexSymbol(ps);
382 |         return;
383 |     }
384 | 
385 | }


--------------------------------------------------------------------------------
/front/front-lexer.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include <cstdint>
  5 | #include <cstring>
  6 | 
  7 | namespace bjit
  8 | {
  9 |     
 10 |     struct Symbol
 11 |     {
 12 |         std::vector<char>   string;
 13 |     };
 14 |     
 15 |     // lexer tokens
 16 |     struct Token
 17 |     {
 18 |         enum Type
 19 |         {
 20 |             Teof,       // end of file
 21 | 
 22 |             // literals
 23 |             Tint,
 24 |             Tuint,
 25 |             Tfloat,
 26 | 
 27 |             Tsymbol,
 28 | 
 29 |             // keywords
 30 |             Tif,
 31 |             Telse,
 32 | 
 33 |             Twhile,
 34 |             Tbreak,
 35 |             Tcontinue,
 36 | 
 37 |             Treturn,
 38 | 
 39 |             // operators - only one use-case is defined here
 40 |             // typically this should be the binary operator
 41 |             
 42 |             ToParen,    // (
 43 |             TcParen,    // )
 44 | 
 45 |             ToIndex,    // [
 46 |             TcIndex,    // ]
 47 | 
 48 |             ToBlock,    // {
 49 |             TcBlock,    // }
 50 | 
 51 |             Tdot,       // .
 52 |             Tcolon,     // :
 53 |             
 54 |             Tcomma,     // ,
 55 |             Tsemicolon, // ;
 56 | 
 57 |             Tadd,       // +
 58 |             Tsub,       // -
 59 | 
 60 |             Tmul,       // *
 61 |             Tdiv,       // /
 62 |             Tmod,       // %
 63 | 
 64 |             TshiftL,    // <<
 65 |             TshiftR,    // >>
 66 | 
 67 |             TbitOr,     // |
 68 |             TbitAnd,    // &
 69 |             TbitXor,    // ^
 70 |             TbitNot,    // ~
 71 | 
 72 |             TlogNot,    // !
 73 |             TlogAnd,    // &&
 74 |             TlogOr,     // ||
 75 | 
 76 |             Tassign,    // =
 77 |             Tdefine,    // :=
 78 | 
 79 |             Teq,        // ==
 80 |             TnotEq,     // !=
 81 |             Tless,      // <
 82 |             TlessEq,    // <=
 83 |             Tgreater,   // >
 84 |             TgreaterEq, // >=
 85 | 
 86 |             // pseudo-token types for the parser
 87 |             // these are mostly alternatives to the above
 88 |             Tpos,       // unary +
 89 |             Tneg,       // unary -
 90 | 
 91 |             Tfuncall,   // opening paren for function calls
 92 | 
 93 |             TifBody,    // Tif after condition is done
 94 |             TwhileBody, // Twhile after condition is done
 95 | 
 96 |             Terror      // invalid token
 97 |             
 98 |         } type;
 99 | 
100 |         int posChar;
101 |         int posLine;
102 | 
103 |         union
104 |         {
105 |             int64_t     vInt;
106 |             double      vFloat;
107 | 
108 |             int32_t     nArgs;  // for funcalls
109 | 
110 |             Symbol *    symbol;
111 |         };
112 |     };
113 | }


--------------------------------------------------------------------------------
/front/front-parse.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "front-parse.h"
  3 | #include "front-ast.h"
  4 | 
  5 | using namespace bjit;
  6 | 
  7 | // precedence classes, order from high to low
  8 | //
  9 | // NOTE: parens are logically in the highest precedence class
 10 | // but we treat them as the lowest internally, so that we can
 11 | // reduce everything up to the opening paren before checking match
 12 | enum Precedence
 13 | {
 14 |     P_unary,    // unary operators
 15 |     
 16 |     P_product,  // mul / div / mod
 17 |     P_sum,      // add / sub
 18 |     P_shift,    // bitshifts
 19 | 
 20 |     P_compare,  // relative comparisons
 21 |     P_equal,    // equality comparisons
 22 | 
 23 |     P_bitAnd,   // bitwise and
 24 |     P_bitXor,   // bitwise xor
 25 |     P_bitOr,    // bitwise or
 26 | 
 27 |     P_logAnd,   // logical and
 28 |     P_logOr,    // logical or
 29 | 
 30 |     P_assign,   // assignments
 31 |     
 32 |     P_comma,    // comma
 33 | 
 34 |     P_flow      // parens, control flow constructs
 35 | };
 36 | 
 37 | static int getPrecede(Token const & t)
 38 | {
 39 |     switch(t.type)
 40 |     {
 41 |     case Token::Tpos: case Token::Tneg:
 42 |         return P_unary;
 43 |         
 44 |     case Token::Tmul: case Token::Tdiv: case Token::Tmod:
 45 |         return P_product;
 46 |         
 47 |     case Token::Tadd: case Token::Tsub:
 48 |         return P_sum;
 49 | 
 50 |     case Token::TshiftL: case Token::TshiftR:
 51 |         return P_shift;
 52 | 
 53 |     case Token::Tless: case Token::TlessEq:
 54 |     case Token::Tgreater: case Token::TgreaterEq:
 55 |         return P_compare;
 56 | 
 57 |     case Token::Teq: case Token::TnotEq:
 58 |         return P_equal;
 59 | 
 60 |     case Token::TbitAnd: return P_bitAnd;
 61 |     case Token::TbitXor: return P_bitXor;
 62 |     case Token::TbitOr: return P_bitOr;
 63 |     case Token::TlogAnd: return P_logAnd;
 64 |     case Token::TlogOr: return P_logOr;
 65 |     case Token::Tassign: case Token::Tdefine: return P_assign;
 66 |     case Token::Tcomma: return P_comma;
 67 | 
 68 |     case Token::ToParen: case Token::ToBlock: case Token::ToIndex:
 69 |     case Token::Tif: case Token::TifBody: case Token::Telse:
 70 |     case Token::Twhile: case Token::TwhileBody:
 71 |     case Token::Tfuncall: case Token::Treturn:
 72 |         return P_flow;
 73 | 
 74 |     default: BJIT_LOG("TT: %d\n", t.type); assert(false); return 0;
 75 |     }
 76 | 
 77 | }
 78 | 
 79 | static void defer(Parser & ps)
 80 | {
 81 |     ps.defer.push_back(ps.token);
 82 | }
 83 | 
 84 | static void deferAs(Parser & ps, Token::Type t)
 85 | {
 86 |     ps.token.type = t;
 87 |     ps.defer.push_back(ps.token);
 88 | }
 89 | 
 90 | static void fragment(Parser & ps, Token const & t)
 91 | {
 92 |     switch(t.type)
 93 |     {
 94 |     case Token::Tint: ps.frags.emplace_back(new EConst(t)); break;
 95 |     case Token::Tuint: ps.frags.emplace_back(new EConst(t)); break;
 96 |     case Token::Tfloat: ps.frags.emplace_back(new EConst(t)); break;
 97 |     case Token::Tsymbol: ps.frags.emplace_back(new ESymbol(t)); break;
 98 | 
 99 |     case Token::Tadd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
100 |     case Token::Tsub: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
101 |     case Token::Tmul: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
102 |     case Token::Tdiv: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
103 |     case Token::Tmod: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
104 | 
105 |     case Token::TshiftL: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
106 |     case Token::TshiftR: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
107 | 
108 |     case Token::TbitOr: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
109 |     case Token::TbitAnd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
110 |     case Token::TbitXor: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
111 |     
112 |     case Token::TbitNot: ps.frags.emplace_back(new EUnary(t, ps.frags)); break;
113 |     case Token::TlogNot: ps.frags.emplace_back(new EUnary(t, ps.frags)); break;
114 |     
115 |     case Token::TlogAnd: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
116 |     case Token::TlogOr: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
117 |     
118 |     case Token::Tassign: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
119 |     case Token::Tdefine: ps.frags.emplace_back(new EDefine(t, ps.frags)); break;
120 | 
121 |     case Token::Teq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
122 |     case Token::TnotEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
123 | 
124 |     case Token::Tless: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
125 |     case Token::TlessEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
126 |     case Token::Tgreater: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
127 |     case Token::TgreaterEq: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
128 | 
129 |     case Token::Tpos: ps.frags.emplace_back(new EUnary(t, ps.frags)); break;
130 |     case Token::Tneg: ps.frags.emplace_back(new EUnary(t, ps.frags)); break;
131 | 
132 |     case Token::Treturn: ps.frags.emplace_back(new EReturn(t, ps.frags)); break;
133 |     case Token::Tfuncall: ps.frags.emplace_back(new ECall(t, ps.frags)); break;
134 | 
135 |     case Token::ToIndex: ps.frags.emplace_back(new EBinary(t, ps.frags)); break;
136 | 
137 |     case Token::TifBody: ps.frags.emplace_back(new EIf(t, ps.frags, false)); break;
138 |     case Token::Telse: ps.frags.emplace_back(new EIf(t, ps.frags, true)); break;
139 | 
140 |     case Token::TwhileBody: ps.frags.emplace_back(new EWhile(t, ps.frags)); break;
141 |     case Token::Tbreak: ps.frags.emplace_back(new EBreak(t)); break;
142 |     case Token::Tcontinue: ps.frags.emplace_back(new EContinue(t)); break;
143 |     case Token::ToBlock: ps.frags.emplace_back(new EBlock(t, ps.frags)); break;
144 | 
145 |     default: BJIT_LOG("TT: %d\n", t.type); assert(false);
146 |     }
147 |     
148 | }
149 | 
150 | void reduce(Parser & ps, int precede)
151 | {
152 |     // then reduce everything at current or higher priority
153 |     while(ps.defer.size())
154 |     {
155 |         auto & t = ps.defer.back();
156 |         if(getPrecede(t) > precede) return;
157 | 
158 |         fragment(ps, ps.defer.back());
159 |         ps.defer.pop_back();
160 |     }
161 | }
162 | 
163 | // forward declare all states
164 | static void psStatement(Parser & ps);       // function top-level statement
165 | static void psStatementEnd(Parser & ps);    // only ending ; accepted
166 | static void psMaybeAssign(Parser & ps);     // expect variable value or comma
167 | static void psInfix(Parser & ps);           // infix operators
168 | static void psExpr(Parser & ps);            // expression
169 | static void psCondition(Parser & ps);       // if/while condition context
170 | static void psMaybeElse(Parser & ps);       // statement or else for an if
171 | 
172 | void bjit::parse(std::vector<uint8_t> & codeOut)
173 | {
174 |     Parser ps;
175 | 
176 |     ps.state = psStatement;
177 |     
178 |     while(true)
179 |     {
180 |         lexToken(ps);
181 | 
182 |         if(ps.token.type == Token::Teof)
183 |         {
184 |             // FIXME: this should be removed once statements go into functions
185 |             while(ps.defer.size() && ps.defer.back().type == Token::TifBody)
186 |             {
187 |                 fragment(ps, ps.defer.back()); ps.defer.pop_back();
188 |             }
189 |             break;
190 |         }
191 |         if(ps.token.type == Token::Terror) continue;
192 | 
193 |         ps.state(ps);
194 |     }
195 | 
196 |     // create Proc for the initial env-size
197 |     Proc p(0, "");
198 |     Env env(p.env.size());
199 | 
200 |     // wrap all expressions into a block
201 |     ps.token.type = Token::ToBlock;
202 |     ps.token.nArgs = ps.frags.size();
203 |     fragment(ps, ps.token);
204 | 
205 |     assert(ps.frags.size() == 1);
206 |     auto & ast = ps.frags.back();
207 |     ast->typecheck(ps, env);
208 |     ast->debug(0);
209 |     BJIT_LOG("\n");
210 | 
211 |     if(ps.nErrors) return;
212 | 
213 |     CodeGen cg(p);
214 | 
215 |     ast->codeGen(cg);
216 | 
217 |     // always force return
218 |     p.iret(p.lci(0));
219 |     p.debug();
220 | 
221 |     BJIT_LOG("-- Compiling:\n");
222 |     p.compile(codeOut, 2);
223 | }
224 | 
225 | // helper to figure out what to do with statements
226 | // we'll need this also from psStatement once we add blocks
227 | static void reduceStatement(Parser & ps)
228 | {
229 |     ps.state = psStatement; // default to statement
230 |     
231 |     while(ps.defer.size())
232 |     {
233 |         switch(ps.defer.back().type)
234 |         {
235 |         // for these we've completed the control-flow construct
236 |         case Token::Telse:
237 |         case Token::TwhileBody:
238 |         case Token::Treturn:
239 |             fragment(ps, ps.defer.back());
240 |             ps.defer.pop_back();
241 |             // only break, don't return, we might need to reduce more
242 |             break;
243 | 
244 |         case Token::TifBody: ps.state = psMaybeElse; return;
245 |         case Token::ToBlock: ++ps.defer.back().nArgs; return;
246 | 
247 |         default:
248 |             ps.errorAt(ps.token, "unexpected ';'");
249 |             ps.errorAt(ps.defer.back(), "incomplete expression here");
250 |             
251 |             // reset state to avoid error cascade
252 |             while(getPrecede(ps.defer.back()) < P_flow)
253 |                 ps.defer.pop_back();
254 |             return;
255 |         }
256 |     }
257 | }
258 | 
259 | static void psStatement(Parser & ps)
260 | {
261 |     switch(ps.token.type)
262 |     {
263 |     case Token::TcBlock:
264 |         if(!ps.defer.size() || ps.defer.back().type != Token::ToBlock)
265 |         {
266 |             ps.errorAt(ps.token, "unexpected '}'"); return;
267 |         }
268 |         fragment(ps, ps.defer.back()); ps.defer.pop_back();
269 |         reduceStatement(ps);
270 |         break;
271 |         
272 |     case Token::ToBlock: ps.token.nArgs = 0; defer(ps); break;
273 |     case Token::Tif: defer(ps); ps.state = psCondition; break;
274 |     case Token::Twhile: defer(ps); ps.state = psCondition; break;
275 |     case Token::Treturn: defer(ps); ps.state = psExpr; break;
276 | 
277 |     case Token::Tbreak:
278 |         {
279 |             // sanity check that we are in a valid context
280 |             bool good = false;
281 |             for(auto & t : ps.defer)
282 |             {
283 |                 if(t.type == Token::TwhileBody) good = true;
284 |                 if(good) break;
285 |             }
286 | 
287 |             if(!good) ps.errorAt(ps.token, "'break' not within a loop");
288 |             else fragment(ps, ps.token);
289 |         }
290 |         ps.state = psStatementEnd;
291 |         break;
292 |     
293 |     case Token::Tcontinue:  // separate in case we add other breakable constructs
294 |         {
295 |             // sanity check that we are in a valid context
296 |             bool good = false;
297 |             for(auto & t : ps.defer)
298 |             {
299 |                 if(t.type == Token::TwhileBody) good = true;
300 |                 if(good) break;
301 |             }
302 | 
303 |             if(!good) ps.errorAt(ps.token, "'continue' not within a loop");
304 |             else fragment(ps, ps.token);
305 |         }
306 |         ps.state = psStatementEnd;
307 |         break;
308 | 
309 |     case Token::Tsymbol: fragment(ps, ps.token); ps.state = psMaybeAssign; break;
310 |     
311 |     default: ps.state = psExpr; psExpr(ps); break;
312 |     }
313 | }
314 | 
315 | static void psStatementEnd(Parser & ps)
316 | {
317 |     reduceStatement(ps);
318 | 
319 |     if(ps.token.type != Token::Tsemicolon)
320 |     {
321 |         ps.errorAt(ps.token, "expected ';'");
322 |         // try to recover by pretending we had a semi
323 |         ps.state(ps);
324 |     }
325 | }
326 | 
327 | static void psMaybeAssign(Parser & ps)
328 | {
329 |     switch(ps.token.type)
330 |     {
331 |     case Token::Tassign:
332 |         reduce(ps, P_assign-1); defer(ps); ps.state = psExpr; break;
333 | 
334 |     case Token::Tdefine:
335 |         reduce(ps, P_assign-1); defer(ps); ps.state = psExpr; break;
336 | 
337 |     default: ps.state = psInfix; psInfix(ps); break;
338 |     }
339 | }
340 | 
341 | static void psInfix(Parser & ps)
342 | {
343 |     switch(ps.token.type)
344 |     {
345 |     case Token::ToParen:
346 |         ps.token.nArgs = 0;
347 |         deferAs(ps, Token::Tfuncall);
348 |         ps.state = psExpr; break;
349 |     
350 |     case Token::TcParen:
351 |         reduce(ps, P_flow-1);
352 |         if(!ps.defer.size())
353 |         {
354 |             ps.errorAt(ps.token, "mismatched ')'");
355 |             break;
356 |         }
357 |         switch(ps.defer.back().type)
358 |         {
359 |             case Token::ToParen:
360 |                 ps.defer.pop_back(); break;
361 | 
362 |             case Token::Tfuncall:
363 |                 ++ps.defer.back().nArgs;
364 |                 fragment(ps, ps.defer.back());
365 |                 ps.defer.pop_back(); break;
366 | 
367 |             case Token::Tif:    // is this a condition for if-statement?
368 |                 ps.defer.back().type = Token::TifBody;
369 |                 ps.state = psStatement;
370 |                 break;
371 |             case Token::Twhile: // is this a condition for while-statement?
372 |                 ps.defer.back().type = Token::TwhileBody;
373 |                 ps.state = psStatement;
374 |                 break;
375 | 
376 |             default:
377 |                 ps.errorAt(ps.token, "mismatched ')'");
378 |                 break;
379 |         }
380 |         break;
381 | 
382 |     case Token::Tcomma:
383 |         reduce(ps, P_comma);
384 |         // check that this is a context where comma is valid
385 |         switch(ps.defer.back().type)
386 |         {
387 |         case Token::Tfuncall: ++ps.defer.back().nArgs; ps.state = psExpr; break;
388 |         default: ps.errorAt(ps.token, "unexpected ','");
389 |         }
390 |         break;
391 | 
392 |     case Token::ToIndex: defer(ps); ps.state = psExpr; break;
393 |     
394 |     case Token::TcIndex:
395 |         reduce(ps, P_flow-1);
396 |         if(!ps.defer.size())
397 |         {
398 |             ps.errorAt(ps.token, "mismatched ']'");
399 |             break;
400 |         }
401 |         switch(ps.defer.back().type)
402 |         {
403 |             case Token::ToIndex:
404 |                 fragment(ps, ps.defer.back());
405 |                 ps.defer.pop_back(); break;
406 | 
407 |             default:
408 |                 ps.errorAt(ps.token, "mismatched ']'");
409 |                 break;
410 |         }
411 |         break;
412 |     
413 |     case Token::Tsemicolon:
414 |         reduce(ps, P_assign);
415 |         reduceStatement(ps);
416 |         break;
417 | 
418 |     case Token::Tadd:
419 |     case Token::Tsub:
420 |         reduce(ps, P_sum); defer(ps); ps.state = psExpr; break;
421 |         
422 |     case Token::Tmul:
423 |     case Token::Tdiv:
424 |     case Token::Tmod:
425 |         reduce(ps, P_product); defer(ps); ps.state = psExpr; break;
426 | 
427 |     case Token::TshiftL:
428 |     case Token::TshiftR:
429 |         reduce(ps, P_shift); defer(ps); ps.state = psExpr; break;
430 |     
431 |     case Token::TbitOr:
432 |         reduce(ps, P_bitOr); defer(ps); ps.state = psExpr; break;
433 |         
434 |     case Token::TbitAnd:
435 |         reduce(ps, P_bitAnd); defer(ps); ps.state = psExpr; break;
436 |         
437 |     case Token::TbitXor:
438 |         reduce(ps, P_bitXor); defer(ps); ps.state = psExpr; break;
439 |         
440 |     case Token::TlogOr:
441 |         reduce(ps, P_logOr); defer(ps); ps.state = psExpr; break;
442 |         
443 |     case Token::TlogAnd:
444 |         reduce(ps, P_logAnd); defer(ps); ps.state = psExpr; break;
445 | 
446 |     case Token::Tless:
447 |     case Token::TlessEq:
448 |     case Token::Tgreater:
449 |     case Token::TgreaterEq:
450 |         reduce(ps, P_compare); defer(ps); ps.state = psExpr; break;
451 | 
452 |     case Token::Teq:
453 |     case Token::TnotEq:
454 |         reduce(ps, P_equal); defer(ps); ps.state = psExpr; break;
455 | 
456 |         
457 |     case Token::TcBlock:
458 |         // this is always an error, but we'll recover the case where
459 |         // we are simply missing a semicolon
460 |         reduce(ps, P_assign);
461 |         if(ps.defer.back().type == Token::ToBlock)
462 |         {
463 |             ps.errorAt(ps.token, "missing ';'");
464 |             ++ps.defer.back().nArgs;
465 |             psStatement(ps);
466 |             return;
467 |         }
468 |         // fall-thru to default error
469 |     default:
470 |         ps.errorAt(ps.token, "unexpected token - expecting operator"); break;
471 |     }
472 | }
473 | 
474 | static void psExpr(Parser & ps)
475 | {
476 |     // special case for closing paren of function calls
477 |     if(ps.token.type == Token::TcParen)
478 |     {
479 |         if(ps.defer.back().type == Token::Tfuncall && ps.defer.back().nArgs == 0)
480 |         {
481 |             reduce(ps, P_flow); ps.state = psInfix; return;
482 |         }
483 |     }
484 | 
485 |     switch(ps.token.type)
486 |     {
487 |     case Token::ToParen: defer(ps); break;
488 | 
489 |     // no reduce for unary operators as they are already the highest
490 |     // and should be reduced right-to-left
491 |     case Token::Tadd: deferAs(ps, Token::Tpos); break;
492 |     case Token::Tsub: deferAs(ps, Token::Tneg); break;
493 |     case Token::TlogNot: defer(ps); break;
494 |     case Token::TbitNot: defer(ps); break;
495 | 
496 |     case Token::Tint: 
497 |     case Token::Tuint:
498 |     case Token::Tfloat:
499 |     case Token::Tsymbol:
500 |         fragment(ps, ps.token); ps.state = psInfix; break;
501 |         
502 |     default: ps.errorAt(ps.token, "unexpected token - expecting expression"); break;
503 |     }
504 | }
505 | 
506 | static void psCondition(Parser & ps)
507 | {
508 |     assert(ps.defer.back().type == Token::Tif
509 |     || ps.defer.back().type == Token::Twhile);
510 |     
511 |     switch(ps.token.type)
512 |     {
513 |     case Token::ToParen:
514 |         ps.state = psExpr;
515 |         break;
516 | 
517 |     default:
518 |         ps.errorAt(ps.token, "expected '(' for condition");
519 |         // try to recover by ignoring the previous keyword
520 |         ps.defer.pop_back();
521 |         psStatement(ps);
522 |         break;
523 |     }
524 | }
525 | 
526 | static void psMaybeElse(Parser & ps)
527 | {
528 |     if(ps.token.type == Token::Telse)
529 |     {
530 |         assert(ps.defer.back().type == Token::TifBody);
531 |         ps.defer.back().type = Token::Telse;
532 |         ps.state = psStatement;
533 |         return;
534 |     }
535 | 
536 |     // no else keyword, reduce any conditions
537 |     while(ps.defer.size() && ps.defer.back().type == Token::TifBody)
538 |     {
539 |         fragment(ps, ps.defer.back());
540 |         ps.defer.pop_back();
541 |     }
542 | 
543 |     reduceStatement(ps);
544 |     
545 |     psStatement(ps);
546 | }
547 | 


--------------------------------------------------------------------------------
/front/front-parse.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include <cassert>
  5 | #include <cstdint>
  6 | #include <cstdarg>
  7 | #include <vector>
  8 | #include <memory>
  9 | #ifndef _WIN32
 10 | #include <unistd.h>
 11 | #endif
 12 | 
 13 | #include "hash.h"
 14 | #include "front-lexer.h"
 15 | 
 16 | namespace bjit
 17 | {
 18 | 
 19 |     // printf into std::vector<char>
 20 |     //
 21 |     // appends to the end of the vector, does NOT zero-terminate
 22 |     static inline void vformat(std::vector<char> & out, const char * fmt, ...)
 23 |     {
 24 |         va_list va;
 25 |     
 26 |         // get length - we need the va_crap twice on x64
 27 |         va_start(va, fmt);
 28 |         int len = vsnprintf(0, 0, fmt, va);
 29 |         va_end(va);
 30 |     
 31 |         // get the offset
 32 |         int vOff = out.size();
 33 |         // resize to fit string + null
 34 |         out.resize(vOff + len + 1);
 35 |     
 36 |         va_start(va, fmt);
 37 |         vsnprintf(out.data() + vOff, len + 1, fmt, va);
 38 |         va_end(va);
 39 |     
 40 |         // remove null-termination
 41 |         out.pop_back();
 42 |     }
 43 | 
 44 |     struct SymbolPtr
 45 |     {
 46 |         std::unique_ptr<Symbol> ptr;
 47 | 
 48 |         bool isEqual(const SymbolPtr & s) { return isEqual(s.ptr->string); }
 49 |         bool isEqual(const std::vector<char> & k)
 50 |         {
 51 |             return (k.size() == ptr->string.size())
 52 |                 && !(memcmp(k.data(), ptr->string.data(), k.size()));
 53 |         }
 54 | 
 55 |         static uint64_t getHash(const SymbolPtr & s) { return getHash(s.ptr->string); }
 56 |         static uint64_t getHash(const std::vector<char> & k)
 57 |         {
 58 |             return stringHash64((uint8_t*)k.data(), k.size());
 59 |         }
 60 |     };
 61 | 
 62 |     struct Parser
 63 |     {
 64 |         // keep input for error reporting
 65 |         std::vector<char>   inputBuffer;
 66 |         std::vector<int>    inputLines = { 0 };
 67 | 
 68 |         HashTable<SymbolPtr> symbols;
 69 |         
 70 |         int peek() { return peekChar; }
 71 |         void consume()
 72 |         {
 73 |             if(peekChar == '\n')
 74 |             {
 75 |                 ++posLine; posChar = 0;
 76 |     
 77 |                 // interactive prompt only on TTY
 78 |                 #ifndef _WIN32
 79 |                 if (isatty(fileno(stdin)))
 80 |                 #endif
 81 |                     printf("%6d> ", posLine);
 82 |                 
 83 |                 peekPos = inputBuffer.size();
 84 |                 while(true)
 85 |                 {
 86 |                     int ch = fgetc(stdin);
 87 |                     if(0 <= ch) inputBuffer.push_back(ch);
 88 |                     if(ch < 0 || ch == '\n') break;
 89 |                 }
 90 |                 inputLines.push_back(inputBuffer.size());
 91 |             }
 92 |             else ++posChar;
 93 |     
 94 |             if(peekPos < inputBuffer.size())
 95 |                 peekChar = inputBuffer[peekPos++];
 96 |             else peekChar = -1;
 97 |         }
 98 |     
 99 |         unsigned    peekPos = 0;
100 |         int peekChar = '\n';    // make initial consume do a prompt
101 |     
102 |         int posLine = 0;
103 |         int posChar = 0;
104 |     
105 |         // record all errors
106 |         std::vector<char>   errorBuffer;
107 |         std::vector<char>   formatBuffer;
108 | 
109 |         int nErrors = 0;
110 | 
111 |         // FIXME: redirect to the error buffer
112 |         void doError(const char * file, int line, int col,
113 |             const char * type, const char * what)
114 |         {
115 |             formatBuffer.clear();
116 |             vformat(formatBuffer, "%s:%d:%d: %s: %s\n",
117 |                 file, line, col, type, what);
118 |             vformat(formatBuffer, "    ");
119 |             assert(line < inputLines.size());
120 |             int i = inputLines[line-1];
121 |             int j = inputLines[line];
122 |             while(i < j) formatBuffer.push_back(inputBuffer[i++]);
123 |             if(formatBuffer.back() != '\n') formatBuffer.push_back('\n');
124 |             vformat(formatBuffer, "%*s^\n", 4+col, "");
125 |     
126 |             // append to collected error buffer
127 |             errorBuffer.insert(errorBuffer.end(),
128 |                 formatBuffer.begin(), formatBuffer.end());
129 |     
130 |             // then print .. need null terminate for puts
131 |             formatBuffer.push_back(0);
132 |             fflush(stdout); // keep debugs cleaner
133 |             fprintf(stderr, "%s", formatBuffer.data());
134 |         }
135 | 
136 |         void errorAt(Token & t, const char * what)
137 |         {
138 |             ++nErrors;
139 |             doError("<stdin>", t.posLine, t.posChar, "error", what);
140 |         }
141 | 
142 |         void warningAt(Token & t, const char * what)
143 |         {
144 |             doError("<stdin>", t.posLine, t.posChar, "warning", what);
145 |         }
146 |         
147 |         // current token
148 |         Token   token;
149 | 
150 |         // defer is a stack of tokens not yet reduced
151 |         std::vector<Token>  defer;
152 | 
153 |         // frags is a stack of AST fragments that eventually get
154 |         // consumed when the defer-stack is reduced
155 |         std::vector<std::unique_ptr<struct Expr>> frags;
156 | 
157 |         void (*state)(Parser &) = 0;
158 |     };
159 | 
160 |     void lexToken(Parser & ps);
161 | 
162 |     void parse(std::vector<uint8_t> & codeOut);
163 | }


--------------------------------------------------------------------------------
/run-tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # This is random collection of very simple tests that are crafted
 4 | # to expose potential problems mostly in CSE ruleset
 5 | 
 6 | bin/test_add_ii
 7 | bin/test_add_ff
 8 | bin/test_sub_ii # test parameter order, mostly
 9 | 
10 | bin/test_shift
11 | bin/test_divmod
12 | 
13 | bin/test_ci2f_cf2i
14 | bin/test_sx_zx
15 | bin/test_load_store
16 | 
17 | bin/test_callp
18 | bin/test_calln
19 | 
20 | bin/test_fib
21 | bin/test_call_stub
22 | bin/test_loop   # this tries to confuse opt_jump_be
23 | 
24 | bin/test_mem_opt
25 | 
26 | cat << END | bin/bjit
27 |     x := 0/0; y := x/1u;
28 |     while(x < 10) { if(y != 2) x = x+1; x = x+1; } return x;
29 | END
30 | 
31 | cat << END | bin/bjit
32 |     x := 0/0; y := x/1u;
33 |     while(x < 10) { if(y != 2) y = x+1; x = x+1; } return x;
34 | END
35 | 
36 | cat << END | bin/bjit
37 |     x := 0/0; y := x/1u;
38 |     while(x < 10) { if(y != 2) x = x+1; else x = x+1; x=x+1; } return x;
39 | END
40 | 
41 | cat << END | bin/bjit
42 |     x := 0/0; y := x/1u;
43 |     while(x < 10) { if(y != 2) x = x+1; else x = x+1; } return x+1;
44 | END
45 | 
46 | cat << END | bin/bjit
47 |     x := 0; y := 0/0; while(x < 10) { if(y != 2) x = x+1; else x = x+1; } return (x+1);
48 | END
49 | 
50 | cat << END | bin/bjit
51 |     x := 0; y := 0/0; while(x < 10) { x = x+(y/0); } return (x+1);
52 | END
53 | 
54 | cat << END | bin/bjit
55 | x := 1; while(1) { x = x+1; if (x < 10) continue; break; }
56 | END
57 | 
58 | cat << END | bin/bjit
59 | y := 2/0; x := 1; while(1) { x = x+1; if ((y+x+y) < (y+10+y)) continue; break; }
60 | END
61 | 
62 | cat << END | bin/bjit
63 | y := 2/0; z := 3/0; x := 1; while(1) { x = x+1; if (((y+x)+(x+z)) < ((y+10)+(z+10))) continue; break; }
64 | END
65 | 
66 | # fuzzfold generates tons of garbage, so throw it into /dev/null
67 | # and then run the thing manually if it fails
68 | echo "Fuzzing..."
69 | bin/test_fuzzfold 2> /dev/null
70 | echo "Fuzz passed."
71 | 
72 | bin/test_sieve
73 | 
74 | echo "Looks like it didn't crash, at least... ;-)"
75 | 
76 | 


--------------------------------------------------------------------------------
/src/arch-arm64-asm.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include <cstdint>
  5 | #include <cstring>
  6 | #include <vector>
  7 | 
  8 | #include "arch-arm64.h"
  9 | 
 10 | namespace bjit
 11 | {
 12 | 
 13 | static const uint8_t PC = 0xff;    // otherwise invalid, used for RIP relative
 14 | 
 15 | // encode register from our naming to X64 values
 16 | static uint8_t REG(int r)
 17 | {
 18 |     using namespace regs;
 19 | 
 20 |     switch(r)
 21 |     {
 22 |         case x0: case v0: return 0;
 23 |         case x1: case v1: return 1;
 24 |         case x2: case v2: return 2;
 25 |         case x3: case v3: return 3;
 26 |         case x4: case v4: return 4;
 27 |         case x5: case v5: return 5;
 28 |         case x6: case v6: return 6;
 29 |         case x7: case v7: return 7;
 30 |         case x8: case v8: return 8;
 31 |         case x9: case v9: return 9;
 32 |         
 33 |         case x10: case v10: return 10;
 34 |         case x11: case v11: return 11;
 35 |         case x12: case v12: return 12;
 36 |         case x13: case v13: return 13;
 37 |         case x14: case v14: return 14;
 38 |         case x15: case v15: return 15;
 39 |         case x16: case v16: return 16;
 40 |         case x17: case v17: return 17;
 41 |         case x18: case v18: return 18;
 42 |         case x19: case v19: return 19;
 43 |         
 44 |         case x20: case v20: return 20;
 45 |         case x21: case v21: return 21;
 46 |         case x22: case v22: return 22;
 47 |         case x23: case v23: return 23;
 48 |         case x24: case v24: return 24;
 49 |         case x25: case v25: return 25;
 50 |         case x26: case v26: return 26;
 51 |         case x27: case v27: return 27;
 52 |         case x28: case v28: return 28;
 53 |         
 54 |         case fp: case v29: return 29;
 55 | 
 56 |         case lr: case v30: return 30;
 57 |         case sp: case v31: return 31;
 58 | 
 59 |         // this is only used internally
 60 |         case PC: return PC;
 61 |     }
 62 | 
 63 |     BJIT_ASSERT(false);
 64 |     return 0;
 65 | }
 66 | 
 67 | // return the 4-bit Condition Code part of conditional ops
 68 | uint8_t _CC(uint8_t opcode)
 69 | {
 70 |     switch(opcode)
 71 |     {
 72 |         case ops::jilt: return 0xB;
 73 |         case ops::jige: case ops::jdge: case ops::jfge: return 0xA;
 74 |         case ops::jigt: case ops::jdgt: case ops::jfgt: return 0xC;
 75 |         case ops::jile: case ops::jdle: case ops::jfle: return 0xD;
 76 | 
 77 |         // 0xE = always, 0xF = ???
 78 | 
 79 |         // on ARM using signed LT for floats also matches unordered
 80 |         // where as using unsigned only allows proper match
 81 |         case ops::jult: case ops::jdlt: case ops::jflt: return 0x3;
 82 |         case ops::juge: return 0x2;
 83 |         case ops::jugt: return 0x8;
 84 |         case ops::jule: return 0x9;
 85 | 
 86 |         case ops::jine: case ops::jdne: case ops::jfne: case ops::jnz: return 0x1;
 87 |         case ops::jieq: case ops::jdeq: case ops::jfeq: case ops::jz:  return 0x0;
 88 | 
 89 |         default: break;
 90 |     }
 91 |     // silence warning if assert is nop
 92 |     BJIT_ASSERT(false); return 0;
 93 | }
 94 | 
 95 | struct AsmArm64
 96 | {
 97 |     std::vector<uint8_t>   & out;
 98 | 
 99 |     AsmArm64(std::vector<uint8_t> & out, unsigned nBlocks) : out(out)
100 |     {
101 |         rodata32_index = nBlocks++;
102 |         rodata64_index = nBlocks++;
103 |         //rodata128_index = nBlocks++;
104 |         blockOffsets.resize(nBlocks);
105 |     }
106 | 
107 |     // separate .rodata for 128/64/32 bit constants
108 |     // we will place the most aligned block first
109 | /*
110 |     std::vector<__m128>     rodata128;
111 |     uint32_t                rodata128_index;    // index into blockOffsets
112 | */    
113 |     std::vector<uint64_t>   rodata64;
114 |     uint32_t                rodata64_index;     // index into blockOffsets
115 |     
116 |     std::vector<uint64_t>   rodata32;
117 |     uint32_t                rodata32_index;     // index into blockOffsets
118 | 
119 |     // stores byteOffsets to each basic block for relocation
120 |     std::vector<uint32_t>   blockOffsets;
121 | 
122 |     struct Reloc
123 |     {
124 |         uint32_t    codeOffset;
125 |         uint32_t    blockIndex;
126 |     };
127 | 
128 |     std::vector<Reloc>      relocations;
129 | 
130 |     void emit(uint8_t byte) { out.push_back(byte); }
131 |     void emit32(uint32_t data)
132 |     {
133 |         out.push_back(data & 0xff); data >>= 8;
134 |         out.push_back(data & 0xff); data >>= 8;
135 |         out.push_back(data & 0xff); data >>= 8;
136 |         out.push_back(data & 0xff);
137 |     }
138 | 
139 |     // add relocation entry
140 |     void addReloc(uint32_t block)
141 |     {
142 |         relocations.resize(relocations.size()+1);
143 |         relocations.back().codeOffset = out.size();
144 |         relocations.back().blockIndex = block;
145 |     }
146 |     
147 |     // store 32-bit constant into .rodata32
148 |     // add relocation and return offset for RIP relative
149 |     uint32_t data32(uint32_t data)
150 |     {
151 |         unsigned index = rodata32.size();
152 |         // try to find an existing constant with same value
153 |         for(unsigned i = 0; i < rodata32.size(); ++i)
154 |         {
155 |             if(rodata32[i] == data) { index = i; break; }
156 |         }
157 |         
158 |         if(index == rodata32.size()) rodata32.push_back(data);
159 |         addReloc(rodata32_index);
160 |         return index*sizeof(uint32_t);
161 |     }
162 | 
163 |     // store 64-bit constant into .rodata64
164 |     // add relocation and return offset for RIP relative
165 |     uint32_t data64(uint64_t data)
166 |     {
167 |         unsigned index = rodata64.size();
168 |         // try to find an existing constant with same value
169 |         for(unsigned i = 0; i < rodata64.size(); ++i)
170 |         {
171 |             if(rodata64[i] == data) { index = i; break; }
172 |         }
173 |         
174 |         if(index == rodata64.size()) rodata64.push_back(data);
175 |         addReloc(rodata64_index);
176 |         return index*sizeof(uint64_t);
177 |     }
178 | 
179 |     uint32_t data32f(float data)
180 |     {
181 |         return data32(*reinterpret_cast<uint32_t*>(&data));
182 |     }
183 | 
184 |     uint32_t data64f(double data)
185 |     {
186 |         return data64(*reinterpret_cast<uint64_t*>(&data));
187 |     }
188 | 
189 | /*
190 |     uint32_t data128(__m128 data)
191 |     {
192 |         unsigned index = rodata128.size();
193 |         // try to find an existing constant with same value
194 |         for(unsigned i = 0; i < rodata128.size(); ++i)
195 |         {
196 |             if(!memcmp(&rodata128[i],&data,sizeof(__m128))) { index = i; break; }
197 |         }
198 |         
199 |         if(index == rodata128.size()) rodata128.push_back(data);
200 |         addReloc(rodata128_index);
201 |         return index*sizeof(__m128);
202 |     }
203 | */
204 | 
205 |     void MOVri(int r, int64_t imm64)
206 |     {
207 |         if(imm64 == (0xffff & imm64))
208 |         {
209 |             // MOVZ
210 |             emit32(0xD2800000 | REG(r) | ((0xffff & imm64) << 5));
211 |             return;
212 |         }
213 | 
214 |         if(imm64 == ~(0xffff & ~imm64))
215 |         {
216 |             // MOVN
217 |             emit32(0x92800000 | REG(r) | ((0xffff & ~imm64) << 5));
218 |             return;
219 |         }
220 | 
221 |         if(imm64 == (uint32_t) imm64)
222 |         {
223 |             // LDR pc-relative .. imm19
224 |             auto off = (data32(imm64) - out.size()) >> 2;
225 |             emit32(0x18000000 | REG(r) | ((0x7ffff & off) << 5));
226 |             return;
227 |         }
228 |         
229 |         if(imm64 == (int32_t) imm64)
230 |         {
231 |             // LDR pc-relative .. imm19
232 |             auto off = (data32(imm64) - out.size()) >> 2;
233 |             emit32(0x98000000 | REG(r) | ((0x7ffff & off) << 5));
234 |             return;
235 |         }
236 | 
237 |         // general 64-bit LDR pc-relative .. imm19
238 |         auto off = (data64(imm64) - out.size()) >> 2;
239 |         emit32(0x58000000 | REG(r) | ((0x7ffff & off) << 5));
240 | 
241 |     }
242 | 
243 |     void _mem(uint32_t op, int r0, int r1, int32_t offset, int shift)
244 |     {
245 |         if(offset < 0 || offset > (0x3ff << shift)
246 |         || (offset & ~((~0u)<<shift)))
247 |         {
248 |             // need some magic
249 |             MOVri(regs::x16, offset);
250 |             _rrr(_ADD, regs::x16, regs::x16, r1);
251 | 
252 |             r1 = regs::x16;
253 |             offset = 0;
254 |         }
255 | 
256 |         emit32(op | REG(r0) | (REG(r1)<<5) | (((offset>>shift)&0x1ff) << 10));
257 |     }
258 |     
259 |     void _mem2(uint32_t op, int r0, int r1, int r2, int32_t offset)
260 |     {
261 |         if(offset)
262 |         {
263 |             // need some magic
264 |             MOVri(regs::x16, offset);
265 |             _rrr(_ADD, regs::x16, regs::x16, r1);
266 | 
267 |             r1 = regs::x16;
268 |         }
269 | 
270 |         emit32(op | REG(r0) | (REG(r1)<<5) | (REG(r2) << 16));
271 |     }
272 | 
273 |     void _rrr(uint32_t op, int r0, int r1, int r2)
274 |     {
275 |         emit32(op | REG(r0) | (REG(r1)<<5) | (REG(r2) << 16));
276 | 
277 |     }
278 | 
279 |     // ADD/SUB have 12-bit immediate versions
280 |     void _rri12(uint32_t immop, int r0, int r1, int32_t imm32)
281 |     {
282 |         BJIT_ASSERT(imm32 == (imm32 & 0xfff));
283 | 
284 |         _rrr(immop | (imm32 << 10), r0, r1, regs::x0);
285 |     }
286 | 
287 |     // NOTE: bit0 is set and needs to be invert, so XOR the condition code
288 |     static const uint32_t   _CSET   = 0x9A9F17E0;
289 | 
290 |     void CMPrr(int r0, int r1) { _rrr(0xEB000000, regs::sp, r0, r1); }
291 |     void TSTrr(int r0, int r1) { _rrr(0xEA000000, regs::sp, r0, r1); }
292 | 
293 |     void FCMPss(int r0, int r1) { _rrr(0x1E202000, regs::x0, r0, r1); }
294 |     void FCMPdd(int r0, int r1) { _rrr(0x1E602000, regs::x0, r0, r1); }
295 |     
296 |     void MOVrr(int r0, int r1) { _rrr(0xAA0003E0, r0, 0, r1); }
297 | 
298 |     static const uint32_t   _ADD    = 0x8B000000;
299 |     static const uint32_t   _SUB    = 0xCB000000;
300 |     
301 |     // SUB from zero reg
302 |     void NEGr(int r0, int r1) { _rrr(_SUB, r0, regs::sp, r1); }
303 | 
304 |     static const uint32_t   _MUL    = 0x9B007C00;
305 |     static const uint32_t   _SDIV   = 0x9AC00C00;
306 |     static const uint32_t   _UDIV   = 0x9AC00800;
307 |     
308 |     void MSUBrrrr(int r0, int r1, int r2, int r3)
309 |     { _rrr(0x9B008000 | (REG(r3)<<10), r0, r1, r2); }
310 | 
311 |     // this uses EON with zero register
312 |     void NOTr(int r0, int r1) { _rrr(0xCA3F0000, r0, r1, 0); }
313 | 
314 |     static const uint32_t   _AND    = 0x8A000000;
315 |     static const uint32_t   _OR     = 0xAA000000;
316 |     static const uint32_t   _XOR    = 0xCA000000;
317 | 
318 | };
319 | 
320 | }


--------------------------------------------------------------------------------
/src/arch-arm64-ops.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifdef __aarch64__
  3 | 
  4 | #include "bjit.h"
  5 | 
  6 | using namespace bjit;
  7 | using namespace bjit::impl;
  8 | 
  9 | RegMask Op::regsMask()
 10 | {
 11 |     switch(flags.type)
 12 |     {
 13 |         case _ptr: return regs::mask_int;
 14 |         case _f32: return regs::mask_float;
 15 |         case _f64: return regs::mask_float;
 16 | 
 17 |         default: BJIT_LOG("%s\n", strOpcode());
 18 |     }
 19 |     // silence warning if assert is nop
 20 |     BJIT_ASSERT(false); return 0;
 21 | }
 22 | 
 23 | RegMask Op::regsOut()
 24 | {
 25 |     // only deal with anything that isn't regs::mask_int explicit
 26 |     switch(opcode)
 27 |     {
 28 |         default: return regsMask(); // no special case -> any valid
 29 | 
 30 |         // special
 31 |         case ops::alloc: return R2Mask(regs::sp);
 32 |     
 33 |         case ops::icallp: case ops::icalln: return R2Mask(regs::x0);
 34 |         
 35 |         case ops::fcallp: case ops::fcalln:
 36 |         case ops::dcallp: case ops::dcalln: return R2Mask(regs::v0);
 37 | 
 38 |         // we have in[0] = index in type, in[1] = index total
 39 |         // which one we want to use varies by platform
 40 |         case ops::iarg:
 41 |             switch(indexType)   // AArch64 uses position by type
 42 |             {
 43 |             case 0: return R2Mask(regs::x0);
 44 |             case 1: return R2Mask(regs::x1);
 45 |             case 2: return R2Mask(regs::x2);
 46 |             case 3: return R2Mask(regs::x3);
 47 |             case 4: return R2Mask(regs::x4);
 48 |             case 5: return R2Mask(regs::x5);
 49 |             case 6: return R2Mask(regs::x6);
 50 |             case 7: return R2Mask(regs::x7);
 51 | 
 52 |             // FIXME: We need to teach RA about stack parameters.
 53 |             default: BJIT_ASSERT(false);
 54 |             }
 55 |         case ops::farg:
 56 |         case ops::darg:
 57 |             switch(indexType)   // AArch64 uses position by type
 58 |             {
 59 |             case 0: return R2Mask(regs::v0);
 60 |             case 1: return R2Mask(regs::v1);
 61 |             case 2: return R2Mask(regs::v2);
 62 |             case 3: return R2Mask(regs::v3);
 63 |             case 4: return R2Mask(regs::v4);
 64 |             case 5: return R2Mask(regs::v5);
 65 |             case 6: return R2Mask(regs::v6);
 66 |             case 7: return R2Mask(regs::v7);
 67 | 
 68 |             // FIXME: We need to teach RA about stack parameters.
 69 |             default: BJIT_ASSERT(false);
 70 |             }
 71 |     }
 72 | 
 73 |     // silence warning if assert is nop
 74 |     BJIT_ASSERT(false); return 0;
 75 | }
 76 | 
 77 | RegMask Op::regsIn(int i)
 78 | {
 79 |     switch(opcode)
 80 |     {
 81 |         default: return regsMask(); // no special case -> same as output
 82 | 
 83 |         // indirect calls can theoretically take any GP register
 84 |         // but we don't want to use x0-x8 used for passing arguments
 85 |         // and we MUST have a caller saved register for tcallp
 86 |         //
 87 |         // so practically it makes sense to use x9-x15
 88 |         case ops::icallp: case ops::dcallp:
 89 |         case ops::fcallp: case ops::tcallp:
 90 |             return R2Mask(regs::x9)
 91 |                 |R2Mask(regs::x10) |R2Mask(regs::x11)
 92 |                 |R2Mask(regs::x12) |R2Mask(regs::x13)
 93 |                 |R2Mask(regs::x14) |R2Mask(regs::x15);
 94 | 
 95 |         // loads and stores allow stack pointer as their first argument
 96 |         // FIXME: we do NOT want to rename to RSP though :D
 97 |         case ops::li8: case ops::li16: case ops::li32: case ops::li64:
 98 |         case ops::lu8: case ops::lu16: case ops::lu32:
 99 |         case ops::lf32: case ops::lf64:
100 |         case ops::si8: case ops::si16: case ops::si32: case ops::si64:
101 |         case ops::s2i8: case ops::s2i16: case ops::s2i32: case ops::s2i64:
102 |             return regs::mask_int | (i ? 0 : R2Mask(regs::sp));
103 |         case ops::sf32: case ops::sf64:
104 |         case ops::s2f32: case ops::s2f64:
105 |             return i ? regs::mask_float : ((regs::mask_int) | R2Mask(regs::sp));
106 | 
107 |         // jumps and float compares need explicit types
108 |         case ops::jilt: case ops::jige:
109 |         case ops::jigt: case ops::jile:
110 |         case ops::jieq: case ops::jine:
111 |         case ops::jiltI: case ops::jigeI:
112 |         case ops::jigtI: case ops::jileI:
113 |         case ops::jieqI: case ops::jineI:
114 |         case ops::jz: case ops::jnz:
115 |             return regs::mask_int;
116 | 
117 |         case ops::jdlt: case ops::jdge:
118 |         case ops::jdgt: case ops::jdle:
119 |         case ops::jdeq: case ops::jdne:
120 | 
121 |         case ops::flt: case ops::fge:
122 |         case ops::fgt: case ops::fle:
123 |         case ops::feq: case ops::fne:
124 |         
125 |         case ops::lcf: case ops::cf2i:
126 | 
127 |         case ops::dlt: case ops::dge:
128 |         case ops::dgt: case ops::dle:
129 |         case ops::deq: case ops::dne:
130 |         
131 |         case ops::lcd: case ops::cd2i:
132 |         case ops::bcd2i: case ops::bcf2i:
133 |             return regs::mask_float;
134 | 
135 |         // explicit with casts (duh)
136 |         case ops::ci2f: case ops::bci2f:
137 |         case ops::ci2d: case ops::bci2d:
138 |             return regs::mask_int;
139 |             
140 |         case ops::ipass:
141 |             switch(indexType)   // AArch64 uses position by type
142 |             {
143 |             case 0: return R2Mask(regs::x0);
144 |             case 1: return R2Mask(regs::x1);
145 |             case 2: return R2Mask(regs::x2);
146 |             case 3: return R2Mask(regs::x3);
147 |             case 4: return R2Mask(regs::x4);
148 |             case 5: return R2Mask(regs::x5);
149 |             case 6: return R2Mask(regs::x6);
150 |             case 7: return R2Mask(regs::x7);
151 | 
152 |             // FIXME: We need to teach RA about stack parameters.
153 |             default: BJIT_ASSERT(false);
154 |             }
155 |         case ops::fpass:
156 |         case ops::dpass:
157 |             switch(indexType)   // AArch64 uses position by type
158 |             {
159 |             case 0: return R2Mask(regs::v0);
160 |             case 1: return R2Mask(regs::v1);
161 |             case 2: return R2Mask(regs::v2);
162 |             case 3: return R2Mask(regs::v3);
163 |             case 4: return R2Mask(regs::v4);
164 |             case 5: return R2Mask(regs::v5);
165 |             case 6: return R2Mask(regs::v6);
166 |             case 7: return R2Mask(regs::v7);
167 | 
168 |             // FIXME: We need to teach RA about stack parameters.
169 |             default: BJIT_ASSERT(false);
170 |             }
171 | 
172 |         // these are fixed
173 |         case ops::iret: return R2Mask(regs::x0);
174 |         case ops::fret: return R2Mask(regs::v0);
175 |         case ops::dret: return R2Mask(regs::v0);
176 | 
177 |     }
178 | }
179 | 
180 | RegMask Op::regsLost()
181 | {
182 |     switch(opcode)
183 |     {
184 |         // for now, collect registers used by previous args
185 |         // this should help convince RA to do the right thing
186 |         case ops::ipass:
187 |         case ops::dpass:
188 |             {
189 |                 RegMask used = 0;
190 |                 for(int i = 0; i < in[1]; ++i)
191 |                 {
192 |                     used |= regsIn(i);
193 |                 }
194 |                 return used;
195 |             }
196 | 
197 |         case ops::icalln: case ops::fcalln: case ops::dcalln:
198 |         case ops::icallp: case ops::fcallp: case ops::dcallp:
199 |             return regs::caller_saved | R2Mask(regs::lr);
200 | 
201 |         default: return 0;
202 |     }
203 | }
204 | 
205 | #endif


--------------------------------------------------------------------------------
/src/arch-arm64.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | namespace bjit
  5 | {
  6 |     // this is a hint for opt-ra
  7 |     static const bool arch_explicit_output_regs = true;
  8 | 
  9 |     // we use this for types, etc
 10 |     typedef uint64_t    RegMask;
 11 | 
 12 |     // convert single-register to a mask
 13 |     static RegMask R2Mask(int r) { return ((RegMask)1)<<r; }
 14 |         
 15 |     namespace regs
 16 |     {
 17 |         // List of registers for register allocator
 18 |         // These should be in preference order
 19 |         //
 20 |         //  x0 - x7 are used for calls
 21 |         //
 22 |         //  x16 - x17 are linker temporary (can we use them as callee saved?)
 23 |         //  x18 is platform register
 24 |         //
 25 |         //  x29 = frame pointer
 26 |         //  x30 = link register
 27 |         //  x31 = stack pointer or zero register
 28 |         //
 29 | #define BJIT_REGS(_) \
 30 |         /* caller saved */ \
 31 |         _(x0), _(x1), _(x2), _(x3), _(x4), _(x5), _(x6), _(x7), \
 32 |         _(x8), _(x9), _(x10), _(x11), _(x12), _(x13), _(x14), _(x15), \
 33 |         /* special */ \
 34 |         _(x16), _(x17), _(x18), \
 35 |         /* callee saved */ \
 36 |         _(x19), _(x20), _(x21), _(x22), _(x23), \
 37 |         _(x24), _(x25), _(x26), _(x27), _(x28), _(fp), _(lr), _(sp), \
 38 |         /* floating point */ \
 39 |         _(v0), _(v1), _(v2), _(v3), _(v4), _(v5), _(v6), _(v7), \
 40 |         _(v16), _(v17), _(v18), _(v19), _(v20), _(v21), _(v22), _(v23), \
 41 |         _(v24), _(v25), _(v26), _(v27), _(v28), _(v29), _(v30), _(v31), \
 42 |         /* callee-saved floats .. prefer them last */ \
 43 |         _(v8), _(v9), _(v10), _(v11), _(v12), _(v13), _(v14), _(v15), \
 44 |         /* placeholder */ \
 45 |         _(none)
 46 | 
 47 | #define BJIT_REGS_ENUM(x) x
 48 |         // set nregs as the first non-register value
 49 |         enum { BJIT_REGS(BJIT_REGS_ENUM), nregs = none };
 50 | 
 51 |         // Integer register mask (without specials)
 52 |         static const RegMask mask_int
 53 |             =R2Mask(x0)
 54 |             |R2Mask(x1)
 55 |             |R2Mask(x2)
 56 |             |R2Mask(x3)
 57 |             |R2Mask(x4)
 58 |             |R2Mask(x5)
 59 |             |R2Mask(x6)
 60 |             |R2Mask(x7)
 61 |             
 62 |             |R2Mask(x8)
 63 |             |R2Mask(x9)
 64 |             |R2Mask(10)
 65 |             |R2Mask(x11)
 66 |             |R2Mask(x12)
 67 |             |R2Mask(x13)
 68 |             |R2Mask(x14)
 69 |             |R2Mask(x15)
 70 |             
 71 |             |R2Mask(x19)
 72 |             |R2Mask(x20)
 73 |             |R2Mask(x21)
 74 |             |R2Mask(x22)
 75 |             |R2Mask(x23)
 76 |             |R2Mask(x24)
 77 |             |R2Mask(x25)
 78 |             |R2Mask(x26)
 79 |             |R2Mask(x27) //*/
 80 |             |R2Mask(x28)
 81 |             ;
 82 | 
 83 |         // Float register masks
 84 |         static const RegMask mask_float_volatile
 85 |             =R2Mask(v0)
 86 |             |R2Mask(v1)
 87 |             |R2Mask(v2)
 88 |             |R2Mask(v3)
 89 |             |R2Mask(v4)
 90 |             |R2Mask(v5)
 91 |             |R2Mask(v6)
 92 |             |R2Mask(v7)
 93 | 
 94 |             |R2Mask(v16)
 95 |             |R2Mask(v17)
 96 |             |R2Mask(v18)
 97 |             |R2Mask(v19)
 98 |             |R2Mask(v20)
 99 |             |R2Mask(v21)
100 |             |R2Mask(v22)
101 |             |R2Mask(v23)
102 | 
103 |             |R2Mask(v24)
104 |             |R2Mask(v25)
105 |             |R2Mask(v26)
106 |             |R2Mask(v27)
107 |             |R2Mask(v28)
108 |             |R2Mask(v29)
109 |             |R2Mask(v30)
110 |             |R2Mask(v31)
111 |             ;
112 | 
113 |         // note v8 - v15 are preserved up to 64 bits
114 |         // so if we add vector ops, we might want to
115 |         // treat these as scalar only?
116 |         static const RegMask mask_float
117 |             = mask_float_volatile
118 |             |R2Mask(v8)
119 |             |R2Mask(v9)
120 |             |R2Mask(v10)
121 |             |R2Mask(v11)
122 |             |R2Mask(v12)
123 |             |R2Mask(v13)
124 |             |R2Mask(v14)
125 |             |R2Mask(v15);
126 |             
127 |         // Caller saved (lost on function call)
128 |         //
129 |         // NOTE: We treat all xmm registers as volatile when calling functions.
130 |         // The backend uses separate logic for callee_saved when we are callee.
131 |         //
132 |         // NOTE: This list MUST include any registers used for arguments.
133 |         static const RegMask caller_saved
134 |             =R2Mask(x0)
135 |             |R2Mask(x1)
136 |             |R2Mask(x2)
137 |             |R2Mask(x3)
138 |             |R2Mask(x4)
139 |             |R2Mask(x5)
140 |             |R2Mask(x6)
141 |             |R2Mask(x7)
142 |             |R2Mask(x8)
143 |             |R2Mask(x9)
144 |             |R2Mask(x10)
145 |             |R2Mask(x11)
146 |             |R2Mask(x12)
147 |             |R2Mask(x13)
148 |             |R2Mask(x14)
149 |             |R2Mask(x15)
150 |             |R2Mask(fp)
151 |             |R2Mask(lr)
152 |             | mask_float_volatile
153 |             ;
154 |     };
155 | };
156 | 


--------------------------------------------------------------------------------
/src/arch-x64-ops.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifdef __x86_64__
  3 | 
  4 | #include "bjit.h"
  5 | 
  6 | using namespace bjit;
  7 | using namespace bjit::impl;
  8 | 
  9 | RegMask Op::regsMask()
 10 | {
 11 |     switch(flags.type)
 12 |     {
 13 |         case _ptr: return regs::mask_int;
 14 |         case _f32: return regs::mask_float;
 15 |         case _f64: return regs::mask_float;
 16 | 
 17 |         default: BJIT_LOG("%s\n", strOpcode());
 18 |     }
 19 |     // silence warning if assert is nop
 20 |     BJIT_ASSERT(false); return 0;
 21 | }
 22 | 
 23 | RegMask Op::regsOut()
 24 | {
 25 |     // only deal with anything that isn't regs::mask_int explicit
 26 |     switch(opcode)
 27 |     {
 28 |         default: return regsMask(); // no special case -> any valid
 29 | 
 30 |         // special
 31 |         case ops::alloc: return R2Mask(regs::rsp);
 32 |     
 33 |         // divisions are fixed registers
 34 |         case ops::idiv: case ops::udiv: return R2Mask(regs::rax);
 35 |         case ops::imod: case ops::umod: return R2Mask(regs::rdx);
 36 | 
 37 |         case ops::icallp: case ops::icalln: return R2Mask(regs::rax);
 38 |         
 39 |         case ops::fcallp: case ops::fcalln:
 40 |         case ops::dcallp: case ops::dcalln: return R2Mask(regs::xmm0);
 41 | 
 42 |         // we have in[0] = index in type, in[1] = index total
 43 |         // which one we want to use varies by platform
 44 |         case ops::iarg:
 45 | #ifdef _WIN32
 46 |             switch(indexTotal)  // Win64 wants the total position
 47 |             {
 48 |             case 0: return R2Mask(regs::rcx);
 49 |             case 1: return R2Mask(regs::rdx);
 50 |             case 2: return R2Mask(regs::r8);
 51 |             case 3: return R2Mask(regs::r9);
 52 | 
 53 |             // FIXME: We need to teach RA about stack parameters.
 54 |             default: BJIT_ASSERT(false);
 55 |             }
 56 | #else
 57 |             switch(indexType)   // SysV uses position by type
 58 |             {
 59 |             case 0: return R2Mask(regs::rdi);
 60 |             case 1: return R2Mask(regs::rsi);
 61 |             case 2: return R2Mask(regs::rdx);
 62 |             case 3: return R2Mask(regs::rcx);
 63 |             case 4: return R2Mask(regs::r8);
 64 |             case 5: return R2Mask(regs::r9);
 65 | 
 66 |             // FIXME: We need to teach RA about stack parameters.
 67 |             default: BJIT_ASSERT(false);
 68 |             }
 69 | #endif
 70 |         case ops::farg:
 71 |         case ops::darg:
 72 | #ifdef _WIN32
 73 |             switch(indexTotal)  // Win64 wants the total position
 74 |             {
 75 |             case 0: return R2Mask(regs::xmm0);
 76 |             case 1: return R2Mask(regs::xmm1);
 77 |             case 2: return R2Mask(regs::xmm2);
 78 |             case 3: return R2Mask(regs::xmm3);
 79 | 
 80 |             // FIXME: We need to teach RA about stack parameters.
 81 |             default: BJIT_ASSERT(false);
 82 |             }
 83 | #else
 84 |             switch(indexType)   // SysV uses position by type
 85 |             {
 86 |             case 0: return R2Mask(regs::xmm0);
 87 |             case 1: return R2Mask(regs::xmm1);
 88 |             case 2: return R2Mask(regs::xmm2);
 89 |             case 3: return R2Mask(regs::xmm3);
 90 |             case 4: return R2Mask(regs::xmm4);
 91 |             case 5: return R2Mask(regs::xmm5);
 92 |             case 6: return R2Mask(regs::xmm6);
 93 |             case 7: return R2Mask(regs::xmm7);
 94 | 
 95 |             // FIXME: We need to teach RA about stack parameters.
 96 |             default: BJIT_ASSERT(false);
 97 |             }
 98 | #endif
 99 |     }
100 | 
101 |     // silence warning if assert is nop
102 |     BJIT_ASSERT(false); return 0;
103 | }
104 | 
105 | RegMask Op::regsIn(int i)
106 | {
107 |     switch(opcode)
108 |     {
109 |         default: return regsMask(); // no special case -> same as input
110 | 
111 |         // indirect calls can theoretically take any GP register
112 |         // but force RAX so we hopefully don't globber stuff
113 |         case ops::icallp: case ops::dcallp:
114 |         case ops::fcallp: case ops::tcallp:
115 |             return R2Mask(regs::rax);
116 |         
117 |         // loads and stores allow stack pointer as their first argument
118 |         // FIXME: we do NOT want to rename to RSP though :D
119 |         case ops::li8: case ops::li16: case ops::li32: case ops::li64:
120 |         case ops::lu8: case ops::lu16: case ops::lu32:
121 |         case ops::lf32: case ops::lf64:
122 |         case ops::si8: case ops::si16: case ops::si32: case ops::si64:
123 |         case ops::s2i8: case ops::s2i16: case ops::s2i32: case ops::s2i64:
124 |             return regs::mask_int | (i ? R2Mask(regs::rsp) : 0);
125 |         case ops::sf32: case ops::sf64:
126 |         case ops::s2f32: case ops::s2f64:
127 |             return i ? ((regs::mask_int) | R2Mask(regs::rsp)) : regs::mask_float;
128 | 
129 |         // allow iadd and iaddI to take RSP too, saves moves if we use LEA
130 |         case ops::iadd: case ops::iaddI:
131 |             return regs::mask_int | R2Mask(regs::rsp);
132 |         
133 |         // integer division takes RDX:RAX as 128-bit first operand
134 |         // we only do 64-bit, but force RAX on 1st and forbid RDX on 2nd
135 |         case ops::idiv: case ops::udiv:
136 |         case ops::imod: case ops::umod:
137 |             return (!i) ? R2Mask(regs::rax)
138 |             : (regs::mask_int & ~R2Mask(regs::rdx));
139 | 
140 |         case ops::jilt: case ops::jige:
141 |         case ops::jigt: case ops::jile:
142 |         case ops::jieq: case ops::jine:
143 |         case ops::jiltI: case ops::jigeI:
144 |         case ops::jigtI: case ops::jileI:
145 |         case ops::jieqI: case ops::jineI:
146 |         case ops::jz: case ops::jnz:
147 |             return regs::mask_int;
148 | 
149 |         case ops::jdlt: case ops::jdge:
150 |         case ops::jdgt: case ops::jdle:
151 |         case ops::jdeq: case ops::jdne:
152 | 
153 |         case ops::flt: case ops::fge:
154 |         case ops::fgt: case ops::fle:
155 |         case ops::feq: case ops::fne:
156 |         
157 |         case ops::lcf: case ops::cf2i:
158 | 
159 |         case ops::dlt: case ops::dge:
160 |         case ops::dgt: case ops::dle:
161 |         case ops::deq: case ops::dne:
162 |         
163 |         case ops::lcd: case ops::cd2i:
164 |         case ops::bcd2i: case ops::bcf2i:
165 |             return regs::mask_float;
166 | 
167 |         case ops::ci2f: case ops::bci2f:
168 |         case ops::ci2d: case ops::bci2d:
169 |             return regs::mask_int;
170 |             
171 |         // shifts want their second operand in CL
172 |         case ops::ishl: case ops::ishr: case ops::ushr:
173 |             return i ? R2Mask(regs::rcx) :
174 |                 (regs::mask_int &~ R2Mask(regs::rcx));
175 | 
176 |         case ops::ipass:
177 | #ifdef _WIN32
178 |             switch(indexTotal)  // Win64 wants the total position
179 |             {
180 |             case 0: return R2Mask(regs::rcx);
181 |             case 1: return R2Mask(regs::rdx);
182 |             case 2: return R2Mask(regs::r8);
183 |             case 3: return R2Mask(regs::r9);
184 | 
185 |             default: BJIT_ASSERT(false); // FIXME: RA can't handle
186 |             }
187 | #else
188 |             switch(indexType)   // SysV uses position by type
189 |             {
190 |             case 0: return R2Mask(regs::rdi);
191 |             case 1: return R2Mask(regs::rsi);
192 |             case 2: return R2Mask(regs::rdx);
193 |             case 3: return R2Mask(regs::rcx);
194 |             case 4: return R2Mask(regs::r8);
195 |             case 5: return R2Mask(regs::r9);
196 | 
197 |             default: BJIT_ASSERT(false); // FIXME: RA can't handle
198 |             }
199 | #endif
200 |         case ops::fpass:
201 |         case ops::dpass:
202 | #ifdef _WIN32
203 |             switch(indexTotal)  // Win64 wants the total index
204 |             {
205 |             case 0: return R2Mask(regs::xmm0);
206 |             case 1: return R2Mask(regs::xmm1);
207 |             case 2: return R2Mask(regs::xmm2);
208 |             case 3: return R2Mask(regs::xmm3);
209 | 
210 |             default: BJIT_ASSERT(false); // FIXME: RA can't handle
211 |             }
212 | #else
213 |             switch(indexType)   // SysV uses position by type
214 |             {
215 |             case 0: return R2Mask(regs::xmm0);
216 |             case 1: return R2Mask(regs::xmm1);
217 |             case 2: return R2Mask(regs::xmm2);
218 |             case 3: return R2Mask(regs::xmm3);
219 |             case 4: return R2Mask(regs::xmm4);
220 |             case 5: return R2Mask(regs::xmm5);
221 |             case 6: return R2Mask(regs::xmm6);
222 |             case 7: return R2Mask(regs::xmm7);
223 | 
224 |             default: BJIT_ASSERT(false); // FIXME: RA can't handle
225 |             }
226 | #endif
227 | 
228 |         // these are fixed
229 |         case ops::iret: return R2Mask(regs::rax);
230 |         case ops::fret: return R2Mask(regs::xmm0);
231 |         case ops::dret: return R2Mask(regs::xmm0);
232 | 
233 |     }
234 | }
235 | 
236 | RegMask Op::regsLost()
237 | {
238 |     switch(opcode)
239 |     {
240 |         case ops::idiv: case ops::udiv:
241 |         case ops::imod: case ops::umod:
242 |             // mark the output as lost as well, so RA tries to save
243 |             // if we still need the value after the division
244 |             return R2Mask(regs::rax)|R2Mask(regs::rdx);
245 | 
246 |         // for now, collect registers used by previous args
247 |         // this should help convince RA to do the right thing
248 |         case ops::ipass:
249 |         case ops::dpass:
250 |             {
251 |                 RegMask used = 0;
252 |                 for(int i = 0; i < in[1]; ++i)
253 |                 {
254 |                     used |= regsIn(i);
255 |                 }
256 |                 return used;
257 |             }
258 | 
259 |         case ops::icalln: case ops::fcalln: case ops::dcalln:
260 |         case ops::icallp: case ops::fcallp: case ops::dcallp:
261 |             return regs::caller_saved;
262 | 
263 |         default: return 0;
264 |     }
265 | }
266 | 
267 | #endif


--------------------------------------------------------------------------------
/src/arch-x64.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | /*
  5 | 
  6 | //
  7 | // Basic instruction order is:
  8 | //   REX | OP | ModRM | SIB | DISP | IMM
  9 | //
 10 | // REX encoding: 0 1 0 0 W R X B
 11 | //  - W: op size override (1 = 64 bit)
 12 | //  - R: prefix ModRM.reg field
 13 | //  - X: prefix SIB.index
 14 | //  - B: prefix ModRM.rm field
 15 | 
 16 | This is the table for 32-bit mode, with RIP-relative patched in,
 17 | since rest of it is exactly the same except for REX bytes
 18 | 
 19 | [---] means SIB byte follows
 20 | 
 21 | r8(/r)                       AL   CL   DL   BL   AH   CH   DH   BH
 22 | r16(/r)                      AX   CX   DX   BX   SP   BP   SI   DI
 23 | r32(/r)                      EAX  ECX  EDX  EBX  ESP  EBP  ESI  EDI
 24 | mm(/r)                       MM0  MM1  MM2  MM3  MM4  MM5  MM6  MM7
 25 | xmm(/r)                      XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7
 26 | (In decimal) /digit (Opcode) 0    1    2    3    4    5    6    7
 27 | (In binary) REG =            000  001  010  011  100  101  110  111
 28 | Effective Address Mod R/M    Value of ModR/M Byte (in Hexadecimal)
 29 | [EAX]              00 000    00   08   10   18   20   28   30   38
 30 | [ECX]                 001    01   09   11   19   21   29   31   39
 31 | [EDX]                 010    02   0A   12   1A   22   2A   32   3A
 32 | [EBX]                 011    03   0B   13   1B   23   2B   33   3B
 33 | [---]                 100    04   0C   14   1C   24   2C   34   3C
 34 | [RIP]+disp32          101    05   0D   15   1D   25   2D   35   3D
 35 | [ESI]                 110    06   0E   16   1E   26   2E   36   3E
 36 | [EDI]                 111    07   0F   17   1F   27   2F   37   3F
 37 | [EAX]+disp8        01 000    40   48   50   58   60   68   70   78
 38 | [ECX]+disp8           001    41   49   51   59   61   69   71   79
 39 | [EDX]+disp8           010    42   4A   52   5A   62   6A   72   7A
 40 | [EBX]+disp8           011    43   4B   53   5B   63   6B   73   7B
 41 | [---]+disp8           100    44   4C   54   5C   64   6C   74   7C
 42 | [EBP]+disp8           101    45   4D   55   5D   65   6D   75   7D
 43 | [ESI]+disp8           110    46   4E   56   5E   66   6E   76   7E
 44 | [EDI]+disp8           111    47   4F   57   5F   67   6F   77   7F
 45 | [EAX]+disp32       10 000    80   88   90   98   A0   A8   B0   B8
 46 | [ECX]+disp32          001    81   89   91   99   A1   A9   B1   B9
 47 | [EDX]+disp32          010    82   8A   92   9A   A2   AA   B2   BA
 48 | [EBX]+disp32          011    83   8B   93   9B   A3   AB   B3   BB
 49 | [---]+disp32          100    84   8C   94   9C   A4   AC   B4   BC
 50 | [EBP]+disp32          101    85   8D   95   9D   A5   AD   B5   BD
 51 | [ESI]+disp32          110    86   8E   96   9E   A6   AE   B6   BE
 52 | [EDI]+disp32          111    87   8F   97   9F   A7   AF   B7   BF
 53 | EAX/AX/AL/MM0/XMM0 11 000    C0   C8   D0   D8   E0   E8   F0   F8
 54 | ECX/CX/CL/MM/XMM1     001    C1   C9   D1   D9   E1   E9   F1   F9
 55 | EDX/DX/DL/MM2/XMM2    010    C2   CA   D2   DA   E2   EA   F2   FA
 56 | EBX/BX/BL/MM3/XMM3    011    C3   CB   D3   DB   E3   EB   F3   FB
 57 | ESP/SP/AH/MM4/XMM4    100    C4   CC   D4   DC   E4   EC   F4   FC
 58 | EBP/BP/CH/MM5/XMM5    101    C5   CD   D5   DD   E5   ED   F5   FD
 59 | ESI/SI/DH/MM6/XMM6    110    C6   CE   D6   DE   E6   EE   F6   FE
 60 | EDI/DI/BH/MM7/XMM7    111    C7   CF   D7   DF   E7   EF   F7   FF
 61 | 
 62 | */
 63 | 
 64 | namespace bjit
 65 | {
 66 |     // this is a hint for opt-ra
 67 |     static const bool arch_explicit_output_regs = false;
 68 | 
 69 |     // we use this for types, etc
 70 |     typedef uint64_t    RegMask;
 71 | 
 72 |     // convert single-register to a mask
 73 |     static RegMask R2Mask(int r) { return ((RegMask)1)<<r; }
 74 |         
 75 |     namespace regs
 76 |     {
 77 |         // List of registers for register allocator
 78 |         // These should be in preference order
 79 |         //
 80 |         // FIXME: order is not necessarily ideal, but basic idea is that
 81 |         // we want caller-saved first (ideally avoid creating stack frame)
 82 |         // and opt-ra supposedly understands that they get globbered by calls
 83 |         //
 84 | #define BJIT_REGS(_) \
 85 |         /* caller saved (rsi & rdi callee saved on win32) */ \
 86 |         _(rax), _(rcx), _(rdx), \
 87 |          _(r8), _(r9), _(r10), _(r11), _(rsi), _(rdi), \
 88 |         /* callee saved */ \
 89 |         _(rbx), _(rbp), _(r12), _(r13), _(r14), _(r15), _(rsp), \
 90 |         /* floating point */ \
 91 |         _(xmm0), _(xmm1), _(xmm2), _(xmm3), \
 92 |         _(xmm4), _(xmm5), _(xmm6), _(xmm7), \
 93 |         _(xmm8), _(xmm9), _(xmm10), _(xmm11), \
 94 |         _(xmm12), _(xmm13), _(xmm14), _(xmm15), \
 95 |         /* placeholder */ \
 96 |         _(none)
 97 | 
 98 | #define BJIT_REGS_ENUM(x) x
 99 |         // set nregs as the first non-register value
100 |         enum { BJIT_REGS(BJIT_REGS_ENUM), nregs = none };
101 | 
102 |         // Integer register mask (without specials)
103 |         static const RegMask mask_int
104 |             =R2Mask(rax)
105 |             |R2Mask(rdx)
106 |             |R2Mask(rbx)
107 |             |R2Mask(rcx)
108 |             |R2Mask(rsi)
109 |             |R2Mask(rdi)
110 |             |R2Mask(rbp)
111 |             |R2Mask(r8)
112 |             |R2Mask(r9)
113 |             |R2Mask(r10)
114 |             |R2Mask(r11)
115 |             |R2Mask(r12)
116 |             |R2Mask(r13)
117 |             |R2Mask(r14)
118 |             |R2Mask(r15) //*/
119 |             ;
120 | 
121 |         // Float register masks
122 |         static const RegMask mask_float
123 |             =R2Mask(xmm0)
124 |             |R2Mask(xmm1)
125 |             |R2Mask(xmm2)
126 |             |R2Mask(xmm3)
127 |             |R2Mask(xmm4)
128 |             |R2Mask(xmm5)
129 |             |R2Mask(xmm6)
130 |             |R2Mask(xmm7)
131 |             |R2Mask(xmm8)
132 |             |R2Mask(xmm9)
133 |             |R2Mask(xmm10)
134 |             |R2Mask(xmm11)
135 |     		|R2Mask(xmm12)
136 |             |R2Mask(xmm13)
137 |             |R2Mask(xmm14)
138 |             |R2Mask(xmm15)
139 |             ;
140 | 
141 |         // Caller saved (lost on function call)
142 |         //
143 |         // NOTE: We treat all xmm registers as volatile when calling functions.
144 |         // The backend uses separate logic for callee_saved when we are callee.
145 |         //
146 |         // NOTE: This list MUST include any registers used for arguments.
147 |         static const RegMask caller_saved
148 |             =R2Mask(rax)
149 | #ifndef _WIN32
150 |             |R2Mask(rsi)
151 |             |R2Mask(rdi)
152 | #endif
153 |             |R2Mask(rcx)
154 |             |R2Mask(rdx)
155 |             |R2Mask(r8)
156 |             |R2Mask(r9)
157 |             |R2Mask(r10)
158 |             |R2Mask(r11)
159 |             |mask_float;
160 |     };
161 | 
162 | };
163 | 
164 | 


--------------------------------------------------------------------------------
/src/bjit-impl.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "hash.h"
  5 | #include "ir-ops.h"
  6 | 
  7 | #ifdef __x86_64__
  8 | # define BJIT_ARCH_SUPPORTED
  9 | # include "arch-x64.h"   // FIXME: check for arch
 10 | #endif
 11 | 
 12 | #ifdef __aarch64__
 13 | # define BJIT_ARCH_SUPPORTED
 14 | # include "arch-arm64.h"
 15 | #endif
 16 | 
 17 | #ifndef BJIT_ARCH_SUPPORTED
 18 | #error "Unsupported architecture.
 19 | #endif
 20 | 
 21 | namespace bjit
 22 | {
 23 | 
 24 | #define BJIT_OP_ENUM(name,...) name
 25 |     namespace ops { enum { BJIT_OPS(BJIT_OP_ENUM) }; };
 26 | #undef BJIT_OP_ENUM
 27 | 
 28 |     static const uint16_t   noVal = 0xffff;
 29 |     static const uint16_t   noSCC = 0xffff;
 30 |     
 31 |     namespace impl
 32 |     {
 33 |         struct Op
 34 |         {
 35 |             // input operands
 36 |             //
 37 |             // NOTE: CSE only copies this union when it moves ops
 38 |             //
 39 |             // NOTE: packing is sensitive (also FIXME: rethink this?)
 40 |             union
 41 |             {
 42 |                 // 64-bit constants for load immediate
 43 |                 int64_t     i64;
 44 |                 uint64_t    u64;
 45 |                 double      f64;
 46 |                 
 47 |                 uint16_t    in[3];
 48 |                 
 49 |                 // imm + two values
 50 |                 struct {
 51 |                     uint32_t    __in_pad;
 52 |                     
 53 |                     union
 54 |                     {
 55 |                         float       f32;
 56 | 
 57 |                         // this must be signed!
 58 |                         int32_t     imm32;
 59 | 
 60 |                         struct
 61 |                         {
 62 |                             uint16_t    phiIndex;
 63 |                             uint16_t    iv;
 64 |                         };
 65 |                         
 66 |                         struct  // used by arguments
 67 |                         {
 68 |                             uint16_t    indexType;
 69 |                             uint16_t    indexTotal;
 70 |                         };
 71 | 
 72 |                         struct
 73 |                         {
 74 |                             // memtag will alias on in[2] for stores
 75 |                             // which is fine, stores don't store a tag
 76 |                             uint16_t    memtag;
 77 |                             uint16_t    off16;
 78 |                         };
 79 |                     };
 80 |                 };
 81 |     
 82 |             };
 83 |     
 84 |             // output data
 85 |             // NOTE: packing is sensitive (also relative to above)
 86 |             union
 87 |             {
 88 |                 struct
 89 |                 {
 90 |                     uint16_t    scc;    // stack congruence class
 91 |                     uint16_t    nUse;   // number of users
 92 |                 };
 93 |     
 94 |                 // jumps need labels
 95 |                 uint16_t    label[2];
 96 |             };
 97 |             
 98 |             uint16_t    block;  // block in which the op currently lives
 99 |             uint16_t    pos = noVal;        // position in block after DCE
100 |             
101 |             uint16_t    opcode;             // opcode, see ir-ops.h
102 |             uint8_t     reg = regs::none;   // output register
103 |     
104 |             // Register type, needed for correct renames, etc..
105 |             //
106 |             enum Type
107 |             {
108 |                 _none,  // no output
109 |                 _ptr,   // pointer-sized integer (anything that fits GP regs)
110 |                 _f32,   // single precision float
111 |                 _f64    // double precision float
112 |             };
113 |     
114 |             struct {
115 |                 Type    type    : 4;    // see above, packed into flags
116 |                 bool    spill   : 1;
117 |                 
118 |                 // no_opt on jumps means "don't try to unroll this further"
119 |                 // no_opt on regular ops means "don't hoist this further"
120 |                 // no_opt on phis in RA means "don't try to spill sources"
121 |                 bool    no_opt  : 1;
122 |             } flags = {};
123 |     
124 |     
125 |             // regsMask returns a mask of registers that can hold this type
126 |             RegMask     regsMask();     // in arch-XX-ops.cpp
127 |     
128 |             // regsIn, regsOut and regsLost return masks for the actual operation
129 |             RegMask     regsIn(int i);  // in arch-XX-ops.cpp
130 |             RegMask     regsOut();      // in arch-XX-ops.cpp
131 |             RegMask     regsLost();     // in arch-XX-ops.cpp
132 |     
133 |             // rest are in ir-ops.cpp
134 |             const char* strOpcode() const;
135 |             
136 |             unsigned    nInputs()   const;
137 |             bool        hasOutput() const;
138 |     
139 |             bool    canCSE()        const;
140 |             bool    canMove()       const;
141 |     
142 |             bool    hasSideFX()     const;
143 |     
144 |             bool    hasImm32()      const;
145 |             bool    hasI64()        const;
146 |             
147 |             bool    hasF32()        const;
148 |             bool    hasF64()        const;
149 | 
150 |             bool    hasMem()        const;  // aka. isLoad() / isStore()
151 |             bool    hasMemTag()     const;  // aka. isLoad()?
152 | 
153 |             bool    anyOutReg()     const;
154 |     
155 |             void    makeNOP() { opcode = ops::nop; u64 = ~0ull; }
156 |         };
157 |     
158 |         // This stores the data CSE needs in our hash table.
159 |         // Only used by CSE, but defined here so that we can
160 |         // allocate the hash table just once.
161 |         struct OpCSE
162 |         {
163 |             // not included in hash
164 |             uint16_t index = noVal;
165 |             uint16_t block = noVal;
166 |         
167 |             // rest is hashed
168 |             union
169 |             {
170 |                 struct {
171 |                     uint32_t imm32 = 0;
172 |                     uint16_t in[2] = { noVal, noVal };
173 |                 };
174 |         
175 |                 // for lci/lcf
176 |                 int64_t     i64;
177 |             };
178 |             uint16_t opcode = noVal;
179 |         
180 |             OpCSE() {}
181 |             OpCSE(uint16_t opIndex, Op const & op) { set(opIndex, op); }
182 |         
183 |             void set(uint16_t opIndex, Op const & op)
184 |             {
185 |                 index = opIndex;
186 |                 block = op.block;
187 |                 opcode = op.opcode;
188 |                 if(op.hasI64() || op.hasF64())
189 |                 {
190 |                     i64 = op.i64;
191 |                 }
192 |                 else
193 |                 {
194 |                     imm32 = (op.hasMemTag() || op.hasImm32() || op.hasF32())
195 |                         ? op.imm32 : 0;
196 |                     in[0] = op.nInputs() >= 1 ? op.in[0] : noVal;
197 |                     in[1] = op.nInputs() >= 2 ? op.in[1] : noVal;
198 | 
199 |                     BJIT_ASSERT(op.nInputs() <= 2);
200 |                 }
201 |             }
202 |         
203 |             // NOTE: we need temporary to force the "noVals"
204 |             bool isEqual(Op const & op) const
205 |             { OpCSE tmp(noVal, op); return isEqual(tmp); }
206 |         
207 |             // NOTE: we need temporary to force the "noVals"
208 |             static uint64_t getHash(Op const & op)
209 |             { OpCSE tmp(noVal, op); return getHash(tmp); }
210 |             
211 |             bool isEqual(OpCSE const & op) const
212 |             { return i64 == op.i64 && opcode == op.opcode; }
213 |         
214 |             static uint64_t getHash(OpCSE const & op)
215 |             { return hash64(op.i64 + op.opcode); }
216 |         };
217 |     
218 |         // Variable rename tracker
219 |         struct Rename
220 |         {
221 |             struct Map {
222 |                 uint16_t src, dst;
223 |                 Map(uint16_t s, uint16_t d) : src(s), dst(d) {}
224 |             };
225 |             std::vector<Map>    map;
226 |     
227 |             void add(uint16_t s, uint16_t d) { map.emplace_back(s,d); }
228 |     
229 |             Op & operator()(Op & op)
230 |             {
231 |                 int n = op.nInputs();
232 |                 if(!n) return op;
233 |                 for(auto & r : map)
234 |                 {
235 |                     switch(n)
236 |                     {
237 |                     case 3: if(op.in[2] == r.src) op.in[2] = r.dst;
238 |                     case 2: if(op.in[1] == r.src) op.in[1] = r.dst;
239 |                     case 1: if(op.in[0] == r.src) op.in[0] = r.dst;
240 |                     case 0: break;
241 |                     default: BJIT_ASSERT(false);
242 |                     }
243 |                 }
244 |                 return op;
245 |             }
246 |         };
247 |         
248 |         // Use by Block to track actual phi-alternatives
249 |         struct Phi
250 |         {
251 |             uint16_t            phiop;
252 |             uint16_t            tmp;    // used by DCE
253 | 
254 |             Phi() : phiop(noVal) {}
255 |             Phi(uint16_t phiop) : phiop(phiop) {}
256 |         };
257 | 
258 |         struct PhiAlt
259 |         {
260 |             uint16_t    phi;
261 |             uint16_t    src;
262 |             uint16_t    val;
263 |         };
264 |     
265 |         // One basic block
266 |         struct Block
267 |         {
268 |             std::vector<uint16_t>   code;
269 |             std::vector<Phi>        args;
270 |             std::vector<PhiAlt>     alts;
271 | 
272 |             void newAlt(uint16_t phi, uint16_t src, uint16_t val)
273 |             {
274 |                 alts.emplace_back(PhiAlt{phi, src, val});
275 |             }
276 |     
277 |             std::vector<uint16_t>   livein;
278 |             std::vector<uint16_t>   comeFrom;   // which blocks we come from?
279 |     
280 |             // register state on input
281 |             uint16_t    regsIn[regs::nregs];
282 |     
283 |             // register state on output (used for shuffling)
284 |             uint16_t    regsOut[regs::nregs];
285 |     
286 |             // dominators
287 |             std::vector<uint16_t>   dom;
288 |             
289 |             uint16_t    idom;   // immediate dominator
290 |             uint16_t    pdom;   // immediate post-dominator
291 | 
292 |             uint16_t    memtag; // memory version into the block
293 |             uint16_t    memout; // memory version out of the block
294 | 
295 |             struct {
296 |                 bool live       : 1;    // used/reset by DCE, RA
297 |                 bool regsDone   : 1;    // reg-alloc uses this
298 |                 bool codeDone   : 1;    // backend uses this
299 |             } flags = {};
300 |     
301 |             Block()
302 |             {
303 |                 for(int i = 0; i < regs::nregs; ++i) regsIn[i] = regsOut[i] = noVal;
304 |             }
305 |         };
306 |     
307 |         // used to communicate relocations from Proc to Module
308 |         struct NearReloc
309 |         {
310 |             uint32_t    codeOffset;     // where to add offset
311 |             uint32_t    procIndex;    // which offset to add
312 |         };
313 |     };
314 | };


--------------------------------------------------------------------------------
/src/debug.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | #include <inttypes.h>
  5 | 
  6 | using namespace bjit;
  7 | 
  8 | // register names
  9 | #define none ---
 10 | #define BJIT_STR(x) #x
 11 | #define BJIT_REGS_NAME(x) BJIT_STR(x)
 12 | static const char * regNames[] = { BJIT_REGS(BJIT_REGS_NAME) };
 13 | #undef none
 14 | 
 15 | const char * bjit::Proc::regName(int r) const { return regNames[r]; }
 16 | 
 17 | void bjit::Proc::debugOp(uint16_t iop) const
 18 | {
 19 |     if(iop == noVal) { BJIT_LOG("           -- removed op -- \n"); return; }
 20 |     auto & op = ops[iop];
 21 | 
 22 |     BJIT_LOG("%4x:", op.pos);
 23 | 
 24 |     if(op.hasOutput())
 25 |     {
 26 |         if(op.flags.spill) BJIT_LOG("=[%04x]= ", op.scc);
 27 |         else if(op.scc == noSCC) BJIT_LOG("  ----   ");
 28 |         else BJIT_LOG(" (%04x)  ", op.scc);
 29 |         //else BJIT_LOG("        ");
 30 |     }
 31 |     else BJIT_LOG("         ");
 32 | 
 33 |     // make it clear which renames actually cause moves
 34 |     bool nopRename = false;
 35 |     if(op.opcode == ops::rename && op.reg == ops[op.in[0]].reg)
 36 |         nopRename = true;
 37 |     BJIT_LOG("%04x %6s %8s %c", iop,
 38 |         op.hasOutput() ? regName(op.reg) : "",
 39 |         nopRename ? " - " : op.strOpcode(),
 40 |         op.flags.no_opt ? '*' : ' ');
 41 | 
 42 |     switch(op.flags.type)
 43 |     {
 44 |         case Op::_none: BJIT_LOG("          "); break;
 45 |         case Op::_ptr:  BJIT_LOG(" %3d  ptr ", op.nUse); break;
 46 |         case Op::_f32:  BJIT_LOG(" %3d  f32 ", op.nUse); break;
 47 |         case Op::_f64:  BJIT_LOG(" %3d  f64 ", op.nUse); break;
 48 |     };
 49 | 
 50 |     // this should now hold
 51 |     if(!op.hasI64() && !op.hasF64())
 52 |     {
 53 |         if(op.nInputs() < 1) BJIT_ASSERT(op.in[0] == noVal);
 54 |         if(!op.hasMemTag()
 55 |         && op.nInputs() < 2) BJIT_ASSERT(op.in[1] == noVal);
 56 |     }
 57 | 
 58 |     // special-case reload to not print register
 59 |     if(op.opcode == ops::reload)
 60 |         BJIT_LOG(" [%04x]:%04x", ops[op.in[0]].scc, op.in[0]);
 61 |     else
 62 |     {
 63 |         switch(op.nInputs())
 64 |         {
 65 |             case 1: BJIT_LOG(" %s:%04x",
 66 |                 regNames[ops[op.in[0]].reg], op.in[0]);
 67 |                 if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" <BAD0>");
 68 |                 break;
 69 |             case 2: BJIT_LOG(" %s:%04x %s:%04x",
 70 |                 regNames[ops[op.in[0]].reg], op.in[0],
 71 |                 regNames[ops[op.in[1]].reg], op.in[1]);
 72 |                 if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" <BAD:0>");
 73 |                 if(ops[op.in[1]].opcode == ops::nop) BJIT_LOG(" <BAD:1>");
 74 |                 break;
 75 |             case 3: BJIT_LOG(" %s:%04x %s:%04x %s:%04x",
 76 |                 regNames[ops[op.in[0]].reg], op.in[0],
 77 |                 regNames[ops[op.in[1]].reg], op.in[1],
 78 |                 regNames[ops[op.in[2]].reg], op.in[2]);
 79 |                 if(ops[op.in[0]].opcode == ops::nop) BJIT_LOG(" <BAD:0>");
 80 |                 if(ops[op.in[1]].opcode == ops::nop) BJIT_LOG(" <BAD:1>");
 81 |                 if(ops[op.in[2]].opcode == ops::nop) BJIT_LOG(" <BAD:2>");
 82 |                 break;
 83 |             case 0: break;
 84 |             default: BJIT_ASSERT(false);
 85 |         }
 86 |     }
 87 | 
 88 |     if(op.opcode == ops::icalln
 89 |     || op.opcode == ops::fcalln
 90 |     || op.opcode == ops::dcalln
 91 |     || op.opcode == ops::tcalln) BJIT_LOG(" near: %d", op.imm32);
 92 |     else if(op.hasImm32()) BJIT_LOG(" %+d", op.imm32);
 93 |     else if(op.hasMem()) BJIT_LOG(" %d", op.off16);
 94 | 
 95 |     if(op.hasMemTag()) BJIT_LOG(" mem(%04x)", op.memtag);
 96 |     
 97 |     if(op.hasI64()) BJIT_LOG(" i64:%" PRId64, op.i64);
 98 |     if(op.hasF32()) BJIT_LOG(" f32:%.8e", op.f32);
 99 |     if(op.hasF64()) BJIT_LOG(" f64:%.8e", op.f64);
100 | 
101 |     if(op.opcode == ops::phi)
102 |     {
103 |         for(auto & a : blocks[op.block].alts)
104 |         {
105 |             if(a.phi != iop) continue;
106 |             if(ops[a.val].scc != noSCC)
107 |                 BJIT_LOG(" L%d:[%04x]:%04x", a.src, ops[a.val].scc, a.val);
108 |             else 
109 |                 BJIT_LOG(" L%d:[----]:%04x", a.src, a.val);
110 |         }
111 | 
112 |         if(op.iv != noVal) BJIT_LOG(" IV:%04x", op.iv);
113 |     }
114 | 
115 |     if(op.opcode == ops::iarg || op.opcode == ops::farg || op.opcode == ops::darg)
116 |     {
117 |         BJIT_LOG(" #%d total #%d", op.indexType, op.indexTotal);
118 |     }
119 | 
120 |     if(op.opcode <= ops::jmp) BJIT_LOG(" L%d", op.label[0]);
121 |     if(op.opcode < ops::jmp) BJIT_LOG(" L%d", op.label[1]);
122 | 
123 |     BJIT_LOG("\n");
124 | }
125 | 
126 | void bjit::Proc::debug() const
127 | {
128 |     BJIT_LOG("\n;----");
129 |     if(raDone) BJIT_LOG(" Slots: %d\n", nSlots); else BJIT_LOG("\n");
130 |     
131 |     if(live.size())
132 |     {
133 |         for(auto b : live)
134 |         {
135 |             BJIT_LOG("L%d:", b);
136 |             for(auto s : blocks[b].comeFrom) BJIT_LOG(" <L%d", s);
137 |             BJIT_LOG("\n; IDom: L%d (nDom: %d),",
138 |                 blocks[b].idom, (int)blocks[b].dom.size());
139 |             if(blocks[b].pdom != noVal) BJIT_LOG(" PDom: L%d", blocks[b].pdom);
140 |             else BJIT_LOG(" PDom: exit");
141 |             //BJIT_LOG("\n; "); for(auto s : blocks[b].dom) BJIT_LOG(" ^L%d", s);
142 |             //BJIT_LOG("\n; "); for(auto s : blocks[b].pdom) BJIT_LOG(" L%d^", s);
143 | 
144 |             // print memory tag, but not for entry (always ffff and def at 0000)
145 |             if(b) BJIT_LOG(" mem(%04x)", blocks[b].memtag);
146 |             
147 |             //if(0)
148 |             for(int i = 0; i < blocks[b].livein.size(); ++i)
149 |             {
150 |                 if(!(0x7&(i))) BJIT_LOG("\n; Live: ");
151 |                 if(ops[blocks[b].livein[i]].scc != noSCC)
152 |                     BJIT_LOG(" [%04x]:%04x",
153 |                         ops[blocks[b].livein[i]].scc, blocks[b].livein[i]);
154 |                 else BJIT_LOG(" [----]:%04x",blocks[b].livein[i]);
155 |             }
156 |             //if(0)
157 |             if(raDone)
158 |             {
159 |                 BJIT_LOG("\n; In:");
160 |                 for(int i = 0; i < regs::nregs; ++i)
161 |                 {
162 |                     if(blocks[b].regsIn[i] != 0xffff)
163 |                     {
164 |                         BJIT_LOG(" %s:%04x", regNames[i], blocks[b].regsIn[i]);
165 |                     }
166 |                 }
167 |             }
168 |             BJIT_LOG("\n;      SLOT  VALUE    REG       OP   USE TYPE  ARGS\n");
169 |             for(auto & iop : blocks[b].code) { debugOp(iop); }
170 |             //if(0)
171 |             if(raDone)
172 |             {
173 |                 BJIT_LOG("; Out:");
174 |                 for(int i = 0; i < regs::nregs; ++i)
175 |                 {
176 |                     if(blocks[b].regsOut[i] != 0xffff)
177 |                     {
178 |                         BJIT_LOG(" %s:%04x", regNames[i], blocks[b].regsOut[i]);
179 |                     }
180 |                 }
181 |                 BJIT_LOG("\n");
182 |             }
183 |             BJIT_LOG("\n");
184 |         }
185 |     }
186 |     else for(int b = 0; b < blocks.size(); ++b)
187 |     {
188 |         if(!blocks[b].flags.live) continue;
189 |         BJIT_LOG("L%d:%s\n", b, blocks[b].flags.live ? "" : " -- dead --");
190 |         BJIT_LOG("; SLOT  VALUE    REG       OP USE TYPE  ARGS\n");
191 |         for(auto & iop : blocks[b].code) { debugOp(iop); }
192 |     }
193 |     BJIT_LOG(";----\n");
194 |     
195 | }
196 | 


--------------------------------------------------------------------------------
/src/hash.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include <cstdio>
  5 | 
  6 | namespace bjit
  7 | {
  8 |     // This uses the mix13 constants (also used by splitmix64) from
  9 |     // https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
 10 |     static uint64_t hash64(uint64_t x)
 11 |     {
 12 |         x ^= x >> 30; x *= 0xbf58476d1ce4e5b9LLU;
 13 |         x ^= x >> 27; x *= 0x94d049bb133111ebLLU;
 14 |         x ^= x >> 31;
 15 |         return x;
 16 |     }
 17 | 
 18 |     // This does 32-bits at a time using hash64 with the upper bytes
 19 |     // set to the remaining length of the string.
 20 |     //
 21 |     // It's basically a variant of the sponge-construction, except we
 22 |     // xor the length into the capacity on every round.
 23 |     static uint64_t stringHash64(const uint8_t * bytes, uint32_t nBytes)
 24 |     {
 25 |         uint64_t x = 0;
 26 |         uint64_t seed = ((uint64_t)nBytes) << 32;
 27 |         while(nBytes >= 4)
 28 |         {
 29 |             x ^= (*(uint32_t*)bytes);
 30 |             x ^= seed;
 31 |             x = hash64(x);
 32 | 
 33 |             bytes += 4; nBytes -= 4;
 34 |         }
 35 | 
 36 |         switch(nBytes)
 37 |         {
 38 |             case 3: x += bytes[2] << 16;
 39 |             case 2: x += bytes[1] << 8;
 40 |             case 1: x += bytes[0];
 41 |                 x ^= seed;
 42 |                 x = hash64(x);
 43 |             default:
 44 |                 break;
 45 |         }
 46 |         
 47 |         return x;
 48 |     }
 49 | 
 50 |     // This is a closed hashtable that stores a set of items
 51 |     // and which allows searching by any key for which the
 52 |     // Item-type provides methods getHash(Key) and isEqual(Key),
 53 |     // which means it can be used as either a set or a map.
 54 |     template <typename Item>
 55 |     struct HashTable
 56 |     {
 57 |         // minimum number of slots to use
 58 |         static const unsigned minSlots = 4;
 59 | 
 60 |         // resize when less than 1/freeFactor slots are free
 61 |         // this should normally be between 2 and 4
 62 |         //
 63 |         static const unsigned freeFactor = 3;
 64 |     
 65 |         // this implements visitor pattern
 66 |         // calls fn(key, value) for each non-null pair
 67 |         template <class Visitor>
 68 |         void foreach(Visitor && fn)
 69 |         {
 70 |             for(unsigned i = 0; i < slots.size(); ++i)
 71 |             {
 72 |                 Slot & s = slots[i];
 73 |                 if(slotInUse == (s.hash & 0x3))
 74 |                 {
 75 |                     fn(s.item);
 76 |                 }
 77 |             }
 78 |         }
 79 | 
 80 |         unsigned size() { return nUsed; }
 81 | 
 82 |         // return total capacity before resize
 83 |         unsigned capacity()
 84 |         {
 85 |             // FIXME: properly test that this isn't off-by-one or something?
 86 |             return (slots.size() * (freeFactor-1)) / freeFactor;
 87 |         }
 88 | 
 89 |         HashTable(unsigned reserveCapacity = 0) { reserve(reserveCapacity); }
 90 | 
 91 |         void reserve(unsigned fitSize)
 92 |         {
 93 |             if(fitSize < minSlots) fitSize = minSlots;
 94 |             
 95 |             // FIXME: properly test that  this isn't off-by-one or something?
 96 |             fitSize = 1 + fitSize * (freeFactor+1) / freeFactor;
 97 | 
 98 |             unsigned wantSize = minSlots;
 99 |             while(wantSize < fitSize) wantSize <<= 1;
100 |             
101 |             if(wantSize > slots.size()) resize(wantSize);
102 |         }
103 | 
104 |         // return existing item matching key or null
105 |         template <typename Key>
106 |         Item * find(const Key & k)
107 |         {
108 |             Slot & s = internalFind(k, false);
109 |             if((s.hash & 0x3) != slotInUse) return 0;
110 |             return &s.item;
111 |         }
112 | 
113 |         // remove existing item matching key if any
114 |         template <typename Key>
115 |         void remove(const Key & k)
116 |         {
117 |             Slot & s = internalFind(k, false);
118 |             if((s.hash & 0x3) == slotInUse)
119 |             {
120 |                 s.hash = slotRemoved | (s.hash & ~(uint64_t)0x3);
121 |                 s.item = std::move(Item());
122 |                 --nUsed;
123 |             }
124 |         }
125 | 
126 |         // add a new item - any existing matching key is replaced
127 |         void insert(Item & i)
128 |         {
129 |             Slot & s = internalFind(i, true);
130 |             s.item = std::move(i);
131 | 
132 |             // check for resize, multiply out the divides from:
133 |             //   (nSlots - nUsed) / nSlots < 1 / freeFactor
134 |             if((slots.size() - (++nUsed)) * freeFactor < slots.size())
135 |             {
136 |                 resize(slots.size() << 1);
137 |             }
138 |         }
139 | 
140 |         // clear the whole table
141 |         void clear()
142 |         {
143 |             for(unsigned i = 0; i < slots.size(); ++i)
144 |             {
145 |                 Slot & s = slots[i];
146 |                 s.hash = slotFree;
147 |                 s.item = std::move(Item());
148 |             }
149 |         }
150 | 
151 |         // explicit rehash is useful in some situations
152 |         // to clear lazily deleted junk that leads to long probes
153 |         //
154 |         // this can also optionally attempt to compact the table
155 |         //
156 |         void rehash(bool compact = false)
157 |         {
158 |             unsigned wantSlots = slots.size();
159 |             if(compact)
160 |             {
161 |                 // reserve at least minSlots before resize
162 |                 unsigned needSlots = nUsed + minSlots;
163 | 
164 |                 // can we halve the size?
165 |                 while(wantSlots > minSlots)
166 |                 {
167 |                     // next candidate size
168 |                     unsigned halfSlots = wantSlots >> 1;
169 | 
170 |                     // would this cause a resize up
171 |                     if(halfSlots
172 |                         > (halfSlots - needSlots) * freeFactor) break;
173 | 
174 |                     // accept the smaller size and iterate
175 |                     wantSlots = halfSlots;
176 |                 }
177 |             }
178 | 
179 |             // do the actual rehash
180 |             resize(wantSlots);
181 |         }
182 | 
183 |     private:
184 |         // these are stored in low 2-bits of Slot's hash
185 |         enum { slotFree, slotInUse, slotRemoved };
186 |         struct Slot
187 |         {
188 |             Item        item;
189 |             uint64_t    hash;
190 | 
191 |             // default to both null-pointers
192 |             Slot() : hash(slotFree) {}
193 |         };
194 | 
195 |         unsigned    nUsed   = 0;  // for resize control
196 | 
197 |         // use std::vector for memory management
198 |         std::vector<Slot>   slots;
199 | 
200 |         // the ultimate hash probe of death:
201 |         //   - use a 2nd hash (upper 32 bits) to seed the probe
202 |         //   - force the 2nd hash to be odd (all slots for pow2)
203 |         //   - then use quadratic probe order on that
204 |         //   - terrible for cache, but shouldn't cluster
205 |         uint32_t probe(uint64_t hash, unsigned j)
206 |         {
207 |             return (hash + ((hash>>32)|1)*((j+j*j)/2)) & (slots.size() - 1);
208 |         }
209 | 
210 |         // find a slot for a given key, or a free slot to insert into
211 |         template <typename Key>
212 |         Slot & internalFind(const Key & k, bool doInsert)
213 |         {
214 |             uint64_t hash = Item::getHash(k);
215 | 
216 |             // probe loop
217 |             for(int j = 0; j < slots.size(); ++j)
218 |             {
219 |                 unsigned i = probe(hash, j);
220 | 
221 |                 Slot & s = slots[i];
222 | 
223 |                 // is this a free slot or a slot for this key
224 |                 if((slotFree == (s.hash & 0x3))
225 |                 || (s.hash>>2 == hash>>2 && s.item.isEqual(k)))
226 |                 {
227 |                     if(doInsert)
228 |                     {
229 |                         // mark the slot as in use and set hash
230 |                         s.hash = slotInUse | (hash & ~(uint64_t)0x3);
231 |                     }
232 |                     return s;
233 |                 }
234 | 
235 |                 // if this is a removed slot, then we need to
236 |                 // do a further probe
237 |                 if(slotRemoved == (s.hash & 0x3))
238 |                 {
239 |                     while(++j < slots.size())
240 |                     {
241 |                         i = probe(hash, j);
242 | 
243 |                         Slot & ss = slots[i];
244 | 
245 |                         // if we find free slot, then key not found
246 |                         // and we can reuse the removed slot
247 |                         if(slotFree == (ss.hash & 0x3)) break;
248 | 
249 |                         // if we find legit match, return this slot
250 |                         if(ss.hash>>2 == hash>>2 && ss.item.isEqual(k))
251 |                         {
252 |                             if(doInsert)
253 |                             {
254 |                                 // mark the slot as in use and set hash
255 |                                 ss.hash = slotInUse | (hash & ~(uint64_t)0x3);
256 |                             }
257 |                             return ss;
258 |                         }
259 |                     }
260 | 
261 |                     // didn't find a legit match
262 |                     // so reuse the first candidate
263 |                     if(doInsert)
264 |                     {
265 |                         // mark the slot as in use and set hash
266 |                         s.hash = slotInUse | (hash & ~(uint64_t)0x3);
267 |                     }
268 |                     return s;
269 |                 }
270 |             }
271 | 
272 |             fprintf(stderr, "bjit::HashTable warning: probe failed\n");
273 | 
274 |             // if we are here then something is wrong with
275 |             // our probing function.. but resize to play safe
276 |             rehash(slots.size() << 1);
277 | 
278 |             // recursively try again, should never happen
279 |             return internalFind(k, doInsert);
280 |         }
281 | 
282 |         void resize(unsigned newSize)
283 |         {
284 |             // create a new vector with new size
285 |             std::vector<Slot> tmp(newSize);
286 | 
287 |             // swap with the existing slots
288 |             slots.swap(tmp);
289 | 
290 |             // fast-path resize on empty table
291 |             if(!nUsed) return;
292 | 
293 |             // reset load factor, this gets recalculated
294 |             nUsed = 0;
295 | 
296 |             // loop the old table to rehash
297 |             for(unsigned i = 0; i < tmp.size(); ++i)
298 |             {
299 |                 Slot & s = tmp[i];
300 |                 if(slotInUse == (s.hash&0x3))
301 |                 {
302 |                     insert(s.item);
303 |                 }
304 |             }
305 |         }
306 |     };
307 | };


--------------------------------------------------------------------------------
/src/ir-ops.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | // Opcode data definitions
 3 | 
 4 | #include "bjit.h"
 5 | 
 6 | #define BJIT_DATA(name, out, in) { #name, out, in }
 7 | static struct
 8 | {
 9 |     const char * name;
10 | 
11 |     unsigned    outputs;
12 |     unsigned    inputs;
13 | } opData[] = { BJIT_OPS(BJIT_DATA) };
14 | 
15 | const char * bjit::impl::Op::strOpcode() const
16 | {
17 |     return opData[this->opcode].name;
18 | }
19 | 
20 | bool bjit::impl::Op::hasOutput() const
21 | {
22 |     return 0 != (opData[this->opcode].outputs & 0x3);
23 | }
24 | 
25 | unsigned bjit::impl::Op::nInputs() const
26 | {
27 |     return opData[this->opcode].inputs & 0x3;   // mask the flags
28 | }
29 | 
30 | bool bjit::impl::Op::hasImm32() const
31 | {
32 |     return 0 != (opData[this->opcode].inputs & BJIT_IMM32);
33 | }
34 | 
35 | bool bjit::impl::Op::hasI64() const
36 | {
37 |     return 0 != (opData[this->opcode].inputs & BJIT_I64);
38 | }
39 | 
40 | bool bjit::impl::Op::hasF64() const
41 | {
42 |     return 0 != (opData[this->opcode].inputs & BJIT_F64);
43 | }
44 | 
45 | bool bjit::impl::Op::hasF32() const
46 | {
47 |     return 0 != (opData[this->opcode].inputs & BJIT_F32);
48 | }
49 | 
50 | bool bjit::impl::Op::hasMem() const
51 | {
52 |     return (opData[this->opcode].inputs & BJIT_MEM);
53 | }
54 | 
55 | bool bjit::impl::Op::hasMemTag() const
56 | {
57 |     return hasMem() && canCSE();
58 | }
59 | 
60 | bool bjit::impl::Op::hasSideFX() const
61 | {
62 |     return !opData[this->opcode].outputs
63 |         || (opData[this->opcode].outputs & BJIT_SIDEFX);
64 | }
65 | 
66 | bool bjit::impl::Op::canCSE() const
67 | {
68 |     return (opData[this->opcode].outputs & BJIT_CSE);
69 | }
70 | 
71 | bool bjit::impl::Op::canMove() const
72 | {
73 |     return !(opData[this->opcode].outputs & BJIT_NOMOVE);
74 | }
75 | 
76 | bool bjit::impl::Op::anyOutReg() const
77 | {
78 |     return (opData[this->opcode].outputs & BJIT_ANYREG);
79 | }
80 | 


--------------------------------------------------------------------------------
/src/ir-ops.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | // List of operations, order is significant
  5 | //  _(name, nOutputs, nInputs)
  6 | 
  7 | // output flags:
  8 | //
  9 | // if both SIDEFX and CSE are defined (eg. idiv) then we treat it as
 10 | // having SIDEFX for "safe" optimization, and CSE for "unsafe" optimizations
 11 | //
 12 | 
 13 | #define BJIT_SIDEFX 0x10    // never DCE, don't move loads across
 14 | #define BJIT_CSE    0x20    // can CSE
 15 | #define BJIT_NOMOVE 0x40    // must be in the beginning of a block
 16 | #define BJIT_ANYREG 0x80    // ignore 2-reg ISA (eg. can swap operands)
 17 | 
 18 | // input flags (lowest 2 bits are nInputs)
 19 | #define BJIT_MEM    0x08    // offset16 + memtag
 20 | #define BJIT_IMM32  0x10    // has imm32 operand
 21 | #define BJIT_I64    0x20    // has 64-bit integer constant
 22 | #define BJIT_F64    0x40    // has double constant
 23 | #define BJIT_F32    0x80    // has single constant
 24 | 
 25 | #define BJIT_OPS(_) \
 26 |     /* CAREFUL WITH THE ORDER HERE (see below also) */ \
 27 |     /* (xor 1): branch signed integer comparisons */ \
 28 |     /* (xor 2): operations swapped */ \
 29 |     _(jilt, 0, 2), \
 30 |     _(jige, 0, 2), \
 31 |     _(jigt, 0, 2), \
 32 |     _(jile, 0, 2), \
 33 |     /* (xor 1): branch unsigned integer comparisons */ \
 34 |     /* (xor 2): operations swapped */ \
 35 |     _(jult, 0, 2), \
 36 |     _(juge, 0, 2), \
 37 |     _(jugt, 0, 2), \
 38 |     _(jule, 0, 2), \
 39 |     /* (xor 1): branch integer equality (equal, not equal) */ \
 40 |     _(jieq, 0, 2), \
 41 |     _(jine, 0, 2), \
 42 |     /* (xor 1): branch double equality (equal, not equal) */ \
 43 |     _(jdeq, 0, 2), \
 44 |     _(jdne, 0, 2), \
 45 |     /* (xor 1): branch double comparisons */ \
 46 |     /* (xor 2): operations swapped */ \
 47 |     _(jdlt, 0, 2), \
 48 |     _(jdge, 0, 2), \
 49 |     _(jdgt, 0, 2), \
 50 |     _(jdle, 0, 2), \
 51 |     /* (xor 1): branch float comparisons */ \
 52 |     /* (xor 2): operations swapped */ \
 53 |     _(jflt, 0, 2), \
 54 |     _(jfge, 0, 2), \
 55 |     _(jfgt, 0, 2), \
 56 |     _(jfle, 0, 2), \
 57 |     /* (xor 1): branch float equality (equal, not equal) */ \
 58 |     _(jfeq, 0, 2), \
 59 |     _(jfne, 0, 2), \
 60 |     /* (xor 1): integer zero, not-zero tests */ \
 61 |     _(jz,  0, 1), \
 62 |     _(jnz, 0, 1), \
 63 |     /* */ \
 64 |     /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \
 65 |     /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \
 66 |     /* */ \
 67 |     /* (xor 1): branch signed integer comparisons */ \
 68 |     _(jiltI, 0, 1+BJIT_IMM32), \
 69 |     _(jigeI, 0, 1+BJIT_IMM32), \
 70 |     _(jigtI, 0, 1+BJIT_IMM32), \
 71 |     _(jileI, 0, 1+BJIT_IMM32), \
 72 |     /* (xor 1): branch unsigned integer comparisons */ \
 73 |     _(jultI, 0, 1+BJIT_IMM32), \
 74 |     _(jugeI, 0, 1+BJIT_IMM32), \
 75 |     _(jugtI, 0, 1+BJIT_IMM32), \
 76 |     _(juleI, 0, 1+BJIT_IMM32), \
 77 |     /* (xor 1): branch integer equality comparisons */ \
 78 |     _(jieqI, 0, 1+BJIT_IMM32), \
 79 |     _(jineI, 0, 1+BJIT_IMM32), \
 80 |     /* control flow, jump must come after conditionals!  */ \
 81 |     /* make sure there are even number of these (for xor1 below)  */ \
 82 |     _(jmp, 0, 0), \
 83 |     _(dret, 0, 1), \
 84 |     _(fret, 0, 1), \
 85 |     _(iret, 0, 1), \
 86 |     _(iretI, 0, BJIT_IMM32), /* opt-dce needs to know which one is last */ \
 87 |     _(tcallp, 0, 1), \
 88 |     _(tcalln, 0, BJIT_IMM32), \
 89 |     _(dummy_align, 0, 0), \
 90 |     /* */ \
 91 |     /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \
 92 |     /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \
 93 |     /* */ \
 94 |     /* (xor 1): signed integer comparisons */ \
 95 |     _(ilt, BJIT_ANYREG+BJIT_CSE+1, 2), \
 96 |     _(ige, BJIT_ANYREG+BJIT_CSE+1, 2), \
 97 |     _(igt, BJIT_ANYREG+BJIT_CSE+1, 2), \
 98 |     _(ile, BJIT_ANYREG+BJIT_CSE+1, 2), \
 99 |     /* (xor 1): unsigned integer comparisons */ \
100 |     _(ult, BJIT_ANYREG+BJIT_CSE+1, 2), \
101 |     _(uge, BJIT_ANYREG+BJIT_CSE+1, 2), \
102 |     _(ugt, BJIT_ANYREG+BJIT_CSE+1, 2), \
103 |     _(ule, BJIT_ANYREG+BJIT_CSE+1, 2), \
104 |     /* (xor 1): integer equality (equal, not equal) */ \
105 |     _(ieq, BJIT_ANYREG+BJIT_CSE+1, 2), \
106 |     _(ine, BJIT_ANYREG+BJIT_CSE+1, 2), \
107 |     /* (xor 1): double equality (equal, not equal) */ \
108 |     _(deq, BJIT_ANYREG+BJIT_CSE+1, 2), \
109 |     _(dne, BJIT_ANYREG+BJIT_CSE+1, 2), \
110 |     /* (xor 1): double comparisons */ \
111 |     _(dlt, BJIT_ANYREG+BJIT_CSE+1, 2), \
112 |     _(dge, BJIT_ANYREG+BJIT_CSE+1, 2), \
113 |     _(dgt, BJIT_ANYREG+BJIT_CSE+1, 2), \
114 |     _(dle, BJIT_ANYREG+BJIT_CSE+1, 2), \
115 |     /* (xor 1): floating point comparisons */ \
116 |     _(flt, BJIT_ANYREG+BJIT_CSE+1, 2), \
117 |     _(fge, BJIT_ANYREG+BJIT_CSE+1, 2), \
118 |     _(fgt, BJIT_ANYREG+BJIT_CSE+1, 2), \
119 |     _(fle, BJIT_ANYREG+BJIT_CSE+1, 2), \
120 |     /* (xor 1): float equality (equal, not equal) */ \
121 |     _(feq, BJIT_ANYREG+BJIT_CSE+1, 2), \
122 |     _(fne, BJIT_ANYREG+BJIT_CSE+1, 2), \
123 |     /* */ \
124 |     /* NOTE: THESE SHOULD MATCH THOSE STARTING FROM 'jilt' */ \
125 |     /* SO MAKE SURE THE POSITIONS STAY RELATIVE */ \
126 |     /* */ \
127 |     /* (xor 1): signed integer comparisons */ \
128 |     _(iltI, BJIT_CSE+1, 1+BJIT_IMM32), \
129 |     _(igeI, BJIT_CSE+1, 1+BJIT_IMM32), \
130 |     _(igtI, BJIT_CSE+1, 1+BJIT_IMM32), \
131 |     _(ileI, BJIT_CSE+1, 1+BJIT_IMM32), \
132 |     /* (xor 1): unsigned integer comparisons */ \
133 |     _(ultI, BJIT_CSE+1, 1+BJIT_IMM32), \
134 |     _(ugeI, BJIT_CSE+1, 1+BJIT_IMM32), \
135 |     _(ugtI, BJIT_CSE+1, 1+BJIT_IMM32), \
136 |     _(uleI, BJIT_CSE+1, 1+BJIT_IMM32), \
137 |     /* (xor 1): integer equality (equal, not equal) */ \
138 |     _(ieqI, BJIT_CSE+1, 1+BJIT_IMM32), \
139 |     _(ineI, BJIT_CSE+1, 1+BJIT_IMM32), \
140 |     /* integer arithmetic */ \
141 |     _(iadd, BJIT_ANYREG+BJIT_CSE+1, 2), \
142 |     _(isub, BJIT_CSE+1, 2), \
143 |     _(ineg, BJIT_CSE+1, 1), \
144 |     _(imul, BJIT_ANYREG+BJIT_CSE+1, 2), \
145 |     /* division by zero is a "side-effect" */ \
146 |     _(idiv, BJIT_SIDEFX+BJIT_CSE+1, 2), \
147 |     _(imod, BJIT_SIDEFX+BJIT_CSE+1, 2), \
148 |     /* unsigned integer arithmetic */ \
149 |     _(udiv, BJIT_SIDEFX+BJIT_CSE+1, 2), \
150 |     _(umod, BJIT_SIDEFX+BJIT_CSE+1, 2), \
151 |     /* integer bitwise */ \
152 |     _(inot, BJIT_CSE+1, 1), \
153 |     _(iand, BJIT_ANYREG+BJIT_CSE+1, 2), \
154 |     _(ior,  BJIT_ANYREG+BJIT_CSE+1, 2),  \
155 |     _(ixor, BJIT_ANYREG+BJIT_CSE+1, 2), \
156 |     /* integer shifts */ \
157 |     _(ishl, BJIT_CSE+1, 2), \
158 |     _(ishr, BJIT_CSE+1, 2), \
159 |     _(ushr, BJIT_CSE+1, 2), \
160 |     /* integer arithmetic */ \
161 |     _(iaddI, BJIT_CSE+1, 1+BJIT_IMM32), \
162 |     _(isubI, BJIT_CSE+1, 1+BJIT_IMM32), \
163 |     _(imulI, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_IMM32), \
164 |     /* integer bitwise */ \
165 |     _(iandI, BJIT_CSE+1, 1+BJIT_IMM32), \
166 |     _(iorI,  BJIT_CSE+1, 1+BJIT_IMM32), \
167 |     _(ixorI, BJIT_CSE+1, 1+BJIT_IMM32), \
168 |     /* integer shifts */ \
169 |     _(ishlI, BJIT_CSE+1, 1+BJIT_IMM32), \
170 |     _(ishrI, BJIT_CSE+1, 1+BJIT_IMM32), \
171 |     _(ushrI, BJIT_CSE+1, 1+BJIT_IMM32), \
172 |     /* double arithmetic */ \
173 |     _(dadd, BJIT_ANYREG+BJIT_CSE+1, 2), \
174 |     _(dsub, BJIT_CSE+1, 2), \
175 |     _(dneg, BJIT_CSE+1, 1), \
176 |     _(dabs, BJIT_CSE+1, 1), \
177 |     _(dmul, BJIT_ANYREG+BJIT_CSE+1, 2), \
178 |     _(ddiv, BJIT_CSE+1, 2), \
179 |     /* float arithmetic */ \
180 |     _(fadd, BJIT_ANYREG+BJIT_CSE+1, 2), \
181 |     _(fsub, BJIT_CSE+1, 2), \
182 |     _(fneg, BJIT_CSE+1, 1), \
183 |     _(fabs, BJIT_CSE+1, 1), \
184 |     _(fmul, BJIT_ANYREG+BJIT_CSE+1, 2), \
185 |     _(fdiv, BJIT_CSE+1, 2), \
186 |     /* type conversions */ \
187 |     _(ci2d, BJIT_CSE+1, 1), \
188 |     _(cd2i, BJIT_CSE+1, 1), \
189 |     _(ci2f, BJIT_CSE+1, 1), \
190 |     _(cf2i, BJIT_CSE+1, 1), \
191 |     _(cf2d, BJIT_CSE+1, 1), \
192 |     _(cd2f, BJIT_CSE+1, 1), \
193 |     /* reinterpret bitcasts */ \
194 |     _(bci2d, BJIT_CSE+1, 1), \
195 |     _(bcd2i, BJIT_CSE+1, 1), \
196 |     _(bci2f, BJIT_CSE+1, 1), \
197 |     _(bcf2i, BJIT_CSE+1, 1), \
198 |     /* load constants */ \
199 |     _(lci, BJIT_CSE+1, BJIT_I64), \
200 |     _(lcf, BJIT_CSE+1, BJIT_F32), \
201 |     _(lcd, BJIT_CSE+1, BJIT_F64), \
202 |     /* load near proc address */ \
203 |     _(lnp, BJIT_CSE+1, BJIT_IMM32), \
204 |     /* sign-extend values (cast to smaller type) */ \
205 |     _(i8,  BJIT_CSE+1, 1), \
206 |     _(i16, BJIT_CSE+1, 1), \
207 |     _(i32, BJIT_CSE+1, 1), \
208 |     /* unsigned variants (zero-extend) */ \
209 |     _(u8,  BJIT_CSE+1, 1), \
210 |     _(u16, BJIT_CSE+1, 1), \
211 |     _(u32, BJIT_CSE+1, 1), \
212 |     /* memory loads: load out <- [in0+offset] */ \
213 |     /* ANYREG 'cos typically explicit output reg */ \
214 |     /* integer variants: sign-extended */ \
215 |     /* treat as potentially causing side-effects */ \
216 |     _(li8,  BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
217 |     _(li16, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
218 |     _(li32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
219 |     _(li64, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
220 |     /* unsigned variants (zero-extend) */ \
221 |     _(lu8,  BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
222 |     _(lu16, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
223 |     _(lu32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
224 |     /* float */ \
225 |     _(lf32, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
226 |     _(lf64, BJIT_ANYREG+BJIT_CSE+1, 1+BJIT_MEM), \
227 |     /* two reg versions - NOTE: must be in same order! */ \
228 |     _(l2i8,  BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
229 |     _(l2i16, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
230 |     _(l2i32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
231 |     _(l2i64, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
232 |     /* unsigned variants (zero-extend) */ \
233 |     _(l2u8,  BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
234 |     _(l2u16, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
235 |     _(l2u32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
236 |     /* float */ \
237 |     _(l2f32, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
238 |     _(l2f64, BJIT_ANYREG+BJIT_CSE+1, 2+BJIT_MEM), \
239 |     /* memory stores: store [in0+offset] <- in1 */ \
240 |     _(si8,  0, 2+BJIT_MEM), \
241 |     _(si16, 0, 2+BJIT_MEM), \
242 |     _(si32, 0, 2+BJIT_MEM), \
243 |     _(si64, 0, 2+BJIT_MEM), \
244 |     /* floating point */ \
245 |     _(sf32, 0, 2+BJIT_MEM), \
246 |     _(sf64, 0, 2+BJIT_MEM), \
247 |     /* two reg versions - NOTE: must be in same order!  */ \
248 |     _(s2i8,  0, 3+BJIT_MEM), \
249 |     _(s2i16, 0, 3+BJIT_MEM), \
250 |     _(s2i32, 0, 3+BJIT_MEM), \
251 |     _(s2i64, 0, 3+BJIT_MEM), \
252 |     /* floating point */ \
253 |     _(s2f32, 0, 3+BJIT_MEM), \
254 |     _(s2f64, 0, 3+BJIT_MEM), \
255 |     /* procedure arguments */ \
256 |     _(iarg, 1+BJIT_NOMOVE, 0), \
257 |     _(farg, 1+BJIT_NOMOVE, 0), \
258 |     _(darg, 1+BJIT_NOMOVE, 0), \
259 |     /* Call arguments - right to left before call */ \
260 |     _(ipass, 0, 1), \
261 |     _(fpass, 0, 1), \
262 |     _(dpass, 0, 1), \
263 |     /* Indirect calls: typed for return value */ \
264 |     _(icallp, 1+BJIT_SIDEFX, 1), \
265 |     _(fcallp, 1+BJIT_SIDEFX, 1), \
266 |     _(dcallp, 1+BJIT_SIDEFX, 1), \
267 |     /* Module local "near" calls, relocated */ \
268 |     _(icalln, 1+BJIT_SIDEFX, BJIT_IMM32), \
269 |     _(fcalln, 1+BJIT_SIDEFX, BJIT_IMM32), \
270 |     _(dcalln, 1+BJIT_SIDEFX, BJIT_IMM32), \
271 |     /* this is user-requested allocation with reg = stack pointer */ \
272 |     _(alloc,  1+BJIT_SIDEFX+BJIT_NOMOVE, BJIT_IMM32), \
273 |     /* this keeps the compiler from moving loads across */ \
274 |     _(fence, BJIT_SIDEFX, 0), \
275 |     /* pseudo-ops: polymorphic */ \
276 |     _(phi,    1+BJIT_NOMOVE, 0), \
277 |     _(rename, 1, 1), \
278 |     _(reload, 1, 1), \
279 |     _(nop,    0, 0) /* removed by DCE */
280 | 


--------------------------------------------------------------------------------
/src/module.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | // We need separate logic for Unix vs. Windows for loading code.
  3 | // Define BJIT_USE_MMAP on platforms where we can use the Unix version.
  4 | #if defined(__unix__) || defined(__LINUX__) || defined(__APPLE__)
  5 | #  define BJIT_USE_MMAP
  6 | #  define BJIT_CAN_LOAD
  7 | #  include <sys/mman.h>
  8 | #endif
  9 | 
 10 | #if defined(_WIN32)
 11 | #  define BJIT_CAN_LOAD
 12 | #  include <windows.h>
 13 | #endif
 14 | 
 15 | #ifdef __APPLE__
 16 | # define MAP_ANONYMOUS MAP_ANON  // the joy of being different
 17 | #endif
 18 | 
 19 | #include <cstring>
 20 | 
 21 | #include "bjit.h"
 22 | 
 23 | using namespace bjit;
 24 | 
 25 | static void flush_cache(char * exec_mem, uint32_t mmapSize)
 26 | {
 27 | #if defined(__GNUC__) || defined(__clang__)
 28 |     // flush icache with a "portable" builtin
 29 |     __builtin___clear_cache(exec_mem, exec_mem + mmapSize);
 30 | #elif defined(__aarch64__)
 31 | #   warning arm64 i-cache might be left stale
 32 | #endif
 33 | }
 34 | 
 35 | uintptr_t Module::load(unsigned mmapSizeMin)
 36 | {
 37 |     BJIT_ASSERT(!exec_mem);
 38 | 
 39 | #ifndef BJIT_CAN_LOAD
 40 |     return 0;
 41 | #endif
 42 | 
 43 |     // compute sizes
 44 |     mmapSize = mmapSizeMin;
 45 |     loadSize = bytes.size();
 46 |     
 47 |     if(mmapSize < loadSize) mmapSize = loadSize;
 48 | 
 49 | #ifdef BJIT_USE_MMAP
 50 |     // get a block of memory we can mess with, read+write
 51 |     exec_mem = mmap(NULL, mmapSize, PROT_READ | PROT_WRITE,
 52 |         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 53 |     if(!exec_mem)
 54 |     {
 55 |         BJIT_LOG("error: mmap failed in bjit::Module::load()\n");
 56 |         return 0;
 57 |     }
 58 | #endif
 59 | #ifdef _WIN32
 60 |     exec_mem = VirtualAlloc(0, mmapSize, MEM_COMMIT, PAGE_READWRITE);
 61 |     if(!exec_mem)
 62 |     {
 63 |         BJIT_LOG("error: VirtualAlloc failed in bjit::Module::load()\n");
 64 |         return 0;
 65 |     }
 66 | #endif
 67 | 
 68 |     // copy & relocate
 69 |     memcpy(exec_mem, bytes.data(), bytes.size());
 70 |     for(auto & r : relocs)
 71 |     {
 72 |         BJIT_ASSERT(r.procIndex < offsets.size());
 73 |         arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, offsets[r.procIndex]);
 74 |     }
 75 | 
 76 | #ifdef BJIT_USE_MMAP
 77 |     // return zero on success
 78 |     if(mprotect(exec_mem, mmapSize, PROT_READ | PROT_EXEC))
 79 |     {
 80 |         BJIT_LOG("error: mprotect failed in bjit::Module::load()\n");
 81 |         // if we can't set executable, then try to unload
 82 |         unload();
 83 |         return 0;
 84 |     }
 85 | 
 86 |     flush_cache((char*)exec_mem, mmapSize);
 87 | 
 88 | #endif
 89 | #ifdef _WIN32
 90 |     // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer!
 91 |     // returns non-zero on success
 92 |     DWORD   oldFlags = 0;
 93 |     if(!VirtualProtect(exec_mem, mmapSize, PAGE_EXECUTE_READ, &oldFlags))
 94 |     {
 95 |         BJIT_LOG("error: mprotect failed in bjit::Module::load()\n");
 96 |         // if we can't set executable, then try to unload
 97 |         unload();
 98 |         return 0;
 99 |     }
100 | #endif
101 | 
102 |     return (uintptr_t) exec_mem;
103 | }
104 | 
105 | bool Module::patch()
106 | {
107 |     BJIT_ASSERT(exec_mem);
108 |     
109 |     // check if patching is going to work?
110 |     if(mmapSize < bytes.size()) return false;
111 | 
112 | #ifdef BJIT_USE_MMAP
113 |     // return zero on success
114 |     BJIT_ASSERT(!mprotect(exec_mem, mmapSize, PROT_READ | PROT_WRITE));
115 | #endif
116 | #ifdef _WIN32
117 |     // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer!
118 |     // returns non-zero on success
119 |     DWORD   oldFlags = 0;
120 |     BJIT_ASSERT(VirtualProtect(exec_mem, mmapSize, PAGE_READWRITE, &oldFlags));
121 | #endif
122 | 
123 |     // copy and relocate, only new ones
124 |     memcpy(loadSize+(uint8_t*)exec_mem, loadSize+bytes.data(),
125 |         bytes.size()-loadSize);
126 |     for(auto & r : relocs)
127 |     {
128 |         if(r.codeOffset < loadSize) continue;
129 |         
130 |         BJIT_ASSERT(r.procIndex < offsets.size());
131 |         arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, offsets[r.procIndex]);
132 |     }
133 |     loadSize = bytes.size();
134 | 
135 |     // do all pending stub-patches
136 |     for(auto & p : stubPatches)
137 |     {
138 |         arch_patchStub(offsets[p.procIndex] + (uint8_t*)exec_mem, p.newAddress);
139 |     }
140 |     stubPatches.clear();
141 | 
142 |     // near patches
143 |     for(auto & p : nearPatches)
144 |     {
145 |         uint32_t delta = offsets[p.newTarget] - offsets[p.oldTarget];
146 |         for(auto & r : relocs)
147 |         {
148 |             if(r.codeOffset < p.offsetStart
149 |             || r.codeOffset >= p.offsetEnd) continue;
150 |             
151 |             if(r.procIndex == p.oldTarget)
152 |             {
153 |                 r.procIndex = p.newTarget;
154 |                 // relocate
155 |                 arch_patchNear(r.codeOffset+(uint8_t*)exec_mem, delta);
156 |             }
157 |         }
158 |     }
159 |     nearPatches.clear();
160 | 
161 | #ifdef BJIT_USE_MMAP
162 |     // return zero on success
163 |     BJIT_ASSERT(!mprotect(exec_mem, mmapSize, PROT_READ | PROT_EXEC));
164 | 
165 |     flush_cache((char*)exec_mem, mmapSize);
166 |     
167 | #endif
168 | #ifdef _WIN32
169 |     // Note that VirtualProtect REQUIRES oldFlags to be a valid pointer!
170 |     // returns non-zero on success
171 |     BJIT_ASSERT(VirtualProtect(exec_mem, mmapSize, PAGE_EXECUTE_READ, &oldFlags));
172 | #endif    
173 | 
174 |     return true;
175 | }
176 | 
177 | uintptr_t Module::unload()
178 | {
179 |     BJIT_ASSERT(exec_mem);
180 | 
181 | #ifdef BJIT_USE_MMAP
182 |     munmap(exec_mem, mmapSize);
183 | #endif
184 | #ifdef _WIN32
185 |     VirtualFree(exec_mem, 0, MEM_RELEASE);
186 | #endif
187 | 
188 |     uintptr_t ret = (uintptr_t) exec_mem;
189 | 
190 |     // do near-patches on unload if any
191 |     for(auto & p : nearPatches)
192 |     {
193 |         for(auto & r : relocs)
194 |         {
195 |             if(r.codeOffset < p.offsetStart
196 |             || r.codeOffset >= p.offsetEnd) continue;
197 |             
198 |             if(r.procIndex == p.oldTarget)
199 |             {
200 |                 r.procIndex = p.newTarget;
201 |             }
202 |         }
203 |     }
204 | 
205 |     // patches are not useful after unload
206 |     stubPatches.clear();
207 |     nearPatches.clear();
208 |     
209 |     exec_mem = 0;
210 |     mmapSize = 0;
211 |     loadSize = 0;
212 | 
213 |     return ret;
214 | }


--------------------------------------------------------------------------------
/src/opt-dce.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | using namespace bjit;
  5 | 
  6 | void Proc::opt_dce(bool unsafeOpt)
  7 | {
  8 |     bool progress = true;
  9 | 
 10 |     int iters = 0;
 11 |     while(progress)
 12 |     {
 13 |         ++iters;
 14 |         progress = false;
 15 |         for(auto & b : blocks)
 16 |         {
 17 |             if(!b.flags.live) continue;
 18 |             b.flags.live = false;
 19 |     
 20 |             for(auto & i : b.code)
 21 |             {
 22 |                 if(i == noVal) continue;
 23 |                 // NOTE: nUse aliases on labels
 24 |                 if(ops[i].hasOutput()) ops[i].nUse = 0;
 25 |             }
 26 |         }
 27 |     
 28 |         todo.clear();
 29 |         live.clear();
 30 |         
 31 |         todo.push_back(0);
 32 |         live.push_back(todo.back());
 33 |         blocks[0].flags.live = true;
 34 |     
 35 |         while(todo.size())
 36 |         {
 37 |             auto b = todo.back(); todo.pop_back();
 38 |             bool deadTail = false;
 39 |             
 40 |             for(auto i : blocks[b].code)
 41 |             {
 42 |                 if(i == noVal) continue;
 43 |             
 44 |                 if(deadTail)
 45 |                 {
 46 |                     ops[i].makeNOP();
 47 |                     continue;
 48 |                 }
 49 |             
 50 |                 switch(ops[i].nInputs())
 51 |                 {
 52 |                     case 3: ++ops[ops[i].in[2]].nUse;
 53 |                     case 2: ++ops[ops[i].in[1]].nUse;
 54 |                     case 1: ++ops[ops[i].in[0]].nUse;
 55 |                     case 0: break;
 56 |                     default: BJIT_ASSERT(false);
 57 |                 }
 58 |                 
 59 |                 // only need to look at last op
 60 |                 if(ops[i].opcode <= ops::jmp)
 61 |                 for(int k = 0; k < 2; ++k)
 62 |                 {
 63 |                     if(k && ops[i].opcode == ops::jmp) break;
 64 | 
 65 |                     while(true)
 66 |                     {
 67 |                         auto bsrc = ops[i].label[k];
 68 |                         auto & kc = blocks[bsrc].code;
 69 |                         auto tjmp = noVal;
 70 | 
 71 |                         // threading conditional jumps through empty loop preheaders
 72 |                         // with multiple entry-blocks causes havoc with IV logic
 73 |                         // so avoid conditional jumps into targets with phis for now
 74 |                         if(ops[i].opcode < ops::jmp
 75 |                         && (kc[0] == noVal || ops[kc[0]].opcode == ops::phi)) break;
 76 | 
 77 |                         // skip over phis
 78 |                         for(int i = 0; i < kc.size(); ++i)
 79 |                         {
 80 |                             tjmp = kc[i];
 81 |                             if(tjmp == noVal || ops[tjmp].opcode != ops::phi)
 82 |                                 break;
 83 |                         }
 84 | 
 85 |                         // can we thread this?
 86 |                         if(tjmp == noVal || i == tjmp
 87 |                         || ops[tjmp].opcode != ops::jmp) break;
 88 | 
 89 |                         auto target = ops[tjmp].label[0];
 90 | 
 91 |                         // do another pass
 92 |                         if(false && blocks[target].code[0] == noVal)
 93 |                         {
 94 |                             progress = true;
 95 |                             break;
 96 |                         }
 97 | 
 98 |                         // if the block we're jumping from has phis
 99 |                         // then validate that target block also has them
100 |                         bool noPhi = false;
101 |                         for(auto & p : blocks[bsrc].args)
102 |                         {
103 |                             // this can happen
104 |                             if(p.phiop == noVal
105 |                             || ops[p.phiop].opcode == ops::nop) continue;
106 | 
107 |                             bool good = false;
108 |                             for(auto & a : blocks[target].alts)
109 |                             {
110 |                                 if(a.src == bsrc && a.val == p.phiop)
111 |                                 {
112 |                                     good = true;
113 |                                     break;
114 |                                 }
115 |                             }
116 |                             if(!good) { noPhi = true; break; }
117 |                         }
118 | 
119 |                         if(noPhi) break;
120 |                         
121 |                         // if we are jumping into a block with phis then
122 |                         // validate that a blocks isn't there for shuffle
123 |                         //if(ops[blocks[target].code[0]].opcode == ops::phi)
124 |                         if(blocks[target].alts.size())
125 |                         {
126 |                             bool bad = false;
127 | 
128 |                             // clear temps
129 |                             for(auto & a : blocks[target].args) a.tmp = noVal;
130 |                             
131 |                             // find relevant alternatives
132 |                             auto & args = blocks[target].args;
133 |                             for(auto & a : blocks[target].alts)
134 |                             {
135 |                                 // is this alternative relevant?
136 |                                 if(a.src != ops[i].block
137 |                                 && a.src != ops[i].label[k]) continue;
138 | 
139 |                                 auto val = a.val;
140 | 
141 |                                 // resolve local phis
142 |                                 if(ops[val].opcode == ops::phi
143 |                                 && ops[val].block == a.src)
144 |                                 {
145 |                                     bool good = false;
146 |                                     for(auto & s : blocks[a.src].alts)
147 |                                     {
148 |                                         if(s.phi != val) continue;
149 |                                         if(s.src != b) continue;
150 |                                         val = s.val;
151 |                                         
152 |                                         good = true;
153 |                                         break;
154 |                                     }
155 |                                     // FIXME: figure out why we might not
156 |                                     // sometimes find a suitable alt?
157 |                                     if(!good) { bad = true; break; }
158 |                                 }
159 |                                 
160 |                                 // check for duplicate
161 |                                 for(auto & s : blocks[target].alts)
162 |                                 {
163 |                                     if(a.phi == s.phi && b == s.src)
164 |                                     {
165 |                                         if(s.val == val) continue;
166 | 
167 |                                         // seems this block exists for shuffle
168 |                                         bad = true;
169 |                                         break;
170 |                                     }
171 |                                 }
172 | 
173 |                                 // phi got removed this pass?
174 |                                 if(ops[a.phi].opcode == ops::nop) continue;
175 |                                 
176 |                                 // if we've not seen it, store tmp
177 |                                 if(args[ops[a.phi].phiIndex].tmp == noVal)
178 |                                 {
179 |                                     args[ops[a.phi].phiIndex].tmp = val;
180 |                                 }
181 |                                 else if(args[ops[a.phi].phiIndex].tmp != val)
182 |                                 {
183 |                                     bad = true;
184 |                                     break;
185 |                                 }
186 |                             }
187 |                             
188 |                             if(bad) break;
189 |                         }
190 | 
191 |                         // patch target phis
192 |                         for(int ai = 0, sz = blocks[target].alts.size();
193 |                             ai < sz; ++ai)
194 |                         {
195 |                             auto & a = blocks[target].alts[ai];
196 |                             if(a.src == ops[i].label[k])
197 |                             {
198 |                                 auto val = a.val;
199 | 
200 |                                 // resolve local phis
201 |                                 if(ops[val].opcode == ops::phi
202 |                                 && ops[val].block == a.src)
203 |                                 {
204 |                                     bool good = false;
205 |                                     for(auto & s : blocks[a.src].alts)
206 |                                     {
207 |                                         if(s.phi != val) continue;
208 |                                         if(s.src != b) continue;
209 |                                         val = s.val;
210 |                                         good = true;
211 |                                         break;
212 |                                     }
213 |                                     BJIT_ASSERT_MORE(good);
214 |                                 }
215 | 
216 |                                 // check for duplicate
217 |                                 bool dedup = false;
218 |                                 for(auto & s : blocks[target].alts)
219 |                                 {
220 |                                     if(a.phi == s.phi && b == s.src)
221 |                                     {
222 |                                         BJIT_ASSERT_MORE(s.val == val);
223 |                                         dedup = true;
224 |                                     }
225 |                                 }
226 | 
227 |                                 if(!dedup) blocks[target].newAlt(a.phi, b, val);
228 |                             }
229 |                         }
230 | 
231 |                         ops[i].label[k] = target;
232 |                         progress = true;    // need at least new DOMs
233 |                     }
234 |                     
235 |                     if(!blocks[ops[i].label[k]].flags.live)
236 |                     {
237 |                         todo.push_back(ops[i].label[k]);
238 |                         live.push_back(todo.back());
239 |                         blocks[ops[i].label[k]].flags.live = true;
240 |                     }
241 |                 }
242 |                 
243 |                 if(ops[i].opcode < ops::jmp)
244 |                 {
245 |                     if(ops[i].label[0] == ops[i].label[1])
246 |                     {
247 |                         ops[i].opcode = ops::jmp;
248 |                         ops[i].in[0] = noVal;
249 |                         ops[i].in[1] = noVal;
250 |                         progress = true;
251 |                     }
252 |                 }
253 |                 
254 |                 if(ops[i].opcode <= ops::tcallp) deadTail = true;
255 |             }
256 |         }
257 |     
258 |         // phi-uses
259 |         for(auto & bi : live)
260 |         {
261 |             auto & b = blocks[bi];
262 |             // cleanup dead sources
263 |             {
264 |                 int j = 0;
265 |                 for(int i = 0; i < b.alts.size(); ++i)
266 |                 {
267 |                     if(ops[b.alts[i].phi].opcode == ops::nop) continue;
268 |                     if(!blocks[b.alts[i].src].flags.live) continue;
269 |                     if(j != i) b.alts[j] = b.alts[i];
270 |                     ++j;
271 |                 }
272 |                 b.alts.resize(j);
273 |             }
274 |             // set tmp sources to noVal
275 |             for(auto & a : b.args) a.tmp = noVal;
276 | 
277 |             // find which phis have actual uses
278 |             for(auto & s : b.alts)
279 |             {
280 |                 // ignore simple loopback
281 |                 if(s.phi == s.val) continue;
282 | 
283 |                 // if we don't have a value yet, set this as value
284 |                 if(b.args[ops[s.phi].phiIndex].tmp == noVal)
285 |                 {
286 |                     b.args[ops[s.phi].phiIndex].tmp = s.val;
287 |                 }
288 |                 else if(b.args[ops[s.phi].phiIndex].tmp != s.val)
289 |                 {
290 |                     // had more than one value, need to keep this
291 |                     b.args[ops[s.phi].phiIndex].tmp = s.phi;
292 |                 }
293 |             }
294 | 
295 |             // set use-counts for phis we're going to keep
296 |             for(auto & s : b.alts)
297 |             {
298 |                 if(b.args[ops[s.phi].phiIndex].tmp == s.phi)
299 |                 {
300 |                     ++ops[s.val].nUse;
301 |                 }
302 |             }
303 |         }
304 |         
305 |         for(auto & b : live)
306 |         {
307 |             // rename
308 |             for(auto i : blocks[b].code)
309 |             {
310 |                 if(i == noVal) continue;
311 | 
312 |                 // rename phis we can eliminate
313 |                 for(int k = 0; k < ops[i].nInputs(); ++k)
314 |                 {
315 |                     auto phiIndex = ops[i].in[k];
316 |                     auto & phi = ops[phiIndex];
317 | 
318 |                     if(phi.opcode != ops::phi) continue;
319 | 
320 |                     auto src = blocks[phi.block].args[phi.phiIndex].tmp;
321 |                     if(src != phiIndex)
322 |                     {
323 |                         ops[i].in[k] = src;
324 |                         ++ops[src].nUse;
325 |                         progress = true;
326 |                     }
327 |                 }
328 |             }
329 | 
330 |             for(auto & a : blocks[b].alts)
331 |             {
332 |                 auto & phi = ops[a.val];
333 |                 if(phi.opcode != ops::phi) continue;
334 | 
335 |                 auto src = blocks[phi.block].args[phi.phiIndex].tmp;
336 |                 if(src != a.val)
337 |                 {
338 |                     a.val = src;
339 |                     ++ops[src].nUse;
340 |                     progress = true;
341 |                 }
342 |             }
343 |         }
344 | 
345 |         // count how many ops we have alive
346 |         // this is used by CSE for intelligent hash sizing
347 |         liveOps = 0;
348 |         
349 |         for(auto bi : live)
350 |         {
351 |             auto & b = blocks[bi];
352 |             
353 |             // loop backwards to figure out what's dead
354 |             for(int i = b.code.size(); i--;)
355 |             {
356 |                 if(b.code[i] == noVal) continue;
357 |                 
358 |                 auto & op = ops[b.code[i]];
359 |                 if(op.opcode == ops::nop) continue;
360 |                 
361 |                 // NOTE: nUse aliases on labels, check other stuff first
362 |                 if((op.hasSideFX() && (!unsafeOpt || !op.canCSE()))
363 |                 || op.nUse) continue;
364 | 
365 |                 switch(op.nInputs())
366 |                 {
367 |                 case 3: --ops[op.in[2]].nUse;
368 |                 case 2: --ops[op.in[1]].nUse;
369 |                 case 1: --ops[op.in[0]].nUse;
370 |                 case 0:
371 |                     op.makeNOP();
372 |                     progress = true;
373 |                     break;
374 |                 default: BJIT_ASSERT(false);
375 |                 }
376 |             }
377 | 
378 |             // loop forward to cleanup
379 |             int j = 0;
380 |             for(int i = 0; i < b.code.size(); ++i)
381 |             {
382 |                 if(b.code[i] == noVal) continue;
383 |                 if(ops[b.code[i]].opcode == ops::nop) continue;
384 |                 if(!ops[b.code[i]].hasSideFX() && !ops[b.code[i]].nUse) continue;
385 |                 
386 |                 if(j != i)
387 |                 {
388 |                     b.code[j] = b.code[i];
389 |                 }
390 |                 ops[b.code[j]].pos = j;
391 |                 ++j;
392 |             }
393 | 
394 |             if(b.code.size() != j) { b.code.resize(j); progress = true; }
395 |             liveOps += j;
396 |         }
397 |     }
398 |     
399 |     BJIT_LOG("\n DCE:%d", iters);
400 | }
401 | 
402 | void Proc::findUsesBlock(int b, bool inOnly, bool localOnly)
403 | {
404 |     // compute which ops are used by this block
405 |     // this must be done in reverse
406 |     for(int c = blocks[b].code.size(); c--;)
407 |     {
408 |         if(blocks[b].code[c] == noVal) continue;
409 |         auto & op = ops[blocks[b].code[c]];
410 | 
411 |         if(!localOnly && op.opcode <= ops::jmp)
412 |         for(int k = 0; k < 2; ++k)
413 |         {
414 |             if(k && op.opcode == ops::jmp) break;
415 |             
416 |             for(auto & s : blocks[op.label[k]].alts)
417 |             {
418 |                 if(s.src != b) continue;
419 |                 
420 |                 if(0) BJIT_LOG("live out %d->%d : v%04x\n",
421 |                     b, op.label[k], s.val);
422 | 
423 |                 ++ops[s.val].nUse;
424 |             }
425 |             for(auto & a : blocks[op.label[k]].livein)
426 |             {
427 |                 ++ops[a].nUse;
428 |             }
429 |         }
430 | 
431 |         switch(op.nInputs())
432 |         {
433 |         case 3: ++ops[op.in[2]].nUse;
434 |         case 2: ++ops[op.in[1]].nUse;
435 |         case 1: ++ops[op.in[0]].nUse;
436 |         case 0: break;
437 |         default: BJIT_ASSERT(false);
438 |         }
439 | 
440 |         // for ops that define values, set nUse to zero
441 |         if(inOnly && op.hasOutput()) op.nUse = 0;
442 |     }
443 | }
444 | 
445 | void Proc::rebuild_livein()
446 | {
447 |     // cleanup stale phis
448 |     rebuild_cfg();
449 |     
450 |     BJIT_ASSERT(live.size());
451 |     
452 |     for(auto & op : ops)
453 |     {
454 |         // NOTE: nUse aliases on labels
455 |         if(op.hasOutput()) op.nUse = 0;
456 |     }
457 | 
458 |     for(auto & b : live) blocks[b].livein.clear();
459 | 
460 |     int iter = 0;
461 |     bool progress = true;
462 |     while(progress)
463 |     {
464 |         ++iter;
465 |         progress = false;
466 | 
467 |         // reverse live almost always requires less iteration
468 |         for(int b = live.size();b--;)
469 |         {
470 |             auto sz = blocks[live[b]].livein.size();
471 | 
472 |             findUsesBlock(live[b], true, false);
473 |             blocks[live[b]].livein.clear();
474 |     
475 |             for(int i = 0; i < ops.size(); ++i)
476 |             {
477 |                 // is this a variable that we need?
478 |                 if(!ops[i].hasOutput() || !ops[i].nUse) continue;
479 | 
480 |                 //BJIT_LOG(" v%04x live in %d\n", i, live[b]);
481 |                 blocks[live[b]].livein.push_back(i);
482 |                 ops[i].nUse = 0;
483 |             }
484 |     
485 |             if(blocks[live[b]].livein.size() != sz) progress = true;
486 |         }
487 |     }
488 | 
489 |     if(blocks[0].livein.size()) debug();
490 |     BJIT_ASSERT(!blocks[0].livein.size());
491 | 
492 |     BJIT_LOG(" Live:%d", iter);
493 | }
494 | 


--------------------------------------------------------------------------------
/src/opt-dom.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | using namespace bjit;
  5 | 
  6 | void Proc::rebuild_cfg()
  7 | {
  8 |     // Redo liveblocks .. sometimes this is called before DCE
  9 |     for(auto & b : live) blocks[b].flags.live = false;
 10 |     todo.clear();
 11 |     live.clear();
 12 |     
 13 |     todo.push_back(0);
 14 |     live.push_back(0);
 15 |     blocks[0].flags.live = true;
 16 |     while(todo.size())
 17 |     {
 18 |         auto b = todo.back();
 19 |         todo.pop_back();
 20 | 
 21 |         auto & jmp = ops[blocks[b].code.back()];
 22 | 
 23 |         if(jmp.opcode <= ops::jmp)
 24 |         for(int k = 0; k < 2; ++k)
 25 |         {
 26 |             if(k && jmp.opcode == ops::jmp) break;
 27 | 
 28 |             if(!blocks[jmp.label[k]].flags.live)
 29 |             {
 30 |                 todo.push_back(jmp.label[k]);
 31 |                 live.push_back(jmp.label[k]);
 32 |                 blocks[jmp.label[k]].flags.live = true;
 33 |             }
 34 |         }            
 35 |     }
 36 | 
 37 |     // rebuild comeFrom, should delay this until iteration done
 38 |     for(int b = live.size();b--;) blocks[live[b]].comeFrom.clear();
 39 |     for(int b = live.size();b--;)
 40 |     {
 41 |         // if this fails, we're probably missing return
 42 |         BJIT_ASSERT(blocks[live[b]].code.size());
 43 | 
 44 |         BJIT_ASSERT(blocks[live[b]].code.back() != noVal);
 45 |         
 46 |         BJIT_ASSERT(ops[blocks[live[b]].code.back()].opcode <= ops::tcalln);
 47 |         
 48 |         auto & op = ops[blocks[live[b]].code.back()];
 49 |         if(op.opcode < ops::jmp)
 50 |         {
 51 |             blocks[op.label[1]].comeFrom.push_back(live[b]);
 52 |         }
 53 |         if(op.opcode <= ops::jmp)
 54 |         {
 55 |             blocks[op.label[0]].comeFrom.push_back(live[b]);
 56 |         }
 57 |     }
 58 | 
 59 |     // cleanup dead phi alternatives
 60 |     for(auto & b : live)
 61 |     {
 62 |         int j = 0;
 63 |         auto & alts = blocks[b].alts;
 64 |         for(int i = 0; i < alts.size(); ++i)
 65 |         {
 66 |             if(ops[alts[i].val].opcode == ops::nop) continue;
 67 |             
 68 |             bool keep = false;
 69 |             for(auto s : blocks[b].comeFrom)
 70 |             {
 71 |                 if(alts[i].src != s) continue;
 72 |                 keep = true;
 73 |                 break;
 74 |             }
 75 |             if(!keep) continue;
 76 |             if(i != j) alts[j] = alts[i];
 77 |             ++j;
 78 |         }
 79 |         if(j != alts.size()) alts.resize(j);
 80 |     }
 81 | }
 82 | 
 83 | void Proc::rebuild_dom()
 84 | {
 85 |     rebuild_cfg();
 86 |     
 87 |     // find dominator algorithm
 88 |     //
 89 |     // start with every node dominating itself
 90 |     // iterate blocks n until no change:
 91 |     //   tdom(n) = blocks
 92 |     //   for p in comeFrom(n):
 93 |     //     tdom(n) = sdom(n) intersect dom(p)
 94 |     //   dom(n) = { n } union sdom(n)
 95 | 
 96 | 
 97 |     // We run the same algorithm twice, first for post-dominators
 98 |     // then for dominators. We only keep immediate post-dominators
 99 |     // but we rebuild (in order) the full-list for dominators.
100 |     //
101 |     // We do post-dominators first so we can use .dom as temp for both
102 |     // which saves some useless per-block allocation.
103 |     
104 |     int domIters = 0;
105 |     bool iterate = true;
106 |     std::vector<uint16_t>   tdom;
107 |     
108 |     // post dominators first, so we can reuse .dom
109 |     for(auto & b : live)
110 |     {
111 |         // reset postdominators
112 |         if(ops[blocks[b].code.back()].opcode > ops::jmp)
113 |         {
114 |             blocks[b].dom.clear();
115 |             blocks[b].dom.push_back(b);
116 |         }
117 |         else blocks[b].dom = live;
118 |     }    
119 |     
120 |     while(iterate)
121 |     {
122 |         iterate = false;
123 |         ++domIters;
124 | 
125 |         // backwards
126 |         for(int bi = live.size(); bi--;)
127 |         {
128 |             auto & b = blocks[live[bi]];
129 |             auto & jmp = ops[b.code.back()];
130 |             // this is an exit block
131 |             if(jmp.opcode > ops::jmp) continue;
132 | 
133 |             int nLabel = (jmp.opcode == ops::jmp) ? 1 : 2;
134 | 
135 |             tdom = live;
136 |             for(int k = 0; k < nLabel; ++k)
137 |             {
138 |                 for(int t = 0; t < tdom.size();)
139 |                 {
140 |                     bool found = false;
141 |                     for(auto & d : blocks[jmp.label[k]].dom)
142 |                     {
143 |                         if(d == tdom[t]) found = true;
144 |                     }
145 |                     
146 |                     if(found) { ++t; }
147 |                     else
148 |                     {
149 |                         std::swap(tdom[t], tdom.back());
150 |                         tdom.pop_back();
151 |                     }
152 |                 }
153 |             }
154 |             
155 |             bool foundSelf = false;
156 |             for(auto & t : tdom)
157 |             { if(t != live[bi]) continue; foundSelf = true; break; }
158 |     
159 |             if(!foundSelf) tdom.push_back(live[bi]);
160 |             if(tdom.size() != b.dom.size()) iterate = true;
161 | 
162 |             // save copy, we'll reset tdom above anyway
163 |             std::swap(b.dom, tdom);
164 |         }
165 |     }
166 | 
167 |     // push theoretical exit-block (unifies multiple returns)
168 |     for(auto & b : live) { blocks[b].dom.push_back(noVal); }
169 | 
170 |     // find immediate post-dominators: we use the fact that the immediate
171 |     // dominator must have exactly one less dominator
172 |     for(auto & b : live)
173 |     {
174 |         blocks[b].pdom = noVal;
175 |         for(auto & d : blocks[b].dom)
176 |         {
177 |             if(d == noVal) continue;   // no common post-dominator
178 |             if(blocks[d].dom.size() == blocks[b].dom.size() - 1)
179 |             {
180 |                 blocks[b].pdom = d;
181 |                 break;
182 |             }
183 |         }
184 |     }
185 |     
186 |     // forward pass
187 |     for(auto & b : live)
188 |     {
189 |         // reset dominators
190 |         if(!b) {
191 |             blocks[b].dom.clear();
192 |             blocks[b].dom.push_back(b);
193 |         }
194 |         else blocks[b].dom = live;
195 |     }
196 |     
197 |     iterate = true;
198 |     while(iterate)
199 |     {
200 |         iterate = false;
201 |         ++domIters;
202 | 
203 |         for(auto & b : live)
204 |         {
205 |             // this is entry block
206 |             if(!b) continue;
207 |             BJIT_ASSERT(blocks[b].comeFrom.size());
208 | 
209 |             tdom = live;
210 |             for(auto & f : blocks[b].comeFrom)
211 |             {
212 |                 for(int t = 0; t < tdom.size();)
213 |                 {
214 |                     bool found = false;
215 |                     for(auto & d : blocks[f].dom)
216 |                     {
217 |                         if(d == tdom[t]) found = true;
218 |                     }
219 |                     
220 |                     if(found) { ++t; }
221 |                     else
222 |                     {
223 |                         std::swap(tdom[t], tdom.back());
224 |                         tdom.pop_back();
225 |                     }
226 |                 }
227 |             }
228 |             
229 |             bool foundSelf = false;
230 |             for(auto & t : tdom) { if(t != b) continue; foundSelf = true; break; }
231 |     
232 |             if(!foundSelf) tdom.push_back(b);
233 |             if(tdom.size() != blocks[b].dom.size()) iterate = true;
234 | 
235 |             // save copy, we'll reset tdom above anyway
236 |             std::swap(blocks[b].dom, tdom);
237 |         }
238 |     }
239 |     
240 |     // find immediate dominators: we use the fact that the immediate
241 |     // dominator must have exactly one less dominator
242 |     for(auto & b : live)
243 |     {
244 |         blocks[b].idom = 0;
245 |         
246 |         for(auto & d : blocks[b].dom)
247 |         {
248 |             if(blocks[d].dom.size() == blocks[b].dom.size() - 1)
249 |             {
250 |                 blocks[b].idom = d;
251 |                 break;
252 |             }
253 |         }
254 |         
255 |     }
256 | 
257 |     // order dominators; we use these for CCD in CSE
258 |     for(auto & b : live)
259 |     {
260 |         for(auto & d : blocks[b].dom) d = noVal;
261 |         for(int d = b, i = blocks[b].dom.size(); i--;)
262 |         {
263 |             blocks[b].dom[i] = d;
264 |             d = blocks[d].idom;
265 |         }
266 |     }
267 | 
268 |     BJIT_LOG(" Dom:%d", domIters);
269 | };


--------------------------------------------------------------------------------
/src/opt-jump.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | using namespace bjit;
  5 | 
  6 | /*
  7 | 
  8 |  This implements non-trivial control-flow optimizations.
  9 |  Trivial jump-threading is not done here, we let DCE handle that.
 10 | 
 11 | */
 12 | 
 13 | static const bool jump_debug = false;
 14 | 
 15 | // This optimizes simple back edges: target block dominates and branches.
 16 | // Break critical edges if any. Copy target block into a new block.
 17 | //
 18 | // Then for each block whose immediate dominator is the target block
 19 | // find all live-in variables originating from that block and insert phis.
 20 | // Rename the variables in any block dominated by this block.
 21 | //
 22 | // This effectively gives us loop-inversion for simple loops. 
 23 | //
 24 | bool Proc::opt_jump_be(uint16_t b)
 25 | {
 26 |     auto & jmp = ops[blocks[b].code.back()];
 27 | 
 28 |     // is this simple jump?
 29 |     if(jmp.opcode != ops::jmp)
 30 |     {
 31 |         if(jump_debug) BJIT_LOG(" JUMP:%d not jump (%s)\n", b, jmp.strOpcode());
 32 |         return false;
 33 |     }
 34 | 
 35 |     auto target = jmp.label[0];
 36 | 
 37 |     // does the target dominate?
 38 |     if(blocks[b].dom.size() <= blocks[target].dom.size()
 39 |     || blocks[b].dom[blocks[target].dom.size()-1] != target)
 40 |     {
 41 |         if(jump_debug) BJIT_LOG(" JUMP:%d target doesn't dominate\n", b);
 42 |         return false;
 43 |     }
 44 | 
 45 |     // does the target end in a branch?
 46 |     auto & jcc = ops[blocks[target].code.back()];
 47 |     if(jcc.opcode >= ops::jmp)
 48 |     {
 49 |         if(jump_debug) BJIT_LOG(" JUMP:%d not branch\n", b);
 50 |         return false;
 51 |     }
 52 | 
 53 |     if(jump_debug)
 54 |         BJIT_LOG(" LOOP:%d (%d:%d,%d)", b, target, jcc.label[0], jcc.label[1]);
 55 |     else
 56 |         BJIT_LOG(" LOOP:%d", b);
 57 | 
 58 |     // break edges if target has phis (valid or not)
 59 |     if(ops[blocks[jcc.label[0]].code[0]].opcode == ops::phi)
 60 |     {
 61 |         jcc.label[0] = breakEdge(target, jcc.label[0]);
 62 |     }
 63 |     if(ops[blocks[jcc.label[1]].code[0]].opcode == ops::phi)
 64 |     {
 65 |         jcc.label[1] = breakEdge(target, jcc.label[1]);
 66 |     }
 67 |     
 68 |     // Make a carbon-copy of the target block
 69 |     uint16_t nb = blocks.size();
 70 |     blocks.resize(blocks.size() + 1);
 71 |     if(jump_debug) BJIT_LOG("\n Jump L%d -> L%d (was: L%d)\n", b, nb, target);
 72 | 
 73 |     auto & head = blocks[target];
 74 |     auto & copy = blocks[nb];
 75 |     copy.flags.live = true;
 76 |     copy.dom = blocks[b].dom;
 77 |     copy.dom.push_back(nb);
 78 |     copy.idom = b;
 79 |     copy.pdom = blocks[b].pdom; // shouldn't REALLY need pdoms, but fix anyway
 80 |     blocks[b].pdom = nb;
 81 |     live.push_back(nb);
 82 | 
 83 |     jmp.label[0] = nb;
 84 | 
 85 |     // we copy all the phis too
 86 |     copy.args.resize(head.args.size());
 87 | 
 88 |     impl::Rename    renameCopy;
 89 |     impl::Rename    renameJump;
 90 |     
 91 |     BJIT_ASSERT(head.code.size());
 92 |     for(int i = 0; i < head.code.size(); ++i)
 93 |     {
 94 |         auto opiIndex = head.code[i];
 95 |         auto & opi = ops[opiIndex];
 96 |         auto opcIndex = addOp(opi.opcode, opi.flags.type, nb);
 97 |         auto & opc = ops[opcIndex];
 98 | 
 99 |         // copy operands
100 |         opc.i64 = opi.i64;
101 | 
102 |         renameCopy(opc);
103 |         
104 |         // for jumps copy labels
105 |         if(opc.opcode <= ops::jmp)
106 |         {
107 |             opc.label[0] = opi.label[0];
108 |             opc.label[1] = opi.label[1];
109 | 
110 |             // need to fix come from
111 |             blocks[opc.label[0]].comeFrom.push_back(nb);
112 |             blocks[opc.label[1]].comeFrom.push_back(nb);
113 | 
114 |             // stop further loop-optimization here
115 |             // even if this folds into a simple jmp
116 |             opc.flags.no_opt = true;
117 | 
118 |             break;  // never copy dead tails
119 |         }
120 | 
121 |         // for phis, copy sources
122 |         if(opc.opcode == ops::phi)
123 |         {
124 |             BJIT_ASSERT(opc.phiIndex == opi.phiIndex);
125 |             copy.args[opc.phiIndex].phiop = opcIndex;
126 |         }
127 | 
128 |         renameCopy.add(opiIndex, opcIndex);
129 |     }
130 |     
131 |     // copy phi alts
132 |     copy.alts = head.alts;
133 |     for(auto & a : copy.alts)
134 |     {
135 |         a.phi = blocks[nb].args[ops[a.phi].phiIndex].phiop;
136 |     }
137 |     
138 |     if(jump_debug) BJIT_LOG("Copied %d ops.\n", (int) copy.code.size());
139 | 
140 |     BJIT_ASSERT(copy.code.size());
141 | 
142 |     // next we need to fix all blocks that target immediately dominates
143 |     for(auto fb : live)
144 |     {
145 |         auto & fixBlock = blocks[fb];
146 |         if(fixBlock.idom != target) continue;
147 |         
148 |         if(jump_debug) BJIT_LOG("Block %d needs fixup.\n", fb);
149 | 
150 |         int nPhi = 0;
151 |         for(auto & in : fixBlock.livein)
152 |         {
153 |             // does this come from original head?
154 |             if(ops[in].block == target) ++nPhi;
155 |         }
156 | 
157 |         // insert phis
158 |         fixBlock.code.insert(fixBlock.code.begin(), nPhi, noVal);
159 |         
160 |         int iPhi = 0;
161 |         for(auto & in : fixBlock.livein)
162 |         {
163 |             // does this come from original head?
164 |             if(ops[in].block != target) continue;
165 | 
166 |             fixBlock.code[iPhi] = newOp(ops::phi, ops[in].flags.type, fb);
167 | 
168 |             // target needs to rename to use the phi
169 |             renameJump.add(in, fixBlock.code[iPhi]);
170 | 
171 |             // setup the new phi
172 |             ops[fixBlock.code[iPhi]].phiIndex = fixBlock.args.size();
173 |             ops[fixBlock.code[iPhi]].iv = noVal;
174 |             fixBlock.args.emplace_back(impl::Phi(fixBlock.code[iPhi]));
175 | 
176 |             // add alternatives, they are in our rename map
177 |             // we fix the real sources later
178 |             for(auto & r : renameCopy.map)
179 |             {
180 |                 if(r.src != in) continue;
181 | 
182 |                 fixBlock.newAlt(fixBlock.code[iPhi], target, r.src);
183 |                 fixBlock.newAlt(fixBlock.code[iPhi], nb, r.dst);
184 | 
185 |                 break;
186 |             }
187 | 
188 |             ++iPhi;
189 |         }
190 |     }
191 | 
192 |     // put the original phis to jump rename list
193 |     for(auto & r : renameCopy.map) renameJump.add(r.src, r.dst);
194 | 
195 |     // do a second pass to actually rename
196 |     for(auto fb : live)
197 |     {
198 |         auto & fixBlock = blocks[fb];
199 |         if(fixBlock.idom != target) continue;
200 | 
201 |         renameCopy.map.clear();
202 |         // filter renames relevant to this block
203 |         for(auto & r : renameJump.map)
204 |         {
205 |             if(ops[r.dst].block == fb) renameCopy.add(r.src, r.dst);
206 |         }
207 | 
208 |         // find all blocks dominated by this block
209 |         for(auto rb : live)
210 |         {
211 |             // we need to do this the old-fashioned way because
212 |             // break-edge doesn't try to fix .dom globally
213 |             bool found = false;
214 |             for(int db = rb; db; db = blocks[db].idom)
215 |             {
216 |                 if(db != fb) continue;
217 |                 found = true;
218 |                 break;
219 |             }
220 |             if(!found) continue;
221 |             
222 |             if(jump_debug)
223 |                 BJIT_LOG("Renaming L%d in branch %d\n", rb, fb);
224 | 
225 |             // rename livein for better debugs
226 |             for(auto & in : blocks[rb].livein)
227 |             for(auto & r : renameCopy.map)
228 |             {
229 |                 if(in == r.src) in = r.dst;
230 |             }
231 |                 
232 |             for(auto & rop : blocks[rb].code)
233 |             {
234 |                 renameCopy(ops[rop]);
235 |             }
236 | 
237 |             // don't patch jumps in the copied block, we already fixed these
238 |             if(rb == nb) continue;
239 |             
240 |             auto & rjmp = ops[blocks[rb].code.back()];
241 |             if(rjmp.opcode > ops::jmp) continue;   // return or tail-call
242 | 
243 |             for(int x = 0; x < 2; ++x)
244 |             {
245 |                 if(x && rjmp.opcode == ops::jmp) break;
246 | 
247 |                 if(jump_debug) BJIT_LOG("Patching jump to %d\n", rjmp.label[x]);
248 |                 for(auto & s : blocks[rjmp.label[x]].alts)
249 |                 {
250 |                     if(s.src == rb && ops[s.val].block != target)
251 |                     {
252 |                         if(jump_debug)
253 |                             BJIT_LOG("L:%d:%04x is from %d (keep)\n",
254 |                                 s.src, s.val, ops[s.val].block);
255 |                         continue;
256 |                     }
257 |                     // is this from somewhere else?
258 |                     if(s.src != rb) continue;
259 |                     
260 |                     for(auto & r : renameCopy.map)
261 |                     {
262 |                         if(s.val == r.src)
263 |                         {
264 |                             if(jump_debug)
265 |                                 BJIT_LOG("L:%d:%04x needs rewrite: ",
266 |                                     s.src, s.val);
267 |                                     
268 |                             if(s.src == rb)
269 |                             {
270 |                                 if(jump_debug)
271 |                                     BJIT_LOG("simple: L:%d:%04x\n",
272 |                                         ops[r.dst].block, r.dst);
273 |                                 s.val = r.dst;
274 |                             }
275 |                             else BJIT_ASSERT(false);
276 |                             break;
277 |                         }
278 |                     }
279 |                 }
280 |             }
281 |         }
282 |     }
283 | 
284 |     if(jump_debug) { debug(); }
285 | 
286 |     return true;
287 | }
288 | 
289 | bool Proc::opt_jump()
290 | {
291 |     rebuild_dom();      // don't need this if after CSE
292 |     rebuild_livein();   // don't need this if after sink
293 | 
294 |     if(jump_debug) debug();
295 |     
296 |     BJIT_LOG(" JUMP");
297 |     
298 |     bool progress = false;
299 |     for(int li = 0, liveSz = live.size(); li < liveSz; ++li)
300 |     {
301 |         auto b = live[li];
302 |         if(blocks[b].code.back() == noVal) continue;
303 | 
304 |         auto & op = ops[blocks[b].code.back()];
305 |     
306 |         // if this is a pointless jump then pull the contents
307 |         if(op.opcode == ops::jmp
308 |         && blocks[op.label[0]].comeFrom.size() == 1
309 |         && ops[blocks[op.label[0]].code[0]].opcode != ops::phi)
310 |         {
311 |             blocks[b].code.pop_back();
312 |             for(auto & tc : blocks[op.label[0]].code)
313 |             {
314 |                 blocks[b].code.push_back(tc);
315 |                 ops[tc].block = b;
316 |                 tc = noVal;
317 |             }
318 | 
319 |             auto & jmp = ops[blocks[b].code.back()];
320 |             // rewrite phi-sources
321 |             if(jmp.opcode <= ops::jmp)
322 |             {
323 |                 for(auto & s : blocks[jmp.label[0]].alts)
324 |                 {
325 |                     if(s.src == op.label[0]) s.src = b;
326 |                 }
327 |             }
328 |             if(jmp.opcode < ops::jmp)
329 |             {
330 |                 for(auto & s : blocks[jmp.label[1]].alts)
331 |                 {
332 |                     if(s.src == op.label[0]) s.src = b;
333 |                 }
334 |             }
335 | 
336 |             BJIT_LOG(" MERGE");
337 |             progress = true;
338 |             break;
339 |         }
340 | 
341 |         // if second branch is pdom, swap so DFS runs on loops first
342 |         if(op.opcode < ops::jmp && blocks[b].pdom == op.label[1])
343 |         {
344 |             op.opcode ^= 1;
345 |             std::swap(op.label[0], op.label[1]);
346 |         }
347 | 
348 |         if(op.flags.no_opt)
349 |         {
350 |             continue;
351 |         }
352 | 
353 |         // handle degenerate loops too?
354 |         if(op.opcode < ops::jmp && op.label[0] == b)
355 |         {
356 |             op.label[0] = breakEdge(b, b);
357 |             op.flags.no_opt = true;
358 |             progress = true;
359 | 
360 |             // want doms though (FIXME: move this to opt_jmp_be only?)
361 |             rebuild_dom();
362 | 
363 |             if(jump_debug) BJIT_LOG(" TRY %d\n", op.label[0]);
364 | 
365 |             if(opt_jump_be(op.label[0]))
366 |             {
367 |                 break;
368 |             }
369 |         }
370 |         
371 |         if(op.opcode < ops::jmp && op.label[1] == b)
372 |         {
373 |             op.label[1] = breakEdge(b, b);
374 |             op.flags.no_opt = true;
375 |             progress = true;
376 |             
377 |             // want doms though (FIXME: move this to opt_jmp_be only?)
378 |             rebuild_dom();
379 |             
380 |             if(jump_debug) BJIT_LOG(" TRY %d\n", op.label[1]);
381 |             
382 |             if(opt_jump_be(op.label[1]))
383 |             {
384 |                 break;
385 |             }
386 |         }
387 | 
388 |         // if we didn't do a trivial pull, try opt_jump
389 |         // but only once per fold, we need to update live info
390 |         if(op.opcode == ops::jmp && opt_jump_be(b))
391 |         {
392 |             progress = true;
393 |             break;
394 |         }
395 |     }
396 | 
397 |     // we really need this here because it cleans up
398 |     // any stale phis, so DCE doesn't get confused
399 |     rebuild_cfg();
400 |     opt_dce();
401 | 
402 |     return progress;
403 | }
404 | 
405 | void Proc::find_ivs()
406 | {
407 |     //debug();
408 |     // detect IVs
409 |     rebuild_dom();
410 |     BJIT_LOG(" IV");
411 |     for(auto & b : live)
412 |     {
413 |         for(auto & p : blocks[b].args)
414 |         {
415 |             if(p.phiop == noVal) continue;
416 |             ops[p.phiop].iv = p.phiop;
417 |         }
418 | 
419 |         auto findSource = [&](uint16_t val) -> uint16_t
420 |         {
421 |             while(ops[val].opcode == ops::rename)
422 |             {
423 |                 val = ops[val].in[0];
424 |             }
425 |             return val;
426 |         };
427 | 
428 |         for(auto & a : blocks[b].alts)
429 |         {
430 |             // FIXME: This condition assumes that all loops have a preheader
431 |             if(a.src == blocks[b].idom) continue;
432 | 
433 |             auto avs = findSource(a.val);
434 |             auto & val = ops[avs];
435 |             auto & phi = ops[a.phi];
436 | 
437 |             if(phi.iv == avs) continue;
438 |             if(val.opcode == ops::phi) { phi.iv = noVal; continue; }
439 | 
440 |             if(phi.iv == noVal) continue;
441 |             if(phi.iv != a.phi) { phi.iv = noVal; continue; }
442 | 
443 |             switch(val.nInputs())
444 |             {
445 |                 case 2:
446 |                     if(findSource(val.in[1]) == a.phi)
447 |                     {
448 |                         // other operand must dominate PHI
449 |                         for(auto & d : blocks[blocks[b].idom].dom)
450 |                         {
451 |                             if(ops[val.in[0]].block != d) continue;
452 |                             phi.iv = avs;
453 |                             break;
454 |                         }
455 |                         if(phi.iv != avs) phi.iv = noVal;
456 |                     }
457 |                     else if(findSource(val.in[0]) == a.phi)
458 |                     {
459 |                         // other operand must dominate PHI
460 |                         for(auto & d : blocks[blocks[b].idom].dom)
461 |                         {
462 |                             if(ops[val.in[1]].block != d) continue;
463 |                             phi.iv = avs;
464 |                             break;
465 |                         }
466 |                         if(phi.iv != avs) phi.iv = noVal;
467 |                     }
468 |                     break;
469 |                 case 1:
470 |                     if(findSource(val.in[0]) == a.phi) phi.iv = avs;
471 |                     break;
472 |                     
473 |                 default:
474 |                     // if this is not one-op or two-op then
475 |                     // it's almost certainly not a valid IV
476 |                     phi.iv = noVal;
477 |                     break;
478 |             }
479 | 
480 |         }
481 | 
482 |         for(auto & p : blocks[b].args)
483 |         {
484 |             if(p.phiop == noVal) continue;
485 |             if(ops[p.phiop].iv == p.phiop) ops[p.phiop].iv = noVal;
486 |         }
487 |         
488 |     }
489 | }


--------------------------------------------------------------------------------
/src/opt-sink.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | using namespace bjit;
  5 | 
  6 | static const bool sink_debug = false;
  7 | 
  8 | bool Proc::opt_sink(bool unsafeOpt)
  9 | {
 10 |     rebuild_livein();
 11 | 
 12 |     // livescan doesn't find phi-inputs, we need them here
 13 |     for(auto b : live)
 14 |     {
 15 |         for(auto & s : blocks[b].alts)
 16 |         {
 17 |             blocks[b].livein.push_back(s.val);
 18 |         }
 19 |     }
 20 | 
 21 |     BJIT_LOG(" SINK");
 22 | 
 23 |     // collect moved ops into tmp (in reverse)
 24 |     // so that we can merge them all together
 25 |     std::vector<uint16_t>   tmp0, tmp1;
 26 | 
 27 |     // one pass should be enough 'cos DFS
 28 |     bool progress = false;
 29 |     for(int li = 0, liveSz = live.size(); li < liveSz; ++li)
 30 |     {
 31 |         auto b = live[li];
 32 |         // find local uses
 33 |         findUsesBlock(b, false, true);
 34 | 
 35 |         auto & jmp = ops[blocks[b].code.back()];
 36 | 
 37 |         if(sink_debug) BJIT_LOG("\nSink in L%d?", b);
 38 | 
 39 |         // is this a return block?
 40 |         if(jmp.opcode > ops::jmp)
 41 |         {
 42 |             if(sink_debug) BJIT_LOG("\nL%d is exit block", b);
 43 |             continue;
 44 |         }
 45 | 
 46 |         // is this a straight jmp?
 47 |         if(jmp.opcode == ops::jmp)
 48 |         {
 49 |             // if we don't dominate the block, then bail out
 50 |             if(blocks[jmp.label[0]].idom != b)
 51 |             {
 52 |                 if(sink_debug)
 53 |                     BJIT_LOG("\nL%d doesn't dominate L%d", b, jmp.label[0]);
 54 |                 continue;
 55 |             }
 56 |         }
 57 | 
 58 |         tmp0.clear();
 59 |         tmp1.clear();
 60 | 
 61 |         // loop code backwards
 62 |         for(int c = blocks[b].code.size(); c--;)
 63 |         {
 64 |             auto opIndex = blocks[b].code[c];
 65 |             auto op = ops[opIndex];
 66 | 
 67 |             // if this has no local uses, is it something we can sink?
 68 |             if(op.nUse || !op.canCSE() || (!unsafeOpt && op.hasSideFX()))
 69 |             {
 70 |                 if(sink_debug)
 71 |                     BJIT_LOG("\n %04x not eligible in L%d", opIndex, b);
 72 |                 continue;
 73 |             }
 74 | 
 75 |             // it must be live-out in at least one block
 76 |             bool live0 = false, live1 = false;
 77 | 
 78 |             for(auto l : blocks[jmp.label[0]].livein)
 79 |             {
 80 |                 if(opIndex != l) continue;
 81 |                 live0 = true;
 82 |                 continue;
 83 |             }
 84 | 
 85 |             if(jmp.opcode < ops::jmp)
 86 |                 for(auto l : blocks[jmp.label[1]].livein)
 87 |             {
 88 |                 if(opIndex != l) continue;
 89 |                 live1 = true;
 90 |                 continue;
 91 |             }
 92 |             
 93 |             if(sink_debug) BJIT_LOG("\nLive (%s, %s)...",
 94 |                 live0 ? "yes" : "no", live1 ? "yes" : "no");
 95 | 
 96 |             // don't move if live (or dead) in both branches
 97 |             if(live0 == live1) continue;
 98 |             
 99 |             if(sink_debug) BJIT_LOG("\nTry to sink...");
100 |             
101 |             // do not move into blocks that merge paths
102 |             // this prevents us from sinking loop invariants
103 |             // back into the loop, which would be silly
104 |             if(blocks[jmp.label[live0?0:1]].comeFrom.size() > 1)
105 |             {
106 |                 // if the edge is not critical, don't sink at all
107 |                 // NOTE: we check this here (not at the top) because jump-opt
108 |                 // which we want to try only after finding sinkable op
109 |                 if(jmp.opcode == ops::jmp)
110 |                 {
111 |                     if(sink_debug) BJIT_LOG("\nMerging path not critical...");
112 |                     break;  // no point scanning the rest
113 |                 }
114 | 
115 |                 // otherwise break the edge
116 |                 jmp.label[live0?0:1] = breakEdge(b, jmp.label[live0?0:1]);
117 |                 if(sink_debug) BJIT_LOG(" L%d", jmp.label[live0?0:1]);
118 |             }
119 |             
120 |             if(sink_debug) BJIT_LOG("\nSinking...");
121 | 
122 |             // pick the block where this is live
123 |             (live0 ? tmp0 : tmp1).push_back(opIndex);
124 |             blocks[b].code[c] = noVal;  // dead at original site
125 |             
126 |             // see if we should be moving inputs too?
127 |             for(int k = 0; k < op.nInputs(); ++k)
128 |             {
129 |                 // if this was last use, for something in this block
130 |                 // then mark it as livein for the block where we moved to
131 |                 if(ops[op.in[k]].block == b && !--ops[op.in[k]].nUse)
132 |                 {
133 |                     if(sink_debug) BJIT_LOG("\nLast use for %04x", op.in[k]);
134 |                     blocks[jmp.label[live0?0:1]].livein.push_back(op.in[k]);
135 |                 }
136 |             }
137 |         }
138 | 
139 |         // did we move anything?
140 |         if(tmp0.size())
141 |         {
142 |             progress = true;
143 |             
144 |             // skip any ops that must be in the beginning
145 |             // really just phis, but try to be future-proof
146 |             int insertAt = 0;
147 |             int tBlock = jmp.label[0];
148 |             while(insertAt < blocks[tBlock].code.size())
149 |             {
150 |                 if(ops[blocks[tBlock].code[insertAt]].canMove()) break;
151 |                 ++insertAt;
152 |             }
153 | 
154 |             // make room and move original ops back
155 |             auto & tcode = blocks[tBlock].code;
156 |             tcode.resize(tcode.size() + tmp0.size());
157 |             for(int i = tcode.size(); --i > insertAt;)
158 |             {
159 |                 tcode[i] = tcode[i-tmp0.size()];
160 |             }
161 |             
162 |             //BJIT_LOG("Moving B%d -> B%d:\n", b, jmp.label[0]);
163 |             
164 |             // then work merge tmp which needs to be reversed
165 |             for(int i = insertAt; tmp0.size(); i++)
166 |             {
167 |                 //debugOp(tmp0.back());
168 |                 tcode[i] = tmp0.back(); tmp0.pop_back();
169 |                 ops[tcode[i]].block = tBlock;
170 |                 ops[tcode[i]].flags.no_opt = true;  // don't hoist further
171 |             }
172 |         }
173 |         
174 |         if(tmp1.size())
175 |         {
176 |             progress = true;
177 |             
178 |             BJIT_ASSERT_MORE(jmp.opcode < ops::jmp);
179 |             
180 |             // skip any ops that must be in the beginning
181 |             // really just phis, but try to be future-proof
182 |             int insertAt = 0;
183 |             int tBlock = jmp.label[1];
184 |             while(insertAt < blocks[tBlock].code.size())
185 |             {
186 |                 if(ops[blocks[tBlock].code[insertAt]].canMove()) break;
187 |                 ++insertAt;
188 |             }
189 |             
190 |             // make room and move original ops back
191 |             auto & tcode = blocks[tBlock].code;
192 |             tcode.resize(tcode.size() + tmp1.size());
193 |             for(int i = tcode.size(); --i > insertAt;)
194 |             {
195 |                 tcode[i] = tcode[i-tmp1.size()];
196 |             }
197 | 
198 |             //BJIT_LOG("Moving B%d -> B%d:\n", b, jmp.label[1]);
199 |             
200 |             // then work merge tmp which needs to be reversed
201 |             for(int i = insertAt; tmp1.size(); i++)
202 |             {
203 |                 //debugOp(tmp1.back());
204 |                 
205 |                 tcode[i] = tmp1.back(); tmp1.pop_back();
206 |                 ops[tcode[i]].block = tBlock;
207 |                 ops[tcode[i]].flags.no_opt = true;  // don't hoist further
208 |             }
209 |         }
210 |     }
211 | 
212 |     //debug();
213 |     
214 |     return progress;
215 | }
216 | 


--------------------------------------------------------------------------------
/src/sanity.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | using namespace bjit;
 5 | 
 6 | // Internal sanity checker
 7 | //
 8 | // This checks (some of the) invariants that we rely on.
 9 | //
10 | // Note that in some cases we can violate invariants temporarily,
11 | // but they should always check out after each DCE pass.
12 | // 
13 | void Proc::sanity()
14 | {
15 |     BJIT_ASSERT(live.size());    // must have one pass DCE
16 | 
17 |     rebuild_livein();
18 |     opt_dce();  // do another round to get use counts?
19 |     rebuild_dom();
20 |     
21 |     debug();
22 |     
23 |     for(auto & b : live)
24 |     {
25 |         for(auto & c : blocks[b].code)
26 |         {
27 |             auto & op = ops[c];
28 | 
29 |             //debugOp(c);
30 | 
31 |             // sanity check that block/index are correct
32 |             BJIT_ASSERT(op.block == b);
33 | 
34 |             if(op.opcode == ops::phi)
35 |             {
36 |                 int nPhiSrc = 0;
37 |                 for(auto & a : blocks[b].alts) if(a.phi == c) ++nPhiSrc;
38 |                 BJIT_ASSERT(nPhiSrc == blocks[b].comeFrom.size());
39 |             
40 |                 int phiSourcesFound = 0;
41 |                 for(auto & s : blocks[b].alts)
42 |                 {
43 |                     if(s.phi != c) continue;
44 |                     
45 |                     bool phiSourceInComeFrom = false;
46 |                     for(auto cf : blocks[b].comeFrom)
47 |                     {
48 |                         if(s.src != cf) continue;
49 |                         phiSourceInComeFrom = true;
50 |                         break;
51 |                     }
52 |                     BJIT_ASSERT(phiSourceInComeFrom);
53 |                     ++phiSourcesFound;
54 |                 }
55 |                 BJIT_ASSERT(phiSourcesFound == blocks[b].comeFrom.size());
56 |             }
57 |             
58 |             // sanity check that definitions dominate uses
59 |             // also check that non-locals are marked as livein
60 |             for(int i = 0; i < op.nInputs(); ++i)
61 |             {
62 |                 bool inputDominates = false;
63 |                 for(auto & d : blocks[b].dom)
64 |                 {
65 |                     if(d == ops[op.in[i]].block)
66 |                     {
67 |                         inputDominates = true;
68 |                         break;
69 |                     }
70 |                 }
71 |                 
72 |                 BJIT_ASSERT(inputDominates);
73 |                 
74 |                 bool liveIn = (ops[op.in[i]].block == b);
75 |                 if(!liveIn)
76 |                 {
77 |                     for(auto & in : blocks[b].livein)
78 |                     {
79 |                         if(in == op.in[i]) liveIn = true;
80 |                     }
81 |                 }
82 |                 BJIT_ASSERT(liveIn);
83 |             }
84 |         }
85 |     }
86 | 
87 |     BJIT_LOG(" SANE\n");
88 | }


--------------------------------------------------------------------------------
/tests/test_add_ff.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 | 
 9 |     {
10 |         bjit::Proc      proc(0, "ff");
11 |         proc.fret(proc.fadd(proc.env[0], proc.env[1]));
12 |         module.compile(proc);
13 |     }
14 |     {
15 |         bjit::Proc      proc(0, "dd");
16 |         proc.dret(proc.dadd(proc.env[0], proc.env[1]));
17 |         module.compile(proc);
18 |     }
19 |     {
20 |         bjit::Proc      proc(0, "dd");
21 |         proc.dret(proc.cf2d(proc.fadd(
22 |             proc.cd2f(proc.env[0]), proc.cd2f(proc.env[1]))));
23 |         module.compile(proc);
24 |     }
25 |     
26 |     auto & codeOut = module.getBytes();
27 |     
28 |     FILE * f = fopen("out.bin", "wb");
29 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
30 |     fclose(f);
31 |     
32 |     printf(" - Wrote out.bin\n");
33 | 
34 |     BJIT_ASSERT(module.load());
35 |     
36 |     BJIT_ASSERT(module.getPointer<float(float,float)>(0)(1.f, 5.5f) == 6.5f);
37 |     BJIT_ASSERT(module.getPointer<double(double,double)>(1)(2.5, 3.25) == 5.75);
38 |     BJIT_ASSERT(module.getPointer<double(double,double)>(2)(3.25, 4.5) == 7.75);
39 | 
40 |     return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/test_add_ii.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 |     bjit::Proc      proc(0, "ii");
 9 | 
10 |     proc.iret(proc.iadd(proc.env[0], proc.env[1]));
11 | 
12 |     int i = module.compile(proc);
13 | 
14 |     BJIT_ASSERT(module.load());
15 | 
16 |     auto ptr = module.getPointer<int(int,int)>(i);
17 | 
18 |     printf(" 2 + 5 = %d\n", ptr(2, 5));
19 | 
20 |     BJIT_ASSERT(ptr(2,5) == 7);
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/test_call_stub.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | #include <cstdint>
 5 | #include <cstdio>
 6 | 
 7 | int hello()
 8 | {
 9 |     printf("Hello world\n");
10 | 
11 |     return 42;
12 | }
13 | 
14 | int helloAgain()
15 | {
16 |     printf("Hello world, again\n");
17 | 
18 |     return 45;
19 | }
20 | 
21 | int main()
22 | {
23 | 
24 |     bjit::Module    module;
25 | 
26 |     // proc 0, stub
27 |     module.compileStub(0);
28 | 
29 |     // proc 1, near-call stub
30 |     {
31 |         bjit::Proc      proc(0, "");
32 |         proc.iret(proc.icalln(0, 0));
33 |         module.compile(proc);
34 |     }
35 |     
36 |     BJIT_ASSERT(module.load());
37 | 
38 |     auto codeOut = module.getBytes();
39 |     if(codeOut.size())
40 |     {
41 |         FILE * f = fopen("out.bin", "wb");
42 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
43 |         fclose(f);
44 |         
45 |         printf(" - Wrote out.bin\n");
46 |     }
47 | 
48 |     module.patchStub(0, (uintptr_t)&hello);
49 |     module.patch();
50 |     
51 |     BJIT_ASSERT(module.getPointer<int()>(1)() == 42);
52 | 
53 |     module.unload();
54 |     
55 |     module.patchStub(0, (uintptr_t)&helloAgain);
56 |     module.load();
57 |     BJIT_ASSERT(module.getPointer<int()>(1)() == 45);
58 | 
59 |     return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/tests/test_calln.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | 
 7 | int main()
 8 | {
 9 | 
10 |     bjit::Module    module;
11 | 
12 |     // proc 0, does actual sub
13 |     {
14 |         bjit::Proc      proc(0, "ii");
15 |         proc.iret(proc.isub(proc.env[0], proc.env[1]));
16 |         module.compile(proc);
17 |     }
18 |     // proc 1, icalln
19 |     {
20 |         bjit::Proc      proc(0, "ii");
21 |         proc.iret(proc.icalln(2, 2));
22 |         module.compile(proc);
23 |     }
24 |     // proc 2: tcalln
25 |     {
26 |         bjit::Proc      proc(0, "ii");
27 |         proc.tcalln(0, 2);
28 |         module.compile(proc);
29 |     }
30 |     
31 |     BJIT_ASSERT(module.load());
32 | 
33 |     auto codeOut = module.getBytes();
34 |     if(codeOut.size())
35 |     {
36 |         FILE * f = fopen("out.bin", "wb");
37 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
38 |         fclose(f);
39 |         
40 |         printf(" - Wrote out.bin\n");
41 |     }
42 |     
43 |     BJIT_ASSERT(module.getPointer<int(int,int)>(1)(5, 2) == 3);
44 |     BJIT_ASSERT(module.getPointer<int(int,int)>(2)(7, 3) == 4);
45 | 
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_callp.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | #include <cstdint>
 5 | #include <cinttypes>
 6 | 
 7 | int isub2(int a, int b)
 8 | {
 9 |     printf("%d - %d = %d\n", a, b, a-b);
10 |     return a - b;
11 | }
12 | 
13 | float fsub2(float a, float b)
14 | {
15 |     printf("%f - %f = %f\n", a, b, a-b);
16 |     return a - b;
17 | }
18 | 
19 | double dsub2(double a, double b)
20 | {
21 |     printf("%f - %f = %f\n", a, b, a-b);
22 |     return a - b;
23 | }
24 | 
25 | int main()
26 | {
27 | 
28 |     bjit::Module    module;
29 | 
30 |     printf("isub2 %" PRIuPTR ", fsub2 %p\n", (uintptr_t)isub2, fsub2);
31 | 
32 |     {
33 |         bjit::Proc      proc(0, "ii");
34 |         proc.env[0] = proc.iadd(proc.env[0], proc.lci(1));
35 |         proc.iret(proc.icallp(proc.lci(uintptr_t(isub2)), 2));
36 |         proc.debug();
37 |         module.compile(proc);
38 |     }
39 |     {
40 |         bjit::Proc      proc(0, "ii");
41 |         proc.env[0] = proc.iadd(proc.env[0], proc.lci(1));
42 |         proc.tcallp(proc.lci(uintptr_t(isub2)), 2);
43 |         proc.debug();
44 |         module.compile(proc);
45 |     }
46 |     {
47 |         bjit::Proc      proc(0, "ff");
48 |         proc.fret(proc.fcallp(proc.lci(uintptr_t(fsub2)), 2));
49 |         proc.debug();
50 |         module.compile(proc);
51 |     }
52 |     {
53 |         bjit::Proc      proc(0, "dd");
54 |         proc.dret(proc.dcallp(proc.lci(uintptr_t(dsub2)), 2));
55 |         proc.debug();
56 |         module.compile(proc);
57 |     }
58 | 
59 |     BJIT_ASSERT(module.load());
60 | 
61 |     auto codeOut = module.getBytes();
62 |     if(codeOut.size())
63 |     {
64 |         FILE * f = fopen("out.bin", "wb");
65 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
66 |         fclose(f);
67 |         
68 |         printf(" - Wrote out.bin\n");
69 |     }
70 |     
71 |     printf("icall\n");
72 |     BJIT_ASSERT(module.getPointer<int(int,int)>(0)(5, 2) == 4);
73 |     printf("tcall\n");
74 |     BJIT_ASSERT(module.getPointer<int(int,int)>(1)(7, 1) == 7);
75 |     printf("fcall\n");
76 |     BJIT_ASSERT(module.getPointer<float(float,float)>(2)(15.5f, 6.f) == 9.5f);
77 |     printf("dcall\n");
78 |     BJIT_ASSERT(module.getPointer<double(double,double)>(3)(5.5, 2) == 3.5);
79 | 
80 |     return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/tests/test_ci2f_cf2i.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 | 
 9 |     {
10 |         bjit::Proc      proc(0, "if");
11 |         proc.iret(proc.cf2i(proc.fadd(proc.ci2f(proc.env[0]), proc.env[1])));
12 |         
13 |         module.compile(proc);
14 |     }
15 | 
16 | 
17 |     {
18 |         bjit::Proc      proc(0, "id");
19 |         proc.iret(proc.cd2i(proc.dadd(proc.ci2d(proc.env[0]), proc.env[1])));
20 |         
21 |         module.compile(proc);
22 |     }
23 | 
24 | 
25 |     BJIT_ASSERT(module.load());
26 |     {
27 |         auto ptr = module.getPointer<int(int,float)>(0);
28 |         printf(" 2 + 5 = %d\n", ptr(2, 5));
29 |         BJIT_ASSERT(ptr(2,5) == 7);
30 |     }
31 | 
32 |     {
33 |         auto ptr = module.getPointer<int(int,double)>(1);
34 |         printf(" 2 + 5 = %d\n", ptr(2, 5));
35 |         BJIT_ASSERT(ptr(2,5) == 7);
36 |     }
37 | 
38 |     return 0;
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/test_divmod.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 | 
 9 |     {
10 |         bjit::Proc      proc(0, "ii");
11 |         proc.iret(proc.idiv(proc.env[0], proc.env[1]));
12 |         module.compile(proc);
13 |     }
14 |     {
15 |         bjit::Proc      proc(0, "ii");
16 |         proc.iret(proc.imod(proc.env[0], proc.env[1]));
17 |         module.compile(proc);
18 |     }
19 |     {
20 |         bjit::Proc      proc(0, "ii");
21 |         proc.iret(proc.udiv(proc.env[0], proc.env[1]));
22 |         module.compile(proc);
23 |     }
24 |    {
25 |         bjit::Proc      proc(0, "ii");
26 |         proc.iret(proc.umod(proc.env[0], proc.env[1]));
27 |         module.compile(proc);
28 |     }
29 |    
30 |     auto & codeOut = module.getBytes();
31 |     
32 |     FILE * f = fopen("out.bin", "wb");
33 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
34 |     fclose(f);
35 |     
36 |     printf(" - Wrote out.bin\n");
37 | 
38 |     BJIT_ASSERT(module.load());
39 | 
40 |     int64_t     s = -3249421;
41 |     uint64_t    u = 55425439;
42 | 
43 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(0)(s,3) == (s/3));
44 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(1)(s,3) == (s%3));
45 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(1)(s,-3) == (s%-3));
46 |     
47 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(2)(u,3) == (u/3));
48 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(3)(u,3) == (u%3));
49 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(3)(u,-3) == (u%-3));
50 | 
51 |     return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/test_fib.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int fib(int x)
 5 | {
 6 |     if( x <= 1 ) return 1;
 7 |     return fib(x-1) + fib(x-2);
 8 | }
 9 | 
10 | int main()
11 | {
12 | 
13 |     bjit::Module    module;
14 |     {
15 |         bjit::Proc  pr(0, "i");
16 | 
17 |         auto lt = pr.newLabel();
18 |         auto le = pr.newLabel();
19 | 
20 |         pr.jnz(pr.ile(pr.env[0], pr.lci(1)), lt, le);
21 | 
22 |         pr.emitLabel(lt);
23 |         pr.iret(pr.lci(1));
24 | 
25 |         pr.emitLabel(le);
26 | 
27 |         pr.env.push_back(pr.isub(pr.env[0], pr.lci(1)));
28 |         auto a = pr.icalln(0, 1);   // recursive fib(x-1)
29 |         pr.env.pop_back();
30 |         
31 |         pr.env.push_back(pr.isub(pr.env[0], pr.lci(2)));
32 |         auto b = pr.icalln(0, 1);   // recursive fib(x-2)
33 |         pr.env.pop_back();
34 | 
35 |         pr.iret(pr.iadd(a,b));
36 | 
37 |         module.compile(pr);
38 |     }
39 |     auto & codeOut = module.getBytes();
40 |     FILE * f = fopen("out.bin", "wb");
41 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
42 |     fclose(f);
43 |     printf(" - Wrote out.bin\n");
44 |     BJIT_ASSERT(module.load());
45 | 
46 |     auto x = 16;
47 |     auto y = fib(x);
48 |     printf("C-fib: %d\n", y);
49 | 
50 |     BJIT_ASSERT(y == module.getPointer<int(int)>(0)(x));
51 | 
52 |     return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/tests/test_fuzzfold.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | // Define to only run one (eg. known to fail) case
 5 | // and enable dumping the code into a file
 6 | //#define ONECASE 93495
 7 | 
 8 | uintptr_t iFuzzSeed(uint64_t seed, int opt)
 9 | {
10 |     bjit::Module    module;
11 |     bjit::Proc      proc(0, "iiii");
12 | 
13 |     auto random = [&]() -> uint64_t { return bjit::hash64(seed++); };
14 | 
15 |     for(int i = 0; i < 64; ++i)
16 |     {
17 |         int op = random() % 11;
18 | 
19 |         // skip the alloc
20 |         int a0 = 1 + (random() % (proc.env.size()-1));
21 |         int a1 = 1 + (random() % (proc.env.size()-1));
22 | 
23 |         switch(op)
24 |         {
25 |         case 0: proc.env.push_back(proc.lci(random())); break;
26 |         case 1: proc.env.push_back(proc.lci(0x1<<31)); break;
27 |         case 2: proc.env.push_back(proc.lci((uint32_t)random())); break;
28 |         
29 |         case 3: proc.env.push_back(proc.iadd(proc.env[a0], proc.env[a1])); break;
30 |         case 4: proc.env.push_back(proc.isub(proc.env[a0], proc.env[a1])); break;
31 |         case 5: proc.env.push_back(proc.imul(proc.env[a0], proc.env[a1])); break;
32 |         
33 |         case 6: proc.env.push_back(proc.iand(proc.env[a0], proc.env[a1])); break;
34 |         case 7: proc.env.push_back(proc.ior(proc.env[a0], proc.env[a1])); break;
35 |         case 8: proc.env.push_back(proc.ixor(proc.env[a0], proc.env[a1])); break;
36 | 
37 |         case 9: proc.env.push_back(proc.ineg(proc.env[a0])); break;
38 |         case 10: proc.env.push_back(proc.inot(proc.env[a0])); break;
39 |         }
40 |     }
41 | 
42 |     proc.iret(proc.env[1 + (random() % (proc.env.size()-1))]);
43 |     module.compile(proc, opt);
44 | #ifdef ONECASE
45 |     if(opt)
46 | #else
47 |     if(false)
48 | #endif
49 |     {
50 |         auto & codeOut = module.getBytes();
51 |         FILE * f = fopen("out.bin", "wb");
52 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
53 |         fclose(f);
54 |         printf(" - Wrote out.bin\n");
55 |     }
56 |     module.load();
57 |     auto ptr = module.getPointer<uintptr_t(int,int,int,int)>(0);
58 | 
59 |     int p0 = random();
60 |     int p1 = random();
61 |     int p2 = random();
62 |     int p3 = random();
63 |     return ptr(p0, p1, p2, p3);
64 | }
65 | 
66 | int main()
67 | {
68 | #ifdef ONECASE
69 |     for(int i = ONECASE; i == ONECASE; ++i)
70 | #else
71 |     for(int i = 0; i < 123456; ++i)
72 | #endif
73 |     {
74 |         auto seed = bjit::hash64(i);
75 |         auto fuzz0 = iFuzzSeed(seed, 0);
76 |         auto fuzz2 = iFuzzSeed(seed, 2);
77 |         BJIT_LOG("\nTest iter %d\n", i);
78 |         if(fuzz0 != fuzz2)
79 |             BJIT_LOG(" %p != %p\n", (void*)fuzz0, (void*)fuzz2);
80 |         BJIT_ASSERT(fuzz0 == fuzz2);
81 | 
82 |         BJIT_LOG(" OK: %d\n", i);
83 |     }
84 | 
85 |     return 0;
86 | }
87 | 


--------------------------------------------------------------------------------
/tests/test_load_store.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | #include <cstdint>
  5 | 
  6 | struct TestData
  7 | {
  8 |     union
  9 |     {
 10 |         int8_t      i8;
 11 |         uint8_t     u8;
 12 |         uint64_t    i8_64;
 13 |     };
 14 |     union
 15 |     {
 16 |         int16_t     i16;
 17 |         uint16_t    u16;
 18 |         uint64_t    i16_64;
 19 |     };
 20 |     union
 21 |     {
 22 |         int32_t     i32;
 23 |         uint32_t    u32;
 24 |         uint64_t    i32_64;
 25 |     };
 26 |     uint64_t        i64;
 27 | 
 28 |     float   f32;
 29 |     double  f64;
 30 | };
 31 | 
 32 | int main()
 33 | {
 34 | 
 35 |     bjit::Module    module;
 36 | 
 37 |     {
 38 |         bjit::Proc      proc(0, "i");
 39 |         proc.iret(proc.li8(proc.env[0], offsetof(TestData,i8)));
 40 |         module.compile(proc);
 41 |     }
 42 |     {
 43 |         bjit::Proc      proc(0, "i");
 44 |         proc.iret(proc.li16(proc.env[0], offsetof(TestData,i16)));
 45 |         module.compile(proc);
 46 |     }
 47 |     {
 48 |         bjit::Proc      proc(0, "i");
 49 |         proc.iret(proc.li32(proc.env[0], offsetof(TestData,i32)));
 50 |         module.compile(proc);
 51 |     }
 52 |     {
 53 |         bjit::Proc      proc(0, "i");
 54 |         proc.iret(proc.lu8(proc.env[0], offsetof(TestData,u8)));
 55 |         module.compile(proc);
 56 |     }
 57 |     {
 58 |         bjit::Proc      proc(0, "i");
 59 |         proc.iret(proc.lu16(proc.env[0], offsetof(TestData,u16)));
 60 |         module.compile(proc);
 61 |     }
 62 |     {
 63 |         bjit::Proc      proc(0, "i");
 64 |         proc.iret(proc.lu32(proc.env[0], offsetof(TestData,u32)));
 65 |         module.compile(proc);
 66 |     }
 67 |     {
 68 |         bjit::Proc      proc(0, "i");
 69 |         proc.iret(proc.li64(proc.env[0], offsetof(TestData,i64)));
 70 |         module.compile(proc);
 71 |     }
 72 |     {
 73 |         bjit::Proc      proc(0, "i");
 74 |         proc.fret(proc.lf32(proc.env[0], offsetof(TestData,f32)));
 75 |         module.compile(proc);
 76 |     }
 77 |     {
 78 |         bjit::Proc      proc(0, "i");
 79 |         proc.dret(proc.lf64(proc.env[0], offsetof(TestData,f64)));
 80 |         module.compile(proc);
 81 |     }
 82 | 
 83 |     BJIT_ASSERT(module.load(0x10000));
 84 | 
 85 |     {
 86 |         auto & codeOut = module.getBytes();
 87 |         FILE * f = fopen("out.bin", "wb");
 88 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
 89 |         fclose(f);
 90 |         printf(" - Wrote out.bin\n");
 91 |     }
 92 |     
 93 |     TestData    test;
 94 |     test.i8_64  = 0xc0c1c2c3c4c5c6c7;
 95 |     test.i16_64 = 0xd0d1d2d3d4d5d6d7;
 96 |     test.i32_64 = 0xe0e1e2e3e4e5e6e7;
 97 |     test.i64    = 0xf0f1f2f3f4f5f6f7;
 98 |     test.f32    = 1.5f;
 99 |     test.f64    = 3.14;
100 | 
101 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(0)(&test) == test.i8);
102 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(1)(&test) == test.i16);
103 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(2)(&test) == test.i32);
104 | 
105 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(3)(&test) == test.u8);
106 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(4)(&test) == test.u16);
107 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(5)(&test) == test.u32);
108 |     
109 |     BJIT_ASSERT(module.getPointer<uint64_t(TestData*)>(6)(&test) == test.i64);
110 |     
111 |     BJIT_ASSERT(module.getPointer<float(TestData*)>(7)(&test) == test.f32);
112 |     BJIT_ASSERT(module.getPointer<double(TestData*)>(8)(&test) == test.f64);
113 | 
114 |     {
115 |         bjit::Proc      proc(0, "ii");
116 |         proc.si8(proc.env[1], proc.env[0], offsetof(TestData,i8));
117 |         proc.iret(proc.lci(0));
118 |         module.compile(proc);
119 |     }
120 |     {
121 |         bjit::Proc      proc(0, "ii");
122 |         proc.si16(proc.env[1], proc.env[0], offsetof(TestData,i16));
123 |         proc.iret(proc.lci(0));
124 |         module.compile(proc);
125 |     }
126 |     {
127 |         bjit::Proc      proc(0, "ii");
128 |         proc.si32(proc.env[1], proc.env[0], offsetof(TestData,i32));
129 |         proc.iret(proc.lci(0));
130 |         module.compile(proc);
131 |     }
132 |     {
133 |         bjit::Proc      proc(0, "ii");
134 |         proc.si64(proc.env[1], proc.env[0], offsetof(TestData,i64));
135 |         proc.iret(proc.lci(0));
136 |         module.compile(proc);
137 |     }
138 |     {
139 |         bjit::Proc      proc(0, "if");
140 |         proc.sf32(proc.env[1], proc.env[0], offsetof(TestData,f32));
141 |         proc.iret(proc.lci(0));
142 |         module.compile(proc);
143 |     }
144 |     {
145 |         bjit::Proc      proc(0, "id");
146 |         proc.sf64(proc.env[1], proc.env[0], offsetof(TestData,f64));
147 |         proc.iret(proc.lci(0));
148 |         module.compile(proc);
149 |     }
150 | 
151 |     {
152 |         auto & codeOut = module.getBytes();
153 |         FILE * f = fopen("out.bin", "wb");
154 |         fwrite(codeOut.data(), 1, codeOut.size(), f);
155 |         fclose(f);
156 |         printf(" - Wrote out.bin\n");
157 |     }
158 |     
159 |     BJIT_ASSERT(module.patch());
160 |     uint64_t v = 0xf0f1f2f3f4f5f6f7;
161 | 
162 |     module.getPointer<void(TestData*,uint64_t)>(9)(&test, v);
163 |     BJIT_ASSERT(test.i8 == (int8_t) v);
164 | 
165 |     module.getPointer<void(TestData*,uint64_t)>(10)(&test, v);
166 |     BJIT_ASSERT(test.i16 == (int16_t) v);
167 |     
168 |     module.getPointer<void(TestData*,uint64_t)>(11)(&test, v);
169 |     BJIT_ASSERT(test.i32 == (int32_t) v);
170 | 
171 |     module.getPointer<void(TestData*,uint64_t)>(12)(&test, v);
172 |     BJIT_ASSERT(test.i64 == v);
173 | 
174 |     module.getPointer<void(TestData*,float)>(13)(&test, 3.14f);
175 |     BJIT_ASSERT(test.f32 == 3.14f);
176 |     
177 |     module.getPointer<void(TestData*,double)>(14)(&test, 1.5);
178 |     BJIT_ASSERT(test.f64 == 1.5);
179 |     
180 |     module.unload();
181 |     
182 |     return 0;
183 | }
184 | 


--------------------------------------------------------------------------------
/tests/test_loop.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int proc(int x, int y)
 5 | {
 6 |     int i = 0;
 7 | 
 8 |     while(true)
 9 |     {
10 |         if((++i) >= x) break;
11 |         if((++i) >= y) break;
12 | 
13 |         ++i;
14 |     };
15 | 
16 |     return i;
17 | }
18 | 
19 | int main()
20 | {
21 | 
22 |     bjit::Module    module;
23 |     {
24 |         bjit::Proc  pr(0, "ii");
25 | 
26 |         pr.env.push_back(pr.lci(0));
27 | 
28 |         auto la = pr.newLabel();
29 |         auto lb = pr.newLabel();
30 |         auto lc = pr.newLabel();
31 |         auto le = pr.newLabel();
32 | 
33 |         pr.jmp(la);
34 | 
35 |         pr.emitLabel(la);
36 |         pr.env[2] = pr.iadd(pr.env[2], pr.lci(1));
37 |         pr.jnz(pr.ige(pr.env[2], pr.env[0]), le, lb);
38 |         
39 |         pr.emitLabel(lb);
40 |         pr.env[2] = pr.iadd(pr.env[2], pr.lci(1));
41 |         pr.jnz(pr.ige(pr.env[2], pr.env[1]), le, lc);
42 |         
43 |         pr.emitLabel(lc);
44 |         pr.env[2] = pr.iadd(pr.env[2], pr.lci(1));
45 |         pr.jmp(la);
46 | 
47 |         pr.emitLabel(le);
48 |         pr.iret(pr.env[2]);
49 | 
50 |         pr.debug();
51 |         module.compile(pr);
52 |     }
53 |     auto & codeOut = module.getBytes();
54 |     FILE * f = fopen("out.bin", "wb");
55 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
56 |     fclose(f);
57 |     printf(" - Wrote out.bin\n");
58 |     BJIT_ASSERT(module.load());
59 | 
60 |     for(int i = 0; i < 16; ++i)
61 |     {
62 |         auto h = bjit::hash64(i+1);
63 |         int x = h&0xff;
64 |         int y = (h>>8)&0xff;
65 |         int z = proc(x,y);
66 |         int zjit = module.getPointer<int(int,int)>(0)(x,y);
67 |         printf("proc(%d,%d) = %d (jit says %d)\n", x, y, z, zjit);
68 |         BJIT_ASSERT(z == zjit);
69 |     }
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/test_mem_opt.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 |     {
 9 |         bjit::Proc  pr(0, "i");
10 | 
11 |         // these should get CSE
12 |         auto a = pr.iadd(pr.li32(pr.env[0],0), pr.li32(pr.env[0],0));
13 | 
14 |         // this should break CSE on loads
15 |         pr.si32(pr.lci(1), pr.env[0], 0);
16 | 
17 |         pr.iret(pr.iadd(a, pr.li32(pr.env[0],0)));
18 | 
19 |         pr.debug();
20 |         module.compile(pr);
21 |     }
22 |     {
23 |         bjit::Proc  pr(0, "i");
24 | 
25 |         // these should get CSE
26 |         auto a = pr.iadd(pr.li32(pr.env[0],0), pr.li32(pr.env[0],0));
27 | 
28 |         // this should break CSE on loads
29 |         pr.si32(pr.lci(1), pr.env[0], 0);
30 | 
31 |         pr.iret(pr.iadd(a, pr.li32(pr.env[0],0)));
32 | 
33 |         pr.debug();
34 |         module.compile(pr);
35 |     }
36 |     auto & codeOut = module.getBytes();
37 |     FILE * f = fopen("out.bin", "wb");
38 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
39 |     fclose(f);
40 |     printf(" - Wrote out.bin\n");
41 |     BJIT_ASSERT(module.load());
42 | 
43 |     int v = 42;
44 | 
45 |     BJIT_ASSERT((2*42+1) == module.getPointer<int(int*)>(0)(&v));
46 |     BJIT_ASSERT(v == 1);
47 |     
48 |     return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/tests/test_shift.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 | 
 9 |     {
10 |         bjit::Proc      proc(0, "ii");
11 |         proc.iret(proc.ishl(proc.env[0], proc.env[1]));
12 |         module.compile(proc);
13 |     }
14 |     {
15 |         bjit::Proc      proc(0, "ii");
16 |         proc.iret(proc.ishr(proc.env[0], proc.env[1]));
17 |         module.compile(proc);
18 |     }
19 |     {
20 |         bjit::Proc      proc(0, "ii");
21 |         proc.iret(proc.ushr(proc.env[0], proc.env[1]));
22 |         module.compile(proc);
23 |     }
24 | 
25 |     {
26 |         bjit::Proc      proc(0, "i");
27 |         proc.iret(proc.ishl(proc.env[0], proc.lci(3)));
28 |         module.compile(proc);
29 |     }
30 |     {
31 |         bjit::Proc      proc(0, "i");
32 |         proc.iret(proc.ishr(proc.env[0], proc.lci(3)));
33 |         module.compile(proc);
34 |     }
35 |     {
36 |         bjit::Proc      proc(0, "i");
37 |         proc.iret(proc.ushr(proc.env[0], proc.lci(3)));
38 |         module.compile(proc);
39 |     }
40 |     
41 |     auto & codeOut = module.getBytes();
42 |     
43 |     FILE * f = fopen("out.bin", "wb");
44 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
45 |     fclose(f);
46 |     
47 |     printf(" - Wrote out.bin\n");
48 | 
49 |     BJIT_ASSERT(module.load());
50 | 
51 |     int64_t     s = 3;
52 |     uint64_t    u = 5;
53 | 
54 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(0)(s,3) == (s<<3));
55 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(1)(~s,3) == (~s>>3));
56 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t,uint64_t)>(2)(~u,3) == (~u>>3));
57 | 
58 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(3)(s) == (s<<3));
59 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(4)(~s) == (~s>>3));
60 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(5)(~u) == (~u>>3));
61 |     
62 |     return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/test_sieve.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bjit.h"
  3 | 
  4 | #ifdef _WIN32
  5 | #pragma comment(lib,"winmm.lib")
  6 | #include <windows.h>
  7 | static inline unsigned getTimeMs()
  8 | {
  9 | 	return timeGetTime();
 10 | }
 11 | #else
 12 | #include <sys/time.h>
 13 | static inline unsigned getTimeMs()
 14 | {
 15 | 	timeval time;
 16 | 	gettimeofday(&time, NULL);
 17 | 	return (time.tv_sec * 1000) + (time.tv_usec / 1000);
 18 | }
 19 | #endif
 20 | 
 21 | int sieve(char * flags, int size)
 22 | {
 23 |     int count = 0;
 24 | 
 25 |     for (int i = 0; i < size; ++i) flags[i] = true;
 26 |     for (int i = 2; i < size; ++i)
 27 |     { 
 28 |         if (flags[i])
 29 |         {
 30 |             int prime = i + 1; 
 31 |             int k = i + prime; 
 32 | 
 33 |             while (k < size)
 34 |             { 
 35 |                 flags[k] = false; 
 36 |                 k += prime;
 37 |             }
 38 |             
 39 |             ++count;
 40 |         }
 41 |     }
 42 | 
 43 |     return count;
 44 | }
 45 | 
 46 | static char data[819000];
 47 | 
 48 | int main()
 49 | {
 50 | 
 51 |     bjit::Module module;
 52 |     {
 53 |         bjit::Proc  pr(0, "ii");
 54 | 
 55 |         int _flags = 0;
 56 |         int _size = 1;
 57 | 
 58 |         // i = 0
 59 |         int _i      = pr.env.size(); pr.env.push_back(pr.lci(0));
 60 |         // count = 0
 61 |         int _count  = pr.env.size(); pr.env.push_back(pr.lci(0));
 62 | 
 63 |         auto ls0 = pr.newLabel();
 64 |         auto lb0 = pr.newLabel();
 65 |         auto le0 = pr.newLabel();
 66 | 
 67 |         pr.jmp(ls0);
 68 |         
 69 |         pr.emitLabel(ls0);
 70 |         // while i < size
 71 |         pr.jz(pr.ilt(pr.env[_i], pr.env[_size]), le0, lb0);
 72 |         pr.emitLabel(lb0);
 73 |         
 74 |             // *(flags + i) = 1
 75 |             pr.si8(pr.lci(1), pr.iadd(pr.env[_flags], pr.env[_i]), 0);
 76 |             // ++i
 77 |             pr.env[_i] = pr.iadd(pr.env[_i], pr.lci(1));
 78 |             pr.jmp(ls0);
 79 |             
 80 |         pr.emitLabel(le0);
 81 | 
 82 |         // i = 2
 83 |         pr.env[_i] = pr.lci(2);
 84 |         auto ls1 = pr.newLabel();
 85 |         auto lb1 = pr.newLabel();
 86 |         auto le1 = pr.newLabel();
 87 | 
 88 |         pr.jmp(ls1);
 89 |         pr.emitLabel(ls1);
 90 |         
 91 |         // while i < size
 92 |         pr.jz(pr.ilt(pr.env[_i], pr.env[_size]), le1, lb1);
 93 |         pr.emitLabel(lb1);
 94 |             
 95 |             auto bt = pr.newLabel();
 96 |             auto be = pr.newLabel();
 97 |     
 98 |             // if flags[i] != 0
 99 |             pr.jnz(pr.li8(pr.iadd(pr.env[_flags], pr.env[_i]), 0), bt, be);
100 |             pr.emitLabel(bt);
101 |         
102 |                 // prime = i + 1
103 |                 int _prime  = pr.env.size();
104 |                 pr.env.push_back(pr.iadd(pr.env[_i], pr.lci(1)));
105 |         
106 |                 // k = i + prim
107 |                 int _k = pr.env.size();
108 |                 pr.env.push_back(pr.iadd(pr.env[_i], pr.env[_prime]));
109 |         
110 |                 auto ls2 = pr.newLabel();
111 |                 auto lb2 = pr.newLabel();
112 |                 auto le2 = pr.newLabel();
113 |         
114 |                 pr.jmp(ls2);
115 |                 pr.emitLabel(ls2);
116 |         
117 |                 // while k < size
118 |                 pr.jnz(pr.ilt(pr.env[_k], pr.env[_size]), lb2, le2);
119 |                 pr.emitLabel(lb2);
120 |         
121 |                     // flags[k] = false;
122 |                     pr.si8(pr.lci(0), pr.iadd(pr.env[_flags], pr.env[_k]), 0);
123 | 
124 |                     // k = k + prime
125 |                     pr.env[_k] = pr.iadd(pr.env[_k], pr.env[_prime]);
126 |             
127 |                     pr.jmp(ls2);
128 |                     
129 |                 pr.emitLabel(le2);
130 |         
131 |                 pr.env.pop_back();  // k
132 |                 pr.env.pop_back();  // prime
133 |         
134 |                 pr.env[_count] = pr.iadd(pr.env[_count], pr.lci(1));
135 |         
136 |                 pr.jmp(be);
137 |             
138 |             pr.emitLabel(be);
139 |             
140 |             // ++i
141 |             pr.env[_i] = pr.iadd(pr.env[_i], pr.lci(1));
142 |     
143 |             pr.jmp(ls1);
144 | 
145 |         pr.emitLabel(le1);
146 | 
147 |         pr.iret(pr.env[_count]);
148 | 
149 |         pr.debug();
150 | 
151 |         module.compile(pr);
152 |     }
153 |     
154 |     auto & codeOut = module.getBytes();
155 |     FILE * f = fopen("out.bin", "wb");
156 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
157 |     fclose(f);
158 |     printf(" - Wrote out.bin\n");
159 |     BJIT_ASSERT(module.load());
160 |     
161 |     auto proc = module.getPointer<int(char*,int)>(0);
162 |     
163 |     printf("C-sieve: %d primes\n", sieve(data, sizeof(data)));
164 |     printf("BJIT-sieve: %d primes\n", proc(data, sizeof(data)));
165 | 
166 |     BJIT_ASSERT(sieve(data, sizeof(data)) == proc(data, sizeof(data)));
167 | 
168 |     printf("Iterating 1000 times...\n");
169 |     {
170 |         auto start = getTimeMs();
171 | 
172 |         for(int i = 0; i < 1000; ++i)
173 |         {
174 |             sieve(data, sizeof(data));
175 |         }
176 | 
177 |         printf("C time: %dms\n", getTimeMs() - start);
178 |     }
179 |     
180 |     {
181 |         auto start = getTimeMs();
182 | 
183 |         for(int i = 0; i < 1000; ++i)
184 |         {
185 |             proc(data, sizeof(data));
186 |         }
187 | 
188 |         printf("BJIT time: %dms\n", getTimeMs() - start);
189 |     }
190 | }


--------------------------------------------------------------------------------
/tests/test_sub_ii.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | int main()
 5 | {
 6 | 
 7 |     bjit::Module    module;
 8 |     bjit::Proc      proc(0, "ii");
 9 | 
10 |     proc.iret(proc.isub(proc.env[0], proc.env[1]));
11 | 
12 |     int i = module.compile(proc);
13 | 
14 |     BJIT_ASSERT(module.load());
15 | 
16 |     auto ptr = module.getPointer<int(int,int)>(i);
17 | 
18 |     printf(" 5 - 2 = %d\n", ptr(5, 2));
19 | 
20 |     BJIT_ASSERT(ptr(5,2) == 3);
21 | 
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/test_sx_zx.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "bjit.h"
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | int main()
 7 | {
 8 | 
 9 |     bjit::Module    module;
10 | 
11 |     {
12 |         bjit::Proc      proc(0, "i");
13 |         proc.iret(proc.u8(proc.env[0]));
14 |         module.compile(proc);
15 |     }
16 |     {
17 |         bjit::Proc      proc(0, "i");
18 |         proc.iret(proc.u16(proc.env[0]));
19 |         module.compile(proc);
20 |     }
21 |     {
22 |         bjit::Proc      proc(0, "i");
23 |         proc.iret(proc.u32(proc.env[0]));
24 |         module.compile(proc);
25 |     }
26 |     {
27 |         bjit::Proc      proc(0, "i");
28 |         proc.iret(proc.i8(proc.env[0]));
29 |         module.compile(proc);
30 |     }
31 |     {
32 |         bjit::Proc      proc(0, "i");
33 |         proc.iret(proc.i16(proc.env[0]));
34 |         module.compile(proc);
35 |     }
36 |     {
37 |         bjit::Proc      proc(0, "i");
38 |         proc.iret(proc.i32(proc.env[0]));
39 |         module.compile(proc);
40 |     }
41 | 
42 |     auto & codeOut = module.getBytes();
43 |     
44 |     FILE * f = fopen("out.bin", "wb");
45 |     fwrite(codeOut.data(), 1, codeOut.size(), f);
46 |     fclose(f);
47 |     
48 |     printf(" - Wrote out.bin\n");
49 | 
50 |     BJIT_ASSERT(module.load());
51 | 
52 |     uint64_t v = 0xfedcba9876543210ull;
53 | 
54 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(0)(v) == 0x10);
55 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(1)(v) == 0x3210);
56 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(2)(v) == 0x76543210);
57 | 
58 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(3)(0x2ff) == ~0ull);
59 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(4)(0x2ffff) == ~0ull);
60 |     BJIT_ASSERT(module.getPointer<uint64_t(uint64_t)>(5)(0x2ffffffff) == ~0ull);
61 |     
62 |     return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/win/README.md:
--------------------------------------------------------------------------------
1 | These files are used to support `make` on Windows.
2 | 


--------------------------------------------------------------------------------
/win/mkdir-p.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | setlocal enableextensions
3 | if not exist %1 mkdir %1
4 | endlocal
5 | 


--------------------------------------------------------------------------------
/win/rm-rf.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | setlocal enableextensions
3 | if exist %1 rmdir /S /Q %1
4 | endlocal
5 | 


--------------------------------------------------------------------------------