├── .gitignore ├── aha.pdf ├── test1.frag.c ├── abs.frag.c ├── bool.frag.c ├── invbool.frag.c ├── onescomplement.frag.c ├── bic3.frag.c ├── round4.frag.c ├── negbool.frag.c ├── bitblt.frag.c ├── artificial.frag.c ├── simulator.h ├── avg.frag.c ├── Makefile ├── example.frag.c ├── config.h ├── README.md ├── machine.h ├── verify └── test.c ├── simulator.c ├── aha.h ├── read.me └── aha.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.o 3 | -------------------------------------------------------------------------------- /aha.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpt/Aha/HEAD/aha.pdf -------------------------------------------------------------------------------- /test1.frag.c: -------------------------------------------------------------------------------- 1 | /* test1.frag.c */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return x * 7; 8 | } 9 | -------------------------------------------------------------------------------- /abs.frag.c: -------------------------------------------------------------------------------- 1 | /* abs.frag.c */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return x >= 0 ? x : -x; 8 | } 9 | -------------------------------------------------------------------------------- /bool.frag.c: -------------------------------------------------------------------------------- 1 | /* bool.frag.c */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return !!x; // x -> 1/0 8 | } 9 | -------------------------------------------------------------------------------- /invbool.frag.c: -------------------------------------------------------------------------------- 1 | /* invbool.frag.c */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return !x; // x -> 1/0 8 | } 9 | -------------------------------------------------------------------------------- /onescomplement.frag.c: -------------------------------------------------------------------------------- 1 | /* onescomplement.frag.c */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return ~x; 8 | } 9 | -------------------------------------------------------------------------------- /bic3.frag.c: -------------------------------------------------------------------------------- 1 | /* bic3.frag.c */ 2 | /* ops to yield x &~ 3 */ 3 | 4 | #include "aha.h" 5 | 6 | int userfun(int x) 7 | { 8 | return x & ~3; 9 | } 10 | -------------------------------------------------------------------------------- /round4.frag.c: -------------------------------------------------------------------------------- 1 | /* round4.frag.c */ 2 | /* ops to yield (x + 2) & ~3 */ 3 | 4 | #include "aha.h" 5 | 6 | int userfun(int x) 7 | { 8 | return (x + 2) & ~3; 9 | } 10 | -------------------------------------------------------------------------------- /negbool.frag.c: -------------------------------------------------------------------------------- 1 | /* negbool.frag.c -- zero or minus one type bools */ 2 | 3 | #include "aha.h" 4 | 5 | int userfun(int x) 6 | { 7 | return x ? -1 : 0; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /bitblt.frag.c: -------------------------------------------------------------------------------- 1 | /* bitblt.frag.c */ 2 | /* bitmap plotting type ops: (dst & ~mask) | (src & mask) */ 3 | 4 | #define NARGS 3 5 | 6 | #include "aha.h" 7 | 8 | int userfun(int dst, int src, int mask) 9 | { 10 | return (dst & ~mask) | (src & mask); 11 | } 12 | -------------------------------------------------------------------------------- /artificial.frag.c: -------------------------------------------------------------------------------- 1 | // artificial.frag.c 2 | // 3 | // Artificial example input for Aha! from my blog post 4 | // http://www.davespace.co.uk/blog/20150131-branchless-sequences.html 5 | 6 | #include "aha.h" 7 | 8 | int userfun(int x) 9 | { 10 | if (x == 0) 11 | return 1; 12 | else if (x == 1) 13 | return 2; 14 | else 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /simulator.h: -------------------------------------------------------------------------------- 1 | /* simulator.h */ 2 | 3 | #ifndef SIMULATOR_H 4 | #define SIMULATOR_H 5 | 6 | typedef int (simproc)(int, int, int); 7 | 8 | simproc neg, _not, pop, nlz, rev, revb, add, sub, rsb, mul, _div, _divu, 9 | _mod, _modu, _and, _or, _xor, _bic, rotl, rotr, shl, shr, shrs, 10 | cmpeq, cmplt, cmpltu, seleq, sellt, selle; 11 | 12 | #endif /* SIMULATOR_H */ 13 | 14 | -------------------------------------------------------------------------------- /avg.frag.c: -------------------------------------------------------------------------------- 1 | /* avg.frag.c */ 2 | 3 | #define NARGS 2 // Number of args in userfun, 1 or 2. 4 | 5 | #include "aha.h" 6 | 7 | int userfun(int x, int y) { // To find Dietz's formula for 8 | // the floor-average of two 9 | // unsigned integers. 10 | return ((unsigned long long)x + (unsigned long long)y) >> 1; 11 | } 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .SUFFIXES: .o 2 | 3 | EXAMPLE = example 4 | 5 | CC = gcc 6 | CXX = g++ 7 | DEFINES = -DINC=\"$(EXAMPLE).frag.c\" -DOFILE=\"$(EXAMPLE).out\" 8 | CFLAGS = -O2 -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -MMD -I. $(DEFINES) 9 | CXXFLAGS = $(CFLAGS) 10 | 11 | SRCS = aha.c simulator.c 12 | OBJS = $(SRCS:.c=.o) 13 | 14 | .c.o: 15 | $(CC) -c $(CFLAGS) -o $@ $< 16 | 17 | .PHONY: all clean 18 | 19 | all: aha 20 | 21 | aha: $(OBJS) 22 | $(CC) $(CFLAGS) -o $@ $(OBJS) 23 | 24 | clean: 25 | $(RM) -f $(OBJS) aha core *~ *.bak *.d 26 | 27 | # Dependencies 28 | 29 | aha.o: $(EXAMPLE).frag.c 30 | 31 | -include $(SRCS:.c=.d) 32 | 33 | -------------------------------------------------------------------------------- /example.frag.c: -------------------------------------------------------------------------------- 1 | /* This is the function for which it is desired to find more efficient 2 | code. It must have either one or two arguments, both int, and must 3 | return a 32-bit int quantity. It is declared in aha.h. */ 4 | 5 | #include "aha.h" 6 | 7 | int userfun(int x) { 8 | // if (x > 0) return 1; // x > 0 predicate. 9 | // else return 0; // Turn off div & divu. 10 | // Found a new formula for HD. 11 | 12 | // if (x >= 32) return 0; 13 | // return 1 << (unsigned)x; 14 | 15 | // return 3*x + 1; 16 | 17 | if (x >= 0) return x; // Absolute value. 18 | else return -x; 19 | 20 | // return (x & 0xfffffffc) | ((x & 1) << 1) | ((x & 2) >> 1); 21 | // Swap rightmost 2 bits. 22 | } 23 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | /* config.h */ 2 | 3 | #ifndef CONFIG_H 4 | #define CONFIG_H 5 | 6 | #define MAXNEG 0x80000000 7 | #define MAXPOS 0x7FFFFFFF 8 | #define NBSM 31 // Shift mask. Use 63 for mod 64 9 | // shifts, or 31 for mod 32. 10 | #define TRIAL {1, 0, -1, \ 11 | MAXNEG, MAXPOS, MAXNEG + 1, MAXPOS - 1, \ 12 | 0x01234567, 0x89ABCDEF, -2, 2, -3, 3, -64, 64, -5, -31415, \ 13 | 0x0000FFFF, 0xFFFF0000, \ 14 | 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000, \ 15 | 0x0000000F, 0x000000F0, 0x00000F00, 0x0000F000, \ 16 | 0x000F0000, 0x00F00000, 0x0F000000, 0xF0000000} 17 | 18 | // First three values of IMMEDS must be 0, -1, and 1. 19 | #define IMMEDS 0, -1, 1, -2, 2, MAXNEG 20 | #define SHIMMEDS 1, 2, 3, 30, 31 21 | 22 | #endif /* CONFIG_H */ 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Aha! 2 | ==== 3 | 4 | This is Henry S. Warren's "Aha!" superoptimiser tool with a couple of mods make it more appropriate for targeting ARM and Thumb code. e.g. It knows about the `BIC` and `RSB` instructions. 5 | 6 | It also has various rejiggery of source the exact nature of which escapes me at the moment. The original source is available on the 'distrib' branch. 7 | 8 | Build a binary with: 9 | 10 | make EXAMPLE= 11 | 12 | Where `` is one of the xxx.frag.c files in the source directory. 13 | 14 | Then run the resultant 'aha' with the number of target instructions. 15 | 16 | ./aha 2 17 | 18 | Enjoy! 19 | 20 | 21 | Related Material 22 | ---------------- 23 | 24 | In January 2015 I wrote [an article about discovering branchless code sequences using Aha!](http://www.davespace.co.uk/blog/20150131-branchless-sequences.html). 25 | 26 | In September 2016 I gave [a presentation about superoptimisers](http://slides.com/dpt/aha#/) to my then colleagues at [Inside Secure](https://www.insidesecure.com/). 27 | 28 | In 2017 my friend Nick Gildea built a similar tool which [uses z3 for synthesis](https://github.com/nickgildea/z3_codegen). 29 | -------------------------------------------------------------------------------- /machine.h: -------------------------------------------------------------------------------- 1 | /* machine.h */ 2 | 3 | #ifndef MACHINE_H 4 | #define MACHINE_H 5 | 6 | // The machine's instruction set: 7 | static const isa_t isa[] = { 8 | {neg, 1, 0, {RX, 0, 0}, "neg", "-(", "" }, // Negate. 9 | // {_not, 1, 0, {RX, 0, 0}, "not", "~(", "" }, // One's-complement. 10 | // {pop, 1, 0, {RX, 0, 0}, "pop", "pop(", "" }, // Population count. 11 | // {nlz, 1, 0, {RX, 0, 0}, "nlz", "nlz(", "" }, // Num leading 0's. 12 | // {rev, 1, 0, {RX, 0, 0}, "rev", "rev(", "" }, // Bit reversal. 13 | {add, 2, 1, {RX, 2, 0}, "add", "(", " + " }, // Add. 14 | {sub, 2, 0, { 2, 2, 0}, "sub", "(", " - " }, // Subtract. 15 | // {rsb, 2, 0, { 2, 2, 0}, "rsb", "(", " -r " }, // Reverse subtract. 16 | // {mul, 2, 1, {RX, 3, 0}, "mul", "(", " * " }, // Multiply. 17 | // {_div, 2, 0, { 1, 3, 0}, "div", "(", " / " }, // Divide signed. 18 | // {_divu, 2, 0, { 1, 1, 0}, "divu", "(", " /u " }, // Divide unsigned. 19 | // {_mod, 2, 0, { 1, 3, 0}, "mod", "(", " % " }, // Modulus signed. 20 | // {_modu, 2, 0, { 1, 1, 0}, "modu", "(", " %u " }, // Modulus unsigned. 21 | {_and, 2, 1, {RX, 2, 0}, "and", "(", " & " }, // AND. 22 | {_or, 2, 1, {RX, 2, 0}, "or", "(", " | " }, // OR. 23 | {_xor, 2, 1, {RX, 2, 0}, "xor", "(", " ^ " }, // XOR. 24 | {_bic, 2, 1, {RX, 2, 0}, "bic", "(", " & ~" }, // AND-NOT / bitwise clear. 25 | // {rotl, 2, 0, { 1,NIM, 0}, "rotl", "(", " <>r "}, // Rotate shift right. 27 | {shl, 2, 0, { 1,NIM, 0}, "shl", "(", " << " }, // Shift left. 28 | {shr, 2, 0, { 1,NIM, 0}, "shr", "(", " >>u "}, // Shift right. 29 | {shrs, 2, 0, { 3,NIM, 0}, "shrs", "(", " >>s "}, // Shift right signed. 30 | // {cmpeq, 2, 1, {RX, 0, 0}, "cmpeq", "(", " == " }, // Compare equal. 31 | // {cmplt, 2, 0, { 0, 0, 0}, "cmplt", "(", " < " }, // Compare less than. 32 | // {cmpltu, 2, 0, { 1, 1, 0}, "cmpltu","(", " 4 | #include 5 | 6 | /* ----------------------------------------------------------------------- */ 7 | 8 | typedef unsigned int T; 9 | typedef T (testfn_t)(T); 10 | 11 | /* ----------------------------------------------------------------------- */ 12 | 13 | /* reference version of our original operation */ 14 | static T reference(T x) 15 | { 16 | if (x == 0) 17 | return 1; 18 | else if (x == 1) 19 | return 2; 20 | else 21 | return 0; 22 | } 23 | 24 | static T branchless1(T x) 25 | { 26 | return (((x + -2) & ~x) >> 31) << x; 27 | } 28 | 29 | static T branchless2(T x) 30 | { 31 | return (((x - 2) & ~x) >> 31) << x; 32 | } 33 | 34 | static T branchless3(T x) 35 | { 36 | return (((x >> 1) - 1) >> 31) << x; 37 | } 38 | 39 | static T branchless4(T x) 40 | { 41 | return ((0x80000000 - (x >> 1)) >> 31) << x; 42 | } 43 | 44 | /* ----------------------------------------------------------------------- */ 45 | 46 | static const struct 47 | { 48 | testfn_t *branchless, *reference; 49 | } 50 | tests[] = 51 | { 52 | { &branchless1, &reference }, 53 | { &branchless2, &reference }, 54 | { &branchless3, &reference }, 55 | { &branchless4, &reference }, 56 | }; 57 | 58 | /* ----------------------------------------------------------------------- */ 59 | 60 | #define NELEMS(a) (int)(sizeof(a) / sizeof(a[0])) 61 | 62 | int main(void) 63 | { 64 | int j; 65 | unsigned int i; 66 | 67 | for (j = 0; j < NELEMS(tests); j++) 68 | { 69 | testfn_t *fn, *ref; 70 | int nfailures; 71 | 72 | printf("starting test %d\n", j); 73 | 74 | fn = tests[j].branchless; 75 | ref = tests[j].reference; 76 | 77 | nfailures = 0; 78 | 79 | /* test all values from zero to UINT_MAX */ 80 | for (i = 0; ; i++) 81 | { 82 | if (fn(i) != ref(i)) 83 | if (++nfailures < 20) /* report first twenty failures only */ 84 | printf("failure at %d\n", i); 85 | 86 | if ((i & 0x03ffffff) == 0) /* draw 64 dots */ 87 | { 88 | putc('.', stdout); 89 | fflush(stdout); 90 | } 91 | 92 | if (i == UINT_MAX) /* test here to prevent infinite loop */ 93 | break; 94 | } 95 | printf("\n"); 96 | 97 | if (nfailures == 0) 98 | printf("all ok!\n"); 99 | else 100 | printf("saw %d failures\n", nfailures); 101 | } 102 | 103 | return 0; 104 | } 105 | -------------------------------------------------------------------------------- /simulator.c: -------------------------------------------------------------------------------- 1 | /* simulator.c */ 2 | 3 | /* Collection of simulator routines for the instructions in the isa. */ 4 | 5 | #include "config.h" 6 | 7 | #include "simulator.h" 8 | 9 | extern int unacceptable; 10 | 11 | int neg(int x, int y, int z) {return -x;} 12 | int _not(int x, int y, int z) {return ~x;} 13 | int pop(int xx, int y, int z) { 14 | unsigned x = xx; 15 | x = x - ((x >> 1) & 0x55555555); 16 | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); 17 | x = (x + (x >> 4)) & 0x0F0F0F0F; 18 | x = x + (x << 8); 19 | x = x + (x << 16); 20 | return x >> 24; 21 | } 22 | 23 | int nlz(int xx, int y, int z) { 24 | unsigned x = xx; 25 | int n; 26 | 27 | if (x == 0) return(32); 28 | n = 0; 29 | if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} 30 | if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} 31 | if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} 32 | if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} 33 | if (x <= 0x7FFFFFFF) {n = n + 1;} 34 | return n; 35 | } 36 | 37 | int rev(int xi, int y, int z) { 38 | unsigned x = xi; 39 | x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555); 40 | x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333); 41 | x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F); 42 | x = (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24); 43 | return x; 44 | } 45 | 46 | int revb(int xi, int y, int z) { 47 | unsigned x = xi; 48 | return ((x & 0x000000FF) << 24) | ((x >> 24) & 0x000000FF) | 49 | ((x & 0x0000FF00) << 8) | ((x >> 8) & 0x0000FF00); 50 | } 51 | 52 | 53 | int add (int x, int y, int z) {return x + y;} 54 | int sub (int x, int y, int z) {return x - y;} 55 | int rsb (int x, int y, int z) {return y - x;} 56 | int mul (int x, int y, int z) {return x * y;} 57 | /* For division overflow we return arbitrary values, hoping they fail 58 | to be part of a solution. (User must check solutions, in general.) */ 59 | int _div (int x, int y, int z) { 60 | if (y == 0 || (y == -1 && x == (int)0x80000000)) 61 | {unacceptable = 1; return 0;} 62 | else return x/y;} 63 | int _divu(int x, int y, int z) { 64 | if (y == 0) {unacceptable = 1; return 0;} 65 | else return (unsigned)x/(unsigned)y;} 66 | int _mod (int x, int y, int z) { 67 | if (y == 0 || (y == -1 && x == (int)0x80000000)) 68 | {unacceptable = 1; return 0;} 69 | else return x%y;} 70 | int _modu(int x, int y, int z) { 71 | if (y == 0) {unacceptable = 1; return 0;} 72 | else return (unsigned)x%(unsigned)y;} 73 | int _and(int x, int y, int z) {return x & y;} 74 | int _or (int x, int y, int z) {return x | y;} 75 | int _xor(int x, int y, int z) {return x ^ y;} 76 | int _bic(int x, int y, int z) {return x & ~y;} 77 | int rotl(int x, int y, int z) {int s = y & NBSM; 78 | return x << s | (unsigned)x >> (32 - s);} 79 | int rotr(int x, int y, int z) {int s = y & NBSM; 80 | return x << (32 - s) | (unsigned)x >> s;} 81 | int shl(int x, int y, int z) {int s = y & NBSM; 82 | if (s >= 32) return 0; else return x << s;} 83 | int shr(int x, int y, int z) {int s = y & NBSM; 84 | if (s >= 32) return 0; else return (unsigned)x >> s;} 85 | int shrs(int x, int y, int z) {int s = y & NBSM; 86 | if (s >= 32) return x >> 31; else return x >> s;} 87 | int cmpeq(int x, int y, int z) {return x == y;} 88 | int cmplt(int x, int y, int z) {return x < y;} 89 | int cmpltu(int x, int y, int z) {return (unsigned)(x) < (unsigned)(y);} 90 | int seleq(int x, int y, int z) {return x == 0 ? y : z;} 91 | int sellt(int x, int y, int z) {return x < 0 ? y : z;} 92 | int selle(int x, int y, int z) {return x <= 0 ? y : z;} 93 | 94 | -------------------------------------------------------------------------------- /aha.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2002 by Henry S. Warren, Jr. 2 | 3 | #include "config.h" 4 | #include "simulator.h" 5 | 6 | #ifndef NARGS 7 | #define NARGS 1 // Number of args in userfun, 1, 2 or 3. 8 | #endif 9 | 10 | const int debug = 0; // 0 or 1; debugging printouts if 1. 11 | const int counters = 1; // 0 or 1; count number of evaluations. 12 | 13 | /* A note about the registers: 14 | 15 | They are divided into four groups. The first group, starting with 16 | register 0, holds ordinary immediate values. The second group, starting 17 | with register NIM, holds the shift immediate values. The next 1 or 2 18 | regs are the arguments to the user-defined function. The last group 19 | holds the results of computations done by the trial programs. 20 | 21 | 0 Start of ordinary immediate values (those given by IMMEDS) 22 | NIM Start of shift immediate values (those given by SHIMMEDS) 23 | RX First (or only) user function argument 24 | RY Second user function argument 25 | RZ Third user function argument 26 | RI0 Result of instruction 0 goes here 27 | RI0 + i Result of instruction i goes here 28 | where: 29 | NIM = number of ordinary immediate values 30 | NSHIM = number of shift immediate values 31 | */ 32 | 33 | static const int trialx[] = TRIAL; 34 | #if NARGS >= 2 35 | static const int trialy[] = TRIAL; 36 | #endif 37 | #if NARGS >= 3 38 | static const int trialz[] = TRIAL; 39 | #endif 40 | 41 | int dummy1[] = {IMMEDS}; // These get optimized out of existence. 42 | int dummy2[] = {SHIMMEDS}; 43 | 44 | #define NELEMS(a) (int)(sizeof(a)/sizeof(a[0])) 45 | #define NIM NELEMS(dummy1) 46 | #define NSHIM NELEMS(dummy2) 47 | #define RX (NIM + NSHIM) // First (or only) user function argument 48 | #define RY (RX + 1) // Second user function argument 49 | #define RZ (RY + 1) // Third user function argument 50 | #define RI0 (RX + NARGS) // Result of instruction 0 goes here 51 | 52 | int unacceptable; // Code below sets this to 1 for an 53 | // unacceptable operation, such as 54 | // divide by 0. It is initially 0. 55 | 56 | // Note: Commutative ops are commutative in operands 0 and 1. 57 | typedef struct isa_t 58 | { 59 | simproc *proc; // Procedure for simulating the op. 60 | int numopnds; // Number of operands, 1 to 3. 61 | int commutative; // 1 if opnds 0 and 1 commutative. 62 | int opndstart[3]; // Starting reg no. for each operand. 63 | const char *mnemonic; // Name of op, for printing. 64 | const char *fun_name; // Function name, for printing. 65 | const char *op_name; // Operator name, for printing. 66 | } 67 | isa_t; 68 | 69 | #include "machine.h" 70 | 71 | /* ------------------- End of user-setup Portion -------------------- */ 72 | 73 | #define MAXNUMI 5 // Max num of insns that can be tried. 74 | #if NARGS == 1 75 | int userfun(int); 76 | #elif NARGS == 2 77 | int userfun(int, int); 78 | #elif NARGS == 3 79 | int userfun(int, int, int); 80 | #else 81 | #error Invalid NARGS 82 | #endif 83 | 84 | #define NTRIALX NELEMS(trialx) 85 | #define NTRIALY NELEMS(trialy) 86 | #define NTRIALZ NELEMS(trialz) 87 | 88 | #if NARGS == 1 89 | int correct_result[NTRIALX]; 90 | #elif NARGS == 2 91 | int correct_result[NTRIALX][NTRIALY]; 92 | #elif NARGS == 3 93 | int correct_result[NTRIALX][NTRIALY][NTRIALZ]; 94 | #endif 95 | 96 | int corr_result; // Correct result for current trial. 97 | 98 | #define NUM_INSNS_IN_ISA NELEMS(isa) 99 | 100 | struct { // The current program. 101 | int op; // Index into isa. 102 | int opnd[3]; // Operands of op. Register numbers 103 | // except if negative, it's the negative 104 | // of a shift amount. 105 | } pgm[MAXNUMI]; 106 | 107 | int numi; // Current size of the trial programs, 108 | // must be from 1 to MAXNUMI. 109 | 110 | /* GPR array: First NIM slots hold ordinary immediate values (IMMEDS), 111 | next NSHIM slots hold shift immediate values (SHIMMEDS), next NARGS 112 | slots hold the arguments x and, optionally, y, and the last numi slots 113 | hold the result of instructions 0 through numi - 1. */ 114 | 115 | int r[NIM + NSHIM + NARGS + MAXNUMI] = {IMMEDS, SHIMMEDS}; 116 | unsigned long long counter[MAXNUMI]; // Count num times insn at level i is evaluated. 117 | -------------------------------------------------------------------------------- /read.me: -------------------------------------------------------------------------------- 1 | Brief Usage Description 2 | 3 | Create a header file that decribes your problem and your machine. There 4 | are two examples provided: abs.h for a function of one variable, and 5 | avg.h for a function of two variables. The first solves the problem of 6 | how to compute the absolute value function on a machine that does not 7 | have that instruction. The second finds the Dietz formula for computing 8 | the "floor average" of two unsigned integers without causing overflow. 9 | 10 | Modify either abs.h or avg.h to fit your problem and save the file under 11 | a name of your choice (with file extension .h). Let us assume the file 12 | is named "mine.h". 13 | 14 | Make the executable file by entering, on Windows, "make mine". This 15 | creates file "mine.exe". 16 | 17 | Execute it by entering "mine n", where n is the number of instructions 18 | for which you want to find a solution, generally 1, 2, 3, or 4. 19 | 20 | The solutions found will be displayed and also placed in file 21 | "mine.out". 22 | 23 | See aha.pdf for a complete writeup. 24 | 25 | 26 | History of Improvements 27 | 28 | Changing from calculating the correct answer for each new program to 29 | calculating them in advance and storing in a table, reduced the 30 | execution time by about 2.7%. 31 | 32 | An 8% improvement resulted from adding the "commutative" bit to the five 33 | commutative operations (add, mul, and, or, xor). Perhaps more 34 | importantly, it reduced the printout of essentially duplicate solutions. 35 | 36 | An improvement by a factor of 2.58 (25.3/9.8) resulted from ensuring 37 | that the last register operand of the last instruction, when this 38 | instruction is created, refers to the result of the immediately 39 | preceding instruction. 40 | 41 | Continued the above idea for other register operands, i.e., ensured that 42 | SOME operand of the last instruction always refers to the result of the 43 | immediately preceding instruction. Got an improvement by a factor of 44 | 1.04 (9.8/9.4). 45 | 46 | 3/16/02: Got a factor of 1.85 by having it simulate the program only 47 | from the last changed instruction to the end, which means that usually 48 | only the last instruction is simulated. Also, changed the trial 49 | value(s) so they "stick" at the last failed one(s). When a trial value 50 | is changed, which happens after a success, the whole program must be 51 | simulated. 52 | 53 | 3/16/02: Got a factor of 1.010 by moving the assignment to 54 | computed_result inside the loop just ahead of where it was. (The loop 55 | is usually executed only once.) 56 | 57 | 3/16/02: Got a factor of 1.020 by computing corr_res only when sticky_i 58 | and/or sticky_j change. 59 | 60 | 3/17/02: Tried making "numi" a constant defined with #define. Got a 5% 61 | improvement. Decided not to do this. 62 | 63 | 3/19/02: Got a factor of 1.166 by inlining "increment." 64 | 65 | 3/23/02: Took 1614 secs (26.9 min) to search with numi = 4. 66 | 67 | 9/19/02: Got a factor of 1.131 by requiring that immediate values be in 68 | the order 0, -1, 1, ... and using isa.opndstart[3] to avoid certain 69 | silly cases like ADD of 0, ADD of -1 (we do a subtract of 1), AND of 0 70 | or -1, etc. This was made kind of necessary because the compare ops 71 | should have an immediate value of 0 as a possibility, whereas for most 72 | other ops, immediate 0 would never be used. 73 | 74 | 9/22/02: Changed shift immediate amounts to be given in an array 75 | (shimmed), so that fewer than 31 values can be specified. This gave no 76 | change to execution time if all 31 are specified (1.222 secs for 77 | absolute value problem on a basic RISC, running on my 1.8 mHz Thinkpad). 78 | If only 4 values are specified, e.g. 1, 2, 30, and 31, the execution 79 | dropped to 0.450 secs, a factor of 2.71 improvement. 80 | I don't quite understand this, because the number of evaluations of 81 | the third instruction reduced from 14.2 million to 2.74 million, a 82 | factor of 5.18. 83 | It's partly explained by the program load time. If you run aha with 84 | an argument of 1, it takes 0.140 secs. Thus the time to start and end 85 | the program is about that amount. So the ration of actual execution 86 | time is (1.222 - 0.140)/(0.450 - 0.140) = 3.49. Closer to 5.18, but not 87 | very close. 88 | 89 | 9/24/02: Put ALL immediates (both ordinary and shift amounts) in the 90 | registers. This did not affect the execution time (if compiled -O2), 91 | but it allowed deleting the operand type info in the isa table, and 92 | simplified the code a little (by 77 lines). 93 | Execution time for the standard run is now 0.591 secs on my 667 mHz 94 | machine (compiled -O2, which I guess I'll use from now on). 95 | 96 | 9/25/02: Made Aha! measure and print its own execution time, using 97 | clock(). I believe this is user + system time for the Aha! process, 98 | rather than wall clock time. Found that -O2 and -O3 make no difference; 99 | the assembly language files search.s and check.s are identical. Am 100 | using -O2. The standard job runs in from 0.520 to 0.540 seconds process 101 | time on my 667 mHz office machine. 102 | The number of instruction evaluations is 62248 + 82618 + 2743328 (for 103 | the first, second, and third instruction resp.), or 2888194 total. This 104 | corresponds to 122 cycles per evaluation. 105 | 106 | 9/30/02: Before today, the program consisted of three .c files: aha, 107 | search, and check. Made it all one file (aha.c), mainly because of 108 | problems with C in defining a preset array of values and not requiring 109 | the user to also set a variable equal to the number of values in the 110 | array. No change to execution time. Build (mainly compilation) time 111 | dropped from about 2.0 secs to 1.2 secs. This change also permits 112 | inlining fix_operands, but trying that did not change execution time 113 | measurably (so it is not inlined now). 114 | 115 | 10/14/02: Before today, the incrementing of instructions was done with 116 | the rightmost operand varying most rapidly. Today it was changed so 117 | that the leftmost operand varies most rapidly. This simplifies the 118 | handling of commutative ops, and permits a few other minor 119 | simplifications. 120 | This gave a factor of 1.05 improvement in execution time. Quite 121 | minor, but the program is a little simpler and I think it will simplify 122 | more complicated optimizations that may be done, such as (somehow) 123 | avoiding programs that have an instruction whose result is unused. 124 | A preliminary investigation of this shows that for a typical RISC 125 | instruction set, 39% of three-instruction programs have an unused 126 | result, and 70% of four-instruction programs have an unused result. 127 | This is compared to the present program, which ensures only that the 128 | second from last computed result does not go unused. Thus there is hay 129 | to be made here. 130 | An attempt to skip ALL these silly programs resulted in a net 131 | increase in execution time, because it was implemented inefficiently. 132 | It seems to be hard to devise an efficient way to do this. Some 133 | compromise might be practical, such as ensuring only that the second and 134 | third from last results are not both unused. 135 | 136 | 10/15/02: Changed the program as just mentioned, i.e., to ensure that 137 | instruction n (the last) uses the result of instruction n-1 and, if 138 | instruction n-1 does not use the result of instruction n-2, then the 139 | last instruction does. This improved execution time by a factor of 1.4 140 | for three-instruction programs, and a factor of 1.8 for four-instruction 141 | programs. 142 | 143 | 4/22/03: Ran Aha! on a two-input problem with n = 5 and 17 instructions 144 | enabled. Was searching for 5-instruction programs to compute the average 145 | of two signed integers (without overflowing). Shut it off after 144 146 | hours (6 days). I should make it display a "progress report" for such 147 | long jobs, such as printing out the first instruction in the list each 148 | time a new opcode is selected for it. Otherwise, you don't know if it 149 | somehow got into an infinite loop and you have no idea how long the run 150 | will take. 151 | 152 | 2/25/11: Incorporated a correction to the printb routine from Greg 153 | Parker, which makes it run on the 64-bit Mac OS X (and probably other 154 | machines). 155 | -------------------------------------------------------------------------------- /aha.c: -------------------------------------------------------------------------------- 1 | /* A Hacker's Assistant */ 2 | 3 | // Copyright (C) 2002 by Henry S. Warren, Jr. 4 | #include // Used by printb (print both). 5 | #include 6 | #include 7 | #include 8 | #include INC 9 | 10 | #define INLINE static inline 11 | 12 | FILE *ofile; 13 | 14 | // ----------------------------- printb -------------------------------- 15 | 16 | void 17 | printb(int pr, const char *format, ...) 18 | { 19 | /* Prints the given data on the display and on the current output 20 | file. Takes any number of printf-style args after "format". 21 | Thanks to Greg Parker for this version, which he verified works on 22 | 64-bit Mac OS X. The previous version did not, due to an error in the 23 | use of va_list. */ 24 | 25 | if (pr & 1) { 26 | va_list ap; 27 | va_start(ap, format); 28 | vprintf(format, ap); 29 | va_end(ap); 30 | } 31 | if (pr & 2) { 32 | va_list ap; 33 | va_start(ap, format); 34 | vfprintf(ofile, format, ap); 35 | va_end(ap); 36 | } 37 | } 38 | 39 | // --------------------------- print_expr ------------------------------ 40 | 41 | void 42 | print_expr(int pr, int opn) 43 | { 44 | int i, j, k; 45 | 46 | if (opn < RX) { // Immediate value. 47 | if (-31 <= r[opn] && r[opn] <= 31) printb(pr, "%d", r[opn]); 48 | else printb(pr, "0x%X", r[opn]); 49 | } 50 | else if (opn == RX) printb(pr, "x"); // First argument. 51 | #if NARGS >= 2 52 | else if (opn == RY) printb(pr, "y"); // Second argument. 53 | #endif 54 | #if NARGS >= 3 55 | else if (opn == RZ) printb(pr, "z"); // Third argument. 56 | #endif 57 | else { // opn is an instruction. 58 | i = opn - RI0; 59 | k = pgm[i].op; 60 | printb(pr, "%s", isa[k].fun_name); 61 | for (j = 0; j < isa[k].numopnds; j++) { 62 | print_expr(pr, pgm[i].opnd[j]); 63 | if (j < isa[k].numopnds - 1) printb(pr, "%s", isa[k].op_name); 64 | else printb(pr, ")"); 65 | } 66 | } 67 | } 68 | 69 | // --------------------------- print_pgm ------------------------------- 70 | 71 | void 72 | print_pgm(int pr) 73 | { 74 | int i, j, k, opndj; 75 | 76 | for (i = 0; i < numi; i++) { 77 | k = pgm[i].op; 78 | printb(pr, " %-5s r%d,", isa[k].mnemonic, i + 1); 79 | for (j = 0; j < isa[k].numopnds; j++) { 80 | opndj = pgm[i].opnd[j]; 81 | if (opndj < NSHIM) { 82 | opndj = r[opndj]; 83 | if (opndj >= -31 && opndj <= 31) printb(pr, "%d", opndj); 84 | else printb(pr, "0x%X", opndj); 85 | } 86 | else if (opndj < RX) { 87 | // shift immediates 88 | opndj = r[opndj]; 89 | printb(pr, "#%d", opndj); 90 | } 91 | else if (opndj == RX) printb(pr, "rx"); 92 | #if NARGS >= 2 93 | else if (opndj == RY) printb(pr, "ry"); 94 | #endif 95 | #if NARGS >= 3 96 | else if (opndj == RZ) printb(pr, "rz"); 97 | #endif 98 | else printb(pr, "r%d", opndj - RI0 + 1); 99 | if (j < isa[k].numopnds - 1) printb(pr, ","); 100 | } 101 | if (debug) 102 | printb(pr, " ==> %d (0x%X)\n", r[i+RI0], r[i+RI0]); 103 | else printb(pr, "\n"); 104 | } // end for i 105 | 106 | /* Now print the program as an expression. */ 107 | 108 | printb(pr, " Expr: "); 109 | print_expr(pr, numi - 1 + RI0); 110 | printb(pr, "\n"); 111 | } 112 | 113 | // -------------------- simulate_one_instruction ----------------------- 114 | 115 | INLINE void 116 | simulate_one_instruction(int i) 117 | { 118 | int arg0, arg1, arg2; 119 | 120 | arg0 = r[pgm[i].opnd[0]]; 121 | arg1 = r[pgm[i].opnd[1]]; 122 | arg2 = r[pgm[i].opnd[2]]; 123 | 124 | r[i + RI0] = (*isa[pgm[i].op].proc)(arg0, arg1, arg2); 125 | if (counters) counter[i]++; 126 | return; 127 | } 128 | 129 | // ----------------------------- check --------------------------------- 130 | 131 | int 132 | check(int i) 133 | { 134 | 135 | static int itrialx; // Init 0. 136 | int kx; 137 | #if NARGS >= 2 138 | static int itrialy; 139 | int ky; 140 | #endif 141 | #if NARGS >= 3 142 | static int itrialz; 143 | int kz; 144 | #endif 145 | 146 | if (debug) { 147 | #if NARGS == 1 148 | fprintf(ofile, "\nSimulating with trial arg x = %d (0x%X):\n", 149 | r[RX],r[RX]); 150 | #elif NARGS == 2 151 | fprintf(ofile, "\nSimulating with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n", 152 | r[RX], r[RY], r[RX], r[RY]); 153 | #elif NARGS == 3 154 | fprintf(ofile, "\nSimulating with (x, y, z) = (%d, %d, %d) ((0x%X, 0x%X, 0x%X)):\n", 155 | r[RX], r[RY], r[RZ], r[RX], r[RY], r[RZ]); 156 | #endif 157 | } 158 | L: 159 | simulate_one_instruction(i); // Simulate i'th insn, 160 | if (i < numi - 1) {i = i + 1; goto L;} // and more if req'd 161 | if (unacceptable) { // E.g., if divide by 0: 162 | if (debug) printb(2, "Unacceptable program (invalid operation).\n"); 163 | unacceptable = 0; 164 | return 0; 165 | } 166 | 167 | if (debug) { 168 | print_pgm(2); 169 | fprintf(ofile, "Computed result = %d, correct result = %d, %s\n", 170 | r[numi-1+RI0], corr_result, r[numi-1+RI0] == corr_result ? "ok" : "fail"); 171 | } 172 | if (r[numi-1+RI0] != corr_result) // If not the correct 173 | return 0; // result, failure. 174 | 175 | // Got the correct result. Check this program using all trial values. 176 | 177 | for (kx = 0; kx < NTRIALX - 1; kx++) { 178 | itrialx += 1; 179 | if (itrialx >= NTRIALX) itrialx = 0; 180 | #if NARGS >= 2 181 | for (ky = 0; ky < NTRIALY - 1; ky++) { 182 | itrialy += 1; 183 | if (itrialy >= NTRIALY) itrialy = 0; 184 | #if NARGS >= 3 185 | for (kz = 0; kz < NTRIALZ - 1; kz++) { 186 | itrialz += 1; 187 | if (itrialz >= NTRIALZ) itrialz = 0; 188 | #endif 189 | #endif 190 | 191 | #if NARGS == 1 192 | r[RX] = trialx[itrialx]; 193 | corr_result = correct_result[itrialx]; 194 | #elif NARGS == 2 195 | r[RX] = trialx[itrialx]; 196 | r[RY] = trialy[itrialy]; 197 | corr_result = correct_result[itrialx][itrialy]; 198 | #elif NARGS == 3 199 | r[RX] = trialx[itrialx]; 200 | r[RY] = trialy[itrialy]; 201 | r[RZ] = trialz[itrialz]; 202 | corr_result = correct_result[itrialx][itrialy][itrialz]; 203 | #endif 204 | 205 | /* Now we simulate the current program, i.e., the instructions 206 | from 0 to numi-1. The result of instruction i goes in 207 | register i + RI0. */ 208 | 209 | if (debug) { 210 | #if NARGS == 1 211 | fprintf(ofile, "\nContinuing this pgm with arg x = %d (0x%X):\n", 212 | r[RX], r[RX]); 213 | #elif NARGS == 2 214 | fprintf(ofile, "\nContinuing this pgm with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n", 215 | r[RX], r[RY], r[RX], r[RY]); 216 | #elif NARGS == 3 217 | fprintf(ofile, "\nContinuing this pgm with (x, y, z) = (%d, %d, %d) ((0x%X, 0x%X, 0x%X)):\n", 218 | r[RX], r[RY], r[RZ], r[RX], r[RY], r[RZ]); 219 | #endif 220 | } 221 | for (i = 0; i < numi; i++) { // Simulate program from 222 | simulate_one_instruction(i); // beginning to end. 223 | } 224 | if (unacceptable) {unacceptable = 0; return 0;} 225 | if (debug) { 226 | print_pgm(2); 227 | fprintf(ofile, "Computed result = %d, correct result = %d, %s\n", 228 | r[numi+RI0-1], corr_result, r[numi+RI0-1] == corr_result ? "ok" : "fail"); 229 | } 230 | if (r[numi+RI0-1] != corr_result) return 0; 231 | 232 | #if NARGS >= 3 233 | } // end kz 234 | #endif 235 | #if NARGS >= 2 236 | } // end ky 237 | #endif 238 | } // end kx 239 | return 1; // Passed all tests, found a 240 | // probably correct program. 241 | } 242 | 243 | // -------------------------- fix_operands ----------------------------- 244 | 245 | void 246 | fix_operands(int i) 247 | { 248 | 249 | /* This program fixes instruction i so that: 250 | 251 | (1) if it is the last instruction, at least one operand uses the 252 | result of the immediately preceding instruction, and furthermore if 253 | the second from last instruction does not use the result of its 254 | predecsssor, then the last instruction must use that result also. 255 | (2) not all operands are immediate values, and (We assume it would be 256 | a waste of time to process an instruction with all immediate 257 | operands). 258 | (3) if it is commutative, operand 0 >= operand 1, 259 | 260 | It does these fixes by "increasing" the instruction by a minimal 261 | amount, so that the incrementing of instructions is kept in order and no 262 | legitimate instructions are skipped. 263 | A hard part to understand is the logic of (1) above. Let us assume 264 | for illustration that the program has four instructions (numi = 4). 265 | Then when this subroutine is called to process the last instruction (i = 266 | numi - 1), the operands may be in any of the configurations shown below. 267 | The last instruction sets r4, the second from last instruction sets r3, 268 | and its predecessor sets r2. ii denotes a register containing an 269 | immediate value, or a register <= RY; in particular ii < r2. We assume 270 | the last instruction ("op") has three input operands, as that is the 271 | more difficult case, and that the second from last instruction does not 272 | use r2. Therefore the last instruction must be altered so that it uses 273 | both r2 and r3. 274 | 275 | operand: 0 1 2 0 1 2 276 | op r4,ii,ii,ii ==> op r4,r3,r2,ii Add r2 and r3. 277 | op r4,ii,r2,ii ==> op r4,r3,r2,ii Add r3. 278 | op r4,ii,r3,ii ==> op r4,r2,r3,ii Add r2. 279 | op r4,ii,ii,r2 ==> op r4,r3,ii,r2 Add r3. 280 | op r4,ii,r2,r2 ==> op r4,r3,r2,r2 Add r3. 281 | op r4,ii,r3,r2 ==> no change 282 | op r4,ii,ii,r3 ==> op r4,r2,ii,r3 Add r2. 283 | op r4,ii,r2,r3 ==> no change 284 | op r4,ii,r3,r3 ==> op r4,r2,r3,r3 285 | 286 | These are the only possibilities. The first input operand cannot be 287 | r2 or r3, because if it were, then it must have just been incremented 288 | from r1 or r2 resp., and in this case "increment" does not call 289 | "fix_operands." 290 | The first row above means that if none of the last instruction's 291 | operands are r2 or r3, then the change that adds r2 and r3 and that 292 | "minimizes" the resulting instruction is to change operand 0 to r3 and 293 | operand 1 to r2. The second row shows a case in which r2 is already 294 | present, but r3 is not. The minimal change is to change operand 0 to r3. 295 | Examination of all the possibilities reveals that a workable simple 296 | rule is: 297 | (1) If r3 is not used, then change operand 0 to be r3. 298 | (2) Then, if r2 is not used, change operand 0 to r2 unless that 299 | decreases the instruction, in which case change operand 1 to r2. 300 | These rules are coded in the block headed by "if (i == numi - 1)". 301 | It might seem that the program should test that pgm[i].opnd[0] is not 302 | equal to rs or rt; however, as noted above operand 0 is never equal 303 | to those registers at this point. 304 | This scheme is sufficient to ensure that if numi = 3, no trial 305 | program has an unused computed value. If numi = 4, a small percentage 306 | of trial programs will have an unused computed value. Incorporation 307 | of the r2 part of it improved the execution time by about a factor of 308 | 1.4 if numi = 3, and a factor of 1.8 if numi = 4. If numi = 5, there 309 | is probably a substantial percentage of trial programs with one or 310 | more unused computed values; it hasn't been tried. */ 311 | 312 | int rs, rt, k; 313 | 314 | k = pgm[i].op; 315 | 316 | if (i == numi - 1) { // If this is the last insn: 317 | rs = numi + RI0 - 2; // Second from last reg. 318 | if (pgm[i].opnd[1] != rs && pgm[i].opnd[2] != rs) { 319 | pgm[i].opnd[0] = rs; 320 | } 321 | rt = rs - 1; // Third from last reg. 322 | if (pgm[i-1].opnd[0] != rt && pgm[i-1].opnd[1] != rt && 323 | pgm[i-1].opnd[2] != rt && pgm[i].opnd[1] != rt && 324 | pgm[i].opnd[2] != rt && rt >= RI0) { 325 | 326 | // The last instruction needs to reference rt. 327 | 328 | if (pgm[i].opnd[0] < rt) pgm[i].opnd[0] = rt; 329 | else if (isa[k].numopnds > 1) pgm[i].opnd[1] = rt; 330 | 331 | // else (unary op), forget it. 332 | } 333 | } 334 | 335 | if (isa[k].commutative) { 336 | if (pgm[i].opnd[0] < pgm[i].opnd[1]) 337 | pgm[i].opnd[0] = pgm[i].opnd[1]; 338 | return; // No need to do next check, as opnd[0] 339 | } // is always a reg containing a variable. 340 | 341 | if (i != numi - 1) { 342 | if (pgm[i].opnd[0] < RX && pgm[i].opnd[1] < RX && 343 | pgm[i].opnd[2] < RX) { 344 | if (isa[k].commutative) abort(); 345 | pgm[i].opnd[0] = RX; 346 | } 347 | } 348 | } 349 | 350 | // --------------------------- increment ------------------------------- 351 | 352 | INLINE int 353 | increment(void) 354 | { 355 | 356 | /* This routine "increments" the instruction list, in a manner 357 | similar to counting. The instruction list changes basically 358 | like this: 359 | 360 | i0 r0,r0 i0 r0,r0 i0 r0,r0 i0 r0,r0 361 | i0 r0,r0 ==> i0 r0,r0 ==> i0 r0,r0 ==> i0 r0,r0 etc. 362 | i0 r0,r0 i0 r1,r0 i0 r2,r0 i0 r0,r1 363 | 364 | The bottom left operand is tested. If it has not reached its 365 | maximum value, it is incremented. If it has reached its maximum 366 | value, it is reset to its starting value and the operand to its right 367 | is incremented if possible. If all operands have reached their 368 | maxima, the last instruction is replaced with the next instruction 369 | in the isa list, if possible, etc. 370 | The returned value is the lowest index i of the instructions 371 | modified, or -1 if the instruction list cannot be incremented anymore 372 | ("done"). 373 | As far as incrementing goes, there are only three types of operands: 374 | 375 | 1. Goes through the ordinary immediate values, skips the shift 376 | immediate values, and then goes through the registers. 377 | 2. Goes through the shift immediate values followed by the registers. 378 | 3. Goes through the registers only. 379 | 380 | Which range an operand is in can be determined by its register number 381 | alone, so we don't need operand types in the ISA. However, opnd[0] 382 | of a commutative op is an exception in that it doesn't go through 383 | all the register values; it skips register values for which it is 384 | less than opnd[1]. 385 | There's no doubt a faster way to program this, maybe by using 386 | some fairly large tables. */ 387 | 388 | int i, j, k, opndj, nopnds; 389 | 390 | for (i = numi - 1; i >= 0; i--) { 391 | k = pgm[i].op; 392 | nopnds = isa[k].numopnds; 393 | for (j = 0; j < nopnds; j++) { 394 | opndj = pgm[i].opnd[j]; 395 | 396 | if (opndj < NIM - 1) { // If ordinary imm. and not last, 397 | pgm[i].opnd[j] += 1; // increment the operand. 398 | break; 399 | } 400 | else if (opndj == NIM - 1) { // If last ordinary imm. operand, 401 | pgm[i].opnd[j] = RX; // skip to first register. 402 | break; 403 | } 404 | else if (opndj < i + RI0 - 1) {// If shift imm. or reg and not 405 | pgm[i].opnd[j] += 1; // last, increment the operand. 406 | break; 407 | } 408 | // We're at the end for opnd j. 409 | pgm[i].opnd[j] = isa[k].opndstart[j]; // Reset it and 410 | // increment next operand to 411 | // its right. 412 | } // end for j 413 | 414 | if (j == 0) // If we just incremented the 415 | return i; // leftmost operand, return; the 416 | // following check is not necessary. 417 | if (j < nopnds) { 418 | 419 | /* We just incremented some operand other than the rightmost, 420 | which means we reset one or more operands. Must ensure that if 421 | the instruction is commutative then opnd[0] >= opnd[1], that 422 | the operands are not all immediate values, and if this is the 423 | last instruction, that at least one operand refers to the 424 | second from last instruction and possibly to the instruction 425 | before that. */ 426 | 427 | fix_operands(i); 428 | return i; 429 | } 430 | 431 | /* Have gone through all of insn i's opnds. 432 | Increment the instruction itself (if possible). */ 433 | 434 | if (k < NUM_INSNS_IN_ISA - 1) { 435 | k = k + 1; // Increment to next isa instruction. 436 | pgm[i].op = k; 437 | pgm[i].opnd[0] = isa[k].opndstart[0]; 438 | pgm[i].opnd[1] = isa[k].opndstart[1]; 439 | pgm[i].opnd[2] = isa[k].opndstart[2]; 440 | 441 | fix_operands(i); 442 | return i; 443 | } 444 | 445 | /* Cannot increment to next isa insn. Reset it to the first 446 | isa insn and look at next insn down in the program. Furthermore, 447 | if the insn being reset is the last insn in the program, make 448 | its first opnd pick up the previous insn's result. */ 449 | 450 | pgm[i].op = 0; // Index first insn in isa. 451 | pgm[i].opnd[0] = isa[0].opndstart[0]; 452 | pgm[i].opnd[1] = isa[0].opndstart[1]; 453 | pgm[i].opnd[2] = isa[0].opndstart[2]; 454 | 455 | fix_operands(i); 456 | } // end for i 457 | return -1; // Return "done" indication. 458 | } 459 | 460 | // ----------------------------- search -------------------------------- 461 | 462 | int 463 | search(void) 464 | { 465 | 466 | int ok, i, num_solutions; 467 | 468 | #if NARGS == 1 469 | r[RX] = trialx[0]; // Must initialize these for 470 | corr_result = correct_result[0]; // speed-up thing in "check." 471 | #elif NARGS == 2 472 | r[RX] = trialx[0]; 473 | r[RY] = trialy[0]; 474 | corr_result = correct_result[0][0]; 475 | #elif NARGS == 3 476 | r[RX] = trialx[0]; 477 | r[RY] = trialy[0]; 478 | r[RZ] = trialz[0]; 479 | corr_result = correct_result[0][0][0]; 480 | #endif 481 | num_solutions = 0; 482 | i = 0; 483 | do { 484 | ok = check(i); // Simulate the program from i on. 485 | if (ok) { 486 | num_solutions++; 487 | printb(3, "\nFound a %d-operation program:\n", numi); 488 | print_pgm(3); 489 | } 490 | i = increment(); // Increment to next program. 491 | } while (i >= 0); 492 | return num_solutions; 493 | } 494 | 495 | // -------------------------- Main Program ----------------------------- 496 | 497 | int 498 | main(int argc, char *argv[]) 499 | { 500 | int i, j, k, num_sol; 501 | clock_t t_start, t_finish; 502 | char *end_num; // End of number, set by strtol. 503 | 504 | /* Obtain parameter (number of instructions (actually operations) 505 | for the sought program) and check it. */ 506 | 507 | if (argc != 2 || *argv[1] == '?') goto tell; 508 | 509 | numi = strtol(argv[1], &end_num, 0); 510 | if (*end_num != '\0') { 511 | fprintf(stderr, "Invalid first argument, must be a decimal integer.\n"); 512 | return 1; 513 | } 514 | 515 | if (numi < 1 || numi > MAXNUMI) { 516 | fprintf(stderr, "Number of insns must be from 1 to %d.\n", MAXNUMI); 517 | return 1; 518 | } 519 | 520 | ofile = fopen(OFILE, "w"); 521 | if (ofile == NULL) { 522 | fprintf(stderr, "Could not open file %s for output.\n", OFILE); 523 | return 1; 524 | } 525 | 526 | printb(3, "Searching for programs with %d operations.\n", numi); 527 | t_start = clock(); 528 | 529 | // Compute all the correct answers and save them in an array. 530 | 531 | for (i = 0; i < NTRIALX; i++) { 532 | #if NARGS == 1 533 | correct_result[i] = userfun(trialx[i]); 534 | #elif NARGS == 2 535 | for (j = 0; j < NTRIALY; j++) 536 | correct_result[i][j] = userfun(trialx[i], trialy[j]); 537 | #elif NARGS == 3 538 | for (j = 0; j < NTRIALY; j++) 539 | for (k = 0; k < NTRIALZ; k++) 540 | correct_result[i][j][k] = userfun(trialx[i], trialy[j], trialz[k]); 541 | #endif 542 | } 543 | 544 | /* Preload the instruction array with the first instruction and 545 | the lowest register number, with copies of this instruction 546 | filling the whole array from 0 to numi - 1. */ 547 | 548 | for (i = 0; i < numi; i++) { 549 | pgm[i].op = 0; // Index first insn in isa. 550 | pgm[i].opnd[0] = isa[0].opndstart[0]; 551 | pgm[i].opnd[1] = isa[0].opndstart[1]; 552 | pgm[i].opnd[2] = isa[0].opndstart[2]; 553 | 554 | /* Ensure that the instruction does not have all immediate 555 | operands, etc. */ 556 | 557 | fix_operands(i); 558 | } 559 | 560 | num_sol = search(); // Check the above program, generate 561 | // the next, check it, etc. 562 | 563 | t_finish = clock(); 564 | printb(3, "Found %d solutions.\n", num_sol); 565 | if (counters) { 566 | unsigned long long total = 0; 567 | printb(3, "Counters = "); 568 | for (i = 0; i < numi; i++) { 569 | printb(3, "%llu, ", counter[i]); 570 | total += counter[i]; 571 | } 572 | printb(3, "total = %llu\n", total); 573 | } 574 | printb(3, "Process time = %.3f secs\n", (double)(t_finish - t_start)/CLOCKS_PER_SEC); 575 | return 0; 576 | 577 | tell: 578 | fprintf(stderr, "Format is: %s n, where n is the number of operations to try.\n", argv[0]); 579 | return 0; 580 | } 581 | --------------------------------------------------------------------------------