├── .gitignore
├── aha.pdf
├── test1.frag.c
├── abs.frag.c
├── bool.frag.c
├── invbool.frag.c
├── onescomplement.frag.c
├── bic3.frag.c
├── round4.frag.c
├── negbool.frag.c
├── bitblt.frag.c
├── artificial.frag.c
├── simulator.h
├── avg.frag.c
├── Makefile
├── example.frag.c
├── config.h
├── README.md
├── machine.h
├── verify
    └── test.c
├── simulator.c
├── aha.h
├── read.me
└── aha.c


/.gitignore:
--------------------------------------------------------------------------------
1 | *.d
2 | *.o
3 | 


--------------------------------------------------------------------------------
/aha.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dpt/Aha/HEAD/aha.pdf


--------------------------------------------------------------------------------
/test1.frag.c:
--------------------------------------------------------------------------------
1 | /* test1.frag.c */
2 | 
3 | #include "aha.h"
4 | 
5 | int userfun(int x)
6 | {
7 |   return x * 7;
8 | }
9 | 


--------------------------------------------------------------------------------
/abs.frag.c:
--------------------------------------------------------------------------------
1 | /* abs.frag.c */
2 | 
3 | #include "aha.h"
4 | 
5 | int userfun(int x) 
6 | {
7 |     return x >= 0 ? x : -x;
8 | }
9 | 


--------------------------------------------------------------------------------
/bool.frag.c:
--------------------------------------------------------------------------------
1 | /* bool.frag.c */
2 | 
3 | #include "aha.h"
4 | 
5 | int userfun(int x)
6 | {
7 |   return !!x;   // x -> 1/0
8 | }
9 | 


--------------------------------------------------------------------------------
/invbool.frag.c:
--------------------------------------------------------------------------------
1 | /* invbool.frag.c */
2 | 
3 | #include "aha.h"
4 | 
5 | int userfun(int x) 
6 | {
7 |   return !x;   // x -> 1/0
8 | }
9 | 


--------------------------------------------------------------------------------
/onescomplement.frag.c:
--------------------------------------------------------------------------------
1 | /* onescomplement.frag.c */
2 | 
3 | #include "aha.h"
4 | 
5 | int userfun(int x)
6 | {
7 |   return ~x;
8 | }
9 | 


--------------------------------------------------------------------------------
/bic3.frag.c:
--------------------------------------------------------------------------------
 1 | /* bic3.frag.c */
 2 | /* ops to yield x &~ 3 */
 3 | 
 4 | #include "aha.h"
 5 | 
 6 | int userfun(int x)
 7 | {
 8 |   return x & ~3;
 9 | }
10 | 


--------------------------------------------------------------------------------
/round4.frag.c:
--------------------------------------------------------------------------------
 1 | /* round4.frag.c */
 2 | /* ops to yield (x + 2) & ~3 */
 3 | 
 4 | #include "aha.h"
 5 | 
 6 | int userfun(int x)
 7 | {
 8 |   return (x + 2) & ~3;
 9 | }
10 | 


--------------------------------------------------------------------------------
/negbool.frag.c:
--------------------------------------------------------------------------------
 1 | /* negbool.frag.c -- zero or minus one type bools */
 2 | 
 3 | #include "aha.h"
 4 | 
 5 | int userfun(int x) 
 6 | {
 7 |     return x ? -1 : 0;
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/bitblt.frag.c:
--------------------------------------------------------------------------------
 1 | /* bitblt.frag.c */
 2 | /* bitmap plotting type ops: (dst & ~mask) | (src & mask) */
 3 | 
 4 | #define NARGS 3
 5 | 
 6 | #include "aha.h"
 7 | 
 8 | int userfun(int dst, int src, int mask)
 9 | {
10 |   return (dst & ~mask) | (src & mask);
11 | }
12 | 


--------------------------------------------------------------------------------
/artificial.frag.c:
--------------------------------------------------------------------------------
 1 | // artificial.frag.c
 2 | //
 3 | // Artificial example input for Aha! from my blog post
 4 | // http://www.davespace.co.uk/blog/20150131-branchless-sequences.html
 5 | 
 6 | #include "aha.h"
 7 | 
 8 | int userfun(int x)
 9 | {
10 |   if (x == 0)
11 |       return 1;
12 |   else if (x == 1)
13 |       return 2;
14 |   else
15 |       return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/simulator.h:
--------------------------------------------------------------------------------
 1 | /* simulator.h */
 2 | 
 3 | #ifndef SIMULATOR_H
 4 | #define SIMULATOR_H
 5 | 
 6 | typedef int (simproc)(int, int, int);
 7 | 
 8 | simproc neg, _not, pop, nlz, rev, revb, add, sub, rsb, mul, _div, _divu,
 9 |         _mod, _modu, _and, _or, _xor, _bic, rotl, rotr, shl, shr, shrs,
10 |         cmpeq, cmplt, cmpltu, seleq, sellt, selle;
11 | 
12 | #endif /* SIMULATOR_H */
13 | 
14 | 


--------------------------------------------------------------------------------
/avg.frag.c:
--------------------------------------------------------------------------------
 1 | /* avg.frag.c */
 2 | 
 3 | #define NARGS 2                 // Number of args in userfun, 1 or 2.
 4 | 
 5 | #include "aha.h"
 6 | 
 7 | int userfun(int x, int y) {     // To find Dietz's formula for
 8 |                                 // the floor-average of two
 9 |                                 // unsigned integers.
10 |    return ((unsigned long long)x + (unsigned long long)y) >> 1;
11 | }
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .SUFFIXES:	.o
 2 | 
 3 | EXAMPLE = example
 4 | 
 5 | CC	= gcc
 6 | CXX	= g++
 7 | DEFINES = -DINC=\"$(EXAMPLE).frag.c\" -DOFILE=\"$(EXAMPLE).out\"
 8 | CFLAGS	=  -O2 -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -MMD -I. $(DEFINES)
 9 | CXXFLAGS = $(CFLAGS)
10 | 
11 | SRCS	= aha.c simulator.c
12 | OBJS	= $(SRCS:.c=.o)
13 | 
14 | .c.o:
15 | 	$(CC) -c $(CFLAGS) -o $@ $<
16 | 
17 | .PHONY:	all clean
18 | 
19 | all: aha
20 | 
21 | aha: $(OBJS)
22 | 	$(CC) $(CFLAGS) -o $@ $(OBJS)
23 | 
24 | clean:
25 | 	$(RM) -f $(OBJS) aha core *~ *.bak *.d
26 | 
27 | # Dependencies
28 | 
29 | aha.o:	$(EXAMPLE).frag.c
30 | 
31 | -include $(SRCS:.c=.d)
32 | 
33 | 


--------------------------------------------------------------------------------
/example.frag.c:
--------------------------------------------------------------------------------
 1 | /* This is the function for which it is desired to find more efficient
 2 | code.  It must have either one or two arguments, both int, and must
 3 | return a 32-bit int quantity.  It is declared in aha.h. */
 4 | 
 5 | #include "aha.h"
 6 | 
 7 | int userfun(int x) {
 8 | // if (x > 0) return 1;                 // x > 0 predicate.
 9 | // else return 0;                       // Turn off div & divu.
10 |                                         // Found a new formula for HD.
11 | 
12 | // if (x >= 32) return 0;
13 | // return 1 << (unsigned)x;
14 | 
15 | // return 3*x + 1;
16 | 
17 |    if (x >= 0) return x;                // Absolute value.
18 |    else return -x;
19 | 
20 | // return (x & 0xfffffffc) | ((x & 1) << 1) | ((x & 2) >> 1);
21 |                                         // Swap rightmost 2 bits.
22 | }
23 | 


--------------------------------------------------------------------------------
/config.h:
--------------------------------------------------------------------------------
 1 | /* config.h */
 2 | 
 3 | #ifndef CONFIG_H
 4 | #define CONFIG_H
 5 | 
 6 | #define MAXNEG 0x80000000
 7 | #define MAXPOS 0x7FFFFFFF
 8 | #define NBSM 31                 // Shift mask.  Use 63 for mod 64
 9 |                                 // shifts, or 31 for mod 32.
10 | #define TRIAL {1, 0, -1, \
11 |                MAXNEG, MAXPOS, MAXNEG + 1, MAXPOS - 1, \
12 |                0x01234567, 0x89ABCDEF, -2, 2, -3, 3, -64, 64, -5, -31415, \
13 |                0x0000FFFF, 0xFFFF0000, \
14 |                0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000, \
15 |                0x0000000F, 0x000000F0, 0x00000F00, 0x0000F000, \
16 |                0x000F0000, 0x00F00000, 0x0F000000, 0xF0000000}
17 | 
18 | // First three values of IMMEDS must be 0, -1, and 1.
19 | #define IMMEDS 0, -1, 1, -2, 2, MAXNEG
20 | #define SHIMMEDS 1, 2, 3, 30, 31
21 | 
22 | #endif /* CONFIG_H */
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Aha!
 2 | ====
 3 | 
 4 | This is Henry S. Warren's "Aha!" superoptimiser tool with a couple of mods make it more appropriate for targeting ARM and Thumb code. e.g. It knows about the `BIC` and `RSB` instructions.
 5 | 
 6 | It also has various rejiggery of source the exact nature of which escapes me at the moment. The original source is available on the 'distrib' branch.
 7 | 
 8 | Build a binary with:
 9 | 
10 |     make EXAMPLE=<fragment>
11 | 
12 | Where `<fragment>` is one of the xxx.frag.c files in the source directory.
13 | 
14 | Then run the resultant 'aha' with the number of target instructions.
15 | 
16 |     ./aha 2
17 | 
18 | Enjoy!
19 | 
20 | 
21 | Related Material
22 | ----------------
23 | 
24 | In January 2015 I wrote [an article about discovering branchless code sequences using Aha!](http://www.davespace.co.uk/blog/20150131-branchless-sequences.html).
25 | 
26 | In September 2016 I gave [a presentation about superoptimisers](http://slides.com/dpt/aha#/) to my then colleagues at [Inside Secure](https://www.insidesecure.com/).
27 | 
28 | In 2017 my friend Nick Gildea built a similar tool which [uses z3 for synthesis](https://github.com/nickgildea/z3_codegen).
29 | 


--------------------------------------------------------------------------------
/machine.h:
--------------------------------------------------------------------------------
 1 | /* machine.h */
 2 | 
 3 | #ifndef MACHINE_H
 4 | #define MACHINE_H
 5 | 
 6 | // The machine's instruction set:
 7 | static const isa_t isa[] = {
 8 |    {neg,    1, 0, {RX,  0,  0}, "neg",   "-(",   ""     },  // Negate.
 9 | // {_not,   1, 0, {RX,  0,  0}, "not",   "~(",   ""     },  // One's-complement.
10 | // {pop,    1, 0, {RX,  0,  0}, "pop",   "pop(", ""     },  // Population count.
11 | // {nlz,    1, 0, {RX,  0,  0}, "nlz",   "nlz(", ""     },  // Num leading 0's.
12 | // {rev,    1, 0, {RX,  0,  0}, "rev",   "rev(", ""     },  // Bit reversal.
13 |    {add,    2, 1, {RX,  2,  0}, "add",   "(",    " + "  },  // Add.
14 |    {sub,    2, 0, { 2,  2,  0}, "sub",   "(",    " - "  },  // Subtract.
15 | // {rsb,    2, 0, { 2,  2,  0}, "rsb",   "(",    " -r "  },  // Reverse subtract.
16 | // {mul,    2, 1, {RX,  3,  0}, "mul",   "(",    " * "  },  // Multiply.
17 | // {_div,   2, 0, { 1,  3,  0}, "div",   "(",    " / "  },  // Divide signed.
18 | // {_divu,  2, 0, { 1,  1,  0}, "divu",  "(",    " /u " },  // Divide unsigned.
19 | // {_mod,   2, 0, { 1,  3,  0}, "mod",   "(",    " % "  },  // Modulus signed.
20 | // {_modu,  2, 0, { 1,  1,  0}, "modu",  "(",    " %u " },  // Modulus unsigned.
21 |    {_and,   2, 1, {RX,  2,  0}, "and",   "(",    " & "  },  // AND.
22 |    {_or,    2, 1, {RX,  2,  0}, "or",    "(",    " | "  },  // OR.
23 |    {_xor,   2, 1, {RX,  2,  0}, "xor",   "(",    " ^ "  },  // XOR.
24 |    {_bic,   2, 1, {RX,  2,  0}, "bic",   "(",    " & ~" },  // AND-NOT / bitwise clear.
25 | // {rotl,   2, 0, { 1,NIM,  0}, "rotl",  "(",    " <<r "},  // Rotate shift left.
26 | // {rotr,   2, 0, { 1,NIM,  0}, "rotr",  "(",    " >>r "},  // Rotate shift right.
27 |    {shl,    2, 0, { 1,NIM,  0}, "shl",   "(",    " << " },  // Shift left.
28 |    {shr,    2, 0, { 1,NIM,  0}, "shr",   "(",    " >>u "},  // Shift right.
29 |    {shrs,   2, 0, { 3,NIM,  0}, "shrs",  "(",    " >>s "},  // Shift right signed.
30 | // {cmpeq,  2, 1, {RX,  0,  0}, "cmpeq", "(",    " == " },  // Compare equal.
31 | // {cmplt,  2, 0, { 0,  0,  0}, "cmplt", "(",    " < "  },  // Compare less than.
32 | // {cmpltu, 2, 0, { 1,  1,  0}, "cmpltu","(",    " <u " },  // Compare less than unsigned.
33 | // {seleq,  3, 0, {RX,  0,  0}, "seleq", "seleq(", ", " },  // Select if = 0.
34 | // {sellt,  3, 0, {RX,  0,  0}, "sellt", "sellt(", ", " },  // Select if < 0.
35 | // {selle,  3, 0, {RX,  0,  0}, "selle", "selle(", ", " },  // Select if <= 0.
36 |     {revb,   1, 0, {RX,   0, 0}, "revb",   "revb(",  ""     }, // Byte reversal.
37 | };
38 | 
39 | #endif /* MACHINE_H */
40 | 
41 | 


--------------------------------------------------------------------------------
/verify/test.c:
--------------------------------------------------------------------------------
  1 | /* test.c -- test shell for Aha! suggested solutions */
  2 | 
  3 | #include <limits.h>
  4 | #include <stdio.h>
  5 | 
  6 | /* ----------------------------------------------------------------------- */
  7 | 
  8 | typedef unsigned int T;
  9 | typedef T (testfn_t)(T);
 10 | 
 11 | /* ----------------------------------------------------------------------- */
 12 | 
 13 | /* reference version of our original operation */
 14 | static T reference(T x)
 15 | {
 16 |     if (x == 0)
 17 |         return 1;
 18 |     else if (x == 1)
 19 |         return 2;
 20 |     else
 21 |         return 0;
 22 | }
 23 | 
 24 | static T branchless1(T x)
 25 | {
 26 |     return (((x + -2) & ~x) >> 31) << x;
 27 | }
 28 | 
 29 | static T branchless2(T x)
 30 | {
 31 |     return (((x - 2) & ~x) >> 31) << x;
 32 | }
 33 | 
 34 | static T branchless3(T x)
 35 | {
 36 |     return (((x >> 1) - 1) >> 31) << x;
 37 | }
 38 | 
 39 | static T branchless4(T x)
 40 | {
 41 |     return ((0x80000000 - (x >> 1)) >> 31) << x;
 42 | }
 43 | 
 44 | /* ----------------------------------------------------------------------- */
 45 | 
 46 | static const struct
 47 | {
 48 |     testfn_t *branchless, *reference;
 49 | }
 50 | tests[] =
 51 | {
 52 |     { &branchless1, &reference },
 53 |     { &branchless2, &reference },
 54 |     { &branchless3, &reference },
 55 |     { &branchless4, &reference },
 56 | };
 57 | 
 58 | /* ----------------------------------------------------------------------- */
 59 | 
 60 | #define NELEMS(a) (int)(sizeof(a) / sizeof(a[0]))
 61 | 
 62 | int main(void)
 63 | {
 64 |     int          j;
 65 |     unsigned int i;
 66 | 
 67 |     for (j = 0; j < NELEMS(tests); j++)
 68 |     {
 69 |         testfn_t *fn, *ref;
 70 |         int       nfailures;
 71 | 
 72 |         printf("starting test %d\n", j);
 73 | 
 74 |         fn  = tests[j].branchless;
 75 |         ref = tests[j].reference;
 76 | 
 77 |         nfailures = 0;
 78 | 
 79 |         /* test all values from zero to UINT_MAX */
 80 |         for (i = 0; ; i++)
 81 |         {
 82 |             if (fn(i) != ref(i))
 83 |                 if (++nfailures < 20) /* report first twenty failures only */
 84 |                     printf("failure at %d\n", i);
 85 | 
 86 |             if ((i & 0x03ffffff) == 0) /* draw 64 dots */
 87 |             {
 88 |                 putc('.', stdout);
 89 |                 fflush(stdout);
 90 |             }
 91 | 
 92 |             if (i == UINT_MAX) /* test here to prevent infinite loop */
 93 |                 break;
 94 |         }
 95 |         printf("\n");
 96 | 
 97 |         if (nfailures == 0)
 98 |             printf("all ok!\n");
 99 |         else
100 |             printf("saw %d failures\n", nfailures);
101 |     }
102 | 
103 |     return 0;
104 | }
105 | 


--------------------------------------------------------------------------------
/simulator.c:
--------------------------------------------------------------------------------
 1 | /* simulator.c */
 2 | 
 3 | /* Collection of simulator routines for the instructions in the isa. */
 4 | 
 5 | #include "config.h"
 6 | 
 7 | #include "simulator.h"
 8 | 
 9 | extern int unacceptable;
10 | 
11 | int neg(int x, int y, int z) {return -x;}
12 | int _not(int x, int y, int z) {return ~x;}
13 | int pop(int xx, int y, int z) {
14 |    unsigned x = xx;
15 |    x = x - ((x >> 1) & 0x55555555);
16 |    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
17 |    x = (x + (x >> 4)) & 0x0F0F0F0F;
18 |    x = x + (x << 8);
19 |    x = x + (x << 16);
20 |    return x >> 24;
21 | }
22 | 
23 | int nlz(int xx, int y, int z) {
24 |    unsigned x = xx;
25 |    int n;
26 | 
27 |    if (x == 0) return(32);
28 |    n = 0;
29 |    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
30 |    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
31 |    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
32 |    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
33 |    if (x <= 0x7FFFFFFF) {n = n + 1;}
34 |    return n;
35 | }
36 | 
37 | int rev(int xi, int y, int z) {
38 |    unsigned x = xi;
39 |    x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555);
40 |    x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333);
41 |    x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F);
42 |    x = (x << 24) | ((x & 0xFF00) << 8) | ((x >> 8) & 0xFF00) | (x >> 24);
43 |    return x;
44 | }
45 | 
46 | int revb(int xi, int y, int z) {
47 |    unsigned x = xi;
48 |    return ((x & 0x000000FF) << 24) | ((x >> 24) & 0x000000FF) |
49 |           ((x & 0x0000FF00) <<  8) | ((x >>  8) & 0x0000FF00);
50 | }
51 | 
52 | 
53 | int add (int x, int y, int z) {return x + y;}
54 | int sub (int x, int y, int z) {return x - y;}
55 | int rsb (int x, int y, int z) {return y - x;}
56 | int mul (int x, int y, int z) {return x * y;}
57 | /* For division overflow we return arbitrary values, hoping they fail
58 | to be part of a solution.  (User must check solutions, in general.) */
59 | int _div (int x, int y, int z) {
60 |    if (y == 0 || (y == -1 && x == (int)0x80000000))
61 |       {unacceptable = 1; return 0;}
62 |    else return x/y;}
63 | int _divu(int x, int y, int z) {
64 |    if (y == 0) {unacceptable = 1; return 0;}
65 |    else return (unsigned)x/(unsigned)y;}
66 | int _mod (int x, int y, int z) {
67 |    if (y == 0 || (y == -1 && x == (int)0x80000000))
68 |       {unacceptable = 1; return 0;}
69 |    else return x%y;}
70 | int _modu(int x, int y, int z) {
71 |    if (y == 0) {unacceptable = 1; return 0;}
72 |    else return (unsigned)x%(unsigned)y;}
73 | int _and(int x, int y, int z) {return x & y;}
74 | int _or (int x, int y, int z) {return x | y;}
75 | int _xor(int x, int y, int z) {return x ^ y;}
76 | int _bic(int x, int y, int z) {return x & ~y;}
77 | int rotl(int x, int y, int z) {int s = y & NBSM;
78 |    return x << s | (unsigned)x >> (32 - s);}
79 | int rotr(int x, int y, int z) {int s = y & NBSM;
80 |    return x << (32 - s) | (unsigned)x >> s;}
81 | int shl(int x, int y, int z) {int s = y & NBSM;
82 |    if (s >= 32) return 0; else return x << s;}
83 | int shr(int x, int y, int z) {int s = y & NBSM;
84 |    if (s >= 32) return 0; else return (unsigned)x >> s;}
85 | int shrs(int x, int y, int z) {int s = y & NBSM;
86 |    if (s >= 32) return x >> 31; else return x >> s;}
87 | int cmpeq(int x, int y, int z) {return x == y;}
88 | int cmplt(int x, int y, int z) {return x < y;}
89 | int cmpltu(int x, int y, int z) {return (unsigned)(x) < (unsigned)(y);}
90 | int seleq(int x, int y, int z) {return x == 0 ? y : z;}
91 | int sellt(int x, int y, int z) {return x < 0 ? y : z;}
92 | int selle(int x, int y, int z) {return x <= 0 ? y : z;}
93 | 
94 | 


--------------------------------------------------------------------------------
/aha.h:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2002 by Henry S. Warren, Jr.
  2 | 
  3 | #include "config.h"
  4 | #include "simulator.h"
  5 | 
  6 | #ifndef NARGS
  7 | #define NARGS 1                 // Number of args in userfun, 1, 2 or 3.
  8 | #endif
  9 | 
 10 | const int debug = 0;            // 0 or 1; debugging printouts if 1.
 11 | const int counters = 1;         // 0 or 1; count number of evaluations.
 12 | 
 13 | /* A note about the registers:
 14 | 
 15 | They are divided into four groups.  The first group, starting with
 16 | register 0, holds ordinary immediate values.  The second group, starting
 17 | with register NIM, holds the shift immediate values.  The next 1 or 2
 18 | regs are the arguments to the user-defined function.  The last group
 19 | holds the results of computations done by the trial programs.
 20 | 
 21 | 0        Start of ordinary immediate values (those given by IMMEDS)
 22 | NIM      Start of shift immediate values    (those given by SHIMMEDS)
 23 | RX       First (or only) user function argument
 24 | RY       Second user function argument
 25 | RZ       Third user function argument
 26 | RI0      Result of instruction 0 goes here
 27 | RI0 + i  Result of instruction i goes here
 28 | where:
 29 | NIM   = number of ordinary immediate values
 30 | NSHIM = number of shift immediate values
 31 | */
 32 | 
 33 | static const int trialx[] = TRIAL;
 34 | #if NARGS >= 2
 35 | static const int trialy[] = TRIAL;
 36 | #endif
 37 | #if NARGS >= 3
 38 | static const int trialz[] = TRIAL;
 39 | #endif
 40 | 
 41 | int dummy1[] = {IMMEDS};        // These get optimized out of existence.
 42 | int dummy2[] = {SHIMMEDS};
 43 | 
 44 | #define NELEMS(a) (int)(sizeof(a)/sizeof(a[0]))
 45 | #define NIM NELEMS(dummy1)
 46 | #define NSHIM NELEMS(dummy2)
 47 | #define RX (NIM + NSHIM)        // First (or only) user function argument
 48 | #define RY (RX + 1)             // Second user function argument
 49 | #define RZ (RY + 1)             // Third user function argument
 50 | #define RI0 (RX + NARGS)        // Result of instruction 0 goes here
 51 | 
 52 | int unacceptable;               // Code below sets this to 1 for an
 53 |                                 // unacceptable operation, such as
 54 |                                 // divide by 0.  It is initially 0.
 55 | 
 56 | // Note: Commutative ops are commutative in operands 0 and 1.
 57 | typedef struct isa_t
 58 | {
 59 |    simproc *proc;                // Procedure for simulating the op.
 60 |    int  numopnds;               // Number of operands, 1 to 3.
 61 |    int  commutative;            // 1 if opnds 0 and 1 commutative.
 62 |    int  opndstart[3];           // Starting reg no. for each operand.
 63 |    const char *mnemonic;              // Name of op, for printing.
 64 |    const char *fun_name;              // Function name, for printing.
 65 |    const char *op_name;               // Operator name, for printing.
 66 | }
 67 | isa_t;
 68 | 
 69 | #include "machine.h"
 70 | 
 71 | /* ------------------- End of user-setup Portion -------------------- */
 72 | 
 73 | #define MAXNUMI 5               // Max num of insns that can be tried.
 74 | #if NARGS == 1
 75 | int userfun(int);
 76 | #elif NARGS == 2
 77 | int userfun(int, int);
 78 | #elif NARGS == 3
 79 | int userfun(int, int, int);
 80 | #else
 81 | #error Invalid NARGS
 82 | #endif
 83 | 
 84 | #define NTRIALX NELEMS(trialx)
 85 | #define NTRIALY NELEMS(trialy)
 86 | #define NTRIALZ NELEMS(trialz)
 87 | 
 88 | #if NARGS == 1
 89 |    int correct_result[NTRIALX];
 90 | #elif NARGS == 2
 91 |    int correct_result[NTRIALX][NTRIALY];
 92 | #elif NARGS == 3
 93 |    int correct_result[NTRIALX][NTRIALY][NTRIALZ];
 94 | #endif
 95 | 
 96 | int corr_result;                // Correct result for current trial.
 97 | 
 98 | #define NUM_INSNS_IN_ISA NELEMS(isa)
 99 | 
100 | struct {                        // The current program.
101 |    int op;                      // Index into isa.
102 |    int opnd[3];                 // Operands of op.  Register numbers
103 |                                 // except if negative, it's the negative
104 |                                 // of a shift amount.
105 | } pgm[MAXNUMI];
106 | 
107 | int numi;                       // Current size of the trial programs,
108 |                                 // must be from 1 to MAXNUMI.
109 | 
110 | /* GPR array:  First NIM slots hold ordinary immediate values (IMMEDS),
111 | next NSHIM slots hold shift immediate values (SHIMMEDS), next NARGS
112 | slots hold the arguments x and, optionally, y, and the last numi slots
113 | hold the result of instructions 0 through numi - 1. */
114 | 
115 | int r[NIM + NSHIM + NARGS + MAXNUMI] = {IMMEDS, SHIMMEDS};
116 | unsigned long long counter[MAXNUMI];    // Count num times insn at level i is evaluated.
117 | 


--------------------------------------------------------------------------------
/read.me:
--------------------------------------------------------------------------------
  1 |                         Brief Usage Description
  2 | 
  3 | Create a header file that decribes your problem and your machine. There
  4 | are two examples provided: abs.h for a function of one variable, and
  5 | avg.h for a function of two variables. The first solves the problem of
  6 | how to compute the absolute value function on a machine that does not
  7 | have that instruction. The second finds the Dietz formula for computing
  8 | the "floor average" of two unsigned integers without causing overflow.
  9 | 
 10 | Modify either abs.h or avg.h to fit your problem and save the file under
 11 | a name of your choice (with file extension .h). Let us assume the file
 12 | is named "mine.h".
 13 | 
 14 | Make the executable file by entering, on Windows, "make mine". This
 15 | creates file "mine.exe".
 16 | 
 17 | Execute it by entering "mine n", where n is the number of instructions
 18 | for which you want to find a solution, generally 1, 2, 3, or 4.
 19 | 
 20 | The solutions found will be displayed and also placed in file
 21 | "mine.out".
 22 | 
 23 | See aha.pdf for a complete writeup.
 24 | 
 25 | 
 26 |                         History of Improvements
 27 | 
 28 | Changing from calculating the correct answer for each new program to
 29 | calculating them in advance and storing in a table, reduced the
 30 | execution time by about 2.7%.
 31 | 
 32 | An 8% improvement resulted from adding the "commutative" bit to the five
 33 | commutative operations (add, mul, and, or, xor).  Perhaps more
 34 | importantly, it reduced the printout of essentially duplicate solutions.
 35 | 
 36 | An improvement by a factor of 2.58 (25.3/9.8) resulted from ensuring
 37 | that the last register operand of the last instruction, when this
 38 | instruction is created, refers to the result of the immediately
 39 | preceding instruction.
 40 | 
 41 | Continued the above idea for other register operands, i.e., ensured that
 42 | SOME operand of the last instruction always refers to the result of the
 43 | immediately preceding instruction.  Got an improvement by a factor of
 44 | 1.04 (9.8/9.4).
 45 | 
 46 | 3/16/02:  Got a factor of 1.85 by having it simulate the program only
 47 | from the last changed instruction to the end, which means that usually
 48 | only the last instruction is simulated.  Also, changed the trial
 49 | value(s) so they "stick" at the last failed one(s).  When a trial value
 50 | is changed, which happens after a success, the whole program must be
 51 | simulated.
 52 | 
 53 | 3/16/02:  Got a factor of 1.010 by moving the assignment to
 54 | computed_result inside the loop just ahead of where it was.  (The loop
 55 | is usually executed only once.)
 56 | 
 57 | 3/16/02:  Got a factor of 1.020 by computing corr_res only when sticky_i
 58 | and/or sticky_j change.
 59 | 
 60 | 3/17/02:  Tried making "numi" a constant defined with #define.  Got a 5%
 61 | improvement.  Decided not to do this.
 62 | 
 63 | 3/19/02:  Got a factor of 1.166 by inlining "increment."
 64 | 
 65 | 3/23/02:  Took 1614 secs (26.9 min) to search with numi = 4.
 66 | 
 67 | 9/19/02:  Got a factor of 1.131 by requiring that immediate values be in
 68 | the order 0, -1, 1, ... and using isa.opndstart[3] to avoid certain
 69 | silly cases like ADD of 0, ADD of -1 (we do a subtract of 1), AND of 0
 70 | or -1, etc.  This was made kind of necessary because the compare ops
 71 | should have an immediate value of 0 as a possibility, whereas for most
 72 | other ops, immediate 0 would never be used.
 73 | 
 74 | 9/22/02:  Changed shift immediate amounts to be given in an array
 75 | (shimmed), so that fewer than 31 values can be specified.  This gave no
 76 | change to execution time if all 31 are specified (1.222 secs for
 77 | absolute value problem on a basic RISC, running on my 1.8 mHz Thinkpad).
 78 | If only 4 values are specified, e.g. 1, 2, 30, and 31, the execution
 79 | dropped to 0.450 secs, a factor of 2.71 improvement.
 80 |    I don't quite understand this, because the number of evaluations of
 81 | the third instruction reduced from 14.2 million to 2.74 million, a
 82 | factor of 5.18.
 83 |    It's partly explained by the program load time.  If you run aha with
 84 | an argument of 1, it takes 0.140 secs.  Thus the time to start and end
 85 | the program is about that amount.  So the ration of actual execution
 86 | time is (1.222 - 0.140)/(0.450 - 0.140) = 3.49.  Closer to 5.18, but not
 87 | very close.
 88 | 
 89 | 9/24/02:  Put ALL immediates (both ordinary and shift amounts) in the
 90 | registers.  This did not affect the execution time (if compiled -O2),
 91 | but it allowed deleting the operand type info in the isa table, and
 92 | simplified the code a little (by 77 lines).
 93 |    Execution time for the standard run is now 0.591 secs on my 667 mHz
 94 | machine (compiled -O2, which I guess I'll use from now on).
 95 | 
 96 | 9/25/02:  Made Aha! measure and print its own execution time, using
 97 | clock().  I believe this is user + system time for the Aha! process,
 98 | rather than wall clock time.  Found that -O2 and -O3 make no difference;
 99 | the assembly language files search.s and check.s are identical.  Am
100 | using -O2.  The standard job runs in from 0.520 to 0.540 seconds process
101 | time on my 667 mHz office machine.
102 |    The number of instruction evaluations is 62248 + 82618 + 2743328 (for
103 | the first, second, and third instruction resp.), or 2888194 total.  This
104 | corresponds to 122 cycles per evaluation.
105 | 
106 | 9/30/02:  Before today, the program consisted of three .c files:  aha,
107 | search, and check.  Made it all one file (aha.c), mainly because of
108 | problems with C in defining a preset array of values and not requiring
109 | the user to also set a variable equal to the number of values in the
110 | array.  No change to execution time.  Build (mainly compilation) time
111 | dropped from about 2.0 secs to 1.2 secs.  This change also permits
112 | inlining fix_operands, but trying that did not change execution time
113 | measurably (so it is not inlined now).
114 | 
115 | 10/14/02:  Before today, the incrementing of instructions was done with
116 | the rightmost operand varying most rapidly.  Today it was changed so
117 | that the leftmost operand varies most rapidly.  This simplifies the
118 | handling of commutative ops, and permits a few other minor
119 | simplifications.
120 |    This gave a factor of 1.05 improvement in execution time.  Quite
121 | minor, but the program is a little simpler and I think it will simplify
122 | more complicated optimizations that may be done, such as (somehow)
123 | avoiding programs that have an instruction whose result is unused.
124 |    A preliminary investigation of this shows that for a typical RISC
125 | instruction set, 39% of three-instruction programs have an unused
126 | result, and 70% of four-instruction programs have an unused result.
127 | This is compared to the present program, which ensures only that the
128 | second from last computed result does not go unused.  Thus there is hay
129 | to be made here.
130 |    An attempt to skip ALL these silly programs resulted in a net
131 | increase in execution time, because it was implemented inefficiently.
132 | It seems to be hard to devise an efficient way to do this.  Some
133 | compromise might be practical, such as ensuring only that the second and
134 | third from last results are not both unused.
135 | 
136 | 10/15/02:  Changed the program as just mentioned, i.e., to ensure that
137 | instruction n (the last) uses the result of instruction n-1 and, if
138 | instruction n-1 does not use the result of instruction n-2, then the
139 | last instruction does.  This improved execution time by a factor of 1.4
140 | for three-instruction programs, and a factor of 1.8 for four-instruction
141 | programs.
142 | 
143 | 4/22/03: Ran Aha! on a two-input problem with n = 5 and 17 instructions
144 | enabled. Was searching for 5-instruction programs to compute the average
145 | of two signed integers (without overflowing). Shut it off after 144
146 | hours (6 days). I should make it display a "progress report" for such
147 | long jobs, such as printing out the first instruction in the list each
148 | time a new opcode is selected for it. Otherwise, you don't know if it
149 | somehow got into an infinite loop and you have no idea how long the run
150 | will take.
151 | 
152 | 2/25/11: Incorporated a correction to the printb routine from Greg
153 | Parker, which makes it run on the 64-bit Mac OS X (and probably other
154 | machines).
155 | 


--------------------------------------------------------------------------------
/aha.c:
--------------------------------------------------------------------------------
  1 |                        /* A Hacker's Assistant */
  2 | 
  3 | // Copyright (C) 2002 by Henry S. Warren, Jr.
  4 | #include <stdarg.h>             // Used by printb (print both).
  5 | #include <stdlib.h>
  6 | #include <stdio.h>
  7 | #include <time.h>
  8 | #include INC
  9 | 
 10 | #define INLINE static inline
 11 | 
 12 | FILE *ofile;
 13 | 
 14 | // ----------------------------- printb --------------------------------
 15 | 
 16 | void
 17 | printb(int pr, const char *format, ...)
 18 | {
 19 |    /* Prints  the given data on the display and on the current output
 20 |    file. Takes any number of printf-style args after "format".
 21 |       Thanks to Greg Parker for this version, which he verified works on
 22 |    64-bit Mac OS X. The previous version did not, due to an error in the
 23 |    use of va_list. */
 24 | 
 25 |    if (pr & 1) {
 26 |       va_list ap;
 27 |       va_start(ap, format);
 28 |       vprintf(format, ap);
 29 |       va_end(ap);
 30 |    }
 31 |    if (pr & 2) {
 32 |       va_list ap;
 33 |       va_start(ap, format);
 34 |       vfprintf(ofile, format, ap);
 35 |       va_end(ap);
 36 |    }
 37 | }
 38 | 
 39 | // --------------------------- print_expr ------------------------------
 40 | 
 41 | void
 42 | print_expr(int pr, int opn)
 43 | {
 44 |    int i, j, k;
 45 | 
 46 |    if (opn < RX) {                      // Immediate value.
 47 |       if (-31 <= r[opn] && r[opn] <= 31) printb(pr, "%d", r[opn]);
 48 |       else                               printb(pr, "0x%X", r[opn]);
 49 |    }
 50 |    else if (opn == RX) printb(pr, "x");     // First argument.
 51 | #if NARGS >= 2
 52 |    else if (opn == RY) printb(pr, "y");     // Second argument.
 53 | #endif
 54 | #if NARGS >= 3
 55 |    else if (opn == RZ) printb(pr, "z");     // Third argument.
 56 | #endif
 57 |    else {                               // opn is an instruction.
 58 |       i = opn - RI0;
 59 |       k = pgm[i].op;
 60 |       printb(pr, "%s", isa[k].fun_name);
 61 |       for (j = 0; j < isa[k].numopnds; j++) {
 62 |          print_expr(pr, pgm[i].opnd[j]);
 63 |          if (j < isa[k].numopnds - 1) printb(pr, "%s", isa[k].op_name);
 64 |          else                         printb(pr, ")");
 65 |       }
 66 |    }
 67 | }
 68 | 
 69 | // --------------------------- print_pgm -------------------------------
 70 | 
 71 | void
 72 | print_pgm(int pr)
 73 | {
 74 |    int i, j, k, opndj;
 75 | 
 76 |    for (i = 0; i < numi; i++) {
 77 |       k = pgm[i].op;
 78 |       printb(pr, "   %-5s r%d,", isa[k].mnemonic, i + 1);
 79 |       for (j = 0; j < isa[k].numopnds; j++) {
 80 |          opndj = pgm[i].opnd[j];
 81 |          if (opndj < NSHIM) {
 82 |             opndj = r[opndj];
 83 |             if (opndj >= -31 && opndj <= 31) printb(pr, "%d", opndj);
 84 |             else printb(pr, "0x%X", opndj);
 85 |          }
 86 |          else if (opndj < RX) {
 87 |             // shift immediates
 88 |             opndj = r[opndj];
 89 |             printb(pr, "#%d", opndj);
 90 |          }
 91 |          else if (opndj == RX) printb(pr, "rx");
 92 | #if NARGS >= 2
 93 |          else if (opndj == RY) printb(pr, "ry");
 94 | #endif
 95 | #if NARGS >= 3
 96 |          else if (opndj == RZ) printb(pr, "rz");
 97 | #endif
 98 |          else printb(pr, "r%d", opndj - RI0 + 1);
 99 |          if (j < isa[k].numopnds - 1) printb(pr, ",");
100 |       }
101 |       if (debug)
102 |          printb(pr, "     ==> %d (0x%X)\n", r[i+RI0], r[i+RI0]);
103 |       else printb(pr, "\n");
104 |    } // end for i
105 | 
106 |    /* Now print the program as an expression. */
107 | 
108 |    printb(pr, "   Expr: ");
109 |    print_expr(pr, numi - 1 + RI0);
110 |    printb(pr, "\n");
111 | }
112 | 
113 | // -------------------- simulate_one_instruction -----------------------
114 | 
115 | INLINE void
116 | simulate_one_instruction(int i)
117 | {
118 |    int arg0, arg1, arg2;
119 | 
120 |    arg0 = r[pgm[i].opnd[0]];
121 |    arg1 = r[pgm[i].opnd[1]];
122 |    arg2 = r[pgm[i].opnd[2]];
123 | 
124 |    r[i + RI0] = (*isa[pgm[i].op].proc)(arg0, arg1, arg2);
125 |    if (counters) counter[i]++;
126 |    return;
127 | }
128 | 
129 | // ----------------------------- check ---------------------------------
130 | 
131 | int
132 | check(int i)
133 | {
134 | 
135 |    static int itrialx;          // Init 0.
136 |    int kx;
137 | #if NARGS >= 2
138 |    static int itrialy;
139 |    int ky;
140 | #endif
141 | #if NARGS >= 3
142 |    static int itrialz;
143 |    int kz;
144 | #endif
145 | 
146 |    if (debug) {
147 | #if NARGS == 1
148 |       fprintf(ofile, "\nSimulating with trial arg x = %d (0x%X):\n",
149 |          r[RX],r[RX]);
150 | #elif NARGS == 2
151 |       fprintf(ofile, "\nSimulating with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n",
152 |          r[RX], r[RY], r[RX], r[RY]);
153 | #elif NARGS == 3
154 |       fprintf(ofile, "\nSimulating with (x, y, z) = (%d, %d, %d) ((0x%X, 0x%X, 0x%X)):\n",
155 |          r[RX], r[RY], r[RZ], r[RX], r[RY], r[RZ]);
156 | #endif
157 |    }
158 | L:
159 |       simulate_one_instruction(i);              // Simulate i'th insn,
160 |       if (i < numi - 1) {i = i + 1; goto L;}    // and more if req'd
161 |       if (unacceptable) {       // E.g., if divide by 0:
162 |          if (debug) printb(2, "Unacceptable program (invalid operation).\n");
163 |          unacceptable = 0;
164 |          return 0;
165 |       }
166 | 
167 |    if (debug) {
168 |       print_pgm(2);
169 |       fprintf(ofile, "Computed result = %d, correct result = %d, %s\n",
170 |       r[numi-1+RI0], corr_result, r[numi-1+RI0] == corr_result ? "ok" : "fail");
171 |    }
172 |    if (r[numi-1+RI0] != corr_result)    // If not the correct
173 |       return 0;                         // result, failure.
174 | 
175 |    // Got the correct result.  Check this program using all trial values.
176 | 
177 |    for (kx = 0; kx < NTRIALX - 1; kx++) {
178 |      itrialx += 1;
179 |      if (itrialx >= NTRIALX) itrialx = 0;
180 | #if NARGS >= 2
181 |      for (ky = 0; ky < NTRIALY - 1; ky++) {
182 |        itrialy += 1;
183 |        if (itrialy >= NTRIALY) itrialy = 0;
184 | #if NARGS >= 3
185 |        for (kz = 0; kz < NTRIALZ - 1; kz++) {
186 |          itrialz += 1;
187 |          if (itrialz >= NTRIALZ) itrialz = 0;
188 | #endif
189 | #endif
190 | 
191 | #if NARGS == 1
192 |            r[RX] = trialx[itrialx];
193 |            corr_result = correct_result[itrialx];
194 | #elif NARGS == 2
195 |            r[RX] = trialx[itrialx];
196 |            r[RY] = trialy[itrialy];
197 |            corr_result = correct_result[itrialx][itrialy];
198 | #elif NARGS == 3
199 |            r[RX] = trialx[itrialx];
200 |            r[RY] = trialy[itrialy];
201 |            r[RZ] = trialz[itrialz];
202 |            corr_result = correct_result[itrialx][itrialy][itrialz];
203 | #endif
204 | 
205 |       /* Now we simulate the current program, i.e., the instructions
206 |       from 0 to numi-1.  The result of instruction i goes in
207 |       register i + RI0. */
208 | 
209 |       if (debug) {
210 | #if NARGS == 1
211 |          fprintf(ofile, "\nContinuing this pgm with arg x = %d (0x%X):\n",
212 |             r[RX], r[RX]);
213 | #elif NARGS == 2
214 |          fprintf(ofile, "\nContinuing this pgm with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n",
215 |             r[RX], r[RY], r[RX], r[RY]);
216 | #elif NARGS == 3
217 |          fprintf(ofile, "\nContinuing this pgm with (x, y, z) = (%d, %d, %d) ((0x%X, 0x%X, 0x%X)):\n",
218 |             r[RX], r[RY], r[RZ], r[RX], r[RY], r[RZ]);
219 | #endif
220 |       }
221 |       for (i = 0; i < numi; i++) {      // Simulate program from
222 |          simulate_one_instruction(i);   // beginning to end.
223 |       }
224 |       if (unacceptable) {unacceptable = 0; return 0;}
225 |       if (debug) {
226 |          print_pgm(2);
227 |          fprintf(ofile, "Computed result = %d, correct result = %d, %s\n",
228 |          r[numi+RI0-1], corr_result, r[numi+RI0-1] == corr_result ? "ok" : "fail");
229 |       }
230 |       if (r[numi+RI0-1] != corr_result) return 0;
231 | 
232 | #if NARGS >= 3
233 |        }  // end kz
234 | #endif
235 | #if NARGS >= 2
236 |      }  // end ky
237 | #endif
238 |    }  // end kx
239 |    return 1;                    // Passed all tests, found a
240 |                                 // probably correct program.
241 | }
242 | 
243 | // -------------------------- fix_operands -----------------------------
244 | 
245 | void
246 | fix_operands(int i)
247 | {
248 | 
249 | /* This program fixes instruction i so that:
250 | 
251 | (1) if it is the last instruction, at least one operand uses the
252 |     result of the immediately preceding instruction, and furthermore if
253 |     the second from last instruction does not use the result of its
254 |     predecsssor, then the last instruction must use that result also.
255 | (2) not all operands are immediate values, and (We assume it would be
256 |     a waste of time to process an instruction with all immediate
257 |     operands).
258 | (3) if it is commutative, operand 0 >= operand 1,
259 | 
260 |    It does these fixes by "increasing" the instruction by a minimal
261 | amount, so that the incrementing of instructions is kept in order and no
262 | legitimate instructions are skipped.
263 |    A hard part to understand is the logic of (1) above.  Let us assume
264 | for illustration that the program has four instructions (numi = 4).
265 | Then when this subroutine is called to process the last instruction (i =
266 | numi - 1), the operands may be in any of the configurations shown below.
267 | The last instruction sets r4, the second from last instruction sets r3,
268 | and its predecessor sets r2.  ii denotes a register containing an
269 | immediate value, or a register <= RY; in particular ii < r2.  We assume
270 | the last instruction ("op") has three input operands, as that is the
271 | more difficult case, and that the second from last instruction does not
272 | use r2.  Therefore the last instruction must be altered so that it uses
273 | both r2 and r3.
274 | 
275 | operand:        0  1  2                0  1  2
276 |          op r4,ii,ii,ii   ==>   op r4,r3,r2,ii   Add r2 and r3.
277 |          op r4,ii,r2,ii   ==>   op r4,r3,r2,ii   Add r3.
278 |          op r4,ii,r3,ii   ==>   op r4,r2,r3,ii   Add r2.
279 |          op r4,ii,ii,r2   ==>   op r4,r3,ii,r2   Add r3.
280 |          op r4,ii,r2,r2   ==>   op r4,r3,r2,r2   Add r3.
281 |          op r4,ii,r3,r2   ==>     no change
282 |          op r4,ii,ii,r3   ==>   op r4,r2,ii,r3   Add r2.
283 |          op r4,ii,r2,r3   ==>     no change
284 |          op r4,ii,r3,r3   ==>   op r4,r2,r3,r3
285 | 
286 |    These are the only possibilities.  The first input operand cannot be
287 | r2 or r3, because if it were, then it must have just been incremented
288 | from r1 or r2 resp., and in this case "increment" does not call
289 | "fix_operands."
290 |    The first row above means that if none of the last instruction's
291 | operands are r2 or r3, then the change that adds r2 and r3 and that
292 | "minimizes" the resulting instruction is to change operand 0 to r3 and
293 | operand 1 to r2.  The second row shows a case in which r2 is already
294 | present, but r3 is not.  The minimal change is to change operand 0 to r3.
295 |    Examination of all the possibilities reveals that a workable simple
296 | rule is:
297 |    (1) If r3 is not used, then change operand 0 to be r3.
298 |    (2) Then, if r2 is not used, change operand 0 to r2 unless that
299 |        decreases the instruction, in which case change operand 1 to r2.
300 |    These rules are coded in the block headed by "if (i == numi - 1)".
301 | It might seem that the program should test that pgm[i].opnd[0] is not
302 | equal to rs or rt; however, as noted above operand 0 is never equal
303 | to those registers at this point.
304 |    This scheme is sufficient to ensure that if numi = 3, no trial
305 | program has an unused computed value.  If numi = 4, a small percentage
306 | of trial programs will have an unused computed value.  Incorporation
307 | of the r2 part of it improved the execution time by about a factor of
308 | 1.4 if numi = 3, and a factor of 1.8 if numi = 4.  If numi = 5, there
309 | is probably a substantial percentage of trial programs with one or
310 | more unused computed values; it hasn't been tried. */
311 | 
312 |    int rs, rt, k;
313 | 
314 |    k = pgm[i].op;
315 | 
316 |    if (i == numi - 1) {         // If this is the last insn:
317 |       rs = numi + RI0 - 2;      // Second from last reg.
318 |       if (pgm[i].opnd[1] != rs && pgm[i].opnd[2] != rs) {
319 |          pgm[i].opnd[0] = rs;
320 |       }
321 |       rt = rs - 1;              // Third from last reg.
322 |       if (pgm[i-1].opnd[0] != rt && pgm[i-1].opnd[1] != rt &&
323 |          pgm[i-1].opnd[2] != rt && pgm[i].opnd[1] != rt &&
324 |          pgm[i].opnd[2] != rt && rt >= RI0) {
325 | 
326 |          // The last instruction needs to reference rt.
327 | 
328 |          if (pgm[i].opnd[0] < rt) pgm[i].opnd[0] = rt;
329 |          else if (isa[k].numopnds > 1) pgm[i].opnd[1] = rt;
330 | 
331 |          // else (unary op), forget it.
332 |       }
333 |    }
334 | 
335 |    if (isa[k].commutative) {
336 |       if (pgm[i].opnd[0] < pgm[i].opnd[1])
337 |          pgm[i].opnd[0] = pgm[i].opnd[1];
338 |       return;                   // No need to do next check, as opnd[0]
339 |    }                            // is always a reg containing a variable.
340 | 
341 |    if (i != numi - 1) {
342 |       if (pgm[i].opnd[0] < RX && pgm[i].opnd[1] < RX &&
343 |          pgm[i].opnd[2] < RX) {
344 |          if (isa[k].commutative) abort();
345 |          pgm[i].opnd[0] = RX;
346 |       }
347 |    }
348 | }
349 | 
350 | // --------------------------- increment -------------------------------
351 | 
352 | INLINE int
353 | increment(void)
354 | {
355 | 
356 |    /* This routine "increments" the instruction list, in a manner
357 |    similar to counting.  The instruction list changes basically
358 |    like this:
359 | 
360 |       i0  r0,r0       i0  r0,r0       i0  r0,r0       i0  r0,r0
361 |       i0  r0,r0  ==>  i0  r0,r0  ==>  i0  r0,r0  ==>  i0  r0,r0  etc.
362 |       i0  r0,r0       i0  r1,r0       i0  r2,r0       i0  r0,r1
363 | 
364 |    The bottom left operand is tested.  If it has not reached its
365 |    maximum value, it is incremented.  If it has reached its maximum
366 |    value, it is reset to its starting value and the operand to its right
367 |    is incremented if possible.  If all operands have reached their
368 |    maxima, the last instruction is replaced with the next instruction
369 |    in the isa list, if possible, etc.
370 |       The returned value is the lowest index i of the instructions
371 |    modified, or -1 if the instruction list cannot be incremented anymore
372 |    ("done").
373 |       As far as incrementing goes, there are only three types of operands:
374 | 
375 |    1. Goes through the ordinary immediate values, skips the shift
376 |       immediate values, and then goes through the registers.
377 |    2. Goes through the shift immediate values followed by the registers.
378 |    3. Goes through the registers only.
379 | 
380 |    Which range an operand is in can be determined by its register number
381 |    alone, so we don't need operand types in the ISA.  However, opnd[0]
382 |    of a commutative op is an exception in that it doesn't go through
383 |    all the register values; it skips register values for which it is
384 |    less than opnd[1].
385 |       There's no doubt a faster way to program this, maybe by using
386 |    some fairly large tables. */
387 | 
388 |    int i, j, k, opndj, nopnds;
389 | 
390 |    for (i = numi - 1; i >= 0; i--) {
391 |       k = pgm[i].op;
392 |       nopnds = isa[k].numopnds;
393 |       for (j = 0; j < nopnds; j++) {
394 |          opndj = pgm[i].opnd[j];
395 | 
396 |          if (opndj < NIM - 1) {         // If ordinary imm. and not last,
397 |             pgm[i].opnd[j] += 1;        // increment the operand.
398 |             break;
399 |          }
400 |          else if (opndj == NIM - 1) {   // If last ordinary imm. operand,
401 |             pgm[i].opnd[j] = RX;        // skip to first register.
402 |             break;
403 |          }
404 |          else if (opndj < i + RI0 - 1) {// If shift imm. or reg and not
405 |             pgm[i].opnd[j] += 1;        // last, increment the operand.
406 |             break;
407 |          }
408 |                                         // We're at the end for opnd j.
409 |          pgm[i].opnd[j] = isa[k].opndstart[j];       // Reset it and
410 |                                         // increment next operand to
411 |                                         // its right.
412 |       } // end for j
413 | 
414 |       if (j == 0)                       // If we just incremented the
415 |          return i;                      // leftmost operand, return; the
416 |                                         // following check is not necessary.
417 |       if (j < nopnds) {
418 | 
419 |          /* We just incremented some operand other than the rightmost,
420 |          which means we reset one or more operands.  Must ensure that if
421 |          the instruction is commutative then opnd[0] >= opnd[1], that
422 |          the operands are not all immediate values, and if this is the
423 |          last instruction, that at least one operand refers to the
424 |          second from last instruction and possibly to the instruction
425 |          before that.  */
426 | 
427 |          fix_operands(i);
428 |          return i;
429 |       }
430 | 
431 |       /* Have gone through all of insn i's opnds.
432 |       Increment the instruction itself (if possible). */
433 | 
434 |       if (k < NUM_INSNS_IN_ISA - 1) {
435 |          k = k + 1;             // Increment to next isa instruction.
436 |          pgm[i].op = k;
437 |          pgm[i].opnd[0] = isa[k].opndstart[0];
438 |          pgm[i].opnd[1] = isa[k].opndstart[1];
439 |          pgm[i].opnd[2] = isa[k].opndstart[2];
440 | 
441 |          fix_operands(i);
442 |          return i;
443 |       }
444 | 
445 |       /* Cannot increment to next isa insn.  Reset it to the first
446 |       isa insn and look at next insn down in the program.  Furthermore,
447 |       if the insn being reset is the last insn in the program, make
448 |       its first opnd pick up the previous insn's result. */
449 | 
450 |       pgm[i].op = 0;                    // Index first insn in isa.
451 |       pgm[i].opnd[0] = isa[0].opndstart[0];
452 |       pgm[i].opnd[1] = isa[0].opndstart[1];
453 |       pgm[i].opnd[2] = isa[0].opndstart[2];
454 | 
455 |       fix_operands(i);
456 |    } // end for i
457 |    return -1;                   // Return "done" indication.
458 | }
459 | 
460 | // ----------------------------- search --------------------------------
461 | 
462 | int
463 | search(void)
464 | {
465 | 
466 |    int ok, i, num_solutions;
467 | 
468 | #if NARGS == 1
469 |    r[RX] = trialx[0];                   // Must initialize these for
470 |    corr_result = correct_result[0];     // speed-up thing in "check."
471 | #elif NARGS == 2
472 |    r[RX] = trialx[0];
473 |    r[RY] = trialy[0];
474 |    corr_result = correct_result[0][0];
475 | #elif NARGS == 3
476 |    r[RX] = trialx[0];
477 |    r[RY] = trialy[0];
478 |    r[RZ] = trialz[0];
479 |    corr_result = correct_result[0][0][0];
480 | #endif
481 |    num_solutions = 0;
482 |    i = 0;
483 |    do {
484 |       ok = check(i);            // Simulate the program from i on.
485 |       if (ok) {
486 |          num_solutions++;
487 |          printb(3, "\nFound a %d-operation program:\n", numi);
488 |          print_pgm(3);
489 |       }
490 |       i = increment();          // Increment to next program.
491 |    } while (i >= 0);
492 |    return num_solutions;
493 | }
494 | 
495 | // -------------------------- Main Program -----------------------------
496 | 
497 | int
498 | main(int argc, char *argv[])
499 | {
500 |    int i, j, k, num_sol;
501 |    clock_t t_start, t_finish;
502 |    char *end_num;       // End of number, set by strtol.
503 | 
504 |    /* Obtain parameter (number of instructions (actually operations)
505 |    for the sought program) and check it. */
506 | 
507 |    if (argc != 2 || *argv[1] == '?') goto tell;
508 | 
509 |    numi = strtol(argv[1], &end_num, 0);
510 |    if (*end_num != '\0') {
511 |       fprintf(stderr, "Invalid first argument, must be a decimal integer.\n");
512 |       return 1;
513 |    }
514 | 
515 |    if (numi < 1 || numi > MAXNUMI) {
516 |       fprintf(stderr, "Number of insns must be from 1 to %d.\n", MAXNUMI);
517 |       return 1;
518 |    }
519 | 
520 |    ofile = fopen(OFILE, "w");
521 |    if (ofile == NULL) {
522 |       fprintf(stderr, "Could not open file %s for output.\n", OFILE);
523 |       return 1;
524 |    }
525 | 
526 |    printb(3, "Searching for programs with %d operations.\n", numi);
527 |    t_start = clock();
528 | 
529 |    // Compute all the correct answers and save them in an array.
530 | 
531 |    for (i = 0; i < NTRIALX; i++) {
532 | #if NARGS == 1
533 |       correct_result[i] = userfun(trialx[i]);
534 | #elif NARGS == 2
535 |       for (j = 0; j < NTRIALY; j++)
536 |          correct_result[i][j] = userfun(trialx[i], trialy[j]);
537 | #elif NARGS == 3
538 |       for (j = 0; j < NTRIALY; j++)
539 |          for (k = 0; k < NTRIALZ; k++)
540 |             correct_result[i][j][k] = userfun(trialx[i], trialy[j], trialz[k]);
541 | #endif
542 |    }
543 | 
544 |    /* Preload the instruction array with the first instruction and
545 |    the lowest register number, with copies of this instruction
546 |    filling the whole array from 0 to numi - 1. */
547 | 
548 |    for (i = 0; i < numi; i++) {
549 |       pgm[i].op = 0;                    // Index first insn in isa.
550 |       pgm[i].opnd[0] = isa[0].opndstart[0];
551 |       pgm[i].opnd[1] = isa[0].opndstart[1];
552 |       pgm[i].opnd[2] = isa[0].opndstart[2];
553 | 
554 |       /* Ensure that the instruction does not have all immediate
555 |       operands, etc. */
556 | 
557 |       fix_operands(i);
558 |    }
559 | 
560 |    num_sol = search();       // Check the above program, generate
561 |                              // the next, check it, etc.
562 | 
563 |    t_finish = clock();
564 |    printb(3, "Found %d solutions.\n", num_sol);
565 |    if (counters) {
566 |       unsigned long long total = 0;
567 |       printb(3, "Counters = ");
568 |       for (i = 0; i < numi; i++) {
569 |          printb(3, "%llu, ", counter[i]);
570 |          total += counter[i];
571 |       }
572 |       printb(3, "total = %llu\n", total);
573 |    }
574 |    printb(3, "Process time = %.3f secs\n", (double)(t_finish - t_start)/CLOCKS_PER_SEC);
575 |    return 0;
576 | 
577 | tell:
578 |    fprintf(stderr, "Format is: %s n, where n is the number of operations to try.\n", argv[0]);
579 |    return 0;
580 | }
581 | 


--------------------------------------------------------------------------------