├── Doc
    ├── GettingStarted.md
    ├── heat.png
    ├── teapot.png
    └── teapot180.png
├── LICENSE
├── Lib
    ├── Common
    │   ├── Heap.h
    │   ├── Seq.h
    │   └── Stack.h
    ├── Kernel.cpp
    ├── Kernel.h
    ├── Params.h
    ├── QPULib.h
    ├── Source
    │   ├── Cond.h
    │   ├── Float.cpp
    │   ├── Float.h
    │   ├── Gen.cpp
    │   ├── Gen.h
    │   ├── Int.cpp
    │   ├── Int.h
    │   ├── Interpreter.cpp
    │   ├── Interpreter.h
    │   ├── Pretty.cpp
    │   ├── Pretty.h
    │   ├── Ptr.h
    │   ├── Stmt.cpp
    │   ├── Stmt.h
    │   ├── StmtExtra.h
    │   ├── Syntax.cpp
    │   ├── Syntax.h
    │   ├── Translate.cpp
    │   └── Translate.h
    ├── Target
    │   ├── CFG.cpp
    │   ├── CFG.h
    │   ├── Emulator.cpp
    │   ├── Emulator.h
    │   ├── Encode.cpp
    │   ├── Encode.h
    │   ├── LiveRangeSplit.cpp
    │   ├── LiveRangeSplit.h
    │   ├── Liveness.cpp
    │   ├── Liveness.h
    │   ├── LoadStore.cpp
    │   ├── LoadStore.h
    │   ├── Pretty.cpp
    │   ├── Pretty.h
    │   ├── ReachingDefs.cpp
    │   ├── ReachingDefs.h
    │   ├── RegAlloc.cpp
    │   ├── RegAlloc.h
    │   ├── RemoveLabels.cpp
    │   ├── RemoveLabels.h
    │   ├── Satisfy.cpp
    │   ├── Satisfy.h
    │   ├── SmallLiteral.cpp
    │   ├── SmallLiteral.h
    │   ├── Subst.cpp
    │   ├── Subst.h
    │   ├── Syntax.cpp
    │   └── Syntax.h
    └── VideoCore
    │   ├── Invoke.cpp
    │   ├── Invoke.h
    │   ├── Mailbox.cpp
    │   ├── Mailbox.h
    │   ├── SharedArray.h
    │   ├── VideoCore.cpp
    │   └── VideoCore.h
├── README.md
└── Tests
    ├── AutoTest.cpp
    ├── GCD.cpp
    ├── HeatMap.cpp
    ├── HeatMapScalar.cpp
    ├── Hello.cpp
    ├── ID.cpp
    ├── Makefile
    ├── MultiTri.cpp
    ├── OET.cpp
    ├── Print.cpp
    ├── ReqRecv.cpp
    ├── Rot3D.cpp
    ├── Sort.cpp
    ├── Tri.cpp
    └── TriFloat.cpp


/Doc/GettingStarted.md:
--------------------------------------------------------------------------------
 1 | ### Getting started
 2 | 
 3 | On recent Raspbian distributions, QPULib should work out-of-the-box.
 4 | It's been tested on the Pi 1 Model B, the Pi 2, but not yet the Pi
 5 | 3.
 6 | 
 7 | ##### Building and running the GCD example
 8 | 
 9 | Try the commands below to build and run the `GCD` example.
10 | 
11 | ```
12 | sudo apt-get install git
13 | git clone https://github.com/mn416/QPULib
14 | cd QPULib/Tests
15 | make QPU=1 GCD
16 | sudo ./GCD
17 | ```
18 | 
19 | The `QPU=1` flag to `make` indicates that the physical QPUs on the
20 | Raspberry Pi should be used to run QPULib kernels.  Simply using
21 | `make` without `QPU=1`, or setting `QPU` to any value other than 1,
22 | will lead to **emulation mode** being used.  As the name suggests,
23 | this means that QPU code will be emulated in software.  This is useful
24 | for debugging, and also allows you to run QPULib programs on a PC if
25 | your Pi is not to hand.  If you want to recompile with a different
26 | flag then do a `make clean` first.
27 | 
28 | Strictly speaking, any program that works in emulation mode but not on
29 | the Pi's physical QPUs is probably a bug in QPULib and should be
30 | reported, although there may be valid explanations for such
31 | differences.
32 | 
33 | ##### Other examples
34 | 
35 | Here is a quick overview of some the other examples, which can be
36 | built in the same way as GCD:
37 | 
38 |   Example   | Description | Output
39 |   --------- | ----------- | ------
40 |   GCD       | [Euclid's algorithm](https://en.wikipedia.org/wiki/Euclidean_algorithm) | The GCD's of some random pairs of integers
41 |   Tri       | Computes [triangular numbers](https://en.wikipedia.org/wiki/Triangular_number) | The first 16 triangular numbers
42 |   MultiTri  | Like Tri, but uses 4 QPUs instead of 1 | The first 64 triangular numbers
43 |   TriFloat  | Like Tri but casts floats to integers and back | The first 16 triangular numbers
44 |   OET       | [Odd-even transposition sorter](https://en.wikipedia.org/wiki/Odd%E2%80%93even_sort) | 32 integers, sorted
45 |   HeatMap   | Modelling heat flow across a 2D surface | An image in [pgm](http://netpbm.sourceforge.net/doc/pgm.html) format, and the time taken
46 |   Rot3D     | 3D rotation of a random object | The time taken
47 | 
48 | ##### AutoTest: specification-based testing of the compiler
49 | 
50 | Another program in the `Tests` directory worth mentioning is
51 | `AutoTest`: it generates random QPULib programs, runs them on the both
52 | source language interpreter and the target language emulator, and
53 | checks for equivalance.  Currently, it only works in emulation mode.
54 | 
55 | ##### CPU/GPU memory split
56 | 
57 | Depending on your plans, it may be useful to ensure that plenty of
58 | memory is available to the GPU.  This can be done by using
59 | `raspi-config`, selecting `Advanced Options` and then `Memory Split`:
60 | (On a Raspberry Pi 1 Model B, 32M seems to be the minimum that works
61 | for me.)
62 | 


--------------------------------------------------------------------------------
/Doc/heat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/heat.png


--------------------------------------------------------------------------------
/Doc/teapot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/teapot.png


--------------------------------------------------------------------------------
/Doc/teapot180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mn416/QPULib/c2658a55f2b21de6f3270c86c227011b10d447c2/Doc/teapot180.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | QPULib is released under the MIT License below.  This covers all files
 2 | in the QPULib repository, except for:
 3 | 
 4 |   * Doc/teapot.png
 5 |   * Doc/teapot180.png
 6 |   * Lib/VideoCore/Mailbox.cpp
 7 |   * Lib/VideoCore/Mailbox.h
 8 | 
 9 | MIT License
10 | 
11 | Copyright (c) 2016 Matthew Naylor
12 | 
13 | Permission is hereby granted, free of charge, to any person obtaining
14 | a copy of this software and associated documentation files (the
15 | "Software"), to deal in the Software without restriction, including
16 | without limitation the rights to use, copy, modify, merge, publish,
17 | distribute, sublicense, and/or sell copies of the Software, and to
18 | permit persons to whom the Software is furnished to do so, subject to
19 | the following conditions:
20 | 
21 | The above copyright notice and this permission notice shall be
22 | included in all copies or substantial portions of the Software.
23 | 
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 | 


--------------------------------------------------------------------------------
/Lib/Common/Heap.h:
--------------------------------------------------------------------------------
 1 | #ifndef _HEAP_H_
 2 | #define _HEAP_H_
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <stdint.h>
 7 | 
 8 | class Heap
 9 | {
10 |   public:
11 |     uint8_t *base;
12 |     unsigned long size, capacity;
13 |     const char* heapName;
14 | 
15 |     // Construct an empty heap
16 |     Heap()
17 |     {
18 |       base     = NULL;
19 |       capacity = 0;
20 |       size     = 0;
21 |       heapName = "unnamed";
22 |     }
23 | 
24 |     // Create a heap of a given number of bytes
25 |     void create(unsigned int heapCapacityInBytes)
26 |     {
27 |       if (base != NULL) free(base);
28 |       capacity = heapCapacityInBytes;
29 |       base     = (uint8_t*) malloc(heapCapacityInBytes);
30 |       size     = 0;
31 |     }
32 | 
33 |     Heap(unsigned int heapCapacityInBytes)
34 |     {
35 |       heapName = "";
36 |       create(heapCapacityInBytes);
37 |     }
38 | 
39 |     Heap(const char* name, unsigned int heapCapacityInBytes)
40 |     {
41 |       heapName = name;
42 |       create(heapCapacityInBytes);
43 |     }
44 | 
45 |     // Allocate 'n' elements of type T on the heap
46 |     template <class T> T* alloc(unsigned long n)
47 |     {
48 |       unsigned long nbytes = sizeof(T) * n;
49 |       if (size + nbytes >= capacity) {
50 |         printf("QPULib error: heap '%s' is full.\n", heapName);
51 |         exit(-1);
52 |         return (T*) NULL;
53 |       }
54 |       else {
55 |         uint8_t* p = base + size;
56 |         size += nbytes;
57 |         return (T*) p;
58 |       }
59 |     }
60 | 
61 |     // Allocate one element of type T on the heap
62 |     template <class T> T* alloc()
63 |     {
64 |       return alloc<T>(1);
65 |     }
66 | 
67 |     // Free all the structures allocated on the heap
68 |     void clear()
69 |     {
70 |       size = 0;
71 |     }
72 | 
73 |     // Destroy the heap
74 |     void destroy()
75 |     {
76 |       free(base);
77 |       base     = NULL;
78 |       capacity = 0;
79 |       size     = 0;
80 |     }
81 | 
82 |     // Destructor
83 |     ~Heap()
84 |     {
85 |       destroy();
86 |     }
87 | };
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/Lib/Common/Seq.h:
--------------------------------------------------------------------------------
  1 | // Sequence data type
  2 | 
  3 | #ifndef _SEQ_H_
  4 | #define _SEQ_H_
  5 | 
  6 | #define INITIAL_MAX_ELEMS 1024
  7 | 
  8 | #include <stdlib.h>
  9 | #include <assert.h>
 10 | 
 11 | template <class T> class Seq
 12 | {
 13 |   private:
 14 |     // Initialisation
 15 |     void init(int initialSize)
 16 |     {
 17 |       maxElems = initialSize;
 18 |       numElems = 0;
 19 |       elems    = new T[initialSize];
 20 |     }
 21 | 
 22 |   public:
 23 |     int maxElems;
 24 |     int numElems;
 25 |     T* elems;
 26 | 
 27 |     // Constructors
 28 |     Seq() { init(INITIAL_MAX_ELEMS); }
 29 |     Seq(int initialSize) { init(initialSize); }
 30 | 
 31 |     // Copy constructor
 32 |     Seq(const Seq<T>& seq) {
 33 |       init(seq.maxElems);
 34 |       numElems = seq.numElems;
 35 |       for (int i = 0; i < seq.numElems; i++)
 36 |         elems[i] = seq.elems[i];
 37 |     }
 38 | 
 39 |     // Set capacity of sequence
 40 |     void setCapacity(int n) {
 41 |       maxElems = n;
 42 |       T* newElems = new T[maxElems];
 43 |       for (int i = 0; i < numElems-1; i++)
 44 |         newElems[i] = elems[i];
 45 |       delete [] elems;
 46 |       elems = newElems;
 47 |     }
 48 | 
 49 |     // Extend size of sequence by one
 50 |     void extend()
 51 |     {
 52 |       numElems++;
 53 |       if (numElems > maxElems)
 54 |         setCapacity(maxElems*2);
 55 |     }
 56 | 
 57 |     // Append
 58 |     void append(T x)
 59 |     {
 60 |       extend();
 61 |       elems[numElems-1] = x;
 62 |     }
 63 | 
 64 |     // Delete last element
 65 |     void deleteLast()
 66 |     {
 67 |       numElems--;
 68 |     }
 69 | 
 70 |     // Push
 71 |     void push(T x) { append(x); }
 72 | 
 73 |     // Pop
 74 |     T pop() {
 75 |       numElems--;
 76 |       return elems[numElems];
 77 |     }
 78 | 
 79 |     // Clear the sequence
 80 |     void clear()
 81 |     {
 82 |       numElems = 0;
 83 |     }
 84 | 
 85 |     // Is given value already in sequence?
 86 |     bool member(T x) {
 87 |       for (int i = 0; i < numElems; i++)
 88 |         if (elems[i] == x) return true;
 89 |       return false;
 90 |     }
 91 | 
 92 |     // Insert element into sequence if not already present
 93 |     bool insert(T x) {
 94 |       bool alreadyPresent = member(x);
 95 |       if (!alreadyPresent) append(x);
 96 |       return !alreadyPresent;
 97 |     }
 98 | 
 99 |     // Remove element at index
100 |     T remove(int index) {
101 |       assert(index < numElems);
102 |       T x = elems[index];
103 |       for (int j = index; j < numElems-1; j++)
104 |         elems[j] = elems[j+1];
105 |       numElems--;
106 |       return x;
107 |     }
108 | 
109 |     // Destructor
110 |     ~Seq()
111 |     {
112 |       delete [] elems;
113 |     }
114 | };
115 | 
116 | // A small sequence is just a sequence with a small initial size
117 | template <class T> class SmallSeq : public Seq<T> {
118 |   public:
119 |     SmallSeq() : Seq<T>(8) {};
120 | };
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/Lib/Common/Stack.h:
--------------------------------------------------------------------------------
 1 | #ifndef _STACK_H_
 2 | #define _STACK_H_
 3 | 
 4 | #include <stdlib.h>
 5 | #include <assert.h>
 6 | 
 7 | template <class T> class StackItem
 8 | {
 9 |   public:
10 |     T* head;
11 |     StackItem<T>* tail;
12 | };
13 | 
14 | template <class T> class Stack
15 | {
16 |   public:
17 |     unsigned int size;
18 |     StackItem<T>* topItem;
19 | 
20 |     // Constructor
21 |     Stack()
22 |     {
23 |       topItem = NULL;
24 |       size    = 0;
25 |     }
26 | 
27 |     // Push
28 |     void push(T* x)
29 |     {
30 |       StackItem<T>* oldTop = topItem;
31 |       topItem       = new StackItem<T>;
32 |       topItem->head = x;
33 |       topItem->tail = oldTop;
34 |       size++;
35 |     }
36 | 
37 |     // Pop
38 |     void pop()
39 |     {
40 |       assert(size > 0);
41 |       StackItem<T>* oldTop = topItem;
42 |       topItem = topItem->tail;
43 |       delete oldTop;
44 |       size--;
45 |     }
46 | 
47 |     // Top
48 |     T* top()
49 |     {
50 |       assert(size > 0);
51 |       return topItem->head;
52 |     }
53 | 
54 |     // Replace the top element
55 |     void replace(T* x)
56 |     {
57 |       topItem->head = x;
58 |     }
59 | 
60 |     // Clear the stack
61 |     void clear()
62 |     {
63 |       StackItem<T>* p;
64 |       for (int i = 0; i < size; i++) {
65 |         p = topItem->tail;
66 |         delete topItem;
67 |         topItem = p;
68 |       }
69 |       size = 0;
70 |     }
71 | 
72 |     // Obtain Nth element from the top
73 |     T* index(unsigned int n)
74 |     {
75 |       assert(n < size);
76 |       StackItem<T>* p = topItem;
77 |       for (int i = 0; i < n; i++)
78 |         p = p->tail;
79 |       return p->head;
80 |     }
81 | 
82 |     // Destructor
83 |     ~Stack()
84 |     {
85 |       clear();
86 |     }
87 | };
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/Lib/Kernel.cpp:
--------------------------------------------------------------------------------
 1 | #include "Source/Pretty.h"
 2 | #include "Source/Translate.h"
 3 | #include "Target/Pretty.h"
 4 | #include "Target/Emulator.h"
 5 | #include "Target/RemoveLabels.h"
 6 | #include "Target/CFG.h"
 7 | #include "Target/Liveness.h"
 8 | #include "Target/ReachingDefs.h"
 9 | #include "Target/LiveRangeSplit.h"
10 | #include "Target/RegAlloc.h"
11 | #include "Target/Satisfy.h"
12 | #include "Target/LoadStore.h"
13 | #include "Target/Encode.h"
14 | 
15 | // ============================================================================
16 | // Compile kernel
17 | // ============================================================================
18 | 
19 | void compileKernel(Seq<Instr>* targetCode, Stmt* body)
20 | {
21 |   #ifdef DEBUG
22 |     printf("Source code\n");
23 |     printf("===========\n\n");
24 |     pretty(body);
25 |     printf("\n");
26 |   #endif
27 | 
28 |   // Translate to target code
29 |   translateStmt(targetCode, body);
30 | 
31 |   // Load/store pass
32 |   loadStorePass(targetCode);
33 | 
34 |   // Construct control-flow graph
35 |   CFG cfg;
36 |   buildCFG(targetCode, &cfg);
37 | 
38 |   // Apply live-range splitter
39 |   //liveRangeSplit(targetCode, &cfg);
40 | 
41 |   // Perform register allocation
42 |   regAlloc(&cfg, targetCode);
43 | 
44 |   // Satisfy target code constraints
45 |   satisfy(targetCode);
46 | 
47 |   #ifdef DEBUG
48 |     printf("Target code\n");
49 |     printf("===========\n\n");
50 |     for (int i = 0; i < targetCode->numElems; i++) {
51 |       printf("%i: ", i);
52 |       pretty(targetCode->elems[i]);
53 |     }
54 |     printf("\n");
55 |   #endif
56 | 
57 |   // Translate branch-to-labels to relative branches
58 |   removeLabels(targetCode);
59 | }
60 | 


--------------------------------------------------------------------------------
/Lib/Kernel.h:
--------------------------------------------------------------------------------
  1 | #ifndef _KERNEL_H_
  2 | #define _KERNEL_H_
  3 | 
  4 | #include "Source/Interpreter.h"
  5 | #include "Target/Emulator.h"
  6 | #include "Target/Encode.h"
  7 | #include "VideoCore/SharedArray.h"
  8 | #include "VideoCore/Invoke.h"
  9 | #include "VideoCore/VideoCore.h"
 10 | 
 11 | // ============================================================================
 12 | // Modes of operation
 13 | // ============================================================================
 14 | 
 15 | // Two important compile-time macros are EMULATION_MODE and QPU_MODE.
 16 | // With -D EMULATION_MODE, QPULib can be compiled for any architecture.
 17 | // With -D QPU_MODE, QPULib can be compiled only for the Raspberry Pi.
 18 | // At least one of these macros must be defined.
 19 | 
 20 | // IN EMULATION_MODE a memory pool is used for allocating data that
 21 | // can be read by kernels.  Otherwise, a mailbox interface to the
 22 | // VideoCore is used to allocate memory.  In both cases, see
 23 | // 'VideoCore/SharedArray.h'.
 24 | 
 25 | // The 'Kernel' class provides various ways to invoke a kernel:
 26 | //
 27 | //   * qpu(...)        invoke kernel on physical QPUs
 28 | //                     (only available in QPU_MODE)
 29 | //   * emulate(...)    invoke kernel using target code emulator
 30 | //                     (only available in EMULATION_MODE)
 31 | //   * interpret(...)  invoke kernel using source code interpreter
 32 | //                     (only available in EMULATION_MODE)
 33 | //   * call(...)       in EMULATION_MODE, same as emulate(...)
 34 | //                     in QPU_MODE, same as qpu(...)
 35 | //                     in EMULATION_MODE *and* QPU_MODE, same as emulate(...)
 36 | 
 37 | // Notice it is OK to compile with both -D EMULATION_MODE *and*
 38 | // -D QPU_MODE.  This feature is provided for doing equivalance
 39 | // testing between the physical QPU and the QPU emulator.  However,
 40 | // EMULATION_MODE introduces a performance penalty and should be used
 41 | // only for testing and debugging purposes.
 42 | 
 43 | // Maximum number of kernel parameters allowed
 44 | #define MAX_KERNEL_PARAMS 128
 45 | 
 46 | // ============================================================================
 47 | // Kernel arguments
 48 | // ============================================================================
 49 | 
 50 | // Construct an argument of QPU type 't'.
 51 | 
 52 | template <typename t> inline t mkArg();
 53 | 
 54 | template <> inline Int mkArg<Int>() {
 55 |   Int x;
 56 |   x = getUniformInt();
 57 |   return x;
 58 | }
 59 | 
 60 | template <> inline Float mkArg<Float>() {
 61 |   Float x;
 62 |   x = getUniformFloat();
 63 |   return x;
 64 | }
 65 | 
 66 | template <> inline Ptr<Int> mkArg< Ptr<Int> >() {
 67 |   Ptr<Int> x;
 68 |   x = getUniformPtr<Int>();
 69 |   return x;
 70 | }
 71 | 
 72 | template <> inline Ptr<Float> mkArg< Ptr<Float> >() {
 73 |   Ptr<Float> x;
 74 |   x = getUniformPtr<Float>();
 75 |   return x;
 76 | }
 77 | 
 78 | // ============================================================================
 79 | // Parameter passing
 80 | // ============================================================================
 81 | 
 82 | template <typename... ts> inline void nothing(ts... args) {}
 83 | 
 84 | // Pass argument of ARM type 'u' as parameter of QPU type 't'.
 85 | 
 86 | template <typename t, typename u> inline bool
 87 |   passParam(Seq<int32_t>* uniforms, u x);
 88 | 
 89 | // Pass an int
 90 | template <> inline bool passParam<Int, int>
 91 |   (Seq<int32_t>* uniforms, int x)
 92 | {
 93 |   uniforms->append((int32_t) x);
 94 |   return true;
 95 | }
 96 | 
 97 | // Pass a float
 98 | template <> inline bool passParam<Float, float>
 99 |   (Seq<int32_t>* uniforms, float x)
100 | {
101 |   int32_t* bits = (int32_t*) &x;
102 |   uniforms->append(*bits);
103 |   return true;
104 | }
105 | 
106 | // Pass a SharedArray<int>*
107 | template <> inline bool passParam< Ptr<Int>, SharedArray<int>* >
108 |   (Seq<int32_t>* uniforms, SharedArray<int>* p)
109 | {
110 |   uniforms->append(p->getAddress());
111 |   return true;
112 | }
113 | 
114 | // Pass a SharedArray<int*>*
115 | template <> inline bool passParam< Ptr<Ptr<Int>>, SharedArray<int*>* >
116 |   (Seq<int32_t>* uniforms, SharedArray<int*>* p)
117 | {
118 |   uniforms->append(p->getAddress());
119 |   return true;
120 | }
121 | 
122 | // Pass a SharedArray<float>*
123 | template <> inline bool passParam< Ptr<Float>, SharedArray<float>* >
124 |   (Seq<int32_t>* uniforms, SharedArray<float>* p)
125 | {
126 |   uniforms->append(p->getAddress());
127 |   return true;
128 | }
129 | 
130 | // Pass a SharedArray<float*>*
131 | template <> inline bool passParam< Ptr<Ptr<Float>>, SharedArray<float*>* >
132 |   (Seq<int32_t>* uniforms, SharedArray<float*>* p)
133 | {
134 |   uniforms->append(p->getAddress());
135 |   return true;
136 | }
137 | 
138 | // ============================================================================
139 | // Functions on kernels
140 | // ============================================================================
141 | 
142 | // Compile a kernel
143 | void compileKernel(Seq<Instr>* targetCode, Stmt* s);
144 | 
145 | // ============================================================================
146 | // Kernels
147 | // ============================================================================
148 | 
149 | // A kernel is parameterised by a list of QPU types 'ts' representing
150 | // the types of the parameters that the kernel takes.
151 | 
152 | // The kernel constructor takes a function with parameters of QPU
153 | // types 'ts'.  It applies the function to constuct an AST.
154 | 
155 | template <typename... ts> struct Kernel {
156 |   // AST representing the source code
157 |   Stmt* sourceCode;
158 | 
159 |   // AST representing the target code
160 |   Seq<Instr> targetCode;
161 | 
162 |   // Parameters to be passed to kernel
163 |   Seq<int32_t> uniforms;
164 | 
165 |   // The number of variables in the source code
166 |   int numVars;
167 | 
168 |   // Number of QPUs to run on
169 |   int numQPUs;
170 | 
171 |   // Memory region for QPU code and parameters
172 |   #ifdef QPU_MODE
173 |   SharedArray<uint32_t>* qpuCodeMem;
174 |   int qpuCodeMemOffset;
175 |   #endif
176 | 
177 |   // Construct kernel out of C++ function
178 |   Kernel(void (*f)(ts... params)) {
179 |     numQPUs = 1;
180 | 
181 |     // Initialise AST constructors
182 |     #ifndef EMULATION_MODE
183 |     astHeap.clear();
184 |     #endif
185 |     controlStack.clear();
186 |     stmtStack.clear();
187 |     stmtStack.push(mkSkip());
188 |     resetFreshVarGen();
189 |     resetFreshLabelGen();
190 | 
191 |     // Reserved general-purpose variables
192 |     Int qpuId, qpuCount, readStride, writeStride;
193 |     qpuId = getUniformInt();
194 |     qpuCount = getUniformInt();
195 | 
196 |     // Construct the AST
197 |     f(mkArg<ts>()...);
198 | 
199 |     // QPU code to cleanly exit
200 |     kernelFinish();
201 | 
202 |     // Obtain the AST
203 |     Stmt* body = stmtStack.top();
204 |     stmtStack.pop();
205 | 
206 |     // Save pointer to source program for interpreter
207 |     #ifdef EMULATION_MODE
208 |     sourceCode = body;
209 |     #else
210 |     sourceCode = NULL;
211 |     #endif
212 | 
213 |     // Compile
214 |     compileKernel(&targetCode, body);
215 | 
216 |     // Remember the number of variables used
217 |     numVars = getFreshVarCount();
218 | 
219 |     #ifdef QPU_MODE
220 |     enableQPUs();
221 | 
222 |     // Allocate code mem
223 |     qpuCodeMem = new SharedArray<uint32_t>;
224 | 
225 |     // Encode target instrs into array of 32-bit ints
226 |     Seq<uint32_t> code;
227 |     encode(&targetCode, &code);
228 | 
229 |     // Allocate memory for QPU code and parameters
230 |     int numWords = code.numElems + 12*MAX_KERNEL_PARAMS + 12*2;
231 |     qpuCodeMem->alloc(numWords);
232 | 
233 |     // Copy kernel to code memory
234 |     int offset = 0;
235 |     for (int i = 0; i < code.numElems; i++) {
236 |       (*qpuCodeMem)[offset++] = code.elems[i];
237 |     }
238 |     qpuCodeMemOffset = offset;
239 |     #endif
240 |   }
241 | 
242 |   #ifdef EMULATION_MODE
243 |   template <typename... us> void emu(us... args) {
244 |     // Pass params, checking arguments types us against parameter types ts
245 |     uniforms.clear();
246 |     nothing(passParam<ts, us>(&uniforms, args)...);
247 | 
248 |     emulate
249 |       ( numQPUs          // Number of QPUs active
250 |       , &targetCode      // Instruction sequence
251 |       , numVars          // Number of vars in source
252 |       , &uniforms        // Kernel parameters
253 |       , NULL             // Use stdout
254 |       );
255 |   }
256 |   #endif
257 | 
258 |   // Invoke the interpreter
259 |   #ifdef EMULATION_MODE
260 |   template <typename... us> void interpret(us... args) {
261 |     // Pass params, checking arguments types us against parameter types ts
262 |     uniforms.clear();
263 |     nothing(passParam<ts, us>(&uniforms, args)...);
264 | 
265 |     interpreter
266 |       ( numQPUs          // Number of QPUs active
267 |       , sourceCode       // Source program
268 |       , numVars          // Number of vars in source
269 |       , &uniforms        // Kernel parameters
270 |       , NULL             // Use stdout
271 |       );
272 |   }
273 |   #endif
274 | 
275 |   // Invoke kernel on physical QPU hardware
276 |   #ifdef QPU_MODE
277 |   template <typename... us> void qpu(us... args) {
278 |     // Pass params, checking arguments types us against parameter types ts
279 |     uniforms.clear();
280 |     nothing(passParam<ts, us>(&uniforms, args)...);
281 | 
282 |     // Invoke kernel on QPUs
283 |     invoke(numQPUs, *qpuCodeMem, qpuCodeMemOffset, &uniforms);
284 |   }
285 |   #endif
286 |  
287 |   // Invoke the kernel
288 |   template <typename... us> void call(us... args) {
289 |     #ifdef EMULATION_MODE
290 |       emu(args...);
291 |     #else
292 |       #ifdef QPU_MODE
293 |         qpu(args...);
294 |       #endif
295 |     #endif
296 |   };
297 | 
298 |   // Overload function application operator
299 |   template <typename... us> void operator()(us... args) {
300 |     call(args...);
301 |   }
302 | 
303 |   // Set number of QPUs to use
304 |   void setNumQPUs(int n) {
305 |     numQPUs = n;
306 |   }
307 | 
308 |   // Deconstructor
309 |   ~Kernel() {
310 |     #ifdef QPU_MODE
311 |       delete qpuCodeMem;
312 |       disableQPUs();
313 |     #endif
314 |   }
315 | };
316 | 
317 | // Initialiser
318 | 
319 | template <typename... ts> Kernel<ts...> compile(void (*f)(ts... params))
320 | {
321 |   Kernel<ts...> k(f);
322 |   return k;
323 | }
324 | 
325 | #endif
326 | 


--------------------------------------------------------------------------------
/Lib/Params.h:
--------------------------------------------------------------------------------
1 | #ifndef _PARAMS_H_
2 | #define _PARAMS_H_
3 | 
4 | // Memory available for constructing abstract syntax trees
5 | #define AST_HEAP_SIZE 131072
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/Lib/QPULib.h:
--------------------------------------------------------------------------------
 1 | #ifndef _QPULIB_H_
 2 | #define _QPULIB_H_
 3 | 
 4 | #include "Source/Int.h"
 5 | #include "Source/Float.h"
 6 | #include "Source/Ptr.h"
 7 | #include "Source/Cond.h"
 8 | #include "Source/Stmt.h"
 9 | #include "Kernel.h"
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/Lib/Source/Cond.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SOURCE_COND_H_
  2 | #define _SOURCE_COND_H_
  3 | 
  4 | #include "Source/Syntax.h"
  5 | #include "Source/Int.h"
  6 | 
  7 | // ============================================================================
  8 | // Types                   
  9 | // ============================================================================
 10 | 
 11 | struct Cond
 12 | {
 13 |   // Abstract syntax tree
 14 |   CExpr* cexpr;
 15 |   // Constructor
 16 |   Cond(CExpr* c) { cexpr = c; }
 17 | };
 18 | 
 19 | struct BoolExpr
 20 | {
 21 |   // Abstract syntax tree
 22 |   BExpr* bexpr;
 23 |   // Constructor
 24 |   BoolExpr(BExpr* b) { bexpr = b; }
 25 |   // Cast to Cond
 26 |   //operator Cond();
 27 | };
 28 | 
 29 | // ============================================================================
 30 | // Generic 'Int' comparison
 31 | // ============================================================================
 32 | 
 33 | inline BoolExpr mkIntCmp(IntExpr a, CmpOp op, IntExpr b)
 34 |   { return BoolExpr(mkCmp(a.expr, op, b.expr)); }
 35 | 
 36 | // ============================================================================
 37 | // Specific 'Int' comparisons
 38 | // ============================================================================
 39 | 
 40 | inline BoolExpr operator==(IntExpr a, IntExpr b)
 41 |   { return mkIntCmp(a, mkCmpOp(EQ, INT32), b); }
 42 | 
 43 | inline BoolExpr operator!=(IntExpr a, IntExpr b)
 44 |   { return mkIntCmp(a, mkCmpOp(NEQ, INT32), b); }
 45 | 
 46 | inline BoolExpr operator<(IntExpr a, IntExpr b)
 47 |   { return mkIntCmp(a, mkCmpOp(LT, INT32), b); }
 48 | 
 49 | inline BoolExpr operator<=(IntExpr a, IntExpr b)
 50 |   { return mkIntCmp(a, mkCmpOp(LE, INT32), b); }
 51 | 
 52 | inline BoolExpr operator>(IntExpr a, IntExpr b)
 53 |   { return mkIntCmp(a, mkCmpOp(GT, INT32), b); }
 54 | 
 55 | inline BoolExpr operator>=(IntExpr a, IntExpr b)
 56 |   { return mkIntCmp(a, mkCmpOp(GE, INT32), b); }
 57 | 
 58 | // ============================================================================
 59 | // Generic 'Float' comparison
 60 | // ============================================================================
 61 | 
 62 | inline BoolExpr mkFloatCmp(FloatExpr a, CmpOp op, FloatExpr b)
 63 |   { return BoolExpr(mkCmp(a.expr, op, b.expr)); }
 64 | 
 65 | // ============================================================================
 66 | // Specific 'Float' comparisons
 67 | // ============================================================================
 68 | 
 69 | inline BoolExpr operator==(FloatExpr a, FloatExpr b)
 70 |   { return mkFloatCmp(a, mkCmpOp(EQ, FLOAT), b); }
 71 | 
 72 | inline BoolExpr operator!=(FloatExpr a, FloatExpr b)
 73 |   { return mkFloatCmp(a, mkCmpOp(NEQ, FLOAT), b); }
 74 | 
 75 | inline BoolExpr operator<(FloatExpr a, FloatExpr b)
 76 |   { return mkFloatCmp(a, mkCmpOp(LT, FLOAT), b); }
 77 | 
 78 | inline BoolExpr operator<=(FloatExpr a, FloatExpr b)
 79 |   { return mkFloatCmp(a, mkCmpOp(LE, FLOAT), b); }
 80 | 
 81 | inline BoolExpr operator>(FloatExpr a, FloatExpr b)
 82 |   { return mkFloatCmp(a, mkCmpOp(GT, FLOAT), b); }
 83 | 
 84 | inline BoolExpr operator>=(FloatExpr a, FloatExpr b)
 85 |   { return mkFloatCmp(a, mkCmpOp(GE, FLOAT), b); }
 86 | 
 87 | // ============================================================================
 88 | // Boolean operators
 89 | // ============================================================================
 90 | 
 91 | inline BoolExpr operator!(BoolExpr a)
 92 |   { return BoolExpr(mkNot(a.bexpr)); }
 93 | 
 94 | inline BoolExpr operator&&(BoolExpr a, BoolExpr b)
 95 |   { return BoolExpr(mkAnd(a.bexpr, b.bexpr)); }
 96 | 
 97 | inline BoolExpr operator||(BoolExpr a, BoolExpr b)
 98 |   { return BoolExpr(mkOr(a.bexpr, b.bexpr)); }
 99 | 
100 | inline Cond any(BoolExpr a)
101 |   { return Cond(mkAny(a.bexpr)); }
102 | 
103 | inline Cond all(BoolExpr a)
104 |   { return Cond(mkAll(a.bexpr)); }
105 | 
106 | #endif
107 | 


--------------------------------------------------------------------------------
/Lib/Source/Float.cpp:
--------------------------------------------------------------------------------
  1 | #include "Source/Float.h"
  2 | #include "Source/Stmt.h"
  3 | 
  4 | // ============================================================================
  5 | // Type 'FloatExpr'
  6 | // ============================================================================
  7 | 
  8 | // Constructors
  9 | 
 10 | FloatExpr::FloatExpr() { this->expr = NULL; }
 11 | 
 12 | FloatExpr::FloatExpr(float x) { this->expr = mkFloatLit(x); }
 13 | 
 14 | // Helper constructor
 15 | 
 16 | inline FloatExpr mkFloatExpr(Expr* e) { FloatExpr x; x.expr = e; return x; }
 17 | 
 18 | // ============================================================================
 19 | // Type 'Float'
 20 | // ============================================================================
 21 | 
 22 | // Constructors
 23 | 
 24 | Float::Float() {
 25 |   Var v    = freshVar();
 26 |   this->expr = mkVar(v);
 27 | }
 28 | 
 29 | Float::Float(float x) {
 30 |   Var v    = freshVar();
 31 |   this->expr = mkVar(v);
 32 |   assign(this->expr, mkFloatLit(x));
 33 | }
 34 | 
 35 | Float::Float(FloatExpr e) {
 36 |   Var v    = freshVar();
 37 |   this->expr = mkVar(v);
 38 |   assign(this->expr, e.expr);
 39 | }
 40 | 
 41 | // Copy constructors
 42 | 
 43 | Float::Float(Float& x) {
 44 |   Var v    = freshVar();
 45 |   this->expr = mkVar(v);
 46 |   assign(this->expr, x.expr);
 47 | }
 48 | 
 49 | Float::Float(const Float& x) {
 50 |   Var v    = freshVar();
 51 |   this->expr = mkVar(v);
 52 |   assign(this->expr, x.expr);
 53 | }
 54 | 
 55 | // Cast to an FloatExpr
 56 | 
 57 | Float::operator FloatExpr() { return mkFloatExpr(this->expr); }
 58 | 
 59 | // Assignment
 60 | 
 61 | Float& Float::operator=(Float& rhs)
 62 |   { assign(this->expr, rhs.expr); return rhs; }
 63 | 
 64 | FloatExpr Float::operator=(FloatExpr rhs)
 65 |   { assign(this->expr, rhs.expr); return rhs; };
 66 | 
 67 | // ============================================================================
 68 | // Generic operations
 69 | // ============================================================================
 70 | 
 71 | inline FloatExpr mkFloatApply(FloatExpr a,Op op,FloatExpr b)
 72 | {
 73 |   Expr* e = mkApply(a.expr, op, b.expr);
 74 |   return mkFloatExpr(e);
 75 | }
 76 | 
 77 | // ============================================================================
 78 | // Specific operations
 79 | // ============================================================================
 80 | 
 81 | // Read an Float from the UNIFORM FIFO.
 82 | FloatExpr getUniformFloat()
 83 | {
 84 |   Expr* e    = mkExpr();
 85 |   e->tag     = VAR;
 86 |   e->var.tag = UNIFORM;
 87 |   return mkFloatExpr(e);
 88 | }
 89 | 
 90 | // Add
 91 | FloatExpr operator+(FloatExpr a, FloatExpr b)
 92 |   { return mkFloatApply(a, mkOp(ADD, FLOAT), b); }
 93 | 
 94 | // Subtract
 95 | FloatExpr operator-(FloatExpr a, FloatExpr b)
 96 |   { return mkFloatApply(a, mkOp(SUB, FLOAT), b); }
 97 | 
 98 | // Multiply
 99 | FloatExpr operator*(FloatExpr a, FloatExpr b)
100 |   { return mkFloatApply(a, mkOp(MUL, FLOAT), b); }
101 | 
102 | // Min
103 | FloatExpr min(FloatExpr a, FloatExpr b)
104 |   { return mkFloatApply(a, mkOp(MIN, FLOAT), b); }
105 | 
106 | // Max
107 | FloatExpr max(FloatExpr a, FloatExpr b)
108 |   { return mkFloatApply(a, mkOp(MAX, FLOAT), b); }
109 | 


--------------------------------------------------------------------------------
/Lib/Source/Float.h:
--------------------------------------------------------------------------------
 1 | // This module defines type 'Float' for a vector of 16 x 32-bit floats.
 2 | 
 3 | #ifndef _SOURCE_FLOAT_H_
 4 | #define _SOURCE_FLOAT_H_
 5 | 
 6 | #include <assert.h>
 7 | #include "Source/Syntax.h"
 8 | 
 9 | // ============================================================================
10 | // Types                   
11 | // ============================================================================
12 | 
13 | // An 'FloatExpr' defines an float vector expression which can
14 | // only be used on the RHS of assignment statements.
15 | 
16 | struct FloatExpr {
17 |   // Abstract syntax tree
18 |   Expr* expr;
19 |   // Constructors
20 |   FloatExpr();
21 |   FloatExpr(float x);
22 | };
23 | 
24 | // An 'Float' defines a float vector variable which can be used in
25 | // both the LHS and RHS of an assignment.
26 | 
27 | struct Float {
28 |   // Abstract syntax tree
29 |   Expr* expr;
30 | 
31 |   // Constructors
32 |   Float();
33 |   Float(float x);
34 |   Float(FloatExpr e);
35 | 
36 |   // Copy constructors
37 |   Float(Float& x);
38 |   Float(const Float& x);
39 | 
40 |   // Cast to an FloatExpr
41 |   operator FloatExpr();
42 | 
43 |   // Assignment
44 |   Float& operator=(Float& rhs);
45 |   FloatExpr operator=(FloatExpr rhs);
46 | };
47 | 
48 | // ============================================================================
49 | // Operations
50 | // ============================================================================
51 | 
52 | FloatExpr getUniformFloat();
53 | 
54 | FloatExpr operator+(FloatExpr a, FloatExpr b);
55 | FloatExpr operator-(FloatExpr a, FloatExpr b);
56 | FloatExpr operator*(FloatExpr a, FloatExpr b);
57 | FloatExpr min(FloatExpr a, FloatExpr b);
58 | FloatExpr max(FloatExpr a, FloatExpr b);
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/Lib/Source/Gen.h:
--------------------------------------------------------------------------------
 1 | // A random source-program generator
 2 | 
 3 | #ifndef _GEN_H_
 4 | #define _GEN_H_
 5 | 
 6 | #include "Common/Seq.h"
 7 | #include "Source/Syntax.h"
 8 | 
 9 | // Options for the program generator
10 | struct GenOptions {
11 |   // Limit on depth of nesting of statements and expressions
12 |   int depth;
13 | 
14 |   // Limit on length of statement sequences
15 |   int length;
16 | 
17 |   // Number of arguments to use when generating kernels
18 |   int numIntArgs;
19 |   int numFloatArgs;
20 |   int numPtrArgs;
21 |   int numPtr2Args;
22 | 
23 |   // Number of variables to use when generating kernels
24 |   int numIntVars;
25 |   int numFloatVars;
26 | 
27 |   // Loop bound (max iterations of any loop)
28 |   int loopBound;
29 | 
30 |   // Generate float operations?
31 |   bool genFloat;
32 | 
33 |   // Generate rotate operations?
34 |   bool genRotate;
35 | 
36 |   // Generate pointer-dereferencing operations?
37 |   bool genDeref;
38 |   bool genDeref2;
39 | 
40 |   // We can dereference expressions of the form p+e where p is a 
41 |   // pointer variable and e is an integer expression that is bitwise
42 |   // anded with derefOffsetMask.
43 |   int derefOffsetMask;
44 | 
45 |   // Allow loads and stores to be strided?
46 |   bool genStrided;
47 | };
48 | 
49 | // Generate random literals
50 | int genIntLit();
51 | float genFloatLit();
52 | 
53 | // Generate a random source program
54 | Stmt* progGen(GenOptions* opts, int* numVars);
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/Lib/Source/Int.cpp:
--------------------------------------------------------------------------------
  1 | #include "Source/Int.h"
  2 | #include "Source/Stmt.h"
  3 | #include "Source/Float.h"
  4 | 
  5 | // ============================================================================
  6 | // Type 'IntExpr'
  7 | // ============================================================================
  8 | 
  9 | // Constructors
 10 | 
 11 | IntExpr::IntExpr() { this->expr = NULL; }
 12 | 
 13 | IntExpr::IntExpr(int x) { this->expr = mkIntLit(x); }
 14 | 
 15 | // Helper constructor
 16 | 
 17 | inline IntExpr mkIntExpr(Expr* e) { IntExpr x; x.expr = e; return x; }
 18 | inline FloatExpr mkFloatExpr(Expr* e) { FloatExpr x; x.expr = e; return x; }
 19 | 
 20 | // ============================================================================
 21 | // Type 'Int'
 22 | // ============================================================================
 23 | 
 24 | // Constructors
 25 | 
 26 | Int::Int() {
 27 |   Var v    = freshVar();
 28 |   this->expr = mkVar(v);
 29 | }
 30 | 
 31 | Int::Int(int x) {
 32 |   Var v    = freshVar();
 33 |   this->expr = mkVar(v);
 34 |   assign(this->expr, mkIntLit(x));
 35 | }
 36 | 
 37 | Int::Int(IntExpr e) {
 38 |   Var v    = freshVar();
 39 |   this->expr = mkVar(v);
 40 |   assign(this->expr, e.expr);
 41 | }
 42 | 
 43 | // Copy constructors
 44 | 
 45 | Int::Int(Int& x) {
 46 |   Var v    = freshVar();
 47 |   this->expr = mkVar(v);
 48 |   assign(this->expr, x.expr);
 49 | }
 50 | 
 51 | Int::Int(const Int& x) {
 52 |   Var v    = freshVar();
 53 |   this->expr = mkVar(v);
 54 |   assign(this->expr, x.expr);
 55 | }
 56 | 
 57 | // Cast to an IntExpr
 58 | 
 59 | Int::operator IntExpr() { return mkIntExpr(this->expr); }
 60 | 
 61 | // Assignment
 62 | 
 63 | Int& Int::operator=(Int& rhs)
 64 |   { assign(this->expr, rhs.expr); return rhs; }
 65 | 
 66 | IntExpr Int::operator=(IntExpr rhs)
 67 |   { assign(this->expr, rhs.expr); return rhs; };
 68 | 
 69 | // ============================================================================
 70 | // Generic operations
 71 | // ============================================================================
 72 | 
 73 | inline IntExpr mkIntApply(IntExpr a,Op op,IntExpr b)
 74 | {
 75 |   Expr* e = mkApply(a.expr, op, b.expr);
 76 |   return mkIntExpr(e);
 77 | }
 78 | 
 79 | // ============================================================================
 80 | // Specific operations
 81 | // ============================================================================
 82 | 
 83 | // Read an Int from the UNIFORM FIFO.
 84 | IntExpr getUniformInt()
 85 | {
 86 |   Expr* e    = mkExpr();
 87 |   e->tag     = VAR;
 88 |   e->var.tag = UNIFORM;
 89 |   return mkIntExpr(e);
 90 | }
 91 | 
 92 | // A vector containing integers 0..15
 93 | IntExpr index()
 94 | {
 95 |   Expr* e    = mkExpr();
 96 |   e->tag     = VAR;
 97 |   e->var.tag = ELEM_NUM;
 98 |   return mkIntExpr(e);
 99 | }
100 | 
101 | // A vector containing the QPU id
102 | IntExpr me()
103 | {
104 |   // There is reserved var holding the QPU ID.
105 |   Expr* e    = mkExpr();
106 |   e->tag     = VAR;
107 |   e->var.tag = STANDARD;
108 |   e->var.id  = RSV_QPU_ID;
109 |   return mkIntExpr(e);
110 | }
111 | 
112 | // A vector containing the QPU count
113 | IntExpr numQPUs()
114 | {
115 |   // There is reserved var holding the QPU count.
116 |   Expr* e    = mkExpr();
117 |   e->tag     = VAR;
118 |   e->var.tag = STANDARD;
119 |   e->var.id  = RSV_NUM_QPUS;
120 |   return mkIntExpr(e);
121 | }
122 | 
123 | // Vector rotation
124 | IntExpr rotate(IntExpr a, IntExpr b)
125 |   { return mkIntApply(a, mkOp(ROTATE, INT32), b); }
126 | 
127 | FloatExpr rotate(FloatExpr a, IntExpr b)
128 | {
129 |   Expr* e = mkApply(a.expr, mkOp(ROTATE, FLOAT), b.expr);
130 |   return mkFloatExpr(e);
131 | }
132 | 
133 | // Add
134 | IntExpr operator+(IntExpr a, IntExpr b)
135 |   { return mkIntApply(a, mkOp(ADD, INT32), b); }
136 | 
137 | // Increment
138 | void Int::operator++(int)
139 |   { *this = *this + 1; }
140 | 
141 | // Subtract
142 | IntExpr operator-(IntExpr a, IntExpr b)
143 |   { return mkIntApply(a, mkOp(SUB, INT32), b); }
144 | 
145 | // Multiply
146 | IntExpr operator*(IntExpr a, IntExpr b)
147 |   { return mkIntApply(a, mkOp(MUL, INT32), b); }
148 | 
149 | // Min
150 | IntExpr min(IntExpr a, IntExpr b)
151 |   { return mkIntApply(a, mkOp(MIN, INT32), b); }
152 | 
153 | // Max
154 | IntExpr max(IntExpr a, IntExpr b)
155 |   { return mkIntApply(a, mkOp(MAX, INT32), b); }
156 | 
157 | // Shift left
158 | IntExpr operator<<(IntExpr a, IntExpr b)
159 |   { return mkIntApply(a, mkOp(SHL, INT32), b); }
160 | 
161 | // Shift Right
162 | IntExpr operator>>(IntExpr a, IntExpr b)
163 |   { return mkIntApply(a, mkOp(SHR, INT32), b); }
164 | 
165 | // Bitwise AND
166 | IntExpr operator&(IntExpr a, IntExpr b)
167 |   { return mkIntApply(a, mkOp(BAND, INT32), b); }
168 | 
169 | // Bitwise OR
170 | IntExpr operator|(IntExpr a, IntExpr b)
171 |   { return mkIntApply(a, mkOp(BOR, INT32), b); }
172 | 
173 | // Bitwise XOR
174 | IntExpr operator^(IntExpr a, IntExpr b)
175 |   { return mkIntApply(a, mkOp(BXOR, INT32), b); }
176 | 
177 | // Bitwise NOT
178 | IntExpr operator~(IntExpr a)
179 |   { return mkIntApply(a, mkOp(BNOT, INT32), a); }
180 | 
181 | // Unsigned shift-right
182 | IntExpr shr(IntExpr a, IntExpr b)
183 |   { return mkIntApply(a, mkOp(USHR, INT32), b); }
184 | 
185 | // Bitwise rotate-right
186 | IntExpr ror(IntExpr a, IntExpr b)
187 |   { return mkIntApply(a, mkOp(ROR, INT32), b); }
188 | 
189 | // Conversion to Int
190 | IntExpr toInt(FloatExpr a)
191 | {
192 |   Expr* e = mkApply(a.expr, mkOp(FtoI, INT32), mkIntLit(0));
193 |   return mkIntExpr(e);
194 | }
195 | 
196 | // Conversion to Float
197 | FloatExpr toFloat(IntExpr a)
198 | {
199 |   Expr* e = mkApply(a.expr, mkOp(ItoF, FLOAT), mkIntLit(0));
200 |   return mkFloatExpr(e);
201 | }
202 | 


--------------------------------------------------------------------------------
/Lib/Source/Int.h:
--------------------------------------------------------------------------------
 1 | // This module defines type 'Int' for a vector of 16 x 32-bit integers.
 2 | 
 3 | #ifndef _SOURCE_INT_H_
 4 | #define _SOURCE_INT_H_
 5 | 
 6 | #include <assert.h>
 7 | #include "Source/Syntax.h"
 8 | #include "Source/Float.h"
 9 | 
10 | // ============================================================================
11 | // Types                   
12 | // ============================================================================
13 | 
14 | // An 'IntExpr' defines an integer vector expression which can
15 | // only be used on the RHS of assignment statements.
16 | 
17 | struct IntExpr {
18 |   // Abstract syntax tree
19 |   Expr* expr;
20 |   // Constructors
21 |   IntExpr();
22 |   IntExpr(int x);
23 | };
24 | 
25 | // An 'Int' defines an integer vector variable which can be used in
26 | // both the LHS and RHS of an assignment.
27 | 
28 | struct Int {
29 |   // Abstract syntax tree
30 |   Expr* expr;
31 | 
32 |   // Constructors
33 |   Int();
34 |   Int(int x);
35 |   Int(IntExpr e);
36 | 
37 |   // Copy constructors
38 |   Int(Int& x);
39 |   Int(const Int& x);
40 | 
41 |   // Cast to an IntExpr
42 |   operator IntExpr();
43 | 
44 |   // Assignment
45 |   Int& operator=(Int& rhs);
46 |   IntExpr operator=(IntExpr rhs);
47 | 
48 |   // Increment
49 |   void operator++(int);
50 | };
51 | 
52 | // ============================================================================
53 | // Operations
54 | // ============================================================================
55 | 
56 | IntExpr getUniformInt();
57 | IntExpr index();
58 | IntExpr me();
59 | IntExpr numQPUs();
60 | 
61 | IntExpr rotate(IntExpr a, IntExpr b);
62 | FloatExpr rotate(FloatExpr a, IntExpr b);
63 | 
64 | IntExpr operator+(IntExpr a, IntExpr b);
65 | IntExpr operator-(IntExpr a, IntExpr b);
66 | IntExpr operator*(IntExpr a, IntExpr b);
67 | IntExpr min(IntExpr a, IntExpr b);
68 | IntExpr max(IntExpr a, IntExpr b);
69 | IntExpr operator<<(IntExpr a, IntExpr b);
70 | IntExpr operator>>(IntExpr a, IntExpr b);
71 | IntExpr operator&(IntExpr a, IntExpr b);
72 | IntExpr operator|(IntExpr a, IntExpr b);
73 | IntExpr operator^(IntExpr a, IntExpr b);
74 | IntExpr operator~(IntExpr a);
75 | IntExpr shr(IntExpr a, IntExpr b);
76 | IntExpr ror(IntExpr a, IntExpr b);
77 | IntExpr toInt(FloatExpr a);
78 | FloatExpr toFloat(IntExpr a);
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/Lib/Source/Interpreter.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPRETER_H_
 2 | #define _INTERPRETER_H_
 3 | 
 4 | #include <stdint.h>
 5 | #include "Common/Seq.h"
 6 | #include "Source/Syntax.h"
 7 | 
 8 | // The interpreter works in a similar way to the emulator.  The
 9 | // difference is that the former operates on source code and the
10 | // latter on target code.  We reuse a number of concepts of the
11 | // emulator in the interpreter.
12 | 
13 | #include "Target/Emulator.h"
14 | 
15 | // State of a single core.
16 | struct CoreState {
17 |   int id;                    // Core id
18 |   int numCores;              // Core count
19 |   Seq<int32_t>* uniforms;    // Arguments to kernel
20 |   int nextUniform;           // Pointer to next uniform to read
21 |   int readStride;            // Read stride
22 |   int writeStride;           // Write stride
23 |   Vec* env;                  // Environment mapping vars to values
24 |   int sizeEnv;               // Size of the environment
25 |   Seq<char>* output;         // Output from print statements
26 |   Seq<Stmt*>* stack;         // Control stack
27 |   Seq<Vec>* loadBuffer;      // Load buffer
28 | };
29 | 
30 | // State of the Interpreter.
31 | struct InterpreterState {
32 |   CoreState core[MAX_QPUS];  // State of each core
33 |   Word vpm[VPM_SIZE];        // Shared VPM memory
34 |   int sema[16];              // Semaphores
35 | };
36 | 
37 | // Interpreter
38 | void interpreter
39 |   ( int numCores           // Number of cores active
40 |   , Stmt* s                // Source code
41 |   , int numVars            // Max var id used in source
42 |   , Seq<int32_t>* uniforms // Kernel parameters
43 |   , Seq<char>* output      // Output from print statements
44 |                            // (if NULL, stdout is used)
45 |   );
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/Lib/Source/Pretty.cpp:
--------------------------------------------------------------------------------
  1 | #include "Source/Pretty.h"
  2 | 
  3 | // ============================================================================
  4 | // Operators
  5 | // ============================================================================
  6 | 
  7 | const char* opToString(Op op)
  8 | {
  9 |   switch (op.op) {
 10 |     case ADD:    return "+";
 11 |     case SUB:    return "-";
 12 |     case MUL:    return "*";
 13 |     case MIN:    return " min ";
 14 |     case MAX:    return " max ";
 15 |     case ROTATE: return " rotate ";
 16 |     case SHL:    return " << ";
 17 |     case SHR:    return " >> ";
 18 |     case USHR:   return " _>> ";
 19 |     case ROR:    return " ror ";
 20 |     case BOR:    return " | ";
 21 |     case BAND:   return " & ";
 22 |     case BXOR:   return " ^ ";
 23 |     case BNOT:   return "~";
 24 |     case ItoF:   return "(Float) ";
 25 |     case FtoI:   return "(Int) ";
 26 |   }
 27 | 
 28 |   // Not reachable
 29 |   assert(false);
 30 | }
 31 | 
 32 | const char* cmpOpToString(CmpOp op)
 33 | {
 34 |   switch (op.op) {
 35 |     case EQ : return "==";
 36 |     case NEQ: return "!=";
 37 |     case LT : return "<";
 38 |     case LE : return "<=";
 39 |     case GT : return ">";
 40 |     case GE : return ">=";
 41 |   }
 42 | 
 43 |   // Not reachable
 44 |   assert(false);
 45 | }
 46 | 
 47 | // ============================================================================
 48 | // Expressions
 49 | // ============================================================================
 50 | 
 51 | void pretty(Expr* e)
 52 | {
 53 |   if (e == NULL) return;
 54 | 
 55 |   switch (e->tag) {
 56 |     // Integer literals
 57 |     case INT_LIT:
 58 |       printf("%i", e->intLit);
 59 |       break;
 60 | 
 61 |     // Float literals
 62 |     case FLOAT_LIT:
 63 |       printf("%f", e->floatLit);
 64 |       break;
 65 | 
 66 |     // Variables
 67 |     case VAR:
 68 |       if (e->var.tag == STANDARD)
 69 |         printf("v%i", e->var.id);
 70 |       else if (e->var.tag == UNIFORM)
 71 |         printf("UNIFORM");
 72 |       else if (e->var.tag == QPU_NUM)
 73 |         printf("QPU_NUM");
 74 |       else if (e->var.tag == ELEM_NUM)
 75 |         printf("ELEM_NUM");
 76 |       else if (e->var.tag == TMU0_ADDR)
 77 |         printf("TMU0_ADDR");
 78 |       break;
 79 | 
 80 |     // Applications
 81 |     case APPLY:
 82 |       if (isUnary(e->apply.op)) {
 83 |         printf("(");
 84 |         printf("%s", opToString(e->apply.op));
 85 |         pretty(e->apply.lhs);
 86 |         printf(")");
 87 |       }
 88 |       else {
 89 |         printf("(");
 90 |         pretty(e->apply.lhs);
 91 |         printf("%s", opToString(e->apply.op));
 92 |         pretty(e->apply.rhs);
 93 |         printf(")");
 94 |       }
 95 |       break;
 96 | 
 97 |     // Dereference
 98 |     case DEREF:
 99 |       printf("*");
100 |       pretty(e->deref.ptr);
101 |       break;
102 | 
103 |   }
104 | }
105 | 
106 | // ============================================================================
107 | // Boolean expressions
108 | // ============================================================================
109 | 
110 | void pretty(BExpr* b)
111 | {
112 |   if (b == NULL) return;
113 | 
114 |   switch (b->tag) {
115 |     // Negation
116 |     case NOT:
117 |       printf("!");
118 |       pretty(b->neg);
119 |       break;
120 | 
121 |     // Conjunction
122 |     case AND:
123 |       printf("(");
124 |       pretty(b->conj.lhs);
125 |       printf(" && ");
126 |       pretty(b->conj.rhs);
127 |       printf(")");
128 |       break;
129 | 
130 |     // Disjunction
131 |     case OR:
132 |       printf("(");
133 |       pretty(b->disj.lhs);
134 |       printf(" || ");
135 |       pretty(b->disj.rhs);
136 |       printf(")");
137 |       break;
138 | 
139 |     // Comparison
140 |     case CMP:
141 |       pretty(b->cmp.lhs);
142 |       printf("%s", cmpOpToString(b->cmp.op));
143 |       pretty(b->cmp.rhs);
144 |       break;
145 |   }
146 | }
147 | 
148 | // ============================================================================
149 | // Conditional expressions
150 | // ============================================================================
151 | 
152 | void pretty(CExpr* c)
153 | {
154 |   if (c == NULL) return;
155 | 
156 |   switch (c->tag) {
157 |     // Reduce using 'any'
158 |     case ANY: printf("any("); break;
159 | 
160 |     // Reduce using 'all'
161 |     case ALL: printf("all("); break;
162 |   }
163 | 
164 |   pretty(c->bexpr);
165 |   printf(")");
166 | }
167 | 
168 | // ============================================================================
169 | // Statements
170 | // ============================================================================
171 | 
172 | void indentBy(int indent) {
173 |   for (int i = 0; i < indent; i++) printf(" ");
174 | }
175 | 
176 | void pretty(int indent, Stmt* s)
177 | {
178 |   if (s == NULL) return;
179 | 
180 |   switch (s->tag) {
181 |     // Skip
182 |     case SKIP: break;
183 | 
184 |     // Assignment
185 |     case ASSIGN:
186 |       indentBy(indent);
187 |       pretty(s->assign.lhs);
188 |       printf(" = ");
189 |       pretty(s->assign.rhs);
190 |       printf(";\n");
191 |       break;
192 | 
193 |     // Sequential composition
194 |     case SEQ:
195 |       pretty(indent, s->seq.s0);
196 |       pretty(indent, s->seq.s1);
197 |       break;
198 | 
199 |     // Where statement
200 |     case WHERE:
201 |       indentBy(indent);
202 |       printf("Where (");
203 |       pretty(s->where.cond);
204 |       printf(")\n");
205 |       pretty(indent+2, s->where.thenStmt);
206 |       if (s->where.elseStmt != NULL) {
207 |         indentBy(indent);
208 |         printf("Else\n");
209 |         pretty(indent+2, s->where.elseStmt);
210 |       }
211 |       indentBy(indent);
212 |       printf("End\n");
213 |       break;
214 | 
215 |     // If statement
216 |     case IF:
217 |       indentBy(indent);
218 |       printf("If (");
219 |       pretty(s->ifElse.cond);
220 |       printf(")\n");
221 |       pretty(indent+2, s->ifElse.thenStmt);
222 |       if (s->where.elseStmt != NULL) {
223 |         indentBy(indent);
224 |         printf("Else\n");
225 |         pretty(indent+2, s->ifElse.elseStmt);
226 |       }
227 |       indentBy(indent);
228 |       printf("End\n");
229 |       break;
230 | 
231 |     // While statement
232 |     case WHILE:
233 |       indentBy(indent);
234 |       printf("While (");
235 |       pretty(s->loop.cond);
236 |       printf(")\n");
237 |       pretty(indent+2, s->loop.body);
238 |       indentBy(indent);
239 |       printf("End\n");
240 |       break;
241 | 
242 |     // Print statement
243 |     case PRINT:
244 |       indentBy(indent);
245 |       printf("Print (");
246 |       if (s->print.tag == PRINT_STR) {
247 |         // Ideally would print escaped string here
248 |         printf("\"%s\"", s->print.str);
249 |       }
250 |       else
251 |         pretty(s->print.expr);
252 |       printf(")\n");
253 |       break;
254 | 
255 |     // Set read stride
256 |     case SET_READ_STRIDE:
257 |       indentBy(indent);
258 |       printf("setReadStride(");
259 |       pretty(s->stride);
260 |       printf(")\n");
261 |       break;
262 | 
263 |     // Set write stride
264 |     case SET_WRITE_STRIDE:
265 |       indentBy(indent);
266 |       printf("setWriteStride(");
267 |       pretty(s->stride);
268 |       printf(")\n");
269 |       break;
270 | 
271 |     // Load receive
272 |     case LOAD_RECEIVE:
273 |       indentBy(indent);
274 |       printf("receive(");
275 |       pretty(s->loadDest);
276 |       printf(")\n");
277 |       break;
278 | 
279 |     // Store request
280 |     case STORE_REQUEST:
281 |       indentBy(indent);
282 |       printf("store(");
283 |       pretty(s->storeReq.data);
284 |       printf(", ");
285 |       pretty(s->storeReq.addr);
286 |       printf(")\n");
287 |       break;
288 | 
289 |     // Flush outstanding stores
290 |     case FLUSH:
291 |       indentBy(indent);
292 |       printf("flush()\n");
293 |       break;
294 | 
295 |     // Increment semaphore
296 |     case SEMA_INC:
297 |       indentBy(indent);
298 |       printf("semaInc(%i)\n", s->semaId);
299 |       break;
300 | 
301 |     // Decrement semaphore
302 |     case SEMA_DEC:
303 |       indentBy(indent);
304 |       printf("semaDec(%i)\n", s->semaId);
305 |       break;
306 | 
307 |     // Host IRQ
308 |     case SEND_IRQ_TO_HOST:
309 |       indentBy(indent);
310 |       printf("hostIRQ()\n");
311 |       break;
312 | 
313 |     // Not reachable
314 |     default:
315 |       assert(false);
316 |   }
317 | }
318 | 
319 | void pretty(Stmt* s)
320 | {
321 |   pretty(0, s);
322 | }
323 | 


--------------------------------------------------------------------------------
/Lib/Source/Pretty.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SOURCE_PRETTY_H_
 2 | #define _SOURCE_PRETTY_H_
 3 | 
 4 | #include "Source/Syntax.h"
 5 | 
 6 | // Pretty printer for the QPULib source language
 7 | void pretty(Expr* e);
 8 | void pretty(BExpr* b);
 9 | void pretty(CExpr* c);
10 | void pretty(Stmt* s);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/Lib/Source/Ptr.h:
--------------------------------------------------------------------------------
  1 | // This module defines type 'Ptr<T>' type denoting a pointer to a
  2 | // value of type 'T'.
  3 | 
  4 | #ifndef _SOURCE_PTR_H_
  5 | #define _SOURCE_PTR_H_
  6 | 
  7 | #include <assert.h>
  8 | #include "Source/Syntax.h"
  9 | 
 10 | // ============================================================================
 11 | // Types                   
 12 | // ============================================================================
 13 | 
 14 | // A 'PtrExpr<T>' defines a pointer expression which can only be used on the
 15 | // RHS of assignment statements.
 16 | template <typename T> struct PtrExpr {
 17 |   // Abstract syntax tree
 18 |   Expr* expr;
 19 | 
 20 |   PtrExpr<T>() { this->expr = NULL; }
 21 | 
 22 |   // Dereference
 23 |   T& operator*() {
 24 |     // This operation must return a reference to T, so we allocate the
 25 |     // AST node on the heap an return a reference to it.
 26 |     T* p = astHeap.alloc<T>(1);
 27 |     p->expr = mkDeref(expr);
 28 |     return *p;
 29 |   }
 30 | 
 31 |   // Array index
 32 |   T& operator[](IntExpr index) {
 33 |     T* p = astHeap.alloc<T>(1);
 34 |     p->expr = mkDeref(mkApply(expr, mkOp(ADD, INT32),
 35 |                 mkApply(index.expr, mkOp(SHL, INT32), mkIntLit(2))));
 36 |     return *p;
 37 |   }
 38 | };
 39 | 
 40 | // A 'Ptr<T>' defines a pointer variable which can be used in both the LHS and
 41 | // RHS of an assignment.
 42 | 
 43 | template <typename T> struct Ptr {
 44 |   // Abstract syntax tree
 45 |   Expr* expr;
 46 | 
 47 |   // Constructors
 48 |   Ptr<T>() {
 49 |     Var v    = freshVar();
 50 |     this->expr = mkVar(v);
 51 |   }
 52 | 
 53 |   Ptr<T>(PtrExpr<T> rhs) {
 54 |     Var v    = freshVar();
 55 |     this->expr = mkVar(v);
 56 |     assign(this->expr, rhs.expr);
 57 |   }
 58 | 
 59 |   // Copy constructors
 60 |   Ptr<T>(Ptr<T>& x) {
 61 |     Var v    = freshVar();
 62 |     this->expr = mkVar(v);
 63 |     assign(this->expr, x.expr);
 64 |   }
 65 |   Ptr<T>(const Ptr<T>& x) {
 66 |     Var v    = freshVar();
 67 |     this->expr = mkVar(v);
 68 |     assign(this->expr, x.expr);
 69 |   }
 70 | 
 71 |   // Assignment
 72 |   Ptr<T>& operator=(Ptr<T>& rhs) {
 73 |     assign(this->expr, rhs.expr);
 74 |     return rhs;
 75 |   }
 76 | 
 77 |   PtrExpr<T> operator=(PtrExpr<T> rhs) {
 78 |     assign(this->expr, rhs.expr);
 79 |     return rhs;
 80 |   }
 81 | 
 82 |   // Dereference
 83 |   T& operator*() {
 84 |     // This operation must return a reference to T, so we allocate the
 85 |     // AST node on the heap an return a reference to it.
 86 |     T* p = astHeap.alloc<T>(1);
 87 |     p->expr = mkDeref(expr);
 88 |     return *p;
 89 |   }
 90 | 
 91 |   // Array index
 92 |   T& operator[](IntExpr index) {
 93 |     T* p = astHeap.alloc<T>(1);
 94 |     p->expr = mkDeref(mkApply(expr, mkOp(ADD, INT32),
 95 |                 mkApply(index.expr, mkOp(SHL, INT32), mkIntLit(2))));
 96 |     return *p;
 97 |   }
 98 | };
 99 | 
100 | // ============================================================================
101 | // Specific operations
102 | // ============================================================================
103 | 
104 | template <typename T> inline PtrExpr<T> getUniformPtr() {
105 |   Expr* e    = mkExpr();
106 |   e->tag     = VAR;
107 |   e->var.tag = UNIFORM;
108 |   PtrExpr<T> x; x.expr = e; return x;
109 | }
110 | 
111 | template <typename T> inline PtrExpr<T> operator+(PtrExpr<T> a, int b) {
112 |   Expr* e = mkApply(a.expr, mkOp(ADD, INT32), mkIntLit(4*b));
113 |   PtrExpr<T> x; x.expr = e; return x;
114 | }
115 | 
116 | template <typename T> inline PtrExpr<T> operator+(Ptr<T> &a, int b) {
117 |   Expr* e = mkApply(a.expr, mkOp(ADD, INT32), mkIntLit(4*b));
118 |   PtrExpr<T> x; x.expr = e; return x;
119 | }
120 | 
121 | template <typename T> inline PtrExpr<T> operator+(PtrExpr<T> a, IntExpr b) {
122 |   Expr* e = mkApply(a.expr, mkOp(ADD, INT32), (b<<2).expr);
123 |   PtrExpr<T> x; x.expr = e; return x;
124 | }
125 | 
126 | template <typename T> inline PtrExpr<T> operator+(Ptr<T> &a, IntExpr b) {
127 |   Expr* e = mkApply(a.expr, mkOp(ADD, INT32), (b<<2).expr);
128 |   PtrExpr<T> x; x.expr = e; return x;
129 | }
130 | 
131 | 
132 | #endif
133 | 


--------------------------------------------------------------------------------
/Lib/Source/Stmt.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "Common/Stack.h"
  3 | #include "Source/Stmt.h"
  4 | #include "Source/Int.h"
  5 | 
  6 | // Interface to the embedded language.
  7 | 
  8 | //=============================================================================
  9 | // Assignment token
 10 | //=============================================================================
 11 | 
 12 | void assign(Expr* lhs, Expr* rhs) {
 13 |   Stmt* s = mkAssign(lhs, rhs);
 14 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
 15 | }
 16 | 
 17 | //=============================================================================
 18 | // 'If' token
 19 | //=============================================================================
 20 | 
 21 | void If_(Cond c)
 22 | {
 23 |   Stmt* s = mkIf(c.cexpr, NULL, NULL);
 24 |   controlStack.push(s);
 25 |   stmtStack.push(mkSkip());
 26 | }
 27 | 
 28 | void If_(BoolExpr b)
 29 | {
 30 |   If_(any(b));
 31 | }
 32 | 
 33 | //=============================================================================
 34 | // 'Else' token
 35 | //=============================================================================
 36 | 
 37 | void Else_()
 38 | {
 39 |   int ok = 0;
 40 |   if (controlStack.size > 0) {
 41 |     Stmt* s = controlStack.top();
 42 |     if (s->tag == IF && s->ifElse.thenStmt == NULL) {
 43 |       s->ifElse.thenStmt = stmtStack.top();
 44 |       stmtStack.replace(mkSkip());
 45 |       ok = 1;
 46 |     }
 47 |     if (s->tag == WHERE && s->where.thenStmt == NULL) {
 48 |       s->where.thenStmt = stmtStack.top();
 49 |       stmtStack.replace(mkSkip());
 50 |       ok = 1;
 51 |     }
 52 |   }
 53 | 
 54 |   if (!ok) {
 55 |     printf("Syntax error: 'Else' without preceeding 'If' or 'Where'\n");
 56 |     exit(-1);
 57 |   }
 58 | }
 59 | 
 60 | //=============================================================================
 61 | // 'End' token
 62 | //=============================================================================
 63 | 
 64 | void End_()
 65 | {
 66 |   int ok = 0;
 67 |   if (controlStack.size > 0) {
 68 |     Stmt* s = controlStack.top();
 69 |     if (s->tag == IF && s->ifElse.thenStmt == NULL) {
 70 |       s->ifElse.thenStmt = stmtStack.top();
 71 |       ok = 1;
 72 |     }
 73 |     else if (s->tag == IF && s->ifElse.elseStmt == NULL) {
 74 |       s->ifElse.elseStmt = stmtStack.top();
 75 |       ok = 1;
 76 |     }
 77 |     if (s->tag == WHERE && s->where.thenStmt == NULL) {
 78 |       s->where.thenStmt = stmtStack.top();
 79 |       ok = 1;
 80 |     }
 81 |     else if (s->tag == WHERE && s->where.elseStmt == NULL) {
 82 |       s->where.elseStmt = stmtStack.top();
 83 |       ok = 1;
 84 |     }
 85 |     if (s->tag == WHILE && s->loop.body == NULL) {
 86 |       s->loop.body = stmtStack.top();
 87 |       ok = 1;
 88 |     }
 89 |     if (s->tag == FOR && s->forLoop.body == NULL) {
 90 |       // Convert 'for' loop to 'while' loop
 91 |       CExpr* whileCond = s->forLoop.cond;
 92 |       Stmt* whileBody = mkSeq(stmtStack.top(), s->forLoop.inc);
 93 |       s->tag = WHILE;
 94 |       s->loop.body = whileBody;
 95 |       s->loop.cond = whileCond;
 96 |       ok = 1;
 97 |     }
 98 | 
 99 |     if (ok) {
100 |       stmtStack.pop();
101 |       stmtStack.replace(mkSeq(stmtStack.top(), s));
102 |       controlStack.pop();
103 |     }
104 |   }
105 | 
106 |   if (!ok) {
107 |     printf("Syntax error: unexpected 'End'\n");
108 |     exit(-1);
109 |   }
110 | }
111 | 
112 | //=============================================================================
113 | // 'While' token
114 | //=============================================================================
115 | 
116 | void While_(Cond c)
117 | {
118 |   Stmt* s = mkWhile(c.cexpr, NULL);
119 |   controlStack.push(s);
120 |   stmtStack.push(mkSkip());
121 | }
122 | 
123 | void While_(BoolExpr b)
124 | {
125 |   While_(any(b));
126 | }
127 | 
128 | //=============================================================================
129 | // 'Where' token
130 | //=============================================================================
131 | 
132 | void Where__(BExpr* b)
133 | {
134 |   Stmt* s = mkWhere(b, NULL, NULL);
135 |   controlStack.push(s);
136 |   stmtStack.push(mkSkip());
137 | }
138 | 
139 | //=============================================================================
140 | // 'For' token
141 | //=============================================================================
142 | 
143 | void For_(Cond c)
144 | {
145 |   Stmt* s = mkFor(c.cexpr, NULL, NULL);
146 |   controlStack.push(s);
147 |   stmtStack.push(mkSkip());
148 | }
149 | 
150 | void For_(BoolExpr b)
151 | {
152 |   For_(any(b));
153 | }
154 | 
155 | void ForBody_()
156 | {
157 |   Stmt* s = controlStack.top();
158 |   s->forLoop.inc = stmtStack.top();
159 |   stmtStack.pop();
160 |   stmtStack.push(mkSkip());
161 | }
162 | 
163 | //=============================================================================
164 | // 'Print' token
165 | //=============================================================================
166 | 
167 | void Print(const char* str)
168 | {
169 |   Stmt* s = mkStmt();
170 |   s->tag = PRINT;
171 |   s->print.tag = PRINT_STR;
172 |   s->print.str = str;
173 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
174 | }
175 | 
176 | void Print(IntExpr x)
177 | {
178 |   Stmt* s = mkStmt();
179 |   s->tag = PRINT;
180 |   s->print.tag = PRINT_INT;
181 |   s->print.expr = x.expr;
182 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
183 | }
184 | 
185 | //=============================================================================
186 | // Set stride
187 | //=============================================================================
188 | 
189 | void setReadStride(IntExpr stride)
190 | {
191 |   Stmt* s = mkStmt();
192 |   s->tag = SET_READ_STRIDE;
193 |   s->stride = stride.expr;
194 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
195 | }
196 | 
197 | void setWriteStride(IntExpr stride)
198 | {
199 |   Stmt* s = mkStmt();
200 |   s->tag = SET_WRITE_STRIDE;
201 |   s->stride = stride.expr;
202 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
203 | }
204 | 
205 | // ============================================================================
206 | // QPU code for clean exit
207 | // ============================================================================
208 | 
209 | void kernelFinish()
210 | {
211 |   // Ensure outstanding stores have completed
212 |   flush();
213 | 
214 |   // QPU 0 waits until all other QPUs have finished
215 |   // before sending a host IRQ.
216 |   If (me() == 0)
217 |     Int n = numQPUs()-1;
218 |     For (Int i = 0, i < n, i++)
219 |       semaDec(15);
220 |     End
221 |     hostIRQ();
222 |   Else
223 |     semaInc(15);
224 |   End
225 | }
226 | 


--------------------------------------------------------------------------------
/Lib/Source/Stmt.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SOURCE_STMT_H_
 2 | #define _SOURCE_STMT_H_
 3 | 
 4 | #include "Source/Cond.h"
 5 | #include "Source/Syntax.h"
 6 | #include "Source/Ptr.h"
 7 | #include "Source/StmtExtra.h"
 8 | 
 9 | //=============================================================================
10 | // Statement macros
11 | //=============================================================================
12 | 
13 | #define If(c)    If_(c); {
14 | #define Else     } Else_(); {
15 | #define End      } End_();
16 | #define While(c) While_(c); {
17 | #define Where(b) Where_(b); {
18 | #define For(init, cond, inc) \
19 |   { init;                    \
20 |     For_(cond);              \
21 |       inc;                   \
22 |     ForBody_();
23 | 
24 | //=============================================================================
25 | // Statement tokens
26 | //=============================================================================
27 | 
28 | void assign(Expr* lhs, Expr* rhs);
29 | void If_(Cond c);
30 | void If_(BoolExpr c);
31 | void Else_();
32 | void End_();
33 | void While_(Cond c);
34 | void While_(BoolExpr b);
35 | void Where__(BExpr* b);
36 | inline void Where_(BoolExpr b) { Where__(b.bexpr); }
37 | void For_(Cond c);
38 | void For_(BoolExpr b);
39 | void ForBody_();
40 | void Print(const char *);
41 | void Print(IntExpr x);
42 | void setReadStride(IntExpr n);
43 | void setWriteStride(IntExpr n);
44 | void kernelFinish();
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/Lib/Source/StmtExtra.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SOURCE_STMTEXTRA_H_
 2 | #define _SOURCE_STMTEXTRA_H_
 3 | 
 4 | //=============================================================================
 5 | // Host IRQ
 6 | //=============================================================================
 7 | 
 8 | inline void hostIRQ()
 9 | {
10 |   Stmt* s = mkStmt();
11 |   s->tag = SEND_IRQ_TO_HOST;
12 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
13 | }
14 | 
15 | //=============================================================================
16 | // Semaphore access
17 | //=============================================================================
18 | 
19 | inline void semaInc(int semaId)
20 | {
21 |   Stmt* s = mkStmt();
22 |   s->tag = SEMA_INC;
23 |   s->semaId = semaId;
24 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
25 | }
26 | 
27 | inline void semaDec(int semaId)
28 | {
29 |   Stmt* s = mkStmt();
30 |   s->tag = SEMA_DEC;
31 |   s->semaId = semaId;
32 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
33 | }
34 | 
35 | //=============================================================================
36 | // Receive, request, store operations
37 | //=============================================================================
38 | 
39 | inline void gatherExpr(Expr* e)
40 | {
41 |   Var v; v.tag = TMU0_ADDR;
42 |   Stmt* s = mkAssign(mkVar(v), e);
43 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
44 | }
45 | 
46 | template <typename T> inline void gather(PtrExpr<T> addr)
47 |   { gatherExpr(addr.expr); }
48 | 
49 | template <typename T> inline void gather(Ptr<T>& addr)
50 |   { gatherExpr(addr.expr); }
51 | 
52 | inline void receiveExpr(Expr* e)
53 | {
54 |   Stmt* s = mkStmt();
55 |   s->tag = LOAD_RECEIVE;
56 |   s->loadDest = e;
57 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
58 | }
59 | 
60 | inline void receive(Int& dest)
61 |   { receiveExpr(dest.expr); }
62 | 
63 | inline void receive(Float& dest)
64 |   { receiveExpr(dest.expr); }
65 | 
66 | template <typename T> inline void receive(Ptr<T>& dest)
67 |   { receiveExpr(dest.expr); }
68 | 
69 | inline void storeExpr(Expr* e0, Expr* e1)
70 | {
71 |   Stmt* s = mkStmt();
72 |   s->tag = STORE_REQUEST;
73 |   s->storeReq.data = e0;
74 |   s->storeReq.addr = e1;
75 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
76 | }
77 | 
78 | inline void store(IntExpr data, PtrExpr<Int> addr)
79 |   { storeExpr(data.expr, addr.expr); }
80 | 
81 | inline void store(FloatExpr data, PtrExpr<Float> addr)
82 |   { storeExpr(data.expr, addr.expr); }
83 | 
84 | inline void store(IntExpr data, Ptr<Int> &addr)
85 |   { storeExpr(data.expr, addr.expr); }
86 | 
87 | inline void store(FloatExpr data, Ptr<Float> &addr)
88 |   { storeExpr(data.expr, addr.expr); }
89 | 
90 | inline void flush()
91 | {
92 |   Stmt* s = mkStmt();
93 |   s->tag = FLUSH;
94 |   stmtStack.replace(mkSeq(stmtStack.top(), s));
95 | }
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/Lib/Source/Syntax.cpp:
--------------------------------------------------------------------------------
  1 | #include "Source/Syntax.h"
  2 | #include "Common/Heap.h"
  3 | #include "Common/Stack.h"
  4 | #include "Params.h"
  5 | 
  6 | // ============================================================================
  7 | // Globals
  8 | // ============================================================================
  9 | 
 10 | // Used for fresh variable generation
 11 | static int globalVarId = 0;
 12 | 
 13 | // Used for constructing abstract syntax trees
 14 | Heap        astHeap("abstract syntax tree", AST_HEAP_SIZE);
 15 | Stack<Stmt> stmtStack;
 16 | Stack<Stmt> controlStack;
 17 | 
 18 | // ============================================================================
 19 | // Functions on global variables
 20 | // ============================================================================
 21 | 
 22 | // Obtain a fresh variable
 23 | Var freshVar()
 24 | {
 25 |   // Return a fresh standard variable
 26 |   Var v;
 27 |   v.tag = STANDARD;
 28 |   v.id  = globalVarId++;
 29 |   return v;
 30 | }
 31 | 
 32 | // Number of fresh vars
 33 | int getFreshVarCount()
 34 | {
 35 |   return globalVarId;
 36 | }
 37 | 
 38 | // Reset fresh variable generator
 39 | void resetFreshVarGen()
 40 | {
 41 |   globalVarId = 0;
 42 | }
 43 | 
 44 | // Reset fresh variable generator to specified value
 45 | void resetFreshVarGen(int val)
 46 | {
 47 |   globalVarId = val;
 48 | }
 49 | 
 50 | // ============================================================================
 51 | // Functions on operators
 52 | // ============================================================================
 53 | 
 54 | Op mkOp(OpId op, BaseType type) {
 55 |   Op o;
 56 |   o.op   = op;
 57 |   o.type = type;
 58 |   return o;
 59 | }
 60 | 
 61 | CmpOp mkCmpOp(CmpOpId op, BaseType type) {
 62 |   CmpOp o;
 63 |   o.op   = op;
 64 |   o.type = type;
 65 |   return o;
 66 | }
 67 | 
 68 | // Is operator unary?
 69 | bool isUnary(Op op)
 70 | {
 71 |   return (op.op == BNOT || op.op == ItoF || op.op == FtoI);
 72 | }
 73 | 
 74 | // Is given operator commutative?
 75 | bool isCommutative(Op op)
 76 | {
 77 |   if (op.type != FLOAT) {
 78 |     return op.op == ADD
 79 |         || op.op == MUL
 80 |         || op.op == BOR
 81 |         || op.op == BAND
 82 |         || op.op == BXOR
 83 |         || op.op == MIN
 84 |         || op.op == MAX;
 85 |   }
 86 |   return false;
 87 | }
 88 | 
 89 | // ============================================================================
 90 | // Functions on expressions
 91 | // ============================================================================
 92 | 
 93 | // Function to allocate an expression
 94 | Expr* mkExpr()
 95 | {
 96 |   return astHeap.alloc<Expr>();
 97 | }
 98 | 
 99 | // Make an integer literal
100 | Expr* mkIntLit(int lit)
101 | {
102 |   Expr* e   = mkExpr();
103 |   e->tag    = INT_LIT;
104 |   e->intLit = lit;
105 |   return e;
106 | }
107 | 
108 | // Make a float literal
109 | Expr* mkFloatLit(float lit)
110 | {
111 |   Expr* e     = mkExpr();
112 |   e->tag      = FLOAT_LIT;
113 |   e->floatLit = lit;
114 |   return e;
115 | }
116 | 
117 | // Make a variable
118 | Expr* mkVar(Var var)
119 | {
120 |   Expr* e = mkExpr();
121 |   e->tag  = VAR;
122 |   e->var  = var;
123 |   return e;
124 | }
125 | 
126 | // Make an operator application
127 | Expr* mkApply(Expr* lhs, Op op, Expr* rhs)
128 | {
129 |   Expr* e      = mkExpr();
130 |   e->tag       = APPLY;
131 |   e->apply.lhs = lhs;
132 |   e->apply.op  = op;
133 |   e->apply.rhs = rhs;
134 |   return e;
135 | }
136 | 
137 | // Make a pointer dereference
138 | Expr* mkDeref(Expr* ptr)
139 | {
140 |   Expr* e      = mkExpr();
141 |   e->tag       = DEREF;
142 |   e->deref.ptr = ptr;
143 |   return e;
144 | }
145 | 
146 | // Is an expression a literal?
147 | bool isLit(Expr* e)
148 | {
149 |   return (e->tag == INT_LIT) || (e->tag == FLOAT_LIT);
150 | }
151 | 
152 | // ============================================================================
153 | // Functions on boolean expressions
154 | // ============================================================================
155 | 
156 | // Allocate a boolean expression
157 | BExpr* mkBExpr()
158 | {
159 |   return astHeap.alloc<BExpr>();
160 | }
161 | 
162 | BExpr* mkNot(BExpr* neg)
163 | {
164 |   BExpr *b    = mkBExpr();
165 |   b->tag      = NOT;
166 |   b->neg      = neg;
167 |   return b;
168 | }
169 | 
170 | BExpr* mkAnd(BExpr* lhs, BExpr* rhs)
171 | {
172 |   BExpr *b    = mkBExpr();
173 |   b->tag      = AND;
174 |   b->conj.lhs = lhs;
175 |   b->conj.rhs = rhs;
176 |   return b;
177 | }
178 | 
179 | BExpr* mkOr(BExpr* lhs, BExpr* rhs)
180 | {
181 |   BExpr *b    = mkBExpr();
182 |   b->tag      = OR;
183 |   b->disj.lhs = lhs;
184 |   b->disj.rhs = rhs;
185 |   return b;
186 | }
187 | 
188 | BExpr* mkCmp(Expr* lhs, CmpOp op, Expr*  rhs)
189 | {
190 |   BExpr *b    = mkBExpr();
191 |   b->tag      = CMP;
192 |   b->cmp.lhs  = lhs;
193 |   b->cmp.op   = op;
194 |   b->cmp.rhs  = rhs;
195 |   return b;
196 | }
197 | 
198 | // ============================================================================
199 | // Functions on conditionals
200 | // ============================================================================
201 | 
202 | CExpr* mkCExpr()
203 | {
204 |   return astHeap.alloc<CExpr>();
205 | }
206 | 
207 | CExpr* mkAll(BExpr* bexpr)
208 | {
209 |   CExpr* c = mkCExpr();
210 |   c->tag   = ALL;
211 |   c->bexpr = bexpr;
212 |   return c;
213 | }
214 | 
215 | CExpr* mkAny(BExpr* bexpr)
216 | {
217 |   CExpr* c = mkCExpr();
218 |   c->tag   = ANY;
219 |   c->bexpr = bexpr;
220 |   return c;
221 | }
222 | 
223 | // ============================================================================
224 | // Functions on statements
225 | // ============================================================================
226 | 
227 | // Functions to allocate a statement
228 | Stmt* mkStmt()
229 | {
230 |   return astHeap.alloc<Stmt>();
231 | }
232 | 
233 | // Make a skip statement
234 | Stmt* mkSkip()
235 | {
236 |   Stmt* s = mkStmt();
237 |   s->tag = SKIP;
238 |   return s;
239 | }
240 | 
241 | // Make an assignment statement
242 | Stmt* mkAssign(Expr* lhs, Expr* rhs)
243 | {
244 |   Stmt* s       = mkStmt();
245 |   s->tag        = ASSIGN;
246 |   s->assign.lhs = lhs;
247 |   s->assign.rhs = rhs;
248 |   return s;
249 | }
250 | 
251 | // Make a sequential composition
252 | Stmt* mkSeq(Stmt *s0, Stmt* s1)
253 | {
254 |   Stmt* s   = mkStmt();
255 |   s->tag    = SEQ;
256 |   s->seq.s0 = s0;
257 |   s->seq.s1 = s1;
258 |   return s;
259 | }
260 | 
261 | Stmt* mkWhere(BExpr* cond, Stmt* thenStmt, Stmt* elseStmt)
262 | {
263 |   Stmt* s           = mkStmt();
264 |   s->tag            = WHERE;
265 |   s->where.cond     = cond;
266 |   s->where.thenStmt = thenStmt;
267 |   s->where.elseStmt = elseStmt;
268 |   return s;
269 | }
270 | 
271 | Stmt* mkIf(CExpr* cond, Stmt* thenStmt, Stmt* elseStmt)
272 | {
273 |   Stmt* s            = mkStmt();
274 |   s->tag             = IF;
275 |   s->ifElse.cond     = cond;
276 |   s->ifElse.thenStmt = thenStmt;
277 |   s->ifElse.elseStmt = elseStmt;
278 |   return s;
279 | }
280 | 
281 | Stmt* mkWhile(CExpr* cond, Stmt* body)
282 | {
283 |   Stmt* s      = mkStmt();
284 |   s->tag       = WHILE;
285 |   s->loop.cond = cond;
286 |   s->loop.body = body;
287 |   return s;
288 | }
289 | 
290 | Stmt* mkFor(CExpr* cond, Stmt* inc, Stmt* body)
291 | {
292 |   Stmt* s         = mkStmt();
293 |   s->tag          = FOR;
294 |   s->forLoop.cond = cond;
295 |   s->forLoop.inc  = inc;
296 |   s->forLoop.body = body;
297 |   return s;
298 | }
299 | 
300 | Stmt* mkPrint(PrintTag t, Expr* e)
301 | {
302 |   Stmt* s       = mkStmt();
303 |   s->tag        = PRINT;
304 |   s->print.tag  = t;
305 |   s->print.expr = e;
306 |   return s;
307 | }
308 | 


--------------------------------------------------------------------------------
/Lib/Source/Syntax.h:
--------------------------------------------------------------------------------
  1 | // This module defines the abstract syntax of the QPU language.
  2 | 
  3 | #ifndef _SOURCE_SYNTAX_H_
  4 | #define _SOURCE_SYNTAX_H_
  5 | 
  6 | #include "Common/Heap.h"
  7 | #include "Common/Stack.h"
  8 | 
  9 | // ============================================================================
 10 | // Operators
 11 | // ============================================================================
 12 | 
 13 | // Operator id
 14 | // (Note: order of operators is important to the random generator.)
 15 | enum OpId {
 16 |   // Int & Float operators:
 17 |   ROTATE, ADD, SUB, MUL, MIN, MAX,
 18 | 
 19 |   // Int only operators:
 20 |   SHL, SHR, USHR, BOR, BAND, BXOR, BNOT, ROR,
 21 | 
 22 |   // Conversion operators:
 23 |   ItoF, FtoI
 24 | };
 25 | 
 26 | // Every operator has a type associated with it
 27 | enum BaseType { UINT8, INT16, INT32, FLOAT };
 28 | 
 29 | // Pair containing operator and base type
 30 | struct Op { OpId op; BaseType type; };
 31 | 
 32 | // Construct an 'Op'
 33 | Op mkOp(OpId op, BaseType type);
 34 | 
 35 | // Is operator unary?
 36 | bool isUnary(Op op);
 37 | 
 38 | // Is operator commutative?
 39 | bool isCommutative(Op op);
 40 | 
 41 | // ============================================================================
 42 | // Variables
 43 | // ============================================================================
 44 | 
 45 | // What kind of variable is it
 46 | enum VarTag {
 47 |     STANDARD     // A standard variable that can be stored
 48 |                  // in a general-purpose register on a QPU
 49 |   , UNIFORM      // (Read-only.)  Reading this variable will consume a value
 50 |                  // (replicated 16 times) from the QPU's UNIFORM FIFO
 51 |                  // (this is how parameters are passed to kernels).
 52 |   , QPU_NUM      // (Read-only.) Reading this variable will yield the
 53 |                  // QPU's unique id (replicated 16 times).
 54 |   , ELEM_NUM     // (Read-only.) Reading this variable will yield a vector
 55 |                  // containing the integers from 0 to 15.
 56 |   , TMU0_ADDR    // (Write-only.) Initiate load via TMU
 57 | };
 58 | 
 59 | typedef int VarId;
 60 | 
 61 | struct Var {
 62 |   VarTag tag;
 63 | 
 64 |   // A unique identifier for a standard variable
 65 |   VarId id;
 66 | };
 67 | 
 68 | // Reserved general-purpose vars
 69 | enum ReservedVarId {
 70 |   RSV_QPU_ID       = 0,
 71 |   RSV_NUM_QPUS     = 1,
 72 |   RSV_READ_STRIDE  = 2,
 73 |   RSV_WRITE_STRIDE = 3
 74 | };
 75 | 
 76 | // ============================================================================
 77 | // Expressions    
 78 | // ============================================================================
 79 | 
 80 | // What kind of expression is it?
 81 | enum ExprTag { INT_LIT, FLOAT_LIT, VAR, APPLY, DEREF };
 82 | 
 83 | struct Expr {
 84 |   // What kind of expression is it?
 85 |   ExprTag tag;
 86 | 
 87 |   union {
 88 |     // Integer literal
 89 |     int intLit;
 90 | 
 91 |     // Float literal
 92 |     float floatLit;
 93 | 
 94 |     // Variable identifier
 95 |     Var var;
 96 | 
 97 |     // Application of a binary operator
 98 |     struct { Expr* lhs; Op op; Expr* rhs; } apply;
 99 | 
100 |     // Dereference a pointer
101 |     struct { Expr* ptr; } deref;
102 |   };
103 | };
104 | 
105 | // Functions to construct expressions
106 | Expr* mkExpr();
107 | Expr* mkIntLit(int lit);
108 | Expr* mkFloatLit(float lit);
109 | Expr* mkVar(Var var);
110 | Expr* mkApply(Expr* lhs, Op op, Expr* rhs);
111 | Expr* mkDeref(Expr* ptr);
112 | 
113 | // Is an expression a literal?
114 | bool isLit(Expr* e);
115 | 
116 | // ============================================================================
117 | // Comparison operators
118 | // ============================================================================
119 | 
120 | // Comparison operators
121 | enum CmpOpId { EQ, NEQ, LT, GT, LE, GE };
122 | 
123 | // Pair containing comparison operator and base type
124 | struct CmpOp { CmpOpId op; BaseType type; };
125 | 
126 | // Construct an 'Op'
127 | CmpOp mkCmpOp(CmpOpId op, BaseType type);
128 | 
129 | // ============================================================================
130 | // Boolean expressions
131 | // ============================================================================
132 | 
133 | // Kinds of boolean expressions
134 | enum BExprTag { NOT, AND, OR, CMP };
135 | 
136 | struct BExpr {
137 |   // What kind of boolean expression is it?
138 |   BExprTag tag;
139 | 
140 |   union {
141 |     // Negation
142 |     BExpr* neg;
143 | 
144 |     // Conjunction
145 |     struct { BExpr* lhs; BExpr* rhs; } conj;
146 | 
147 |     // Disjunction
148 |     struct { BExpr* lhs; BExpr* rhs; } disj;
149 | 
150 |     // Comparison
151 |     struct { Expr* lhs; CmpOp op; Expr* rhs; } cmp;
152 |   };
153 | };
154 | 
155 | // Functions to construct boolean expressions
156 | BExpr* mkBExpr();
157 | BExpr* mkNot(BExpr* neg);
158 | BExpr* mkAnd(BExpr* lhs, BExpr* rhs);
159 | BExpr* mkOr (BExpr* lhs, BExpr* rhs);
160 | BExpr* mkCmp(Expr*  lhs, CmpOp op, Expr*  rhs);
161 | 
162 | // ============================================================================
163 | // Conditional expressions
164 | // ============================================================================
165 | 
166 | // Kinds of conditional expressions
167 | enum CExprTag { ALL, ANY };
168 | 
169 | struct CExpr {
170 |   // What kind of boolean expression is it?
171 |   CExprTag tag;
172 | 
173 |   // This is either a scalar boolean expression, or a reduction of a vector
174 |   // boolean expressions using 'any' or 'all' operators.
175 |   BExpr* bexpr;
176 | };
177 | 
178 | // Functions to construct conditional expressions
179 | CExpr* mkCExpr();
180 | CExpr* mkAll(BExpr* bexpr);
181 | CExpr* mkAny(BExpr* bexpr);
182 | 
183 | // ============================================================================
184 | // 'print' statements
185 | // ============================================================================
186 | 
187 | // For displaying values in emulation
188 | enum PrintTag { PRINT_INT, PRINT_FLOAT, PRINT_STR };
189 | 
190 | struct PrintStmt {
191 |   PrintTag tag;
192 |   union {
193 |     const char* str;
194 |     Expr* expr;
195 |   };
196 | };
197 | 
198 | // ============================================================================
199 | // Statements
200 | // ============================================================================
201 | 
202 | // What kind of statement is it?
203 | enum StmtTag {
204 |   SKIP, ASSIGN, SEQ, WHERE,
205 |   IF, WHILE, PRINT, FOR,
206 |   SET_READ_STRIDE, SET_WRITE_STRIDE,
207 |   LOAD_RECEIVE, STORE_REQUEST, FLUSH,
208 |   SEND_IRQ_TO_HOST, SEMA_INC, SEMA_DEC };
209 | 
210 | struct Stmt {
211 |   // What kind of statement is it?
212 |   StmtTag tag;
213 | 
214 |   union {
215 |     // Assignment
216 |     struct { Expr* lhs; Expr* rhs; } assign;
217 | 
218 |     // Sequential composition
219 |     struct { Stmt* s0; Stmt* s1; } seq;
220 | 
221 |     // Where
222 |     struct { BExpr* cond; Stmt* thenStmt; Stmt* elseStmt; } where;
223 | 
224 |     // If
225 |     struct { CExpr* cond; Stmt* thenStmt; Stmt* elseStmt; } ifElse;
226 | 
227 |     // While
228 |     struct { CExpr* cond; Stmt* body; } loop;
229 | 
230 |     // For (only used intermediately during AST construction)
231 |     struct { CExpr* cond; Stmt* inc; Stmt* body; } forLoop;
232 | 
233 |     // Print
234 |     PrintStmt print;
235 | 
236 |     // Set stride
237 |     Expr* stride;
238 | 
239 |     // Load receive destination
240 |     Expr* loadDest;
241 | 
242 |     // Store request
243 |     struct { Expr* data; Expr* addr; } storeReq;
244 | 
245 |     // Semaphore id for increment / decrement
246 |     int semaId;
247 |   };
248 | };
249 | 
250 | // Functions to construct statements
251 | Stmt* mkStmt();
252 | Stmt* mkSkip();
253 | Stmt* mkAssign(Expr* lhs, Expr* rhs);
254 | Stmt* mkSeq(Stmt* s0, Stmt* s1);
255 | Stmt* mkWhere(BExpr* cond, Stmt* thenStmt, Stmt* elseStmt);
256 | Stmt* mkIf(CExpr* cond, Stmt* thenStmt, Stmt* elseStmt);
257 | Stmt* mkWhile(CExpr* cond, Stmt* body);
258 | Stmt* mkFor(CExpr* cond, Stmt* inc, Stmt* body);
259 | Stmt* mkPrint(PrintTag t, Expr* e);
260 | 
261 | // ============================================================================
262 | // Global variables
263 | // ============================================================================
264 | 
265 | // Obtain a fresh variable
266 | Var freshVar();
267 | 
268 | // Number of fresh vars used
269 | int getFreshVarCount();
270 | 
271 | // Reset fresh variable generator
272 | void resetFreshVarGen();
273 | void resetFreshVarGen(int val);
274 | 
275 | // Used for constructing abstract syntax trees
276 | extern Heap        astHeap;
277 | extern Stack<Stmt> stmtStack;
278 | extern Stack<Stmt> controlStack;
279 | 
280 | #endif
281 | 


--------------------------------------------------------------------------------
/Lib/Source/Translate.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRANSLATE_H_
 2 | #define _TRANSLATE_H_
 3 | 
 4 | #include "Common/Seq.h"
 5 | #include "Source/Syntax.h"
 6 | #include "Target/Syntax.h"
 7 | 
 8 | void insertEndCode(Seq<Instr>* seq);
 9 | void translateStmt(Seq<Instr>* seq, Stmt* s);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/Lib/Target/CFG.cpp:
--------------------------------------------------------------------------------
 1 | // Control-flow graphs (CFGs)
 2 | 
 3 | #include <assert.h>
 4 | #include "Target/CFG.h"
 5 | #include "Target/Syntax.h"
 6 | 
 7 | // ============================================================================
 8 | // Build control-flow graph
 9 | // ============================================================================
10 | 
11 | // Build a CFG for a given instruction sequence.
12 | 
13 | void buildCFG(Seq<Instr>* instrs, CFG* cfg)
14 | {
15 |   // ----------
16 |   // First pass
17 |   // ----------
18 |   //
19 |   // 1. Each instruction is a successor of the previous
20 |   //    instruction, unless the previous instruction
21 |   //    is an unconditional jump or halt instruction.
22 |   //
23 |   // 2. Compute a mapping from labels to instruction ids.
24 | 
25 |   // Number of labels in program
26 |   int numLabels = getFreshLabelCount();
27 | 
28 |   // Mapping from labels to instruction ids
29 |   InstrId* labelMap = new InstrId [numLabels];
30 | 
31 |   // Initialise label mapping
32 |   for (int i = 0; i < numLabels; i++)
33 |     labelMap[i] = -1;
34 | 
35 |   for (int i = 0; i < instrs->numElems; i++) {
36 |     // Get instruction
37 |     Instr instr = instrs->elems[i];
38 | 
39 |     // Is it an unconditional jump?
40 |     bool uncond = instr.tag == BRL && instr.BRL.cond.tag == COND_ALWAYS;
41 | 
42 |     // Is it a final instruction?
43 |     bool end = instr.tag == END || i+1 == instrs->numElems;
44 | 
45 |     // Add successor
46 |     cfg->extend();
47 |     if (! (uncond || end))
48 |       cfg->elems[i].insert(i+1);
49 | 
50 |     // Remember location of each label
51 |     if (instr.tag == LAB) {
52 |       assert(instr.label >= 0 && instr.label < numLabels);
53 |       labelMap[instr.label] = i;
54 |     }
55 |   }
56 | 
57 |   // -----------
58 |   // Second pass
59 |   // -----------
60 |   //
61 |   // Add a successor for each conditional jump.
62 | 
63 |   for (int i = 0; i < instrs->numElems; i++) {
64 |     Instr instr = instrs->elems[i];
65 |     if (instr.tag == BRL) {
66 |       assert(labelMap[instr.BRL.label] >= 0);
67 |       cfg->elems[i].insert(labelMap[instr.BRL.label]);
68 |     }
69 |   }
70 | 
71 |   // Free memory
72 |   delete [] labelMap;
73 | }
74 | 
75 | // ============================================================================
76 | // Reverse the arrows in a CFG
77 | // ============================================================================
78 | 
79 | // Given a mapping from instruction ids to successors, produce a
80 | // mapping from instruction ids to predecessors.
81 | 
82 | void reverseCFG(CFG* succs, CFG* preds)
83 | {
84 |   int n = succs->numElems;
85 | 
86 |   // Make preds the same size as succs
87 |   preds->setCapacity(n);
88 |   preds->numElems = n;
89 | 
90 |   for (int i = 0; i < n; i++) {
91 |     Succs* s = &succs->elems[i];
92 |     for (int j = 0; j < s->numElems; j++) {
93 |       InstrId succ = s->elems[j];
94 |       preds->elems[succ].insert(i);
95 |     }
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/Lib/Target/CFG.h:
--------------------------------------------------------------------------------
 1 | // Control-flow graphs (CFGs)
 2 | 
 3 | #ifndef _CFG_H_
 4 | #define _CFG_H_
 5 | 
 6 | #include "Common/Seq.h"
 7 | #include "Target/Syntax.h"
 8 | 
 9 | // A set of successors.
10 | 
11 | typedef SmallSeq<InstrId> Succs;
12 | 
13 | // A CFG is simply a set of successors
14 | // for each instruction.
15 | 
16 | typedef Seq<Succs> CFG;
17 | 
18 | // Function to construct a CFG.
19 | 
20 | void buildCFG(Seq<Instr>* instrs, CFG* cfg);
21 | 
22 | // Function to reverse the arrows in a CFG.
23 | 
24 | void reverseCFG(CFG* succs, CFG* preds);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/Lib/Target/Emulator.h:
--------------------------------------------------------------------------------
 1 | #ifndef _EMULATOR_H_
 2 | #define _EMULATOR_H_
 3 | 
 4 | #include <stdint.h>
 5 | #include "Common/Seq.h"
 6 | #include "Target/Syntax.h"
 7 | 
 8 | #define VPM_SIZE 2048
 9 | #define NUM_LANES 16
10 | #define MAX_QPUS 12
11 | #define EMULATOR_HEAP_SIZE 65536
12 | 
13 | // This is a type for representing the values in a vector
14 | union Word {
15 |   int32_t intVal;
16 |   float floatVal; 
17 | };
18 | 
19 | // Vector values
20 | struct Vec {
21 |   Word elems[NUM_LANES];
22 | };
23 | 
24 | // In-flight DMA requests
25 | struct DMAReq {
26 |   bool active;
27 |   Word addr;
28 |   BufferAorB buffer;
29 | };
30 | 
31 | // VPM load queue (max 2 elements)
32 | struct VPMLoadQueue {
33 |   int addrs[3];
34 |   int front, back;
35 | };
36 | 
37 | // State of a single QPU.
38 | struct QPUState {
39 |   int id;                    // QPU id
40 |   int numQPUs;               // QPU count
41 |   bool running;              // Is QPU active, or has it halted?
42 |   int pc;                    // Program counter
43 |   Vec* regFileA;             // Register file A
44 |   int sizeRegFileA;          // (and size)
45 |   Vec* regFileB;             // Register file B
46 |   int sizeRegFileB;          // (and size)
47 |   Vec accum[6];              // Accumulator registers
48 |   bool negFlags[NUM_LANES];  // Negative flags
49 |   bool zeroFlags[NUM_LANES]; // Zero flags
50 |   int nextUniform;           // Pointer to next uniform to read
51 |   DMAReq dmaLoad;            // In-flight DMA load
52 |   DMAReq dmaStore;           // In-flight DMA store
53 |   VPMLoadQueue vpmLoadQueue; // VPM load queue
54 |   int readStride;            // Read stride
55 |   int writeStride;           // Write stride
56 |   SmallSeq<Vec>* loadBuffer; // Load buffer for loads via TMU
57 | };
58 | 
59 | // State of the VideoCore.
60 | struct State {
61 |   QPUState qpu[MAX_QPUS]; // State of each QPU
62 |   Word vpm[VPM_SIZE];     // Shared VPM memory
63 |   Seq<char>* output;      // Output for print statements
64 |   int sema[16];           // Semaphores
65 | };
66 | 
67 | // Emulator
68 | void emulate
69 |   ( int numQPUs            // Number of QPUs active
70 |   , Seq<Instr>* instrs     // Instruction sequence
71 |   , int maxReg             // Max reg id used
72 |   , Seq<int32_t>* uniforms // Kernel parameters
73 |   , Seq<char>* output      // Output from print statements
74 |                            // (if NULL, stdout is used)
75 |   );
76 | 
77 | // Heap used in emulation mode.
78 | extern uint32_t emuHeapEnd;
79 | extern int32_t* emuHeap;
80 | 
81 | // Rotate a vector
82 | Vec rotate(Vec v, int n);
83 | 
84 | // Printing routines
85 | void emitChar(Seq<char>* out, char c);
86 | void emitStr(Seq<char>* out, const char* s);
87 | void printIntVec(Seq<char>* out, Vec x);
88 | void printFloatVec(Seq<char>* out, Vec x);
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/Lib/Target/Encode.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ENCODE_H_
 2 | #define _ENCODE_H_
 3 | 
 4 | #include <stdint.h>
 5 | #include "Target/Syntax.h"
 6 | #include "Common/Seq.h"
 7 | 
 8 | void encode(Seq<Instr>* instrs, Seq<uint32_t>* code);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/Lib/Target/LiveRangeSplit.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include "Source/Syntax.h"
 3 | #include "Target/ReachingDefs.h"
 4 | #include "Target/Subst.h"
 5 | #include "Target/Liveness.h"
 6 | 
 7 | // ============================================================================
 8 | // Live-range splitter
 9 | // ============================================================================
10 | 
11 | // First, a helper function that renames the variable v defined by an
12 | // instruction to w, along with all uses of that variable reached-by
13 | // the instruction, and recursively all definitions of that variable
14 | // that reach one of these uses.
15 | 
16 | void renameDef(Seq<Instr>* instrs,
17 |                InstrId i,
18 |                RegId v,
19 |                RegId w,
20 |                bool* visited,
21 |                ReachingDefs* reachedBy,
22 |                DefsOf* defsOf)
23 | {
24 |   // If we haven't previously visited instruction i then proceeed
25 |   if (visited[i]) return;
26 |   visited[i] = true;
27 |   
28 |   // Rename destination register from v to w
29 |   Instr* instr = &instrs->elems[i];
30 |   renameDest(instr, REG_A, v, REG_B, w);
31 | 
32 |   ReachSet* reached = &reachedBy->elems[i];
33 |   // For each instruction reached by i
34 |   for (int j = 0; j < reached->numElems; j++) {
35 |     InstrId rid = reached->elems[j];
36 |     Instr* r = &instrs->elems[rid];
37 | 
38 |     // Rename uses of v to w
39 |     renameUses(r, REG_A, v, REG_B, w);
40 | 
41 |     // For each instruction d defining v
42 |     SmallSeq<InstrId>* ds = &defsOf->elems[v];
43 |     for (int k = 0; k < ds->numElems; k++) {
44 |       InstrId d = ds->elems[k];
45 |       // If r is reached-by d
46 |       if (reachedBy->elems[d].member(rid)
47 |             || (d == rid && isCondAssign(r)))
48 |         // Recursively modify definition d to define w
49 |         renameDef(instrs, d, v, w, visited, reachedBy, defsOf);
50 |     }
51 |   }
52 | }
53 | 
54 | // Now for the top-level routine.
55 | 
56 | void liveRangeSplit(Seq<Instr>* instrs, CFG* cfg)
57 | {
58 |   // Determine for each variable, the instructions that assign to it
59 |   DefsOf defsOf;
60 |   computeDefsOf(instrs, &defsOf);
61 | 
62 |   // Determine instructions reached by each definition
63 |   ReachingDefs reachedBy;
64 |   computeReachedBy(instrs, cfg, &reachedBy);
65 | 
66 |   // Keep track of which instructions we've visisted
67 |   bool* visited = new bool [instrs->numElems];
68 | 
69 |   // Initialise visited array
70 |   for (int i = 0; i < instrs->numElems; i++)
71 |     visited[i] = false;
72 | 
73 |   // Unique register id
74 |   RegId next = 0;
75 | 
76 |   for (int i = 0; i < instrs->numElems; i++)
77 |     if (!visited[i]) {
78 |       // Compute vars defined by instruction
79 |       UseDef set;
80 |       useDef(instrs->elems[i], &set);
81 | 
82 |       // For each var defined by instruction
83 |       for (int j = 0; j < set.def.numElems; j++)
84 |         renameDef(instrs, i, set.def.elems[j], next++,
85 |                   visited, &reachedBy, &defsOf);
86 |     }
87 | 
88 |   // Every instruction should now soley use register file B.
89 |   // Go through and make them use register file A instead.
90 |   for (int i = 0; i < instrs->numElems; i++)
91 |     substRegTag(&instrs->elems[i], REG_B, REG_A);
92 | 
93 |   // Update fresh var counter
94 |   resetFreshVarGen(next);
95 | 
96 |   // Free memory
97 |   delete [] visited;
98 | }
99 | 


--------------------------------------------------------------------------------
/Lib/Target/LiveRangeSplit.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIVERANGESPLIT_H_
 2 | #define _LIVERANGESPLIT_H_
 3 | 
 4 | #include "Common/Seq.h"
 5 | #include "Target/CFG.h"
 6 | #include "Target/Syntax.h"
 7 | 
 8 | void liveRangeSplit(Seq<Instr>* instrs, CFG* cfg);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/Lib/Target/Liveness.cpp:
--------------------------------------------------------------------------------
  1 | // Liveness analysis
  2 | 
  3 | #include "Target/Liveness.h"
  4 | 
  5 | // ============================================================================
  6 | // Compute 'use' and 'def' sets
  7 | // ============================================================================
  8 | 
  9 | // 'use' set: the variables read by an instruction
 10 | // 'def' set: the variables modified by an instruction
 11 | 
 12 | // Compute 'use' and 'def' sets for a given instruction
 13 | 
 14 | void useDefReg(Instr instr, UseDefReg* useDef)
 15 | {
 16 |   // Make the 'use' and 'def' sets empty
 17 |   useDef->use.clear();
 18 |   useDef->def.clear();
 19 | 
 20 |   switch (instr.tag) {
 21 |     // Load immediate
 22 |     case LI:
 23 |       // Add destination reg to 'def' set
 24 |       useDef->def.insert(instr.LI.dest);
 25 | 
 26 |       // Add destination reg to 'use' set if conditional assigment
 27 |       if (instr.LI.cond.tag != ALWAYS)
 28 |         useDef->use.insert(instr.LI.dest);
 29 |       return;
 30 | 
 31 |     // ALU operation
 32 |     case ALU:
 33 |       // Add destination reg to 'def' set
 34 |       useDef->def.insert(instr.ALU.dest);
 35 | 
 36 |       // Add destination reg to 'use' set if conditional assigment
 37 |       if (instr.ALU.cond.tag != ALWAYS)
 38 |         useDef->use.insert(instr.ALU.dest);
 39 | 
 40 |       // Add source reg A to 'use' set
 41 |       if (instr.ALU.srcA.tag == REG)
 42 |         useDef->use.insert(instr.ALU.srcA.reg);
 43 | 
 44 |       // Add source reg B to 'use' set
 45 |       if (instr.ALU.srcB.tag == REG)
 46 |         useDef->use.insert(instr.ALU.srcB.reg);
 47 |       return;
 48 | 
 49 |     // LD1 instruction
 50 |     case LD1:
 51 |       // Add source reg to 'use' set
 52 |       useDef->use.insert(instr.LD1.addr);
 53 |       return;
 54 | 
 55 |     // LD4 instruction
 56 |     case LD4:
 57 |       // Add dest reg to 'def' set
 58 |       useDef->def.insert(instr.LD4.dest);
 59 |       return;
 60 | 
 61 |     // ST1 instruction
 62 |     case ST1:
 63 |       // Add source reg to 'use' set
 64 |       useDef->use.insert(instr.ST1.data);
 65 |       return;
 66 | 
 67 |     // ST2 instruction
 68 |     case ST2:
 69 |       // Add source reg to 'use' set
 70 |       useDef->use.insert(instr.ST2.addr);
 71 |       return;
 72 | 
 73 |     // Print integer instruction
 74 |     case PRI:
 75 |       // Add source reg to 'use' set
 76 |       useDef->use.insert(instr.PRI);
 77 |       return;
 78 | 
 79 |     // Print float instruction
 80 |     case PRF:
 81 |       // Add source reg to 'use' set
 82 |       useDef->use.insert(instr.PRF);
 83 |       return;
 84 | 
 85 |     // Load receive instruction
 86 |     case RECV:
 87 |       // Add dest reg to 'def' set
 88 |       useDef->def.insert(instr.RECV.dest);
 89 |       return;
 90 |   }
 91 | }
 92 | 
 93 | // Same function as above, except only yeilds ids of registers in
 94 | // register file A.
 95 | 
 96 | void useDef(Instr instr, UseDef* out)
 97 | {
 98 |   UseDefReg set;
 99 |   useDefReg(instr, &set);
100 |   out->use.clear();
101 |   out->def.clear();
102 |   for (int i = 0; i < set.use.numElems; i++) {
103 |     Reg r = set.use.elems[i];
104 |     if (r.tag == REG_A) out->use.append(r.regId);
105 |   }
106 |   for (int i = 0; i < set.def.numElems; i++) {
107 |     Reg r = set.def.elems[i];
108 |     if (r.tag == REG_A) out->def.append(r.regId);
109 |   }
110 | }
111 | 
112 | // Compute the union of the 'use' sets of the successors of a given
113 | // instruction.
114 | 
115 | void useSetOfSuccs(Seq<Instr>* instrs, CFG* cfg,
116 |                    InstrId i, SmallSeq<RegId>* use)
117 | {
118 |   use->clear();
119 |   Succs* s = &cfg->elems[i];
120 |   for (int j = 0; j < s->numElems; j++) {
121 |     UseDef set;
122 |     useDef(instrs->elems[s->elems[j]], &set);
123 |     for (int k = 0; k < set.use.numElems; k++)
124 |       use->insert(set.use.elems[k]);
125 |   }
126 | }
127 | 
128 | // Return true if given instruction has two register operands.
129 | 
130 | bool getTwoUses(Instr instr, Reg* r1, Reg* r2)
131 | {
132 |   if (instr.tag == ALU && instr.ALU.srcA.tag == REG
133 |                        && instr.ALU.srcB.tag == REG) {
134 |     *r1 = instr.ALU.srcA.reg;
135 |     *r2 = instr.ALU.srcB.reg;
136 |     return true;
137 |   }
138 |   return false;
139 | }
140 | 
141 | // ============================================================================
142 | // Compute live sets for each instruction
143 | // ============================================================================
144 | 
145 | // Compute the live-out variables of an instruction, given the live-in
146 | // variables of all instructions and the CFG.
147 | 
148 | void computeLiveOut(CFG* cfg, Liveness* live, InstrId i, LiveSet* liveOut)
149 | {
150 |   liveOut->clear();
151 |   Succs* s = &cfg->elems[i];
152 |   for (int j = 0; j < s->numElems; j++) {
153 |     LiveSet* set = &live->elems[s->elems[j]];
154 |     for (int k = 0; k < set->numElems; k++)
155 |       liveOut->insert(set->elems[k]);
156 |   }
157 | }
158 | 
159 | void liveness(Seq<Instr>* instrs, CFG* cfg, Liveness* live)
160 | {
161 |   // Initialise live mapping to have one entry per instruction
162 |   live->setCapacity(instrs->numElems);
163 |   live->numElems = instrs->numElems;
164 | 
165 |   // For storing the 'use' and 'def' sets of each instruction
166 |   UseDef useDefSets;
167 | 
168 |   // For temporarily storing live-in and live-out variables
169 |   LiveSet liveIn;
170 |   LiveSet liveOut;
171 | 
172 |   // Has a change been made to the liveness mapping?
173 |   bool changed = true;
174 | 
175 |   // Iterate until no change, i.e. fixed point
176 |   while (changed) {
177 |     changed = false;
178 | 
179 |     // Propagate live variables backwards
180 |     for (int i = instrs->numElems-1; i >= 0; i--) {
181 |       // Compute 'use' and 'def' sets
182 |       Instr instr = instrs->elems[i];
183 |       useDef(instr, &useDefSets);
184 | 
185 |       // Compute live-out variables
186 |       computeLiveOut(cfg, live, i, &liveOut);
187 | 
188 |       // Remove the 'def' set from the live-out set to give live-in set
189 |       liveIn.clear();
190 |       for (int j = 0; j < liveOut.numElems; j++) {
191 |         if (! useDefSets.def.member(liveOut.elems[j]))
192 |           liveIn.insert(liveOut.elems[j]);
193 |       }
194 | 
195 |       // Add the 'use' set to the live-in set
196 |       for (int j = 0; j < useDefSets.use.numElems; j++)
197 |         liveIn.insert(useDefSets.use.elems[j]);
198 | 
199 |       // Insert the live-in variables into the map
200 |       for (int j = 0; j < liveIn.numElems; j++) {
201 |         bool inserted = live->elems[i].insert(liveIn.elems[j]);
202 |         changed = changed || inserted;
203 |       }
204 |     }
205 |   }
206 | }
207 | 


--------------------------------------------------------------------------------
/Lib/Target/Liveness.h:
--------------------------------------------------------------------------------
 1 | // Liveness analysis
 2 | 
 3 | #ifndef _LIVENESS_H_
 4 | #define _LIVENESS_H_
 5 | 
 6 | #include "Common/Seq.h"
 7 | #include "Target/Syntax.h"
 8 | #include "Target/CFG.h"
 9 | 
10 | // 'use' and 'def' sets:
11 | //   * 'use' set: the variables read by an instruction
12 | //   * 'def' set: the variables modified by an instruction
13 | 
14 | struct UseDefReg {
15 |   SmallSeq<Reg> use;
16 |   SmallSeq<Reg> def;
17 | };   
18 |      
19 | struct UseDef {
20 |   SmallSeq<RegId> use;
21 |   SmallSeq<RegId> def;
22 | };   
23 | 
24 | // Compute 'use' and 'def' sets for a given instruction
25 | 
26 | void useDefReg(Instr instr, UseDefReg* out);
27 | void useDef(Instr instr, UseDef* out);
28 | bool getTwoUses(Instr instr, Reg* r1, Reg* r2);
29 | 
30 | // A live set containts the variables
31 | // that are live-in to an instruction.
32 | 
33 | typedef SmallSeq<RegId> LiveSet;
34 | 
35 | // The result of liveness analysis is a set
36 | // of live variables for each instruction.
37 | 
38 | typedef Seq<LiveSet> Liveness;
39 | 
40 | // Determine the liveness sets for each instruction.
41 | 
42 | void liveness(Seq<Instr>* instrs, CFG* cfg, Liveness* liveness);
43 | void computeLiveOut(CFG* cfg, Liveness* live, InstrId i, LiveSet* liveOut);
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/Lib/Target/LoadStore.cpp:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include "Source/Syntax.h"
  3 | #include "Target/Syntax.h"
  4 | #include "Target/LoadStore.h"
  5 | 
  6 | // =============================================================================
  7 | // Stride setup
  8 | // =============================================================================
  9 | 
 10 | // Generate instructions to set the read stride.
 11 | 
 12 | void genSetReadStride(Seq<Instr>* instrs, int stride)
 13 | {
 14 |   int pitch = (stride+1)*4;
 15 |   assert(pitch < 8192);
 16 |   int setup = 0x90000000 | pitch;
 17 |   Reg dst; dst.tag = REG_A; dst.regId = RSV_READ_STRIDE;
 18 |   Instr instr = genLI(dst, setup);
 19 |   instrs->append(instr);
 20 | }
 21 | 
 22 | void genSetReadStride(Seq<Instr>* instrs, Reg stride)
 23 | {
 24 |   Reg pitch = freshReg();
 25 |   Reg tmp = freshReg();
 26 |   instrs->append(genIncr(pitch, stride, 1));
 27 |   instrs->append(genLI(tmp, 0x90000000));
 28 |   instrs->append(genLShift(pitch, pitch, 2));
 29 | 
 30 |   Reg dst; dst.tag = REG_A; dst.regId = RSV_READ_STRIDE;
 31 |   instrs->append(genOR(dst, tmp, pitch));
 32 | }
 33 | 
 34 | // Generate instructions to set the write stride.
 35 | 
 36 | void genSetWriteStride(Seq<Instr>* instrs, int stride)
 37 | {
 38 |   int strideBytes = stride*4;
 39 |   assert(strideBytes < 8192);
 40 |   int setup = 0xc0010000 | strideBytes;
 41 |   Reg dst; dst.tag = REG_A; dst.regId = RSV_WRITE_STRIDE;
 42 |   Instr instr = genLI(dst, setup);
 43 |   instrs->append(instr);
 44 | }
 45 | 
 46 | void genSetWriteStride(Seq<Instr>* instrs, Reg stride)
 47 | {
 48 |   Reg tmp0 = freshReg();
 49 |   Reg tmp1 = freshReg();
 50 |   instrs->append(genLShift(tmp0, stride, 2));
 51 |   instrs->append(genLI(tmp1, 0xc0010000));
 52 | 
 53 |   Reg dst; dst.tag = REG_A; dst.regId = RSV_WRITE_STRIDE;
 54 |   instrs->append(genOR(dst, tmp0, tmp1));
 55 | }
 56 | 
 57 | // =============================================================================
 58 | // DMA setup
 59 | // =============================================================================
 60 | 
 61 | // Generate instructions to setup DMA load.
 62 | 
 63 | void assignDMALoadSetup(Seq<Instr>* instrs, Reg dst, BufferAorB b, Reg qpuId)
 64 | {
 65 |   int setup = 0x80101800;
 66 |   int buffIdx = (16 * (b == A ? 0 : 1)) << 4;
 67 |   setup |= buffIdx;
 68 | 
 69 |   Reg tmp = freshReg();
 70 |   instrs->append(genLI(tmp, setup));
 71 |   instrs->append(genOR(dst, qpuId, tmp));
 72 | }
 73 | 
 74 | // Generate instructions to setup DMA store.
 75 | 
 76 | void assignDMAStoreSetup(Seq<Instr>* instrs, Reg dst, BufferAorB b, Reg qpuId)
 77 | {
 78 |   int setup = 0x88014000;
 79 |   int buffIdx = (16 * (b == A ? 2 : 3)) << 7;
 80 |   setup |= buffIdx;
 81 | 
 82 |   Reg tmp0 = freshReg();
 83 |   instrs->append(genLI(tmp0, setup));
 84 | 
 85 |   Reg tmp1 = freshReg();
 86 |   instrs->append(genLShift(tmp1, qpuId, 3));
 87 | 
 88 |   instrs->append(genOR(dst, tmp0, tmp1));
 89 | }
 90 | 
 91 | // =============================================================================
 92 | // VPM setup
 93 | // =============================================================================
 94 | 
 95 | // Generate instructions to setup VPM load.
 96 | 
 97 | void assignVPMLoadSetup(Seq<Instr>* instrs, Reg dst, BufferAorB b, Reg qpuId)
 98 | {
 99 |   int setup = 0x00100200;
100 |   int buffIdx = (b == A ? 0 : 1) << 4;
101 |   setup |= buffIdx;
102 | 
103 |   Reg tmp = freshReg();
104 |   instrs->append(genLI(tmp, setup));
105 |   instrs->append(genOR(dst, qpuId, tmp));
106 | }
107 | 
108 | // Generate instructions to setup VPM store.
109 | 
110 | void genSetupVPMStore(Seq<Instr>* instrs, BufferAorB b, Reg qpuId)
111 | {
112 |   int setup = 0x00100200;
113 |   int buffIdx = (b == A ? 2 : 3) << 4;
114 |   setup |= buffIdx;
115 | 
116 |   Reg tmp = freshReg();
117 |   instrs->append(genLI(tmp, setup));
118 | 
119 |   Reg dst;
120 |   dst.tag   = SPECIAL;
121 |   dst.regId = SPECIAL_WR_SETUP;
122 |   instrs->append(genOR(dst, qpuId, tmp));
123 | }
124 | 
125 | // ============================================================================
126 | // Load/Store pass
127 | // ============================================================================
128 | 
129 | void loadStorePass(Seq<Instr>* instrs)
130 | {
131 |   Seq<Instr> newInstrs(instrs->numElems*2);
132 | 
133 |   // Put QPU number in a register
134 |   Reg qpuId = freshReg();
135 |   Reg qpuNum; qpuNum.tag = SPECIAL; qpuNum.regId = SPECIAL_QPU_NUM;
136 |   newInstrs.append(genMove(qpuId, qpuNum));
137 | 
138 |   // Initialise strides
139 |   genSetReadStride(&newInstrs, 0);
140 |   genSetWriteStride(&newInstrs, 0);
141 | 
142 |   // Initialise load/store setup registers
143 |   Reg vpmLoadSetup  = freshReg();
144 |   Reg dmaLoadSetup  = freshReg();
145 |   Reg dmaStoreSetup = freshReg();
146 | 
147 |   assignDMALoadSetup(&newInstrs, dmaLoadSetup, A, qpuId);
148 |   assignDMAStoreSetup(&newInstrs, dmaStoreSetup, A, qpuId);
149 |   assignVPMLoadSetup(&newInstrs, vpmLoadSetup, A, qpuId);
150 | 
151 |   genSetupVPMStore(&newInstrs, A, qpuId);
152 | 
153 |   // Elaborate LD1, LD3 and ST2 intermediate instructions
154 |   Reg sp; sp.tag = SPECIAL;
155 |   Reg src; src.tag = REG_A;
156 |   for (int i = 0; i < instrs->numElems; i++) {
157 |     Instr instr = instrs->elems[i];
158 |     switch (instr.tag) {
159 |       case LD1:
160 |         sp.regId = SPECIAL_RD_SETUP;
161 |         src.regId = RSV_READ_STRIDE;
162 |         newInstrs.append(genMove(sp, src));
163 |         newInstrs.append(genMove(sp, dmaLoadSetup));
164 |         sp.regId = SPECIAL_DMA_LD_ADDR;
165 |         newInstrs.append(genMove(sp, instr.LD1.addr));
166 |         break;
167 |       case LD3:
168 |         sp.regId = SPECIAL_RD_SETUP;
169 |         newInstrs.append(genMove(sp, vpmLoadSetup));
170 |         for (int j = 0; j < 3; j++)
171 |           newInstrs.append(nop());
172 |         break;
173 |       case ST2:
174 |         sp.regId = SPECIAL_WR_SETUP;
175 |         src.regId = RSV_WRITE_STRIDE;
176 |         newInstrs.append(genMove(sp, src));
177 |         newInstrs.append(genMove(sp, dmaStoreSetup));
178 |         sp.regId = SPECIAL_DMA_ST_ADDR;
179 |         newInstrs.append(genMove(sp, instr.ST2.addr));
180 |         break;
181 |       case RECV: {
182 |         instr.tag = TMU0_TO_ACC4;
183 |         newInstrs.append(instr);
184 | 
185 |         Instr move;
186 |         move.tag                = ALU;
187 |         move.ALU.setFlags       = false;
188 |         move.ALU.cond.tag       = ALWAYS;
189 |         move.ALU.dest           = instr.RECV.dest;
190 |         move.ALU.srcA.tag       = REG;
191 |         move.ALU.srcA.reg.tag   = ACC;
192 |         move.ALU.srcA.reg.regId = 4;
193 |         move.ALU.op             = A_BOR;
194 |         move.ALU.srcB.tag       = REG;
195 |         move.ALU.srcB.reg.tag   = ACC;
196 |         move.ALU.srcB.reg.regId = 4;
197 |         newInstrs.append(move);
198 |         break;
199 |       }
200 |       default:
201 |         newInstrs.append(instr);
202 |         break;
203 |     }
204 |   }
205 | 
206 |   // Update original instruction sequence
207 |   instrs->clear();
208 |   for (int i = 0; i < newInstrs.numElems; i++)
209 |     instrs->append(newInstrs.elems[i]);
210 | }
211 | 


--------------------------------------------------------------------------------
/Lib/Target/LoadStore.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LOADSTORE_H_
 2 | #define _LOADSTORE_H_
 3 | 
 4 | #include "Common/Seq.h"
 5 | #include "Target/Syntax.h"
 6 | 
 7 | void genSetReadStride(Seq<Instr>* instrs, int stride);
 8 | void genSetReadStride(Seq<Instr>* instrs, Reg stride);
 9 | void genSetWriteStride(Seq<Instr>* instrs, int stride);
10 | void genSetWriteStride(Seq<Instr>* instrs, Reg stride);
11 | void loadStorePass(Seq<Instr>* instrs);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/Lib/Target/Pretty.cpp:
--------------------------------------------------------------------------------
  1 | #include "Target/Syntax.h"
  2 | #include "Target/SmallLiteral.h"
  3 | 
  4 | void pretty(SubWord sw)
  5 | {
  6 |   switch (sw) {
  7 |     case A8:  printf("[7:0]"); return;
  8 |     case B8:  printf("[15:8]"); return;
  9 |     case C8:  printf("[23:16]"); return;
 10 |     case D8:  printf("[31:24]"); return;
 11 |     case A16: printf("[15:0]"); return;
 12 |     case B16: printf("[31:16]"); return;
 13 |   }
 14 | }
 15 | 
 16 | const char* specialStr(RegId rid)
 17 | {
 18 |   Special s = (Special) rid;
 19 |   switch (s) {
 20 |     case SPECIAL_UNIFORM:      return "UNIFORM";
 21 |     case SPECIAL_ELEM_NUM:     return "ELEM_NUM";
 22 |     case SPECIAL_QPU_NUM:      return "QPU_NUM";
 23 |     case SPECIAL_RD_SETUP:     return "RD_SETUP";
 24 |     case SPECIAL_WR_SETUP:     return "WR_SETUP";
 25 |     case SPECIAL_DMA_ST_ADDR:  return "DMA_ST_ADDR";
 26 |     case SPECIAL_DMA_LD_ADDR:  return "DMA_LD_ADDR";
 27 |     case SPECIAL_VPM_READ:     return "VPM_READ";
 28 |     case SPECIAL_VPM_WRITE:    return "VPM_WRITE";
 29 |     case SPECIAL_HOST_INT:     return "HOST_INT";
 30 |     case SPECIAL_TMU0_S:       return "TMU0_S";
 31 |   }
 32 | 
 33 |   // Unreachable
 34 |   assert(false);
 35 | }
 36 | 
 37 | void pretty(Reg r)
 38 | {
 39 |   switch (r.tag) {
 40 |     case REG_A:
 41 |       printf("A%i", r.regId);
 42 |       return;
 43 |     case REG_B: printf("B%i", r.regId); return;
 44 |     case ACC: printf("ACC%i", r.regId); return;
 45 |     case SPECIAL: printf("S[%s]", specialStr(r.regId)); return;
 46 |     case NONE: printf("_"); return;
 47 |   }
 48 | }
 49 | 
 50 | void pretty(Flag flag)
 51 | {
 52 |   switch (flag) {
 53 |     case ZS: printf("ZS"); return;
 54 |     case ZC: printf("ZC"); return;
 55 |     case NS: printf("NS"); return;
 56 |     case NC: printf("NC"); return;
 57 |   }
 58 | }
 59 | 
 60 | void pretty(BranchCond cond)
 61 | {
 62 |   switch (cond.tag) {
 63 |     case COND_ALL:
 64 |       printf("all(");
 65 |       pretty(cond.flag);
 66 |       printf(")");
 67 |       return;
 68 |     case COND_ANY:
 69 |       printf("any(");
 70 |       pretty(cond.flag);
 71 |       printf(")");
 72 |       return;
 73 |     case COND_ALWAYS:
 74 |       printf("always");
 75 |       return;
 76 |     case COND_NEVER:
 77 |       printf("never");
 78 |       return;
 79 |   }
 80 | }
 81 | 
 82 | void pretty(AssignCond cond)
 83 | {
 84 |   switch (cond.tag) {
 85 |     case ALWAYS: printf("always"); return;
 86 |     case NEVER: printf("never"); return;
 87 |     case FLAG: pretty(cond.flag); return;
 88 |   }
 89 | }
 90 | 
 91 | void pretty(Imm imm) {
 92 |   switch (imm.tag) {
 93 |     case IMM_INT32:
 94 |       printf("%d", imm.intVal);
 95 |       return;
 96 |     case IMM_FLOAT32:
 97 |       printf("%f", imm.floatVal);
 98 |       return;
 99 |     case IMM_MASK:
100 |       int b = imm.intVal;
101 |       for (int i = 0; i < 16; i++) {
102 |         printf("%i", b&1 ? 1 : 0);
103 |         b >>= 1;
104 |       }
105 |       return;
106 |   }
107 | }
108 | 
109 | void pretty(SmallImm imm)
110 | {
111 |   switch (imm.tag) {
112 |     case SMALL_IMM: printSmallLit(imm.val); return;
113 |     case ROT_ACC: printf("ROT(ACC5)"); return;
114 |     case ROT_IMM: printf("ROT(%i)", imm.val); return;
115 |   }
116 | }
117 | 
118 | void pretty(RegOrImm r)
119 | {
120 |   switch (r.tag) {
121 |     case REG: pretty(r.reg); return;
122 |     case IMM: pretty(r.smallImm); return;
123 |   }
124 | }
125 | 
126 | void pretty(ALUOp op)
127 | {
128 |   switch (op) {
129 |     case NOP:       printf("nop"); return;
130 |     case A_FADD:    printf("addf"); return;
131 |     case A_FSUB:    printf("subf"); return;
132 |     case A_FMIN:    printf("minf"); return;
133 |     case A_FMAX:    printf("maxf"); return;
134 |     case A_FMINABS: printf("minabsf"); return;
135 |     case A_FMAXABS: printf("maxabsf"); return;
136 |     case A_FtoI:    printf("ftoi"); return;
137 |     case A_ItoF:    printf("itof"); return;
138 |     case A_ADD:     printf("add"); return;
139 |     case A_SUB:     printf("sub"); return;
140 |     case A_SHR:     printf("shr"); return;
141 |     case A_ASR:     printf("asr"); return;
142 |     case A_ROR:     printf("ror"); return;
143 |     case A_SHL:     printf("shl"); return;
144 |     case A_MIN:     printf("min"); return;
145 |     case A_MAX:     printf("max"); return;
146 |     case A_BAND:    printf("and"); return;
147 |     case A_BOR:     printf("or"); return;
148 |     case A_BXOR:    printf("xor"); return;
149 |     case A_BNOT:    printf("not"); return;
150 |     case A_CLZ:     printf("clz"); return;
151 |     case A_V8ADDS:  printf("addsatb"); return;
152 |     case A_V8SUBS:  printf("subsatb"); return;
153 |     case M_FMUL:    printf("mulf"); return;
154 |     case M_MUL24:   printf("mul24"); return;
155 |     case M_V8MUL:   printf("mulb"); return;
156 |     case M_V8MIN:   printf("minb"); return;
157 |     case M_V8MAX:   printf("maxb"); return;
158 |     case M_V8ADDS:  printf("m_addsatb"); return;
159 |     case M_V8SUBS:  printf("m_subsatb"); return;
160 |     case M_ROTATE:  printf("rotate"); return;
161 |   }
162 | }
163 | 
164 | void pretty(BranchTarget target)
165 | {
166 |   if (target.relative)
167 |     printf("PC+1+");
168 |   if (target.useRegOffset)
169 |     printf("A%i+", target.regOffset);
170 |   printf("%i", target.immOffset);
171 | }
172 | 
173 | void pretty(BufferAorB buffer)
174 | {
175 |   if (buffer == A) printf("A");
176 |   if (buffer == B) printf("B");
177 | }
178 | 
179 | void pretty(Instr instr)
180 | {
181 |   switch (instr.tag) {
182 |     case LI:
183 |       if (instr.LI.cond.tag != ALWAYS) {
184 |         printf("where ");
185 |         pretty(instr.LI.cond);
186 |         printf(": ");
187 |       }
188 |       pretty(instr.LI.dest);
189 |       printf(" <-%s ", instr.LI.setFlags ? "{sf}" : "");
190 |       pretty(instr.LI.imm);
191 |       printf("\n");
192 |       return;
193 |     case ALU:
194 |       if (instr.ALU.cond.tag != ALWAYS) {
195 |         printf("where ");
196 |         pretty(instr.ALU.cond);
197 |         printf(": ");
198 |       }
199 |       pretty(instr.ALU.dest);
200 |       printf(" <-%s ", instr.ALU.setFlags ? "{sf}" : "");
201 |       pretty(instr.ALU.op);
202 |       printf("(");
203 |       pretty(instr.ALU.srcA);
204 |       printf(", ");
205 |       pretty(instr.ALU.srcB);
206 |       printf(")\n");
207 |       return;
208 |     case END:
209 |       printf("END\n");
210 |       return;
211 |     case BR:
212 |       printf("if ");
213 |       pretty(instr.BR.cond);
214 |       printf(" goto ");
215 |       pretty(instr.BR.target);
216 |       printf("\n");
217 |       return;
218 |     case BRL:
219 |       printf("if ");
220 |       pretty(instr.BRL.cond);
221 |       printf(" goto L%i\n", instr.BRL.label);
222 |       return;
223 |     case LAB:
224 |       printf("L%i:\n", instr.label);
225 |       return;
226 |     case NO_OP:
227 |       printf("NOP\n");
228 |       return;
229 |     case LD1:
230 |       pretty(instr.LD1.buffer);
231 |       printf(" <- LD1(");
232 |       pretty(instr.LD1.addr);
233 |       printf(")\n");
234 |       return;
235 |     case LD2:
236 |       printf("LD2\n");
237 |       return;
238 |     case LD3:
239 |       printf("LD3(");
240 |       pretty(instr.LD3.buffer);
241 |       printf(")\n");
242 |       return;
243 |     case LD4:
244 |       pretty(instr.LD4.dest);
245 |       printf(" <- LD4\n");
246 |       return;
247 |     case ST1:
248 |       printf("ST1(");
249 |       pretty(instr.ST1.buffer);
250 |       printf(") <- ");
251 |       pretty(instr.ST1.data);
252 |       printf("\n");
253 |       return;
254 |     case ST2:
255 |       printf("ST2(");
256 |       pretty(instr.ST2.buffer);
257 |       printf(", ");
258 |       pretty(instr.ST2.addr);
259 |       printf(")\n");
260 |       return;
261 |     case ST3:
262 |       printf("ST3\n");
263 |       return;
264 |     case PRS:
265 |       printf("PRS(\"%s\")", instr.PRS);
266 |       return;
267 |     case PRI:
268 |       printf("PRI(");
269 |       pretty(instr.PRI);
270 |       printf(")\n");
271 |       return;
272 |     case PRF:
273 |       printf("PRF(");
274 |       pretty(instr.PRF);
275 |       printf(")\n");
276 |       return;
277 |     case RECV:
278 |       printf("RECV(");
279 |       pretty(instr.RECV.dest);
280 |       printf(")\n");
281 |       return;
282 |     case TMU0_TO_ACC4:
283 |       printf("TMU0_TO_ACC4\n");
284 |       return;
285 |     case SINC:
286 |       printf("SINC %i\n", instr.semaId);
287 |       return;
288 |     case SDEC:
289 |       printf("SDEC %i\n", instr.semaId);
290 |       return;
291 |     case IRQ:
292 |       printf("IRQ\n");
293 |       return;
294 |   }
295 | }
296 | 


--------------------------------------------------------------------------------
/Lib/Target/Pretty.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TARGET_PRETTY_H_
 2 | #define _TARGET_PRETTY_H_
 3 | 
 4 | #include "Target/Syntax.h"
 5 | 
 6 | // Pretty printer for the QPULib target language
 7 | void pretty(Instr instr);
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/Lib/Target/ReachingDefs.cpp:
--------------------------------------------------------------------------------
  1 | // Reaching definitions analysis
  2 | 
  3 | #include "Source/Syntax.h"
  4 | #include "Target/ReachingDefs.h"
  5 | #include "Target/Liveness.h"
  6 | 
  7 | // ============================================================================
  8 | // Compute 'defsOf' mapping
  9 | // ============================================================================
 10 | 
 11 | // Compute a mapping from each register id to a set of instruction ids
 12 | // that assign to that register.
 13 | 
 14 | typedef Seq<SmallSeq<InstrId>> DefsOf;
 15 | 
 16 | void computeDefsOf(Seq<Instr>* instrs, DefsOf* defsOf)
 17 | {
 18 |   int numVars = getFreshVarCount();
 19 |   defsOf->setCapacity(numVars);
 20 |   defsOf->numElems = numVars;
 21 | 
 22 |   for (int i = 0; i < instrs->numElems; i++) {
 23 |     UseDef set;
 24 |     useDef(instrs->elems[i], &set);
 25 |     for (int j = 0; j < set.def.numElems; j++) {
 26 |       RegId r = set.def.elems[j];
 27 |       defsOf->elems[r].insert(i);
 28 |     }
 29 |   }
 30 | }
 31 | 
 32 | // ============================================================================
 33 | // Compute 'usesOf' mapping
 34 | // ============================================================================
 35 | 
 36 | // Compute a mapping from each register id to a set of instruction ids
 37 | // that use that register.
 38 | 
 39 | typedef Seq<SmallSeq<InstrId>> UsesOf;
 40 | 
 41 | void computeUsesOf(Seq<Instr>* instrs, UsesOf* usesOf)
 42 | {
 43 |   int numVars = getFreshVarCount();
 44 |   usesOf->setCapacity(numVars);
 45 |   usesOf->numElems = numVars;
 46 | 
 47 |   for (int i = 0; i < instrs->numElems; i++) {
 48 |     UseDef set;
 49 |     useDef(instrs->elems[i], &set);
 50 |     for (int j = 0; j < set.use.numElems; j++) {
 51 |       RegId r = set.use.elems[j];
 52 |       usesOf->elems[r].insert(i);
 53 |     }
 54 |   }
 55 | }
 56 | 
 57 | // ============================================================================
 58 | // Compute 'gen' and 'kill' sets
 59 | // ============================================================================
 60 | 
 61 | // 'gen' set:  an instruction labelled x 'generates' x if it
 62 | //             modifies any variable.
 63 | // 'kill' set: an instruction labelled x that modifies a register y
 64 | //             kills all instructions that modify y, except x.
 65 | 
 66 | struct GenKill {
 67 |   SmallSeq<InstrId> gen;
 68 |   SmallSeq<InstrId> kill;
 69 | };
 70 | 
 71 | // Compute 'gen' and 'kill' sets for a given instruction
 72 | 
 73 | void computeGenKill(InstrId id, Instr instr, DefsOf* defsOf, GenKill* genKill)
 74 | {
 75 |   // Make the 'gen' and 'kill' sets empty
 76 |   genKill->gen.clear();
 77 |   genKill->kill.clear();
 78 | 
 79 |   // Does instruction modify a reg?
 80 |   bool isDef = false;
 81 | 
 82 |   // If so, which reg?
 83 |   RegId defReg;
 84 |   
 85 |   switch (instr.tag) {
 86 |     // Load immediate
 87 |     case LI:
 88 |       // Add destination reg to 'def' set
 89 |       if (instr.LI.dest.tag == REG_A) {
 90 |         isDef  = true;
 91 |         defReg = instr.LI.dest.regId;
 92 |       }
 93 |       break;
 94 | 
 95 |     // ALU operation
 96 |     case ALU:
 97 |       // Add destination reg to 'def' set
 98 |       if (instr.ALU.dest.tag == REG_A) {
 99 |         isDef  = true;
100 |         defReg = instr.ALU.dest.regId;
101 |       }
102 |       break;
103 | 
104 |     // LD4 instruction
105 |     case LD4:
106 |       // Add dest reg to 'def' set
107 |       if (instr.LD4.dest.tag == REG_A) {
108 |         isDef  = true;
109 |         defReg = instr.LD4.dest.regId;
110 |       }
111 |       break;
112 |   }
113 | 
114 |   if (isDef) {
115 |     genKill->gen.insert(id);
116 |     SmallSeq<InstrId>* defs = &defsOf->elems[defReg];
117 |     for (int i = 0; i < defs->numElems; i++)
118 |       if (defs->elems[i] != id)
119 |         genKill->kill.insert(defs->elems[i]);
120 |   }
121 | }
122 | 
123 | // ============================================================================
124 | // Compute live reaching definitions for each instruction
125 | // ============================================================================
126 | 
127 | // Helper function: given the reaching-out definitions, compute the
128 | // reaching-in set for a given instruction.  For efficiency reasons,
129 | // we only return live definitions that reach-in, but on the down-side
130 | // this means we have to perform liveness analysis first.
131 | 
132 | void computeReachIn(Seq<Instr>* instrs, CFG* preds, Liveness* live,
133 |                     ReachingDefs* defs, InstrId i, ReachSet* reachIn)
134 | {
135 |   LiveSet* liveIn = &live->elems[i];
136 |   reachIn->clear();
137 |   Succs* p = &preds->elems[i];
138 |   for (int j = 0; j < p->numElems; j++) {
139 |     ReachSet* set = &defs->elems[p->elems[j]];
140 |     for (int k = 0; k < set->numElems; k++) {
141 |       InstrId d = set->elems[k];
142 |       // Compute vars defined by instruction
143 |       UseDef useDefSet;
144 |       useDef(instrs->elems[d], &useDefSet);
145 |       // Only add live definitions to the set
146 |       for (int n = 0; n < useDefSet.def.numElems; n++) {
147 |         if (liveIn->member(useDefSet.def.elems[n])) {
148 |           reachIn->insert(d);
149 |           break;
150 |         }
151 |       }
152 |     }
153 |   }
154 | }
155 | 
156 | void reachingOutDefs(Seq<Instr>* instrs, Liveness* live,
157 |                      CFG* preds, ReachingDefs* defs)
158 | {
159 |   // Make sure defs is large enough
160 |   defs->setCapacity(instrs->numElems);
161 |   defs->numElems = instrs->numElems;
162 | 
163 |   // Find all definitions of each register
164 |   DefsOf defsOf;
165 |   computeDefsOf(instrs, &defsOf);
166 | 
167 |   // For storing the 'gen' and 'kill' sets of each instruction
168 |   GenKill genKillSets;
169 | 
170 |   // For temporarily storing reaching-in and reaching-out definitions
171 |   ReachSet reachIn;
172 |   ReachSet reachOut;
173 | 
174 |   // Has a change been made to the reaching-definitions mapping?
175 |   bool changed = true;
176 | 
177 |   // Iterate until no change, i.e. fixed point
178 |   while (changed) {
179 |     changed = false;
180 | 
181 |     // Propagate reaching definitions forward
182 |     for (int i = 0; i < instrs->numElems; i++) {
183 |       // Compute 'gen' and 'kill' sets
184 |       Instr instr = instrs->elems[i];
185 |       computeGenKill(i, instr, &defsOf, &genKillSets);
186 | 
187 |       // Compute reaching-in definitions
188 |       computeReachIn(instrs, preds, live, defs, i, &reachIn);
189 | 
190 |       // Remove the 'kill' set from the reach-in set to give reach-out set
191 |       reachOut.clear();
192 |       for (int j = 0; j < reachIn.numElems; j++) {
193 |         if (! genKillSets.kill.member(reachIn.elems[j]))
194 |           reachOut.insert(reachIn.elems[j]);
195 |       }
196 | 
197 |       // Add the 'gen' set to the reach-out set
198 |       for (int j = 0; j < genKillSets.gen.numElems; j++)
199 |         reachOut.insert(genKillSets.gen.elems[j]);
200 | 
201 |       // Insert the reach-out variables into the map
202 |       for (int j = 0; j < reachOut.numElems; j++) {
203 |         bool inserted = defs->elems[i].insert(reachOut.elems[j]);
204 |         changed = changed || inserted;
205 |       }
206 |     }
207 |   }
208 | }
209 | 
210 | void reachingDefs(Seq<Instr>* instrs, CFG* cfg, ReachingDefs* defs)
211 | {
212 |   // For efficiency, perform liveness analysis first
213 |   Liveness live;
214 |   liveness(instrs, cfg, &live);
215 | 
216 |   // Reverse the arrows in the CFG
217 |   CFG preds;
218 |   reverseCFG(cfg, &preds);
219 | 
220 |   // Make sure defs is large enough
221 |   defs->setCapacity(instrs->numElems);
222 |   defs->numElems = instrs->numElems;
223 | 
224 |   // Compute defs reaching-out of each instruction
225 |   ReachingDefs out;
226 |   reachingOutDefs(instrs, &live, &preds, &out);
227 |  
228 |   // Compute defs reaching-in to each instruction
229 |   for (int i = 0; i < defs->numElems; i++)
230 |     computeReachIn(instrs, &preds, &live, &out, i, &defs->elems[i]);
231 | }
232 | 
233 | // ============================================================================
234 | // Compute instructions reached-by each definition
235 | // ============================================================================
236 | 
237 | void computeReachedBy(Seq<Instr>* instrs, CFG* cfg, ReachingDefs* reachedBy)
238 | {
239 |   // Make sure reachedBy is large enough
240 |   reachedBy->setCapacity(instrs->numElems);
241 |   reachedBy->numElems = instrs->numElems;
242 | 
243 |   // Find all uses of each register
244 |   UsesOf usesOf;
245 |   computeUsesOf(instrs, &usesOf);
246 | 
247 |   // Compute definitions reaching each instruction
248 |   ReachingDefs defs;
249 |   reachingDefs(instrs, cfg, &defs);
250 | 
251 |   for (int i = 0; i < instrs->numElems; i++) {
252 |     // Compute def set
253 |     UseDef useDefSet;
254 |     useDef(instrs->elems[i], &useDefSet);
255 | 
256 |     for (int j = 0; j < useDefSet.def.numElems; j++) {
257 |       RegId r = useDefSet.def.elems[j];
258 |       SmallSeq<InstrId>* uses = &usesOf.elems[r];
259 |       for (int k = 0; k < uses->numElems; k++) {
260 |         InstrId u = uses->elems[k];
261 |         if (defs.elems[u].member(i))
262 |           reachedBy->elems[i].insert(u);
263 |       }
264 |     }
265 |   }
266 | }
267 | 


--------------------------------------------------------------------------------
/Lib/Target/ReachingDefs.h:
--------------------------------------------------------------------------------
 1 | // Reaching definitions analysis
 2 | 
 3 | #ifndef _REACHINGDEFS_H_
 4 | #define _REACHINGDEFS_H_
 5 | 
 6 | #include "Common/Seq.h"
 7 | #include "Target/Syntax.h"
 8 | #include "Target/CFG.h"
 9 | 
10 | // A reach set containts the instruction ids
11 | // that reach an instruction.
12 | 
13 | typedef SmallSeq<InstrId> ReachSet;
14 | 
15 | // The result of the analysis is a set of
16 | // instruction ids that reach each instruction.
17 | 
18 | typedef Seq<ReachSet> ReachingDefs;
19 | 
20 | // Determine the live definitions reaching each instruction.
21 | 
22 | void reachingDefs(Seq<Instr>* instrs, CFG* cfg, ReachingDefs* defs);
23 | 
24 | // Determine the instructions reached-by each definition.
25 | 
26 | void computeReachedBy(Seq<Instr>* instrs, CFG* cfg, ReachingDefs* reachedBy);
27 | 
28 | // Compute a mapping from each register id to a set of instruction ids
29 | // that assign to that register.
30 | typedef Seq<ReachSet> DefsOf;
31 | void computeDefsOf(Seq<Instr>* instrs, DefsOf* defsOf);
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/Lib/Target/RegAlloc.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "Source/Syntax.h"
  3 | #include "Target/Syntax.h"
  4 | #include "Target/RegAlloc.h"
  5 | #include "Target/Subst.h"
  6 | #include "Target/Liveness.h"
  7 | 
  8 | // ============================================================================
  9 | // Accumulator allocation
 10 | // ============================================================================
 11 | 
 12 | // This is a simple peephole optimisation, captured by the following
 13 | // rewrite rule:
 14 | //
 15 | //   i:  x <- f(...)
 16 | //   j:  g(..., x, ...)
 17 | // 
 18 | // ===> if x not live-out of j
 19 | // 
 20 | //   i:  acc <- f(...)
 21 | //   j:  g(..., acc, ...)
 22 | 
 23 | void introduceAccum(CFG* cfg, Liveness* live, Seq<Instr>* instrs)
 24 | {
 25 |   UseDef useDefPrev, useDefCurrent;
 26 |   LiveSet liveOut;
 27 | 
 28 |   Reg acc;
 29 |   acc.tag = ACC;
 30 |   acc.regId = 1;
 31 | 
 32 |   for (int i = 1; i < instrs->numElems; i++) {
 33 |     Instr prev  = instrs->elems[i-1];
 34 |     Instr instr = instrs->elems[i];
 35 | 
 36 |     // Compute vars defined by prev
 37 |     useDef(prev, &useDefPrev);
 38 | 
 39 |     if (useDefPrev.def.numElems > 0) {
 40 |       RegId def = useDefPrev.def.elems[0];
 41 | 
 42 |       // Compute vars used by instr
 43 |       useDef(instr, &useDefCurrent);
 44 | 
 45 |       // Compute vars live-out of instr
 46 |       computeLiveOut(cfg, live, i, &liveOut);
 47 | 
 48 |       // Check that write is non-conditional
 49 |       bool always = (prev.tag == LI && prev.LI.cond.tag == ALWAYS)
 50 |                  || (prev.tag == ALU && prev.ALU.cond.tag == ALWAYS);
 51 | 
 52 |       if (always &&
 53 |           useDefCurrent.use.member(def)  &&
 54 |           !liveOut.member(def)) {
 55 |         renameDest(&prev, REG_A, def, ACC, 1);
 56 |         renameUses(&instr, REG_A, def, ACC, 1);
 57 |         instrs->elems[i-1] = prev;
 58 |         instrs->elems[i]   = instr;
 59 |       }
 60 |     }
 61 |   }
 62 | }
 63 | 
 64 | // ============================================================================
 65 | // Register allocation
 66 | // ============================================================================
 67 | 
 68 | void regAlloc(CFG* cfg, Seq<Instr>* instrs)
 69 | {
 70 |   // Step 0
 71 |   // Perform liveness analysis
 72 |   Liveness live;
 73 |   liveness(instrs, cfg, &live);
 74 | 
 75 |   // Optimisation pass that introduces accumulators
 76 |   introduceAccum(cfg, &live, instrs);
 77 | 
 78 |   // Step 1
 79 |   // For each variable, determine a preference for register file A or B.
 80 |   int n = getFreshVarCount();
 81 |   int* prefA = new int [n];
 82 |   int* prefB = new int [n];
 83 |   UseDef useDefSet;
 84 |   for (int i = 0; i < n; i++) prefA[i] = prefB[i] = 0;
 85 | 
 86 |   for (int i = 0; i < instrs->numElems; i++) {
 87 |     Instr instr = instrs->elems[i];
 88 |     Reg ra, rb;
 89 |     if (getTwoUses(instr, &ra, &rb) && ra.tag == REG_A && rb.tag == REG_A) {
 90 |       RegId x = ra.regId;
 91 |       RegId y = rb.regId;
 92 |       if (prefA[x] > prefA[y] || prefB[y] > prefB[x])
 93 |         { prefA[x]++; prefB[y]++; }
 94 |       else
 95 |         { prefA[y]++; prefB[x]++; }
 96 |     }
 97 |     else if (instr.tag == ALU &&
 98 |              instr.ALU.srcA.tag == REG &&
 99 |              instr.ALU.srcA.reg.tag == REG_A &&
100 |              instr.ALU.srcB.tag == IMM) {
101 |       prefA[instr.ALU.srcA.reg.regId]++;
102 |     }
103 |     else if (instr.tag == ALU &&
104 |              instr.ALU.srcB.tag == REG &&
105 |              instr.ALU.srcB.reg.tag == REG_A &&
106 |              instr.ALU.srcA.tag == IMM) {
107 |       prefA[instr.ALU.srcB.reg.regId]++;
108 |     }
109 |   }
110 | 
111 |   // Step 2
112 |   // For each variable, determine all variables ever live at same time
113 |   LiveSet* liveWith = new LiveSet [n];
114 |   LiveSet liveOut;
115 |   for (int i = 0; i < instrs->numElems; i++) {
116 |     computeLiveOut(cfg, &live, i, &liveOut);
117 |     useDef(instrs->elems[i], &useDefSet);
118 |     for (int j = 0; j < liveOut.numElems; j++) {
119 |       RegId rx = liveOut.elems[j];
120 |       for (int k = 0; k < liveOut.numElems; k++) {
121 |         RegId ry = liveOut.elems[k];
122 |         if (rx != ry) liveWith[rx].insert(ry);
123 |       }
124 |       for (int k = 0; k < useDefSet.def.numElems; k++) {
125 |         RegId rd = useDefSet.def.elems[k];
126 |         if (rd != rx) {
127 |           liveWith[rx].insert(rd);
128 |           liveWith[rd].insert(rx);
129 |         }
130 |       }
131 |     }
132 |   }
133 | 
134 |   // Step 3
135 |   // Allocate a register to each variable
136 |   RegTag prevChosenRegFile = REG_B;
137 |   Reg* alloc = new Reg [n];
138 |   for (int i = 0; i < n; i++) alloc[i].tag = NONE;
139 | 
140 |   const int NUM_REGS = 32;
141 |   bool possibleA[NUM_REGS];
142 |   bool possibleB[NUM_REGS];
143 | 
144 |   for (int i = 0; i < n; i++) {
145 |     for (int j = 0; j < NUM_REGS; j++)
146 |       possibleA[j] = possibleB[j] = true;
147 | 
148 |     // Eliminate impossible choices of register for this variable
149 |     LiveSet* set = &liveWith[i];
150 |     for (int j = 0; j < set->numElems; j++) {
151 |       Reg neighbour = alloc[set->elems[j]];
152 |       if (neighbour.tag == REG_A) possibleA[neighbour.regId] = false;
153 |       if (neighbour.tag == REG_B) possibleB[neighbour.regId] = false;
154 |     }
155 | 
156 |     // Find possible register in each register file
157 |     RegId chosenA = -1;
158 |     RegId chosenB = -1;
159 |     for (int j = 0; j < NUM_REGS; j++)
160 |       if (possibleA[j]) { chosenA = j; break; }
161 |     for (int j = 0; j < NUM_REGS; j++)
162 |       if (possibleB[j]) { chosenB = j; break; }
163 | 
164 |     // Choose a register file
165 |     RegTag chosenRegFile;
166 |     if (chosenA < 0 && chosenB < 0) {
167 |       printf("QPULib: register allocation failed, insufficient capacity\n");
168 |       exit(EXIT_FAILURE);
169 |     }
170 |     else if (chosenA < 0) chosenRegFile = REG_B;
171 |     else if (chosenB < 0) chosenRegFile = REG_A;
172 |     else {
173 |       if (prefA[i] > prefB[i]) chosenRegFile = REG_A;
174 |       else if (prefA[i] < prefB[i]) chosenRegFile = REG_B;
175 |       else chosenRegFile = prevChosenRegFile == REG_A ? REG_B : REG_A;
176 |     }
177 |     prevChosenRegFile = chosenRegFile;
178 | 
179 |     // Finally, allocate a register to the variable
180 |     alloc[i].tag = chosenRegFile;
181 |     alloc[i].regId = chosenRegFile == REG_A ? chosenA : chosenB;
182 |   }
183 | 
184 |   // Step 4
185 |   // Apply the allocation to the code
186 |   for (int i = 0; i < instrs->numElems; i++) {
187 |     useDef(instrs->elems[i], &useDefSet);
188 |     Instr* instr = &instrs->elems[i];
189 |     for (int j = 0; j < useDefSet.def.numElems; j++) {
190 |       RegId r = useDefSet.def.elems[j];
191 |       RegTag tmp = alloc[r].tag == REG_A ? TMP_A : TMP_B;
192 |       renameDest(instr, REG_A, r, tmp, alloc[r].regId);
193 |     }
194 |     for (int j = 0; j < useDefSet.use.numElems; j++) {
195 |       RegId r = useDefSet.use.elems[j];
196 |       RegTag tmp = alloc[r].tag == REG_A ? TMP_A : TMP_B;
197 |       renameUses(instr, REG_A, r, tmp, alloc[r].regId);
198 |     }
199 |     substRegTag(instr, TMP_A, REG_A);
200 |     substRegTag(instr, TMP_B, REG_B);
201 |   }
202 | 
203 |   // Free memory
204 |   delete [] prefA;
205 |   delete [] prefB;
206 |   delete [] liveWith;
207 | }
208 | 
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/Lib/Target/RegAlloc.h:
--------------------------------------------------------------------------------
 1 | #ifndef _REGALLOC_H_
 2 | #define _REGALLOC_H_
 3 | 
 4 | #include "Target/CFG.h"
 5 | #include "Target/Liveness.h"
 6 | #include "Target/Syntax.h"
 7 | #include "Common/Seq.h"
 8 | 
 9 | void regAlloc(CFG* cfg, Seq<Instr>* instrs);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/Lib/Target/RemoveLabels.cpp:
--------------------------------------------------------------------------------
 1 | #include "Target/RemoveLabels.h"
 2 | 
 3 | // ============================================================================
 4 | // Remove labels
 5 | // ============================================================================
 6 | 
 7 | // Remove all labels, replacing absolute branch-label instructions
 8 | // with relative branch-target instructions.
 9 | 
10 | void removeLabels(Seq<Instr>* instrs)
11 | {
12 |   Seq<Instr> newInstrs;
13 | 
14 |   // The number of labels in the instruction sequence
15 |   int numLabels = getFreshLabelCount();
16 | 
17 |   // A mapping from labels to instruction ids
18 |   InstrId* labels = new InstrId [numLabels];
19 | 
20 |   // Initialise label mapping
21 |   for (int i = 0; i < numLabels; i++)
22 |     labels[i] = -1;
23 | 
24 |   // First, remove labels, remembering the index of the instruction
25 |   // pointed to by each label.
26 |   for (int i = 0, j = 0; i < instrs->numElems; i++) {
27 |     Instr instr = instrs->elems[i];
28 |     if (instr.tag == LAB) {
29 |       labels[instr.label] = j;
30 |     }
31 |     else {
32 |       newInstrs.append(instr);
33 |       j++;
34 |     }
35 |   }
36 | 
37 |   // Second, remove branch-label instructions.
38 |   instrs->numElems = newInstrs.numElems;
39 |   for (int i = 0; i < newInstrs.numElems; i++) {
40 |     Instr instr = newInstrs.elems[i];
41 |     if (instr.tag == BRL) {
42 |       assert(instr.BRL.label >= 0 && instr.BRL.label < numLabels);
43 |       int dest = labels[instr.BRL.label];
44 |       assert (dest >= 0);
45 |       BranchTarget t;
46 |       t.relative       = true;
47 |       t.useRegOffset   = false;
48 |       t.immOffset      = dest - 4 - i;
49 |       instr.tag        = BR;
50 |       instr.BR.target  = t;
51 |       instrs->elems[i] = instr;
52 |     }
53 |     else {
54 |       instrs->elems[i] = instr;
55 |     }
56 |   }
57 | 
58 |   delete [] labels;
59 | }
60 | 


--------------------------------------------------------------------------------
/Lib/Target/RemoveLabels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _REMOVELABELS_H_
 2 | #define _REMOVELABELS_H_
 3 | 
 4 | #include "Target/Syntax.h"
 5 | #include "Target/CFG.h"
 6 | #include "Target/Liveness.h"
 7 | #include "Common/Seq.h"
 8 | 
 9 | // Remove all labels, replacing absolute branch-label instructions
10 | // with relative branch-target instructions.
11 | void removeLabels(Seq<Instr>* instrs);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/Lib/Target/Satisfy.cpp:
--------------------------------------------------------------------------------
  1 | #include "Target/Satisfy.h"
  2 | #include "Target/Liveness.h"
  3 | #include "Target/RegAlloc.h"
  4 | #include <assert.h>
  5 | #include <stdio.h>
  6 | 
  7 | // =============================
  8 | // Remap register to accumulator
  9 | // =============================
 10 | 
 11 | // Return an instruction to move the contents of a register to an
 12 | // accumulator, and change the use of that register in the given
 13 | // instruction to the given accumulator.
 14 | 
 15 | Instr remapAToAccum(Instr* instr, RegId acc)
 16 | {
 17 |   assert(instr->ALU.srcA.tag == REG);
 18 | 
 19 |   AssignCond always;
 20 |   always.tag = ALWAYS;
 21 | 
 22 |   Instr move;
 23 |   move.tag                   = ALU;
 24 |   move.ALU.setFlags          = false;
 25 |   move.ALU.cond              = always;
 26 |   move.ALU.dest.tag          = ACC;
 27 |   move.ALU.dest.regId        = acc;
 28 |   move.ALU.srcA.tag          = REG;
 29 |   move.ALU.srcA.reg          = instr->ALU.srcA.reg;
 30 |   move.ALU.op                = A_BOR;
 31 |   move.ALU.srcB.tag          = REG;
 32 |   move.ALU.srcB.reg          = instr->ALU.srcA.reg;
 33 | 
 34 |   instr->ALU.srcA.reg.tag    = ACC;
 35 |   instr->ALU.srcA.reg.regId  = acc;
 36 | 
 37 |   return move;
 38 | }
 39 | 
 40 | Instr remapBToAccum(Instr* instr, RegId acc)
 41 | {
 42 |   assert(instr->ALU.srcB.tag == REG);
 43 | 
 44 |   AssignCond always;
 45 |   always.tag = ALWAYS;
 46 | 
 47 |   Instr move;
 48 |   move.tag                   = ALU;
 49 |   move.ALU.setFlags          = false;
 50 |   move.ALU.cond              = always;
 51 |   move.ALU.dest.tag          = ACC;
 52 |   move.ALU.dest.regId        = acc;
 53 |   move.ALU.srcA.tag          = REG;
 54 |   move.ALU.srcA.reg          = instr->ALU.srcB.reg;
 55 |   move.ALU.op                = A_BOR;
 56 |   move.ALU.srcB.tag          = REG;
 57 |   move.ALU.srcB.reg          = instr->ALU.srcB.reg;
 58 | 
 59 |   instr->ALU.srcB.reg.tag   = ACC;
 60 |   instr->ALU.srcB.reg.regId = acc;
 61 | 
 62 |   return move;
 63 | }
 64 | 
 65 | // ==============================
 66 | // Resolve register file conflict
 67 | // ==============================
 68 | 
 69 | // Determine reg file of given register.
 70 | 
 71 | RegTag regFileOf(Reg r)
 72 | {
 73 |   if (r.tag == REG_A) return REG_A;
 74 |   if (r.tag == REG_B) return REG_B;
 75 |   if (r.tag == SPECIAL) {
 76 |     if (r.regId == SPECIAL_ELEM_NUM) return REG_A;
 77 |     if (r.regId == SPECIAL_QPU_NUM) return REG_B;
 78 |     if (r.regId == SPECIAL_DMA_LD_WAIT) return REG_A;
 79 |     if (r.regId == SPECIAL_DMA_ST_WAIT) return REG_B;
 80 |   }
 81 |   return NONE;
 82 | }
 83 | 
 84 | // When an instruction uses two (different) registers that are mapped
 85 | // to the same register file, then remap one of them to an
 86 | // accumulator.
 87 | 
 88 | bool resolveRegFileConflict(Instr* instr, Instr* newInstr)
 89 | {
 90 |   if (instr->tag == ALU && instr->ALU.srcA.tag == REG
 91 |                         && instr->ALU.srcB.tag == REG) {
 92 |     int rfa = regFileOf(instr->ALU.srcA.reg);
 93 |     int rfb = regFileOf(instr->ALU.srcB.reg);
 94 |     if (rfa != NONE && rfb != NONE) {
 95 |       bool conflict = rfa == rfb && instr->ALU.srcA.reg.regId !=
 96 |                                     instr->ALU.srcB.reg.regId;
 97 |       if (conflict) {
 98 |         *newInstr = remapAToAccum(instr, 0);
 99 |         return true;
100 |       }
101 |     }
102 |   }
103 |   return false;
104 | }
105 | 
106 | // =============================
107 | // Satisfy VideoCore constraints
108 | // =============================
109 | 
110 | // Transform an instruction sequence to satisfy various VideoCore
111 | // constraints, including:
112 | //
113 | //   1. fill branch delay slots with NOPs;
114 | //
115 | //   2. introduce accumulators for operands mapped to the same
116 | //      register file;
117 | //
118 | //   3. introduce accumulators for horizontal rotation operands;
119 | //
120 | //   4. insert NOPs to account for data hazards: a destination
121 | //      register (assuming it's not an accumulator) cannot be read by
122 | //      the next instruction.
123 | 
124 | // First pass: insert move-to-accumulator instructions.
125 | 
126 | static void insertMoves(Seq<Instr>* instrs, Seq<Instr>* newInstrs)
127 | {
128 |   for (int i = 0; i < instrs->numElems; i++) {
129 |     Instr instr = instrs->elems[i];
130 |     RegId r; RegTag rt;
131 | 
132 |     if (instr.tag == ALU && instr.ALU.op == M_ROTATE) {
133 |       // Insert moves for horizontal rotate operations
134 |       newInstrs->append(remapAToAccum(&instr, 0));
135 |       if (instr.ALU.srcB.tag == REG)
136 |         newInstrs->append(remapBToAccum(&instr, 5));
137 |       newInstrs->append(nop());
138 |     }
139 |     else if (instr.tag == ALU && instr.ALU.srcA.tag == IMM &&
140 |              instr.ALU.srcB.tag == REG &&
141 |              regFileOf(instr.ALU.srcB.reg) == REG_B) {
142 |       // Insert moves for an operation with a small immediate whose
143 |       // register operand must reside in reg file B.
144 |       newInstrs->append(remapBToAccum(&instr, 0));
145 |     }
146 |     else if (instr.tag == ALU && instr.ALU.srcB.tag == IMM &&
147 |              instr.ALU.srcA.tag == REG &&
148 |              regFileOf(instr.ALU.srcA.reg) == REG_B) {
149 |       // Insert moves for an operation with a small immediate whose
150 |       // register operand must reside in reg file B.
151 |       newInstrs->append(remapAToAccum(&instr, 0));
152 |     }
153 |     else {
154 |       // Insert moves for operands that are mapped to the same reg file
155 |       Instr move;
156 |       if (resolveRegFileConflict(&instr, &move))
157 |         newInstrs->append(move);
158 |     }
159 |     
160 |     // Put current instruction into the new sequence
161 |     newInstrs->append(instr);
162 |   }
163 | }
164 | 
165 | // Second pass: insert NOPs
166 | static void insertNops(Seq<Instr>* instrs, Seq<Instr>* newInstrs)
167 | {
168 |   // Use/def sets
169 |   UseDefReg mySet, prevSet;
170 | 
171 |   // Previous instruction
172 |   Instr prev = nop();
173 | 
174 |   for (int i = 0; i < instrs->numElems; i++) {
175 |     Instr instr = instrs->elems[i];
176 |     RegId r; RegTag rt;
177 | 
178 |     // Insert NOPs to avoid data hazards
179 |     useDefReg(prev, &prevSet);
180 |     useDefReg(instr, &mySet);
181 |     for (int j = 0; j < prevSet.def.numElems; j++) {
182 |       Reg defReg = prevSet.def.elems[j];
183 |       bool needNop = defReg.tag == REG_A || defReg.tag == REG_B;
184 |       if (needNop && mySet.use.member(defReg)) {
185 |         newInstrs->append(nop());
186 |         break;
187 |       }
188 |     }
189 | 
190 |     // Put current instruction into the new sequence
191 |     newInstrs->append(instr);
192 | 
193 |     // Insert NOPs in branch delay slots
194 |     if (instr.tag == BRL || instr.tag == END) {
195 |       for (int j = 0; j < 3; j++)
196 |         newInstrs->append(nop());
197 |       prev = nop();
198 |     }
199 | 
200 |     // Update previous instruction
201 |     if (instr.tag != LAB) prev = instr;
202 |   }
203 | 
204 | }
205 | 
206 | // Combine passes
207 | 
208 | void satisfy(Seq<Instr>* instrs)
209 | {
210 |   // New instruction sequence
211 |   Seq<Instr> newInstrs(instrs->numElems * 2);
212 | 
213 |   // Apply passes
214 |   insertMoves(instrs, &newInstrs);
215 |   instrs->clear();
216 |   insertNops(&newInstrs, instrs);
217 | }
218 | 


--------------------------------------------------------------------------------
/Lib/Target/Satisfy.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SATISFY_H_
 2 | #define _SATISFY_H_
 3 | 
 4 | #include "Target/Syntax.h"
 5 | #include "Target/CFG.h"
 6 | 
 7 | RegTag regFileOf(Reg r);
 8 | void satisfy(Seq<Instr>* instrs);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/Lib/Target/SmallLiteral.cpp:
--------------------------------------------------------------------------------
 1 | #include "Target/SmallLiteral.h"
 2 | #include <stdio.h>
 3 | 
 4 | // Small literals are literals that fit in the small immediate field
 5 | // of the VideoCore-IV instruction set.
 6 | 
 7 | const int NUM_SMALL_FLOATS = 16;
 8 | const float smallFloats[NUM_SMALL_FLOATS] = {
 9 |     1.0
10 |   , 2.0
11 |   , 4.0
12 |   , 8.0
13 |   , 16.0
14 |   , 32.0
15 |   , 64.0
16 |   , 128.0
17 |   , 0.00390625
18 |   , 0.0078125
19 |   , 0.015625
20 |   , 0.03125
21 |   , 0.0625
22 |   , 0.125
23 |   , 0.25
24 |   , 0.5
25 | };
26 | 
27 | // Encode a small literal according to Table 5 of the VideoCore-IV
28 | // manual. Returns -1 if expression cannot be encoded as a small
29 | // literal.
30 | 
31 | int encodeSmallLit(Expr* e)
32 | {
33 |   if (e->tag == INT_LIT) { 
34 |     if (e->intLit >= 0 && e->intLit <= 15)
35 |       return e->intLit;
36 |     else if (e->intLit >= -16 && e->intLit <= -1)
37 |       return 32 + e->intLit;
38 |   } 
39 |   else if (e->tag == FLOAT_LIT) {
40 |     if (e->floatLit == 0.0)
41 |       return 0;
42 |     else {
43 |       int index = -1;
44 |       for (int i = 0; i < NUM_SMALL_FLOATS; i++)
45 |         if (smallFloats[i] == e->floatLit) {
46 |           index = i;
47 |           break;
48 |         }
49 |       if (index != -1)
50 |         return 32 + index;
51 |     }
52 |   }
53 |   return -1;
54 | }
55 | 
56 | // Determine if a given expression (source language) can be stored in
57 | // a small immediate.
58 | 
59 | bool isSmallLit(Expr* e)
60 | {
61 |   return encodeSmallLit(e) >= 0;
62 | }
63 | 
64 | // Decode a small literal.
65 | 
66 | Word decodeSmallLit(int x)
67 | {
68 |   Word w;
69 |   if (x >= 32) {
70 |     w.floatVal = smallFloats[x-32];
71 |     return w;
72 |   }
73 |   else if (x >= 16) {
74 |     w.intVal = x-32;
75 |     return w;
76 |   }
77 |   else if (x >= 0) {
78 |     w.intVal = x;
79 |     return w;
80 |   }
81 | 
82 |   // Unreachable
83 |   assert(false);
84 | }
85 | 
86 | // Display a small literal.
87 | 
88 | void printSmallLit(int x)
89 | {
90 |   if (x >= 32)
91 |     printf("%f", smallFloats[x-32]);
92 |   else if (x >= 16)
93 |     printf("%i", x-32);
94 |   else if (x >= 0)
95 |     printf("%i", x);
96 | }
97 | 


--------------------------------------------------------------------------------
/Lib/Target/SmallLiteral.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SMALL_LITERAL_H_
 2 | #define _SMALL_LITERAL_H_
 3 | 
 4 | #include "Source/Syntax.h"
 5 | #include "Target/Emulator.h"
 6 | 
 7 | int encodeSmallLit(Expr* e);
 8 | bool isSmallLit(Expr* e);
 9 | void printSmallLit(int x);
10 | Word decodeSmallLit(int x);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/Lib/Target/Subst.cpp:
--------------------------------------------------------------------------------
  1 | #include "Target/Subst.h"
  2 | 
  3 | // Rename a destination register in an instruction
  4 | void renameDest(Instr* instr, RegTag vt, RegId v,
  5 |                               RegTag wt, RegId w)
  6 | {
  7 |   switch (instr->tag) {
  8 |     // Load immediate
  9 |     case LI:
 10 |       if (instr->LI.dest.tag == vt && instr->LI.dest.regId == v) {
 11 |         instr->LI.dest.tag = wt;
 12 |         instr->LI.dest.regId = w;
 13 |       }
 14 |       return;
 15 | 
 16 |     // ALU operation
 17 |     case ALU:
 18 |       if (instr->ALU.dest.tag == vt && instr->ALU.dest.regId == v) {
 19 |         instr->ALU.dest.tag = wt;
 20 |         instr->ALU.dest.regId = w;
 21 |       }
 22 |       return;
 23 | 
 24 |     // LD4 instruction
 25 |     case LD4:
 26 |       if (instr->LD4.dest.tag == vt && instr->LD4.dest.regId == v) {
 27 |         instr->LD4.dest.tag = wt;
 28 |         instr->LD4.dest.regId = w;
 29 |       }
 30 |       return;
 31 | 
 32 |     // RECV instruction
 33 |     case RECV:
 34 |       if (instr->RECV.dest.tag == vt && instr->RECV.dest.regId == v) {
 35 |         instr->RECV.dest.tag = wt;
 36 |         instr->RECV.dest.regId = w;
 37 |       }
 38 |       return;
 39 |   }
 40 | }
 41 | 
 42 | // Renamed a used register in an instruction
 43 | void renameUses(Instr* instr, RegTag vt, RegId v,
 44 |                               RegTag wt, RegId w)
 45 | {
 46 |   switch (instr->tag) {
 47 |     // ALU operation
 48 |     case ALU:
 49 |       if (instr->ALU.srcA.tag == REG && instr->ALU.srcA.reg.tag == vt &&
 50 |           instr->ALU.srcA.reg.regId == v) {
 51 |         instr->ALU.srcA.reg.tag = wt;
 52 |         instr->ALU.srcA.reg.regId = w;
 53 |       }
 54 | 
 55 |       if (instr->ALU.srcB.tag == REG && instr->ALU.srcB.reg.tag == vt &&
 56 |           instr->ALU.srcB.reg.regId == v) {
 57 |         instr->ALU.srcB.reg.tag = wt;
 58 |         instr->ALU.srcB.reg.regId = w;
 59 |       }
 60 |       return;
 61 | 
 62 |     // LD1 instruction
 63 |     case LD1:
 64 |       if (instr->LD1.addr.tag == vt && instr->LD1.addr.regId == v) {
 65 |         instr->LD1.addr.tag = wt;
 66 |         instr->LD1.addr.regId = w;
 67 |       }
 68 |       return;
 69 | 
 70 |     // ST1 instruction
 71 |     case ST1:
 72 |       if (instr->ST1.data.tag == vt && instr->ST1.data.regId == v) {
 73 |         instr->ST1.data.tag = wt;
 74 |         instr->ST1.data.regId = w;
 75 |       }
 76 |       return;
 77 | 
 78 |     // ST2 instruction
 79 |     case ST2:
 80 |       if (instr->ST2.addr.tag == vt && instr->ST2.addr.regId == v) {
 81 |         instr->ST2.addr.tag = wt;
 82 |         instr->ST2.addr.regId = w;
 83 |       }
 84 |       return;
 85 | 
 86 |     // Print integer instruction
 87 |     case PRI:
 88 |       if (instr->PRI.tag == vt && instr->PRI.regId == v) {
 89 |         instr->PRI.tag = wt;
 90 |         instr->PRI.regId = w;
 91 |       }
 92 |       return;
 93 | 
 94 |     // Print float instruction
 95 |     case PRF:
 96 |       if (instr->PRF.tag == vt && instr->PRF.regId == v) {
 97 |         instr->PRF.tag = wt;
 98 |         instr->PRF.regId = w;
 99 |       }
100 |       return;
101 |   }
102 | }
103 | 
104 | // Globally change register tag vt to wt in given instruction
105 | void substRegTag(Instr* instr, RegTag vt, RegTag wt)
106 | {
107 |   switch (instr->tag) {
108 |     // Load immediate
109 |     case LI:
110 |       if (instr->LI.dest.tag == vt)
111 |         instr->LI.dest.tag = wt;
112 |       return;
113 | 
114 |     // ALU operation
115 |     case ALU:
116 |       if (instr->ALU.dest.tag == vt)
117 |         instr->ALU.dest.tag = wt;
118 |       if (instr->ALU.srcA.tag == REG && instr->ALU.srcA.reg.tag == vt)
119 |         instr->ALU.srcA.reg.tag = wt;
120 |       if (instr->ALU.srcB.tag == REG && instr->ALU.srcB.reg.tag == vt)
121 |         instr->ALU.srcB.reg.tag = wt;
122 |       return;
123 | 
124 |     // LD1 instruction
125 |     case LD1:
126 |       if (instr->LD1.addr.tag == vt)
127 |         instr->LD1.addr.tag = wt;
128 |       return;
129 | 
130 |     // LD4 instruction
131 |     case LD4:
132 |       if (instr->LD4.dest.tag == vt)
133 |         instr->LD4.dest.tag = wt;
134 |       return;
135 | 
136 |     // ST1 instruction
137 |     case ST1:
138 |       if (instr->ST1.data.tag == vt)
139 |         instr->ST1.data.tag = wt;
140 |       return;
141 | 
142 |     // ST2 instruction
143 |     case ST2:
144 |       if (instr->ST2.addr.tag == vt)
145 |         instr->ST2.addr.tag = wt;
146 |       return;
147 | 
148 |     // Print integer instruction
149 |     case PRI:
150 |       if (instr->PRI.tag == vt)
151 |         instr->PRI.tag = wt;
152 |       return;
153 | 
154 |     // Print float instruction
155 |     case PRF:
156 |       if (instr->PRF.tag == vt)
157 |         instr->PRF.tag = wt;
158 |       return;
159 | 
160 |     // RECV instruction
161 |     case RECV:
162 |       if (instr->RECV.dest.tag == vt)
163 |         instr->RECV.dest.tag = wt;
164 |       return;
165 | 
166 |   }
167 | }
168 | 


--------------------------------------------------------------------------------
/Lib/Target/Subst.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBST_H_
 2 | #define _SUBST_H_
 3 | 
 4 | #include "Target/Syntax.h"
 5 | 
 6 | // Rename a destination register in an instruction
 7 | void renameDest(Instr* instr, RegTag vt, RegId v,
 8 |                               RegTag wt, RegId w);
 9 | 
10 | // Renamed a used register in an instruction
11 | void renameUses(Instr* instr, RegTag vt, RegId v,
12 |                               RegTag wt, RegId w);
13 | 
14 | // Globally change register tag vt to wt in given instruction
15 | void substRegTag(Instr* instr, RegTag vt, RegTag wt);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/Lib/Target/Syntax.cpp:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include "Source/Syntax.h"
  3 | #include "Target/Syntax.h"
  4 | 
  5 | // =======
  6 | // Globals
  7 | // =======
  8 | 
  9 | // Used for fresh label generation
 10 | static int globalLabelId = 0;
 11 | 
 12 | // ======================
 13 | // Handy syntax functions
 14 | // ======================
 15 | 
 16 | // Determine if instruction is a conditional assignment
 17 | bool isCondAssign(Instr* instr)
 18 | {
 19 |   if (instr->tag == LI && instr->LI.cond.tag != ALWAYS)
 20 |     return true;
 21 |   if (instr->tag == ALU && instr->ALU.cond.tag != ALWAYS)
 22 |     return true;
 23 |   return false;
 24 | }
 25 | 
 26 | // Generate load-immediate instruction.
 27 | 
 28 | Instr genLI(Reg dst, int i)
 29 | {
 30 |   AssignCond always;
 31 |   always.tag = ALWAYS;
 32 | 
 33 |   Instr instr;
 34 |   instr.tag           = LI;
 35 |   instr.LI.setFlags   = false;
 36 |   instr.LI.cond       = always;
 37 |   instr.LI.dest       = dst;
 38 |   instr.LI.imm.tag    = IMM_INT32;
 39 |   instr.LI.imm.intVal = i;
 40 |  
 41 |   return instr;
 42 | }
 43 | 
 44 | // Generate move instruction.
 45 | 
 46 | Instr genMove(Reg dst, Reg src)
 47 | {
 48 |   AssignCond always;
 49 |   always.tag = ALWAYS;
 50 | 
 51 |   Instr instr;
 52 |   instr.tag           = ALU;
 53 |   instr.ALU.setFlags  = false;
 54 |   instr.ALU.cond      = always;
 55 |   instr.ALU.dest      = dst;
 56 |   instr.ALU.srcA.tag  = REG;
 57 |   instr.ALU.srcA.reg  = src;
 58 |   instr.ALU.op        = A_BOR;
 59 |   instr.ALU.srcB.tag  = REG;
 60 |   instr.ALU.srcB.reg  = src;
 61 | 
 62 |   return instr;
 63 | }
 64 | 
 65 | // Generate bitwise-or instruction.
 66 | 
 67 | Instr genOR(Reg dst, Reg srcA, Reg srcB)
 68 | {
 69 |   AssignCond always;
 70 |   always.tag = ALWAYS;
 71 | 
 72 |   Instr instr;
 73 |   instr.tag           = ALU;
 74 |   instr.ALU.setFlags  = false;
 75 |   instr.ALU.cond      = always;
 76 |   instr.ALU.dest      = dst;
 77 |   instr.ALU.srcA.tag  = REG;
 78 |   instr.ALU.srcA.reg  = srcA;
 79 |   instr.ALU.op        = A_BOR;
 80 |   instr.ALU.srcB.tag  = REG;
 81 |   instr.ALU.srcB.reg  = srcB;
 82 | 
 83 |   return instr;
 84 | }
 85 | 
 86 | // Generate left-shift instruction.
 87 | 
 88 | Instr genLShift(Reg dst, Reg srcA, int n)
 89 | {
 90 |   assert(n >= 0 && n <= 15);
 91 | 
 92 |   AssignCond always;
 93 |   always.tag = ALWAYS;
 94 | 
 95 |   Instr instr;
 96 |   instr.tag                   = ALU;
 97 |   instr.ALU.setFlags          = false;
 98 |   instr.ALU.cond              = always;
 99 |   instr.ALU.dest              = dst;
100 |   instr.ALU.srcA.tag          = REG;
101 |   instr.ALU.srcA.reg          = srcA;
102 |   instr.ALU.op                = A_SHL;
103 |   instr.ALU.srcB.tag          = IMM;
104 |   instr.ALU.srcB.smallImm.tag = SMALL_IMM;
105 |   instr.ALU.srcB.smallImm.val = n;
106 | 
107 |   return instr;
108 | }
109 | 
110 | // Generate increment instruction.
111 | 
112 | Instr genIncr(Reg dst, Reg srcA, int n)
113 | {
114 |   assert(n >= 0 && n <= 15);
115 | 
116 |   AssignCond always;
117 |   always.tag = ALWAYS;
118 | 
119 |   Instr instr;
120 |   instr.tag                   = ALU;
121 |   instr.ALU.setFlags          = false;
122 |   instr.ALU.cond              = always;
123 |   instr.ALU.dest              = dst;
124 |   instr.ALU.srcA.tag          = REG;
125 |   instr.ALU.srcA.reg          = srcA;
126 |   instr.ALU.op                = A_ADD;
127 |   instr.ALU.srcB.tag          = IMM;
128 |   instr.ALU.srcB.smallImm.tag = SMALL_IMM;
129 |   instr.ALU.srcB.smallImm.val = n;
130 | 
131 |   return instr;
132 | }
133 | 
134 | // Is last instruction in a basic block?
135 | bool isLast(Instr instr)
136 | {
137 |   return instr.tag == BRL || instr.tag == BR || instr.tag == END;
138 | }
139 | 
140 | // =========================
141 | // Fresh variable generation
142 | // =========================
143 | 
144 | // Obtain a fresh variable
145 | Reg freshReg()
146 | {
147 |   Var v = freshVar();
148 |   Reg r;
149 |   r.tag = REG_A;
150 |   r.regId = v.id;
151 |   return r;
152 | }
153 | 
154 | // Obtain a fresh label
155 | Label freshLabel()
156 | {
157 |   return globalLabelId++;
158 | }
159 | 
160 | // Number of fresh labels
161 | int getFreshLabelCount()
162 | {
163 |   return globalLabelId;
164 | }
165 | 
166 | // Reset fresh label generator
167 | void resetFreshLabelGen()
168 | {
169 |   globalLabelId = 0;
170 | }
171 | 
172 | // Reset fresh label generator to specified value
173 | void resetFreshLabelGen(int val)
174 | {
175 |   globalLabelId = val;
176 | }
177 | 


--------------------------------------------------------------------------------
/Lib/Target/Syntax.h:
--------------------------------------------------------------------------------
  1 | #ifndef _TARGET_SYNTAX_H_
  2 | #define _TARGET_SYNTAX_H_
  3 | 
  4 | #include <stdint.h>
  5 | 
  6 | // Syntax of the QPU target language.
  7 | 
  8 | // This abstract syntax is a balance between a strict and relaxed
  9 | // definition of the target language:
 10 | // 
 11 | //   a "strict" definition would allow only instructions that can run on
 12 | //   the target machine to be expressed, whereas a "relaxed" one allows
 13 | //   instructions that have no direct mapping to machine instructions.
 14 | // 
 15 | // A relaxed definition allows the compilation process to be incremental:
 16 | // after each pass, the target code gets closer to being executable, by
 17 | // transforming away constructs that do not have a direct mapping to
 18 | // hardware.  However, we do not want to be too relaxed, otherwise we
 19 | // loose scope for the type checker to help us.
 20 | // 
 21 | // For example, the definition below allows an instruction to read two
 22 | // operands from the *same* register file.  In fact, two operands must be
 23 | // taken from different register files in the target language.  It is the
 24 | // job of a compiler pass to enforce such a constraint.
 25 | 
 26 | // ============================================================================
 27 | // Sub-word selectors
 28 | // ============================================================================
 29 | 
 30 | // A sub-word selector allows a 32, 16, or 8-bit portion of each vector
 31 | // word to be selected.
 32 | 
 33 | enum SubWord {
 34 |     A8     // Bits 7..0
 35 |   , B8     // Bits 15..8
 36 |   , C8     // Bits 23..16
 37 |   , D8     // Bits 31..24
 38 |   , A16    // Bits 15..0
 39 |   , B16    // Bits 31..16
 40 |   , A32    // Bits 31..0
 41 | };
 42 | 
 43 | // ============================================================================
 44 | // Registers
 45 | // ============================================================================
 46 | 
 47 | typedef int RegId;
 48 | 
 49 | // Different kinds of registers
 50 | enum RegTag {
 51 |     REG_A           // In register file A (0..31)
 52 |   , REG_B           // In register file B (0..31)
 53 |   , ACC             // Accumulator register
 54 |   , SPECIAL         // Special register
 55 |   , NONE            // No read/write
 56 |   , TMP_A           // Used in intermediate code
 57 |   , TMP_B           // Used in intermediate code
 58 | };
 59 | 
 60 | inline bool isRegAorB(RegTag rt)
 61 |   { return rt == REG_A || rt == REG_B; }
 62 | 
 63 | // Special registers
 64 | enum Special {
 65 |     // Read-only
 66 |     SPECIAL_UNIFORM
 67 |   , SPECIAL_ELEM_NUM
 68 |   , SPECIAL_QPU_NUM
 69 |   , SPECIAL_VPM_READ
 70 | 
 71 |     // Write-only
 72 |   , SPECIAL_RD_SETUP
 73 |   , SPECIAL_WR_SETUP
 74 |   , SPECIAL_DMA_ST_ADDR
 75 |   , SPECIAL_DMA_ST_WAIT
 76 |   , SPECIAL_DMA_LD_ADDR
 77 |   , SPECIAL_DMA_LD_WAIT
 78 |   , SPECIAL_VPM_WRITE
 79 |   , SPECIAL_HOST_INT
 80 |   , SPECIAL_TMU0_S
 81 | };
 82 | 
 83 | struct Reg {
 84 |   // What kind of register is it?
 85 |   RegTag tag;
 86 | 
 87 |   // Register identifier
 88 |   RegId regId;
 89 | };
 90 | 
 91 | inline bool operator==(Reg ra, Reg rb)
 92 |   { return ra.tag == rb.tag && ra.regId == rb.regId; }
 93 | 
 94 | // ============================================================================
 95 | // Conditions
 96 | // ============================================================================
 97 | 
 98 | enum Flag {
 99 |     ZS              // Zero set
100 |   , ZC              // Zero clear
101 |   , NS              // Negative set
102 |   , NC              // Negative clear
103 | };
104 | 
105 | // Branch conditions
106 | 
107 | enum BranchCondTag {
108 |     COND_ALL         // Reduce vector of bits to a single
109 |   , COND_ANY         // bit using AND/OR reduction
110 |   , COND_ALWAYS
111 |   , COND_NEVER
112 | };
113 | 
114 | struct BranchCond {
115 |   // ALL or ANY reduction?
116 |   BranchCondTag tag;
117 | 
118 |   // Condition flag
119 |   Flag flag;
120 | };
121 | 
122 | // Assignment conditions
123 | 
124 | enum AssignCondTag {
125 |     NEVER
126 |   , ALWAYS
127 |   , FLAG
128 | };
129 | 
130 | struct AssignCond {
131 |   // Kind of assignment condition
132 |   AssignCondTag tag;
133 | 
134 |   // Condition flag
135 |   Flag flag;
136 | };
137 | 
138 | // ============================================================================
139 | // Immediates
140 | // ============================================================================
141 | 
142 | // Different kinds of immediate
143 | enum ImmTag {
144 |     IMM_INT32    // 32-bit word
145 |   , IMM_FLOAT32  // 32-bit float
146 |   , IMM_MASK     // 1 bit per vector element (0 to 0xffff)
147 | };
148 | 
149 | struct Imm {
150 |   ImmTag tag;
151 | 
152 |   union {
153 |     int intVal;
154 |     float floatVal;
155 |   };
156 | };
157 | 
158 | // Different kinds of small immediates
159 | enum SmallImmTag {
160 |     SMALL_IMM  // Small immediate
161 |   , ROT_ACC    // Rotation amount taken from accumulator 5
162 |   , ROT_IMM    // Rotation amount 1..15
163 | };
164 | 
165 | struct SmallImm {
166 |   // What kind of small immediate is it?
167 |   SmallImmTag tag;
168 |   
169 |   // Immediate value
170 |   int val;
171 | };
172 | 
173 | // A register or a small immediate operand?
174 | enum RegOrImmTag { REG, IMM };
175 | 
176 | struct RegOrImm {
177 |   // Register id or small immediate?
178 |   RegOrImmTag tag;
179 | 
180 |   union {
181 |     // A register
182 |     Reg reg;
183 |     
184 |     // A small immediate
185 |     SmallImm smallImm;
186 |   };
187 | };
188 | 
189 | // ============================================================================
190 | // ALU operations
191 | // ============================================================================
192 | 
193 | // Add operators
194 | enum ALUOp {
195 |     NOP            // No op
196 | 
197 |   // Opcodes for the 'add' ALU
198 |   , A_FADD         // Floating-point add
199 |   , A_FSUB         // Floating-point subtract
200 |   , A_FMIN         // Floating-point min
201 |   , A_FMAX         // Floating-point max
202 |   , A_FMINABS      // Floating-point min of absolute values
203 |   , A_FMAXABS      // Floating-point max of absolute values
204 |   , A_FtoI         // Float to signed integer
205 |   , A_ItoF         // Signed integer to float
206 |   , A_ADD          // Integer add
207 |   , A_SUB          // Integer subtract
208 |   , A_SHR          // Integer shift right
209 |   , A_ASR          // Integer arithmetic shift right
210 |   , A_ROR          // Integer rotate right
211 |   , A_SHL          // Integer shift left
212 |   , A_MIN          // Integer min
213 |   , A_MAX          // Integer max
214 |   , A_BAND         // Bitwise and
215 |   , A_BOR          // Bitwise or
216 |   , A_BXOR         // Bitwise xor
217 |   , A_BNOT         // Bitwise not
218 |   , A_CLZ          // Count leading zeros
219 |   , A_V8ADDS       // Add with saturation per 8-bit element
220 |   , A_V8SUBS       // Subtract with saturation per 8-bit element
221 | 
222 |   // Opcodes for the 'mul' ALU
223 |   , M_FMUL        // Floating-point multiply
224 |   , M_MUL24       // 24-bit integer multiply
225 |   , M_V8MUL       // Multiply per 8-bit element
226 |   , M_V8MIN       // Min per 8-bit element
227 |   , M_V8MAX       // Max per 8-bit element
228 |   , M_V8ADDS      // Add with saturation per 8-bit element
229 |   , M_V8SUBS      // Subtract with saturation per 8-bit element
230 |   , M_ROTATE      // Rotation (intermediate op-code)
231 | 
232 | };
233 | 
234 | inline bool isMulOp(ALUOp op)
235 | {
236 |   return op == M_FMUL   || op == M_MUL24 || op == M_V8MUL  ||
237 |          op == M_V8MIN  || op == M_V8MAX || op == M_V8ADDS ||
238 |          op == M_V8SUBS || op == M_ROTATE;
239 | }
240 | 
241 | // ============================================================================
242 | // Branch targets
243 | // ============================================================================
244 | 
245 | struct BranchTarget {
246 |   // Branch is absolute or relative to PC+4
247 |   bool relative;
248 | 
249 |   // Plus value from register file A (optional)
250 |   bool useRegOffset;
251 |   RegId regOffset;
252 | 
253 |   // Plus 32-bit immediate value
254 |   int immOffset;
255 | };
256 | 
257 | // We allow labels for branching, represented by integer identifiers.  These
258 | // will be translated to actual branch targets in a linking phase.
259 | 
260 | typedef int Label;
261 | 
262 | // ============================================================================
263 | // Loads/store buffering
264 | // ============================================================================
265 | 
266 | // We reserve two load buffers and two store buffers for each QPU in the VPM
267 | // (shared local) memory.  The reason for two of each is to allow double
268 | // buffering.  We refer to a double buffer as A and B buffers.
269 | 
270 | enum BufferAorB { A, B };
271 | 
272 | // ============================================================================
273 | // Instructions
274 | // ============================================================================
275 | 
276 | // QPU instruction tags
277 | enum InstrTag {
278 |     LI            // Load immediate
279 |   , ALU           // ALU operation
280 |   , BR            // Conditional branch to target
281 |   , END           // Program end (halt)
282 | 
283 |   // ==================================================
284 |   // The remainder are intermediate-language constructs
285 |   // ==================================================
286 | 
287 |   , BRL           // Conditional branch to label
288 |   , LAB           // Label
289 |   , NO_OP         // No-op
290 | 
291 |   // Load instructions
292 |   // -----------------
293 |   //
294 |   // Four instructions are used to implement a memory load.
295 | 
296 |   , LD1           // First, DMA vector in DRAM into VPM (local) memory
297 |   , LD2           // Second, wait for DMA completion
298 |   , LD3           // Third, setup a read from VPM memory
299 |   , LD4           // Fourth, transfer from VPM into given register
300 | 
301 |   // Rules for loads:
302 |   //   * An LD1 must be followed (eventually) by a corresponding LD2
303 |   //   * Ditto for LD3 and LD4
304 |   //   * There must be at least 3 instructions between an LD3 and an LD4
305 |   //   * An LD1/LD2 need not be followed by a corresponding LD3/LD4,
306 |   //     thus can be issued speculatively
307 |   //   * A new LD1 can be issued after an LD2, allowing double buffering
308 | 
309 |   // Store instructions
310 |   // ------------------
311 |   //
312 |   // Three instructions are required to perform a memory store.
313 | 
314 |   , ST1           // First, write the vector to VPM (local) memory.
315 |   , ST2           // Second, DMA from the VPM out to DRAM.
316 |   , ST3           // Third, wait for DMA to complete.
317 | 
318 |   // Semaphores
319 |   // ----------
320 | 
321 |   , SINC          // Increment semaphore
322 |   , SDEC          // Decrement semaphore
323 | 
324 |   // Send IRQ to host
325 |   // ----------------
326 | 
327 |   , IRQ
328 | 
329 |   // Load receive via TMU
330 |   // --------------------
331 | 
332 |   , RECV
333 |   , TMU0_TO_ACC4
334 | 
335 |   // Print instructions
336 |   // ------------------
337 | 
338 |   , PRS           // Print string
339 |   , PRI           // Print integer
340 |   , PRF           // Print float
341 | };
342 | 
343 | // QPU instructions
344 | struct Instr {
345 |   // What kind of instruction is it?
346 |   InstrTag tag;
347 | 
348 |   union {
349 |     // Load immediate
350 |     struct { bool setFlags; AssignCond cond; Reg dest; Imm imm; } LI;
351 | 
352 |     // ALU operation
353 |     struct { bool setFlags; AssignCond cond; Reg dest;
354 |              RegOrImm srcA; ALUOp op; RegOrImm srcB; } ALU;
355 | 
356 |     // Conditional branch (to target)
357 |     struct { BranchCond cond; BranchTarget target; } BR;
358 | 
359 |     // ==================================================
360 |     // The remainder are intermediate-language constructs
361 |     // ==================================================
362 | 
363 |     // Conditional branch (to label)
364 |     struct { BranchCond cond; Label label; } BRL;
365 | 
366 |     // Labels, denoting branch targets
367 |     Label label;
368 | 
369 |     // Load instructions
370 |     // -----------------
371 | 
372 |     // DMA vector at address specifed by register from DRAM into VPM
373 |     // (local) memory.  To allow double buffering, i.e. the VPM to be
374 |     // filled by DMA while also being read by a QPU, a flag is used to
375 |     // indicate which one of two buffers in the VPM to use for the load
376 |     struct { Reg addr; BufferAorB buffer; } LD1;
377 | 
378 |     // LD2 (wait for DMA read completion) has no parameters
379 | 
380 |     // Setup a read from VPM memory.  A flag indicates which one of
381 |     // two buffers in the VPM is being used for the load
382 |     struct { BufferAorB buffer; } LD3;
383 |     
384 |     // Transfer from VPM into given register
385 |     struct { Reg dest; } LD4;
386 | 
387 |     // Store instructions
388 |     // ------------------
389 | 
390 |     // Write the vector to VPM (local) memory using specified buffer
391 |     struct { Reg data; BufferAorB buffer; } ST1;
392 | 
393 |     // DMA from the VPM out to DRAM at the address in given register.
394 |     struct { Reg addr; BufferAorB buffer; } ST2;
395 | 
396 |     // ST3 (wait for DMA write completion) has no parameters
397 | 
398 |     // Semaphores
399 |     // ----------
400 | 
401 |     // Semaphore id (range 0..15)
402 |     int semaId;
403 | 
404 |     // Load receive via TMU
405 |     // --------------------
406 | 
407 |     // Destination register for load receive
408 |     struct { Reg dest; } RECV;
409 | 
410 |     // Print instructions
411 |     // ------------------
412 | 
413 |     // Print string
414 |     const char* PRS;
415 | 
416 |     // Print integer
417 |     Reg PRI;
418 | 
419 |     // Print float
420 |     Reg PRF;
421 |   };
422 | };
423 | 
424 | // Instruction id: also the index of an instruction
425 | // in the main instruction sequence
426 | typedef int InstrId;
427 | 
428 | // ============================================================================
429 | // Handy functions
430 | // ============================================================================
431 | 
432 | // Determine if instruction is a conditional assignment
433 | bool isCondAssign(Instr* instr);
434 | 
435 | // Make a no-op
436 | inline Instr nop()
437 |   { Instr instr; instr.tag = NO_OP; return instr; }
438 | 
439 | // Instruction constructors
440 | Instr genLI(Reg dst, int i);
441 | Instr genMove(Reg dst, Reg src);
442 | Instr genOR(Reg dst, Reg srcA, Reg srcB);
443 | Instr genLShift(Reg dst, Reg srcA, int n);
444 | Instr genIncr(Reg dst, Reg srcA, int n);
445 | 
446 | // Is last instruction in a basic block?
447 | bool isLast(Instr instr);
448 | 
449 | // =========================
450 | // Fresh variable generation
451 | // =========================
452 | 
453 | Reg freshReg();
454 | 
455 | // ======================
456 | // Fresh label generation
457 | // ======================
458 | 
459 | // Obtain a fresh label
460 | Label freshLabel();
461 | 
462 | // Number of fresh labels used
463 | int getFreshLabelCount();
464 | 
465 | // Reset fresh label generator
466 | void resetFreshLabelGen();
467 | void resetFreshLabelGen(int val);
468 | 
469 | #endif
470 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/Invoke.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef QPU_MODE
 2 | 
 3 | #include "VideoCore/Invoke.h"
 4 | #include "VideoCore/Mailbox.h"
 5 | #include "VideoCore/VideoCore.h"
 6 | 
 7 | #define QPU_TIMEOUT 10000
 8 | 
 9 | void invoke(
10 |   int numQPUs,
11 |   SharedArray<uint32_t> &codeMem,
12 |   int qpuCodeMemOffset,
13 |   Seq<int32_t>* params)
14 | {
15 |   // Open mailbox for talking to VideoCore
16 |   int mb = getMailbox();
17 | 
18 |   // Number of 32-bit words needed for kernel code & parameters
19 |   int numWords = qpuCodeMemOffset + (params->numElems+2)*numQPUs + 2*numQPUs;
20 |   assert(numWords < codeMem.size);
21 | 
22 |   // Pointer to start of code
23 |   uint32_t* qpuCodePtr = codeMem.getPointer();
24 | 
25 |   // Copy parameters to instruction memory
26 |   int offset = qpuCodeMemOffset;
27 |   uint32_t** paramsPtr = new uint32_t* [numQPUs];
28 |   for (int i = 0; i < numQPUs; i++) {
29 |     paramsPtr[i] = qpuCodePtr + offset;
30 |     codeMem[offset++] = (uint32_t) i; // Unique QPU ID
31 |     codeMem[offset++] = (uint32_t) numQPUs; // QPU count
32 |     for (int j = 0; j < params->numElems; j++)
33 |       codeMem[offset++] = params->elems[j];
34 |   }
35 | 
36 |   // Copy launch messages
37 |   uint32_t* launchMsgsPtr = qpuCodePtr + offset;
38 |   for (int i = 0; i < numQPUs; i++) {
39 |     codeMem[offset++] = (uint32_t) paramsPtr[i];
40 |     codeMem[offset++] = (uint32_t) qpuCodePtr;
41 |   }
42 | 
43 |   // Launch QPUs
44 |   unsigned result = 
45 |     execute_qpu(mb, numQPUs, (uint32_t) launchMsgsPtr, 1, QPU_TIMEOUT);
46 | 
47 |   if (result != 0) {
48 |     printf("Failed to invoke kernel on QPUs\n");
49 |   }
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/Invoke.h:
--------------------------------------------------------------------------------
 1 | #ifdef QPU_MODE
 2 | 
 3 | #ifndef _INVOKE_H_
 4 | #define _INVOKE_H_
 5 | 
 6 | #include "Common/Seq.h"
 7 | #include "VideoCore/SharedArray.h"
 8 | #include <stdint.h>
 9 | 
10 | void invoke(
11 |   int numQPUs,
12 |   SharedArray<uint32_t> &codeMem,
13 |   int qpuCodeMemOffset,
14 |   Seq<int32_t>* params);
15 | 
16 | #endif
17 | #endif
18 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/Mailbox.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2012, Broadcom Europe Ltd.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are met:
  7 |     * Redistributions of source code must retain the above copyright
  8 |       notice, this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright
 10 |       notice, this list of conditions and the following disclaimer in the
 11 |       documentation and/or other materials provided with the distribution.
 12 |     * Neither the name of the copyright holder nor the
 13 |       names of its contributors may be used to endorse or promote products
 14 |       derived from this software without specific prior written permission.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | */
 27 | 
 28 | #include <stdio.h>
 29 | #include <string.h>
 30 | #include <stdlib.h>
 31 | #include <fcntl.h>
 32 | #include <unistd.h>
 33 | #include <assert.h>
 34 | #include <stdint.h>
 35 | #include <sys/mman.h>
 36 | #include <sys/ioctl.h>
 37 | 
 38 | #include "Mailbox.h"
 39 | 
 40 | #define PAGE_SIZE (4*1024)
 41 | 
 42 | void *mapmem(unsigned base, unsigned size)
 43 | {
 44 |    int mem_fd;
 45 |    unsigned offset = base % PAGE_SIZE;
 46 |    base = base - offset;
 47 |    /* open /dev/mem */
 48 |    if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) {
 49 |       printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n");
 50 |       exit (-1);
 51 |    }
 52 |    void *mem = mmap(
 53 |       0,
 54 |       size,
 55 |       PROT_READ|PROT_WRITE,
 56 |       MAP_SHARED/*|MAP_FIXED*/,
 57 |       mem_fd,
 58 |       base);
 59 | #ifdef DEBUG
 60 |    printf("base=0x%x, mem=%p\n", base, mem);
 61 | #endif
 62 |    if (mem == MAP_FAILED) {
 63 |       printf("mmap error %p\n", mem);
 64 |       exit (-1);
 65 |    }
 66 |    close(mem_fd);
 67 |    return (char *)mem + offset;
 68 | }
 69 | 
 70 | void unmapmem(void *addr, unsigned size)
 71 | {
 72 |    int s = munmap(addr, size);
 73 |    if (s != 0) {
 74 |       printf("munmap error %d\n", s);
 75 |       exit (-1);
 76 |    }
 77 | }
 78 | 
 79 | /*
 80 |  * use ioctl to send mbox property message
 81 |  */
 82 | 
 83 | static int mbox_property(int file_desc, void *buf)
 84 | {
 85 |    int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf);
 86 | 
 87 |    if (ret_val < 0) {
 88 |       printf("ioctl_set_msg failed:%d\n", ret_val);
 89 |    }
 90 | 
 91 | #ifdef DEBUG
 92 |    unsigned *p = (unsigned*) buf; int i; unsigned size = *(unsigned *)buf;
 93 |    for (i=0; i<size/4; i++)
 94 |       printf("%04x: 0x%08x\n", i * (unsigned) sizeof(*p), p[i]);
 95 | #endif
 96 |    return ret_val;
 97 | }
 98 | 
 99 | unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags)
100 | {
101 |    unsigned i=0;
102 |    unsigned p[32];
103 |    p[i++] = 0; // size
104 |    p[i++] = 0x00000000; // process request
105 | 
106 |    p[i++] = 0x3000c; // (the tag id)
107 |    p[i++] = 12; // (size of the buffer)
108 |    p[i++] = 12; // (size of the data)
109 |    p[i++] = size; // (num bytes? or pages?)
110 |    p[i++] = align; // (alignment)
111 |    p[i++] = flags; // (MEM_FLAG_L1_NONALLOCATING)
112 | 
113 |    p[i++] = 0x00000000; // end tag
114 |    p[0] = i * (unsigned) sizeof(*p); // actual size
115 | 
116 |    mbox_property(file_desc, p);
117 |    return p[5];
118 | }
119 | 
120 | unsigned mem_free(int file_desc, unsigned handle)
121 | {
122 |    unsigned i=0;
123 |    unsigned p[32];
124 |    p[i++] = 0; // size
125 |    p[i++] = 0x00000000; // process request
126 | 
127 |    p[i++] = 0x3000f; // (the tag id)
128 |    p[i++] = 4; // (size of the buffer)
129 |    p[i++] = 4; // (size of the data)
130 |    p[i++] = handle;
131 | 
132 |    p[i++] = 0x00000000; // end tag
133 |    p[0] = i * (unsigned) sizeof(*p); // actual size
134 | 
135 |    mbox_property(file_desc, p);
136 |    return p[5];
137 | }
138 | 
139 | unsigned mem_lock(int file_desc, unsigned handle)
140 | {
141 |    unsigned i=0;
142 |    unsigned p[32];
143 |    p[i++] = 0; // size
144 |    p[i++] = 0x00000000; // process request
145 | 
146 |    p[i++] = 0x3000d; // (the tag id)
147 |    p[i++] = 4; // (size of the buffer)
148 |    p[i++] = 4; // (size of the data)
149 |    p[i++] = handle;
150 | 
151 |    p[i++] = 0x00000000; // end tag
152 |    p[0] = i * (unsigned) sizeof(*p); // actual size
153 | 
154 |    mbox_property(file_desc, p);
155 |    return p[5];
156 | }
157 | 
158 | unsigned mem_unlock(int file_desc, unsigned handle)
159 | {
160 |    unsigned i=0;
161 |    unsigned p[32];
162 |    p[i++] = 0; // size
163 |    p[i++] = 0x00000000; // process request
164 | 
165 |    p[i++] = 0x3000e; // (the tag id)
166 |    p[i++] = 4; // (size of the buffer)
167 |    p[i++] = 4; // (size of the data)
168 |    p[i++] = handle;
169 | 
170 |    p[i++] = 0x00000000; // end tag
171 |    p[0] = i * (unsigned) sizeof(*p); // actual size
172 | 
173 |    mbox_property(file_desc, p);
174 |    return p[5];
175 | }
176 | 
177 | unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5)
178 | {
179 |    unsigned int i=0;
180 |    unsigned p[32];
181 |    p[i++] = 0; // size
182 |    p[i++] = 0x00000000; // process request
183 | 
184 |    p[i++] = 0x30010; // (the tag id)
185 |    p[i++] = 28; // (size of the buffer)
186 |    p[i++] = 28; // (size of the data)
187 |    p[i++] = code;
188 |    p[i++] = r0;
189 |    p[i++] = r1;
190 |    p[i++] = r2;
191 |    p[i++] = r3;
192 |    p[i++] = r4;
193 |    p[i++] = r5;
194 | 
195 |    p[i++] = 0x00000000; // end tag
196 |    p[0] = i * (unsigned) sizeof(*p); // actual size
197 | 
198 |    mbox_property(file_desc, p);
199 |    return p[5];
200 | }
201 | 
202 | unsigned qpu_enable(int file_desc, unsigned enable)
203 | {
204 |    unsigned i=0;
205 |    unsigned p[32];
206 | 
207 |    p[i++] = 0; // size
208 |    p[i++] = 0x00000000; // process request
209 | 
210 |    p[i++] = 0x30012; // (the tag id)
211 |    p[i++] = 4; // (size of the buffer)
212 |    p[i++] = 4; // (size of the data)
213 |    p[i++] = enable;
214 | 
215 |    p[i++] = 0x00000000; // end tag
216 |    p[0] = i * (unsigned) sizeof(*p); // actual size
217 | 
218 |    mbox_property(file_desc, p);
219 |    return p[5];
220 | }
221 | 
222 | unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout) {
223 |    unsigned i=0;
224 |    unsigned p[32];
225 | 
226 |    p[i++] = 0; // size
227 |    p[i++] = 0x00000000; // process request
228 |    p[i++] = 0x30011; // (the tag id)
229 |    p[i++] = 16; // (size of the buffer)
230 |    p[i++] = 16; // (size of the data)
231 |    p[i++] = num_qpus;
232 |    p[i++] = control;
233 |    p[i++] = noflush;
234 |    p[i++] = timeout; // ms
235 | 
236 |    p[i++] = 0x00000000; // end tag
237 |    p[0] = i * (unsigned) sizeof(*p); // actual size
238 | 
239 |    mbox_property(file_desc, p);
240 |    return p[5];
241 | }
242 | 
243 | int mbox_open() {
244 |    int file_desc;
245 | 
246 |    // open a char device file used for communicating with kernel mbox driver
247 |    file_desc = open(DEVICE_FILE_NAME, 0);
248 |    if (file_desc < 0) {
249 |       printf("Can't open device file: %s\n", DEVICE_FILE_NAME);
250 |       printf("Try creating a device file with: sudo mknod %s c %d 0\n", DEVICE_FILE_NAME, MAJOR_NUM);
251 |       exit(-1);
252 |    }
253 |    return file_desc;
254 | }
255 | 
256 | void mbox_close(int file_desc) {
257 |   close(file_desc);
258 | }
259 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/Mailbox.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2012, Broadcom Europe Ltd.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 |     * Redistributions of source code must retain the above copyright
 8 |       notice, this list of conditions and the following disclaimer.
 9 |     * Redistributions in binary form must reproduce the above copyright
10 |       notice, this list of conditions and the following disclaimer in the
11 |       documentation and/or other materials provided with the distribution.
12 |     * Neither the name of the copyright holder nor the
13 |       names of its contributors may be used to endorse or promote products
14 |       derived from this software without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | #ifndef _MAILBOX_H_
30 | #define _MAILBOX_H_
31 | 
32 | #include <linux/ioctl.h>
33 | 
34 | #define MAJOR_NUM 100
35 | #define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *)
36 | #define DEVICE_FILE_NAME "/dev/vcio"
37 | 
38 | int mbox_open();
39 | void mbox_close(int file_desc);
40 | 
41 | unsigned get_version(int file_desc);
42 | unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags);
43 | unsigned mem_free(int file_desc, unsigned handle);
44 | unsigned mem_lock(int file_desc, unsigned handle);
45 | unsigned mem_unlock(int file_desc, unsigned handle);
46 | void *mapmem(unsigned base, unsigned size);
47 | void unmapmem(void *addr, unsigned size);
48 | 
49 | unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5);
50 | unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout);
51 | unsigned qpu_enable(int file_desc, unsigned enable);
52 | 
53 | #define BUS_TO_PHYS(addr) (((addr)) & ~0xC0000000)
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/SharedArray.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SHAREDARRAY_H_
  2 | #define _SHAREDARRAY_H_
  3 | 
  4 | #include <stdint.h>
  5 | #include <stdio.h>
  6 | #include <assert.h>
  7 | #include "VideoCore/Mailbox.h"
  8 | #include "VideoCore/VideoCore.h"
  9 | 
 10 | #ifdef EMULATION_MODE
 11 | 
 12 | // ============================================================================
 13 | // Emulation mode
 14 | // ============================================================================
 15 | 
 16 | // When in EMULATION_MODE allocate memory from a pre-allocated pool.
 17 | 
 18 | #include "Target/Emulator.h"
 19 | 
 20 | // Implementation
 21 | template <typename T> class SharedArray {
 22 |  private:
 23 |    // Disallow assignment
 24 |    void operator=(SharedArray<T> a);
 25 |    void operator=(SharedArray<T>& a);
 26 | 
 27 |  public:
 28 | 
 29 |   uint32_t address;
 30 |   uint32_t size;
 31 | 
 32 |   // Allocation
 33 |   void alloc(uint32_t n) {
 34 |     if (emuHeap == NULL) {
 35 |       emuHeapEnd = 0;
 36 |       emuHeap = new int32_t [EMULATOR_HEAP_SIZE];
 37 |     }
 38 |     if (emuHeapEnd+n >= EMULATOR_HEAP_SIZE) {
 39 |       printf("QPULib: heap overflow (increase EMULATOR_HEAP_SIZE)\n");
 40 |       abort();
 41 |     }
 42 |     else {
 43 |       address = emuHeapEnd;
 44 |       emuHeapEnd += n;
 45 |       size = n;
 46 |     }
 47 |   }
 48 | 
 49 |   // Constructor
 50 |   SharedArray(uint32_t n) {
 51 |     alloc(n);
 52 |   }
 53 | 
 54 |   uint32_t getAddress() {
 55 |     return address*4;
 56 |   }
 57 | 
 58 |   T* getPointer() {
 59 |     return (T*) &emuHeap[address];
 60 |   }
 61 | 
 62 |   // Deallocation (does nothing in emulation mode)
 63 |   void dealloc() {}
 64 | 
 65 |   // Subscript
 66 |   T& operator[] (int i) {
 67 |     if (address+i >= EMULATOR_HEAP_SIZE) {
 68 |       printf("QPULib: accessing off end of heap\n");
 69 |       exit(EXIT_FAILURE);
 70 |     }
 71 |     else
 72 |       return (T&) emuHeap[address+i];
 73 |   }
 74 | };
 75 | 
 76 | #else
 77 | 
 78 | // ============================================================================
 79 | // Not emulation mode
 80 | // ============================================================================
 81 | 
 82 | #define GPU_MEM_FLG 0xC // cached=0xC; direct=0x4
 83 | #define GPU_MEM_MAP 0x0 // cached=0x0; direct=0x20000000
 84 | 
 85 | template <typename T> class SharedArray {
 86 |  private:
 87 |   // Disallow assignment & copying
 88 |   void operator=(SharedArray<T> a);
 89 |   void operator=(SharedArray<T>& a);
 90 |   SharedArray(const SharedArray<T>& a);
 91 | 
 92 |   uint32_t handle;
 93 |   void* arm_base;
 94 |   void* gpu_base;
 95 | 
 96 |  public:
 97 |   uint32_t size;
 98 | 
 99 |   /* Allocate GPU memory and map it into ARM address space */
100 |   void alloc(uint32_t n) {
101 |     // Mailbox, for talking to VideoCore
102 |     int mb = getMailbox();
103 | 
104 |     // Allocate memory
105 |     handle = mem_alloc(mb, n*4, 4096, GPU_MEM_FLG);
106 |     if (!handle) {
107 |       fprintf(stderr, "Failed to allocate GPU memory.");
108 |       exit(EXIT_FAILURE);
109 |     }
110 |     size = n;
111 |     gpu_base = (void*) mem_lock(mb, handle);
112 |     arm_base = mapmem(BUS_TO_PHYS((uint32_t) gpu_base+GPU_MEM_MAP), n*4);
113 |   }
114 | 
115 |   // Constructor
116 |   SharedArray() {
117 |     size = handle = 0;
118 |     arm_base = gpu_base = NULL;
119 |   }
120 | 
121 |   // Constructor
122 |   SharedArray(uint32_t n) {
123 |     size = handle = 0;
124 |     alloc(n);
125 |   }  
126 | 
127 |   uint32_t getAddress() {
128 |     return (uint32_t) gpu_base;
129 |   }
130 | 
131 |   T* getPointer() {
132 |     return (T*) gpu_base;
133 |   }
134 | 
135 |   // Deallocation
136 |   void dealloc() {
137 |     // Mailbox, for talking to VideoCore
138 |     int mb = getMailbox();
139 | 
140 |     // Free memory
141 |     if (arm_base) unmapmem(arm_base, size);
142 |     if (handle) {
143 |       mem_unlock(mb, handle);
144 |       mem_free(mb, handle);
145 |     }
146 |     size = handle = 0;
147 |     gpu_base = NULL;
148 |     arm_base = NULL;
149 |   }
150 | 
151 |   // Subscript
152 |   inline T& operator[] (int i) {
153 |     uint32_t* base = (uint32_t*) arm_base;
154 |     return (T&) base[i];
155 |   }
156 | 
157 |   // Destructor
158 |   ~SharedArray() {
159 |     if (arm_base != NULL) dealloc();
160 |   }
161 | };
162 | 
163 | #endif
164 | 
165 | #endif
166 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/VideoCore.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef QPU_MODE
 2 | 
 3 | #include <assert.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include "VideoCore/VideoCore.h"
 7 | #include "VideoCore/Mailbox.h"
 8 | 
 9 | // Globals
10 | int mailbox = -1;
11 | int numQPUUsers = 0;
12 | 
13 | // Get mailbox (open if not already opened)
14 | int getMailbox()
15 | {
16 |   if (mailbox < 0) mailbox = mbox_open();
17 |   return mailbox;
18 | }
19 | 
20 | // Enable QPUs (if not already enabled)
21 | void enableQPUs()
22 | {
23 |   int mb = getMailbox();
24 |   if (numQPUUsers == 0) {
25 |     int qpu_enabled = !qpu_enable(mb, 1);
26 |     if (!qpu_enabled) {
27 |       printf("Unable to enable QPUs. Check your firmware is latest.");
28 |       exit(EXIT_FAILURE);
29 |     }
30 |   }
31 |   numQPUUsers++;
32 | }
33 | 
34 | // Disable QPUs
35 | void disableQPUs()
36 | {
37 |   assert(numQPUUsers > 0);
38 |   int mb = getMailbox();
39 |   numQPUUsers--;
40 |   if (numQPUUsers == 0) {
41 |     qpu_enable(mb, 0);
42 |   }
43 | }
44 | 
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/Lib/VideoCore/VideoCore.h:
--------------------------------------------------------------------------------
 1 | #ifdef QPU_MODE
 2 | 
 3 | #ifndef _VIDEOCORE_H_
 4 | #define _VIDEOCORE_H_
 5 | 
 6 | // Globals
 7 | extern int mailbox;
 8 | extern int numQPUUsers;
 9 | 
10 | // Operations
11 | int getMailbox();
12 | void enableQPUs();
13 | void disableQPUs();
14 | 
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/Tests/AutoTest.cpp:
--------------------------------------------------------------------------------
  1 | #include "QPULib.h"
  2 | #include "Common/Seq.h"
  3 | #include "Source/Gen.h"
  4 | #include "Source/Pretty.h"
  5 | 
  6 | // ============================================================================
  7 | // Program-generator options
  8 | // ============================================================================
  9 | 
 10 | GenOptions basicGenOpts()
 11 | {
 12 |   GenOptions opts;
 13 |   opts.depth           = 3;
 14 |   opts.length          = 4;
 15 |   opts.numIntArgs      = 4;
 16 |   opts.numFloatArgs    = 0;
 17 |   opts.numPtrArgs      = 0;
 18 |   opts.numPtr2Args     = 0;
 19 |   opts.numIntVars      = 4;
 20 |   opts.numFloatVars    = 0;
 21 |   opts.loopBound       = 5;
 22 |   opts.genFloat        = false;
 23 |   opts.genRotate       = false;
 24 |   opts.genDeref        = false;
 25 |   opts.genDeref2       = false;
 26 |   opts.derefOffsetMask = 0;
 27 |   opts.genStrided      = false;
 28 |   return opts;
 29 | }
 30 | 
 31 | // ============================================================================
 32 | // Helpers
 33 | // ============================================================================
 34 | 
 35 | void printCharSeq(Seq<char>* s)
 36 | {
 37 |   for (int i = 0; i < s->numElems; i++)
 38 |     printf("%c", s->elems[i]);
 39 | }
 40 | 
 41 | // ============================================================================
 42 | // Main
 43 | // ============================================================================
 44 | 
 45 | int main()
 46 | {
 47 |   // Seed random generator
 48 |   srand(0);
 49 | 
 50 |   // Basic options
 51 |   GenOptions opts = basicGenOpts();
 52 | 
 53 |   const int numTests = 10000;
 54 |   for (int test = 0; test < numTests; test++) {
 55 |     astHeap.clear();
 56 |     resetFreshLabelGen();
 57 | 
 58 |     int numVars, numEmuVars;
 59 |     Stmt* s = progGen(&opts, &numVars);
 60 |     //pretty(s);
 61 | 
 62 |     Seq<Instr> targetCode;
 63 |     resetFreshVarGen(numVars);
 64 |     compileKernel(&targetCode, s);
 65 |     numEmuVars = getFreshVarCount();
 66 |     Seq<int32_t> params;
 67 |     params.clear();
 68 |     for (int i = 0; i < opts.numIntArgs; i++) {
 69 |       params.append(genIntLit());
 70 |     }
 71 | 
 72 |     Seq<char> interpOut, emuOut;
 73 |     interpreter(1, s, numVars, &params, &interpOut);
 74 |     emulate(1, &targetCode, numEmuVars, &params, &emuOut);
 75 | 
 76 |     bool differs = false;
 77 |     if (interpOut.numElems != emuOut.numElems)
 78 |       differs = true;
 79 |     else {
 80 |       for (int i = 0; i < interpOut.numElems; i++)
 81 |         if (interpOut.elems[i] != emuOut.elems[i]) { differs = true; break; }
 82 |     }
 83 | 
 84 |     if (differs) {
 85 |       printf("Failed test %i.\n", test);
 86 |       pretty(s);
 87 |       printf("Params: ");
 88 |       for (int i = 0; i < params.numElems; i++) {
 89 |         printf("%i ", params.elems[i]);
 90 |       }
 91 |       printf("\nTarget emulator says:\n");
 92 |       printCharSeq(&emuOut);
 93 |       printf("\nSource interpreter says:\n");
 94 |       printCharSeq(&interpOut);
 95 |       printf("\n");
 96 |       return 0;
 97 |     }
 98 |     else
 99 |       printf("%i\r", test);
100 |   }
101 |   printf("OK, passed %i tests\n", numTests);
102 | 
103 |   return 0;
104 | }
105 | 


--------------------------------------------------------------------------------
/Tests/GCD.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include "QPULib.h"
 3 | 
 4 | void gcd(Ptr<Int> p, Ptr<Int> q, Ptr<Int> r)
 5 | {
 6 |   Int a = *p;
 7 |   Int b = *q;
 8 |   While (any(a != b))
 9 |     Where (a > b)
10 |       a = a-b;
11 |     End
12 |     Where (a < b)
13 |       b = b-a;
14 |     End
15 |   End
16 |   *r = a;
17 | }
18 | 
19 | int main()
20 | {
21 |   // Construct kernel
22 |   auto k = compile(gcd);
23 | 
24 |   // Allocate and initialise arrays shared between ARM and GPU
25 |   SharedArray<int> a(16), b(16), r(16);
26 |   srand(0);
27 |   for (int i = 0; i < 16; i++) {
28 |     a[i] = 100 + (rand() % 100);
29 |     b[i] = 100 + (rand() % 100);
30 |   }
31 | 
32 |   // Invoke the kernel and display the result
33 |   k(&a, &b, &r);
34 |   for (int i = 0; i < 16; i++)
35 |     printf("gcd(%i, %i) = %i\n", a[i], b[i], r[i]);
36 |   
37 |   return 0;
38 | }
39 | 


--------------------------------------------------------------------------------
/Tests/HeatMap.cpp:
--------------------------------------------------------------------------------
  1 | #include <QPULib.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <sys/time.h>
  5 | 
  6 | // Heat dissapation constant
  7 | #define K 0.25
  8 | 
  9 | // ============================================================================
 10 | // Vector version
 11 | // ============================================================================
 12 | 
 13 | struct Cursor {
 14 |   Ptr<Float> addr;
 15 |   Float prev, current, next;
 16 | 
 17 |   void init(Ptr<Float> p) {
 18 |     gather(p);
 19 |     current = 0;
 20 |     addr = p+16;
 21 |   }
 22 | 
 23 |   void prime() {
 24 |     receive(next);
 25 |     gather(addr);
 26 |   }
 27 | 
 28 |   void advance() {
 29 |     addr = addr+16;
 30 |     prev = current;
 31 |     gather(addr);
 32 |     current = next;
 33 |     receive(next);
 34 |   }
 35 | 
 36 |   void finish() {
 37 |     receive(next);
 38 |   }
 39 | 
 40 |   void shiftLeft(Float& result) {
 41 |     result = rotate(current, 15);
 42 |     Float nextRot = rotate(next, 15);
 43 |     Where (index() == 15)
 44 |       result = nextRot;
 45 |     End
 46 |   }
 47 | 
 48 |   void shiftRight(Float& result) {
 49 |     result = rotate(current, 1);
 50 |     Float prevRot = rotate(prev, 1);
 51 |     Where (index() == 0)
 52 |       result = prevRot;
 53 |     End
 54 |   }
 55 | };
 56 | 
 57 | void step(Ptr<Float> map, Ptr<Float> mapOut, Int pitch, Int width, Int height)
 58 | {
 59 |   Cursor row[3];
 60 |   map = map + pitch*me() + index();
 61 | 
 62 |   // Skip first row of output map
 63 |   mapOut = mapOut + pitch;
 64 | 
 65 |   For (Int y = me(), y < height, y=y+numQPUs())
 66 | 
 67 |     // Point p to the output row
 68 |     Ptr<Float> p = mapOut + y*pitch;
 69 | 
 70 |     // Initilaise three cursors for the three input rows
 71 |     for (int i = 0; i < 3; i++) row[i].init(map + i*pitch);
 72 |     for (int i = 0; i < 3; i++) row[i].prime();
 73 | 
 74 |     // Compute one output row
 75 |     For (Int x = 0, x < width, x=x+16)
 76 | 
 77 |       for (int i = 0; i < 3; i++) row[i].advance();
 78 | 
 79 |       Float left[3], right[3];
 80 |       for (int i = 0; i < 3; i++) {
 81 |         row[i].shiftLeft(right[i]);
 82 |         row[i].shiftRight(left[i]);
 83 |       }
 84 | 
 85 |       Float sum = left[0] + row[0].current + right[0] +
 86 |                   left[1] +                  right[1] +
 87 |                   left[2] + row[2].current + right[2];
 88 | 
 89 |       store(row[1].current - K * (row[1].current - sum * 0.125), p);
 90 |       p = p + 16;
 91 | 
 92 |     End
 93 | 
 94 |     // Cursors are finished for this row
 95 |     for (int i = 0; i < 3; i++) row[i].finish();
 96 | 
 97 |     // Move to the next input rows
 98 |     map = map + pitch*numQPUs();
 99 | 
100 |   End
101 | }
102 | 
103 | // ============================================================================
104 | // Main
105 | // ============================================================================
106 | 
107 | int main()
108 | {
109 |   // Size of 2D heat map is WIDTH*HEIGHT:
110 |   //   * with zero padding, it is NROWS*NCOLS
111 |   //   * i.e. there is constant cold at the edges
112 |   //   * NCOLs should be a multiple of 16
113 |   //   * HEIGHT should be a multiple of NQPUS
114 |   const int NQPUS  = 1;
115 |   const int WIDTH  = 512-16;
116 |   const int NCOLS  = WIDTH+16;
117 |   const int HEIGHT = 504;
118 |   const int NROWS  = HEIGHT+2;
119 |   const int NSPOTS = 10;
120 |   const int NSTEPS = 1500;
121 | 
122 |   // Timestamps
123 |   timeval tvStart, tvEnd, tvDiff;
124 | 
125 |   // Allocate and initialise input and output maps
126 |   SharedArray<float> mapA(NROWS*NCOLS), mapB(NROWS*NCOLS);
127 |   for (int y = 0; y < NROWS; y++)
128 |     for (int x = 0; x < NCOLS; x++) {
129 |       mapA[y*NCOLS+x] = 0;
130 |       mapB[y*NCOLS+x] = 0;
131 |     }
132 | 
133 |   // Inject hot spots
134 |   srand(0);
135 |   for (int i = 0; i < NSPOTS; i++) {
136 |     int t = rand() % 256;
137 |     int x = rand() % WIDTH;
138 |     int y = 1 + rand() % HEIGHT;
139 |     mapA[y*NCOLS+x] = (float) (1000*t);
140 |   }
141 | 
142 |   // Compile kernel
143 |   auto k = compile(step);
144 | 
145 |   // Invoke kernel
146 |   k.setNumQPUs(NQPUS);
147 |   gettimeofday(&tvStart, NULL);
148 |   for (int i = 0; i < NSTEPS; i++) {
149 |     if (i & 1)
150 |       k(&mapB, &mapA, NCOLS, WIDTH, HEIGHT);
151 |     else
152 |       k(&mapA, &mapB, NCOLS, WIDTH, HEIGHT);
153 |   }
154 |   gettimeofday(&tvEnd, NULL);
155 |   timersub(&tvEnd, &tvStart, &tvDiff);
156 | 
157 |   // Display results
158 |   printf("P2\n%i %i\n255\n", WIDTH, HEIGHT);
159 |   for (int y = 0; y < HEIGHT; y++)
160 |     for (int x = 0; x < WIDTH; x++) {
161 |       int t = (int) mapB[(y+1)*NCOLS+x];
162 |       t = t < 0   ? 0 : t;
163 |       t = t > 255 ? 255 : t;
164 |       printf("%d\n", t);
165 |     }
166 | 
167 |   // Run-time of simulation
168 |   printf("# %ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec);
169 | 
170 |   return 0;
171 | }
172 | 


--------------------------------------------------------------------------------
/Tests/HeatMapScalar.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <sys/time.h>
 4 | 
 5 | // Heat dissapation constant
 6 | #define K 0.25
 7 | 
 8 | // ============================================================================
 9 | // Scalar version
10 | // ============================================================================
11 | 
12 | // One time step
13 | void step(float** map, float** mapOut, int width, int height)
14 | {
15 |   for (int y = 1; y < height-1; y++) {
16 |     for (int x = 1; x < width-1; x++) {
17 |       float surroundings =
18 |         map[y-1][x-1] + map[y-1][x]   + map[y-1][x+1] +
19 |         map[y][x-1]   +                 map[y][x+1]   +
20 |         map[y+1][x-1] + map[y+1][x]   + map[y+1][x+1];
21 |       surroundings *= 0.125;
22 |       mapOut[y][x] = map[y][x] - (K * (map[y][x] - surroundings));
23 |     }
24 |   }
25 | }
26 | 
27 | // ============================================================================
28 | // Main
29 | // ============================================================================
30 | 
31 | int main()
32 | {
33 |   // Parameters
34 |   const int WIDTH  = 512;
35 |   const int HEIGHT = 506;
36 |   const int NSPOTS = 10;
37 |   const int NSTEPS = 1500;
38 | 
39 |   // Timestamps
40 |   timeval tvStart, tvEnd, tvDiff;
41 | 
42 |   // Allocate
43 |   float* map       = new float [WIDTH*HEIGHT];
44 |   float* mapOut    = new float [WIDTH*HEIGHT];
45 |   float** map2D    = new float* [HEIGHT];
46 |   float** mapOut2D = new float* [HEIGHT];
47 | 
48 |   // Initialise
49 |   for (int i = 0; i < WIDTH*HEIGHT; i++) map[i] = mapOut[i] = 0.0;
50 |   for (int i = 0; i < HEIGHT; i++) {
51 |     map2D[i]    = &map[i*WIDTH];
52 |     mapOut2D[i] = &mapOut[i*WIDTH];
53 |   }
54 | 
55 |   // Inject hot spots
56 |   srand(0);
57 |   for (int i = 0; i < NSPOTS; i++) {
58 |     int t = rand() % 256;
59 |     int x = 1 + rand() % (WIDTH-2);
60 |     int y = 1 + rand() % (HEIGHT-2);
61 |     map2D[y][x] = (float) 1000*t;
62 |   }
63 | 
64 |   // Simulate
65 |   gettimeofday(&tvStart, NULL);
66 |   for (int i = 0; i < NSTEPS; i++) {
67 |     step(map2D, mapOut2D, WIDTH, HEIGHT);
68 |     float** tmp = map2D; map2D = mapOut2D; mapOut2D = tmp;
69 |   }
70 |   gettimeofday(&tvEnd, NULL);
71 |   timersub(&tvEnd, &tvStart, &tvDiff);
72 | 
73 |   // Display results
74 |   printf("P2\n%i %i\n255\n", WIDTH, HEIGHT);
75 |   for (int y = 0; y < HEIGHT; y++)
76 |     for (int x = 0; x < WIDTH; x++) {
77 |       int t = (int) map2D[y][x];
78 |       t = t < 0   ? 0 : t;
79 |       t = t > 255 ? 255 : t;
80 |       printf("%d\n", t);
81 |     }
82 |  
83 |   // Run-time of simulation
84 |   printf("# %ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec);
85 | 
86 |   return 0;
87 | }
88 | 


--------------------------------------------------------------------------------
/Tests/Hello.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | // Define function that runs on the GPU.
 4 | 
 5 | void hello(Ptr<Int> p)
 6 | {
 7 |   *p = 1;
 8 | }
 9 | 
10 | int main()
11 | {
12 |   // Construct kernel
13 |   auto k = compile(hello);
14 | 
15 |   // Allocate and initialise array shared between ARM and GPU
16 |   SharedArray<int> array(16);
17 |   for (int i = 0; i < 16; i++)
18 |     array[i] = 100;
19 | 
20 |   // Invoke the kernel and display the result
21 |   k(&array);
22 |   for (int i = 0; i < 16; i++) {
23 |     printf("%i: %i\n", i, array[i]);
24 |   }
25 |   
26 |   return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/Tests/ID.cpp:
--------------------------------------------------------------------------------
 1 | #include <QPULib.h>
 2 | 
 3 | // Define function that runs on the GPU.
 4 | 
 5 | void hello(Ptr<Int> p)
 6 | {
 7 |   p = p + (me() << 4);
 8 |   *p = me();
 9 | }
10 | 
11 | int main()
12 | {
13 |   // Construct kernel
14 |   auto k = compile(hello);
15 | 
16 |   // Allocate and initialise array shared between ARM and GPU
17 |   SharedArray<int> array(192);
18 |   for (int i = 0; i < 192; i++)
19 |     array[i] = 0;
20 | 
21 |   // Invoke the kernel and display the result
22 |   k.setNumQPUs(12);
23 |   k(&array);
24 |   for (int i = 0; i < 192; i++) {
25 |     printf("%i: %i\n", i, array[i]);
26 |   }
27 |   
28 |   return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/Tests/Makefile:
--------------------------------------------------------------------------------
  1 | # Root directory of QPULib repository
  2 | ROOT = ../Lib
  3 | 
  4 | # Compiler and default flags
  5 | CXX = g++
  6 | CXX_FLAGS = -fpermissive -Wconversion -std=c++0x -I $(ROOT)
  7 | 
  8 | # Object directory
  9 | OBJ_DIR = obj
 10 | 
 11 | # Debug mode
 12 | ifeq ($(DEBUG), 1)
 13 |   CXX_FLAGS += -DDEBUG
 14 |   OBJ_DIR := $(OBJ_DIR)-debug
 15 | endif
 16 | 
 17 | # QPU or emulation mode
 18 | ifeq ($(QPU), 1)
 19 |   CXX_FLAGS += -DQPU_MODE
 20 |   OBJ_DIR := $(OBJ_DIR)-qpu
 21 | else
 22 |   CXX_FLAGS += -DEMULATION_MODE
 23 | endif
 24 | 
 25 | # Object files
 26 | OBJ =                         \
 27 |   Kernel.o                    \
 28 |   Source/Syntax.o             \
 29 |   Source/Int.o                \
 30 |   Source/Float.o              \
 31 |   Source/Stmt.o               \
 32 |   Source/Pretty.o             \
 33 |   Source/Translate.o          \
 34 |   Source/Interpreter.o        \
 35 |   Source/Gen.o                \
 36 |   Target/Syntax.o             \
 37 |   Target/SmallLiteral.o       \
 38 |   Target/Pretty.o             \
 39 |   Target/RemoveLabels.o       \
 40 |   Target/CFG.o                \
 41 |   Target/Liveness.o           \
 42 |   Target/RegAlloc.o           \
 43 |   Target/ReachingDefs.o       \
 44 |   Target/Subst.o              \
 45 |   Target/LiveRangeSplit.o     \
 46 |   Target/Satisfy.o            \
 47 |   Target/LoadStore.o          \
 48 |   Target/Emulator.o           \
 49 |   Target/Encode.o             \
 50 |   VideoCore/Mailbox.o         \
 51 |   VideoCore/Invoke.o          \
 52 |   VideoCore/VideoCore.o
 53 | 
 54 | # Top-level targets
 55 | 
 56 | .PHONY: top clean
 57 | 
 58 | top:
 59 | 	@echo Please supply a target to build, e.g. \'make GCD\'
 60 | 	@echo
 61 | 
 62 | clean:
 63 | 	rm -rf obj obj-debug obj-qpu obj-debug-qpu
 64 | 	rm -f Tri GCD Print MultiTri AutoTest OET Hello ReqRecv Rot3D ID *.o
 65 | 	rm -f HeatMap
 66 | 
 67 | LIB = $(patsubst %,$(OBJ_DIR)/%,$(OBJ))
 68 | 
 69 | Hello: Hello.o $(LIB)
 70 | 	@echo Linking...
 71 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 72 | 
 73 | ID: ID.o $(LIB)
 74 | 	@echo Linking...
 75 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 76 | 
 77 | Tri: Tri.o $(LIB)
 78 | 	@echo Linking...
 79 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 80 | 
 81 | Print: Print.o $(LIB)
 82 | 	@echo Linking...
 83 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 84 | 
 85 | GCD: GCD.o $(LIB)
 86 | 	@echo Linking...
 87 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 88 | 
 89 | AutoTest: AutoTest.o $(LIB)
 90 | 	@echo Linking...
 91 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 92 | 
 93 | MultiTri: MultiTri.o $(LIB)
 94 | 	@echo Linking...
 95 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
 96 | 
 97 | OET: OET.o $(LIB)
 98 | 	@echo Linking...
 99 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
100 | 
101 | ReqRecv: ReqRecv.o $(LIB)
102 | 	@echo Linking...
103 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
104 | 
105 | Rot3D: Rot3D.o $(LIB)
106 | 	@echo Linking...
107 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
108 | 
109 | HeatMap: HeatMap.o $(LIB)
110 | 	@echo Linking...
111 | 	@$(CXX) $^ -o $@ $(CXX_FLAGS)
112 | 
113 | # Intermediate targets
114 | 
115 | $(OBJ_DIR)/%.o: $(ROOT)/%.cpp $(OBJ_DIR)
116 | 	@echo Compiling $<
117 | 	@$(CXX) -c -o $@ $< $(CXX_FLAGS)
118 | 
119 | %.o: %.cpp
120 | 	@echo Compiling $<
121 | 	@$(CXX) -c -o $@ $< $(CXX_FLAGS)
122 | 
123 | $(OBJ_DIR):
124 | 	@mkdir -p $(OBJ_DIR)
125 | 	@mkdir -p $(OBJ_DIR)/Source
126 | 	@mkdir -p $(OBJ_DIR)/Target
127 | 	@mkdir -p $(OBJ_DIR)/VideoCore
128 | 


--------------------------------------------------------------------------------
/Tests/MultiTri.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | void tri(Ptr<Int> p)
 4 | {
 5 |   p = p + (me() << 4);
 6 |   Int n = *p;
 7 |   Int sum = 0;
 8 |   While (any(n > 0))
 9 |     Where (n > 0)
10 |       sum = sum+n;
11 |       n = n-1;
12 |     End
13 |   End
14 |   *p = sum;
15 | }
16 | 
17 | int main()
18 | {
19 |   // Construct kernel
20 |   auto k = compile(tri);
21 | 
22 |   // Use 4 QPUs
23 |   k.setNumQPUs(4);
24 | 
25 |   // Allocate and initialise array shared between ARM and GPU
26 |   SharedArray<int> array(64);
27 |   for (int i = 0; i < 64; i++)
28 |     array[i] = i;
29 | 
30 |   // Invoke the kernel and display the result
31 |   k(&array);
32 |   for (int i = 0; i < 64; i++)
33 |     printf("%i: %i\n", i, array[i]);
34 |   
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/Tests/OET.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | // Odd/even transposition sorter for a 32-element array
 4 | 
 5 | void oet(Ptr<Int> p)
 6 | {
 7 |   setReadStride(1);
 8 |   setWriteStride(1);
 9 | 
10 |   Int evens = *p;
11 |   Int odds  = *(p+1);
12 | 
13 |   For (Int count = 0, count < 16, count++)
14 |     Int evens2 = min(evens, odds);
15 |     Int odds2  = max(evens, odds);
16 | 
17 |     Int evens3 = rotate(evens2, 15);
18 |     Int odds3  = odds2;
19 | 
20 |     Where (index() != 15)
21 |       odds2 = min(evens3, odds3);
22 |     End
23 | 
24 |     Where (index() != 0)
25 |       evens2 = rotate(max(evens3, odds3), 1);
26 |     End
27 | 
28 |     evens = evens2;
29 |     odds  = odds2;
30 |   End
31 | 
32 |   *p     = evens;
33 |   *(p+1) = odds;
34 | }
35 | 
36 | int main()
37 | {
38 |   // Construct kernel
39 |   auto k = compile(oet);
40 | 
41 |   // Allocate and initialise array shared between ARM and GPU
42 |   SharedArray<int> a(32);
43 |   for (int i = 0; i < 32; i++)
44 |     a[i] = 100-i;
45 | 
46 |   // Invoke the kernel and display the result
47 |   k.call(&a);
48 |   for (int i = 0; i < 32; i++)
49 |     printf("%i: %i\n", i, a[i]);
50 |   
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/Tests/Print.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | void loop(Int n)
 4 | {
 5 |   For (Int i = 0, i < n, i++)
 6 |     Print(i);
 7 |     Print("\n");
 8 |   End
 9 | }
10 | 
11 | int main()
12 | {
13 |   // Construct kernel
14 |   auto k = compile(loop);
15 | 
16 |   // Invoke kernel with argument 20
17 |   k(20);
18 |   
19 |   return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/Tests/ReqRecv.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | // Define function that runs on the GPU.
 4 | 
 5 | void test(Ptr<Int> p)
 6 | {
 7 |   Int x, y;
 8 |   gather(p+index());
 9 |   gather(p+16+index());
10 |   receive(x);
11 |   receive(y);
12 |   *p = x+y;
13 | }
14 | 
15 | int main()
16 | {
17 |   // Construct kernel
18 |   auto k = compile(test);
19 | 
20 |   // Allocate and initialise array shared between ARM and GPU
21 |   SharedArray<int> array(32);
22 |   for (int i = 0; i < 32; i++)
23 |     array[i] = i;
24 | 
25 |   // Invoke the kernel and display the result
26 |   k(&array);
27 |   for (int i = 0; i < 16; i++)
28 |     printf("%i: %i\n", i, array[i]);
29 |   
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/Tests/Rot3D.cpp:
--------------------------------------------------------------------------------
  1 | #include <QPULib.h>
  2 | #include <sys/time.h>
  3 | #include <math.h>
  4 | 
  5 | // #define USE_SCALAR_VERSION
  6 | 
  7 | // ============================================================================
  8 | // Scalar version
  9 | // ============================================================================
 10 | 
 11 | void rot3D(int n, float cosTheta, float sinTheta, float* x, float* y)
 12 | {
 13 |   for (int i = 0; i < n; i++) {
 14 |     float xOld = x[i];
 15 |     float yOld = y[i];
 16 |     x[i] = xOld * cosTheta - yOld * sinTheta;
 17 |     y[i] = yOld * cosTheta + xOld * sinTheta;
 18 |   }
 19 | }
 20 | 
 21 | // ============================================================================
 22 | // Vector version 1
 23 | // ============================================================================
 24 | 
 25 | void rot3D_1(Int n, Float cosTheta, Float sinTheta, Ptr<Float> x, Ptr<Float> y)
 26 | {
 27 |   For (Int i = 0, i < n, i = i+16)
 28 |     Float xOld = x[i];
 29 |     Float yOld = y[i];
 30 |     x[i] = xOld * cosTheta - yOld * sinTheta;
 31 |     y[i] = yOld * cosTheta + xOld * sinTheta;
 32 |   End
 33 | }
 34 | 
 35 | // ============================================================================
 36 | // Vector version 2
 37 | // ============================================================================
 38 | 
 39 | void rot3D_2(Int n, Float cosTheta, Float sinTheta, Ptr<Float> x, Ptr<Float> y)
 40 | {
 41 |   Int inc = 16;
 42 |   Ptr<Float> p = x + index();
 43 |   Ptr<Float> q = y + index();
 44 |   gather(p); gather(q);
 45 |  
 46 |   Float xOld, yOld;
 47 |   For (Int i = 0, i < n, i = i+inc)
 48 |     gather(p+inc); gather(q+inc); 
 49 |     receive(xOld); receive(yOld);
 50 |     store(xOld * cosTheta - yOld * sinTheta, p);
 51 |     store(yOld * cosTheta + xOld * sinTheta, q);
 52 |     p = p+inc; q = q+inc;
 53 |   End
 54 | 
 55 |   receive(xOld); receive(yOld);
 56 | }
 57 | 
 58 | // ============================================================================
 59 | // Vector version 3
 60 | // ============================================================================
 61 | 
 62 | void rot3D_3(Int n, Float cosTheta, Float sinTheta, Ptr<Float> x, Ptr<Float> y)
 63 | {
 64 |   Int inc = numQPUs() << 4;
 65 |   Ptr<Float> p = x + index() + (me() << 4);
 66 |   Ptr<Float> q = y + index() + (me() << 4);
 67 |   gather(p); gather(q);
 68 |  
 69 |   Float xOld, yOld;
 70 |   For (Int i = 0, i < n, i = i+inc)
 71 |     gather(p+inc); gather(q+inc); 
 72 |     receive(xOld); receive(yOld);
 73 |     store(xOld * cosTheta - yOld * sinTheta, p);
 74 |     store(yOld * cosTheta + xOld * sinTheta, q);
 75 |     p = p+inc; q = q+inc;
 76 |   End
 77 | 
 78 |   receive(xOld); receive(yOld);
 79 | }
 80 | 
 81 | // ============================================================================
 82 | // Main
 83 | // ============================================================================
 84 | 
 85 | int main()
 86 | {
 87 |   // Timestamps
 88 |   timeval tvStart, tvEnd, tvDiff;
 89 | 
 90 |   // Number of vertices and angle of rotation
 91 |   const int N = 19200; // 192000
 92 |   const float THETA = (float) 3.14159;
 93 | 
 94 | #ifdef USE_SCALAR_VERSION
 95 |   // Allocate and initialise
 96 |   float* x = new float [N];
 97 |   float* y = new float [N];
 98 |   for (int i = 0; i < N; i++) {
 99 |     x[i] = (float) i;
100 |     y[i] = (float) i;
101 |   }
102 | #else
103 |   // Construct kernel
104 |   auto k = compile(rot3D_3);
105 | 
106 |   // Use 12 QPUs
107 |   k.setNumQPUs(12);
108 | 
109 |   // Allocate and initialise arrays shared between ARM and GPU
110 |   SharedArray<float> x(N), y(N);
111 |   for (int i = 0; i < N; i++) {
112 |     x[i] = (float) i;
113 |     y[i] = (float) i;
114 |   }
115 | #endif
116 | 
117 |   gettimeofday(&tvStart, NULL);
118 | #ifdef USE_SCALAR_VERSION
119 |   rot3D(N, cosf(THETA), sinf(THETA), x, y);
120 | #else
121 |   k(N, cosf(THETA), sinf(THETA), &x, &y);
122 | #endif
123 |   gettimeofday(&tvEnd, NULL);
124 |   timersub(&tvEnd, &tvStart, &tvDiff);
125 | 
126 |   // Display results
127 |   //for (int i = 0; i < N; i++)
128 |   //  printf("%f %f\n", x[i], y[i]);
129 |  
130 |   printf("%ld.%06lds\n", tvDiff.tv_sec, tvDiff.tv_usec);
131 | 
132 |   return 0;
133 | }
134 | 


--------------------------------------------------------------------------------
/Tests/Sort.cpp:
--------------------------------------------------------------------------------
  1 | #include "QPULib.h"
  2 | 
  3 | void oet(Ptr<Int> p)
  4 | {
  5 |   setReadStride(1);
  6 |   setWriteStride(1);
  7 | 
  8 |   Int evens = *p;
  9 |   Int odds  = *(p+1);
 10 | 
 11 |   For (Int count = 0, count < 16, count++)
 12 |     Int evens2 = min(evens, odds);
 13 |     Int odds2  = max(evens, odds);
 14 | 
 15 |     Int evens3 = rotate(evens2, 15);
 16 |     Int odds3  = odds2;
 17 | 
 18 |     Where (index() != 15)
 19 |       odds2 = min(evens3, odds3);
 20 |     End
 21 | 
 22 |     Where (index() != 0)
 23 |       evens2 = rotate(max(evens3, odds3), 1);
 24 |     End
 25 | 
 26 |     evens = evens2;
 27 |     odds  = odds2;
 28 |   End
 29 | 
 30 |   *p     = evens;
 31 |   *(p+1) = odds;
 32 | }
 33 | 
 34 | int main()
 35 | {
 36 |   // Allocate and initialise array shared between ARM and GPU
 37 |   auto k = compile(oet);
 38 | 
 39 |   // Allocate some data for the kernel to process
 40 |   SharedArray<int> a(32);
 41 |   for (int i = 0; i < 32; i++)
 42 |     a[i] = 100-i;
 43 | 
 44 |   // Invoke the kernel and display the result
 45 |   k(&a);
 46 |   for (int i = 0; i < 32; i++)
 47 |     printf("%i: %i\n", i, a[i]);
 48 |   
 49 |   return 0;
 50 | }
 51 | 
 52 | /*
 53 | 
 54 | Periodic sorter
 55 | ===============
 56 | 
 57 | // Implementation of periodic sorter from:
 58 | //
 59 | //   Design and verification of a sorter core
 60 | //   K. Claessen, M. Sheeran, S. Singh
 61 | //
 62 | // The 'qfly' network is easy to vectorise due to its simple structure, however
 63 | // MergeSort.c should be quicker although slightly more complicated.
 64 | 
 65 | #include <stdio.h>
 66 | #include <stdlib.h>
 67 | #include <time.h>
 68 | 
 69 | // Sort 2^N numbers
 70 | #define N 20
 71 | 
 72 | // All compare-and-swaps in a pass can run in parallel
 73 | void pass(int n, int* a, int s0, int s1, int stride)
 74 | {
 75 |   int end = 1 << n;
 76 |   while (s1 < end) {
 77 |     if (a[s0] > a[s1]) {
 78 |       int tmp = a[s0];
 79 |       a[s0] = a[s1];
 80 |       a[s1] = tmp;
 81 |     }
 82 |     s0 += stride;
 83 |     s1 += stride;
 84 |   }
 85 | }
 86 | 
 87 | void sort(int n, int* a)
 88 | {
 89 |   for (int i = 0; i < n; i++) {
 90 |     pass(n, a, 0, 1, 2);
 91 |     for (int j = 1 << (n-1); j > 1; j >>= 1)
 92 |       pass(n, a, 1, j, 2);
 93 |   }
 94 | }
 95 | 
 96 | int main()
 97 | {
 98 |   // Create random array
 99 |   srand(time(NULL));
100 |   int* a = new int [1 << N];
101 |   for (int i = 0; i < 1 << N; i++)
102 |     a[i] = rand() % 65536;
103 | 
104 |   // Sort it
105 |   sort(N, a);
106 | 
107 |   // Check that it's sorted
108 |   bool sorted = true;
109 |   for (int i = 1; i < (1 << N); i++)
110 |     sorted = sorted && (a[i-1] <= a[i]);
111 |   printf("sorted = %s\n", sorted ? "true" : "false");
112 | }
113 | 
114 | Merge sorter
115 | ============
116 | 
117 | // An implementation of Batcher's odd/even merge sort.  This should be
118 | // straightforward to vectorise, and it should allow a hybrid apporach
119 | // in which a odd/even transposition sort is used to sort small blocks
120 | // of elements quickly before merging the results.
121 | 
122 | #include <stdio.h>
123 | #include <stdlib.h>
124 | #include <time.h>
125 | 
126 | // Sort 2^N numbers
127 | #define N 20
128 | 
129 | // All compare-and-swaps in a pass can run in parallel
130 | void pass(int n, int* a, int s0, int s1)
131 | {
132 |   int count = 1;
133 |   int g = s0;
134 |   while (s1 < n) {
135 |     if (a[s0] > a[s1]) {
136 |       int tmp = a[s0];
137 |       a[s0] = a[s1];
138 |       a[s1] = tmp;
139 |     }
140 |     if (count == g) {
141 |       count = 1;
142 |       s0 += g+1;
143 |       s1 += g+1;
144 |     }
145 |     else {
146 |       s0++;
147 |       s1++;
148 |       count++;
149 |     }
150 |   }
151 | }
152 | 
153 | void merge(int n, int* a) {
154 |   pass(n, a, 0, n>>1);
155 |   for (int i = n; i > 1; i = i >> 1)
156 |     pass(n, a, i>>1, i);
157 | }
158 | 
159 | void sort(int n, int* a)
160 | {
161 |   for (int i = 2; i <= n; i = i << 1)
162 |     // All merges in this loop can run in parallel
163 |     for (int start = 0; start < n; start += i)
164 |       merge(i, &a[start]);
165 | }
166 | 
167 | int main()
168 | {
169 |   // Create random array
170 |   srand(time(NULL));
171 |   int* a = new int [1 << N];
172 |   for (int i = 0; i < 1 << N; i++)
173 |     a[i] = rand() % 65536;
174 | 
175 |   // Sort it
176 |   sort(1<<N, a);
177 | 
178 |   // Check that it's sorted
179 |   bool sorted = true;
180 |   for (int i = 1; i < (1 << N); i++)
181 |     sorted = sorted && (a[i-1] <= a[i]);
182 |   printf("sorted = %s\n", sorted ? "true" : "false");
183 | }
184 | 
185 | */
186 | 


--------------------------------------------------------------------------------
/Tests/Tri.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | // Define function that runs on the GPU.
 4 | 
 5 | void tri(Ptr<Int> p)
 6 | {
 7 |   Int n = *p;
 8 |   Int sum = 0;
 9 |   While (any(n > 0))
10 |     Where (n > 0)
11 |       sum = sum+n;
12 |       n = n-1;
13 |     End
14 |   End
15 |   *p = sum;
16 | }
17 | 
18 | int main()
19 | {
20 |   // Construct kernel
21 |   auto k = compile(tri);
22 | 
23 |   // Allocate and initialise array shared between ARM and GPU
24 |   SharedArray<int> array(16);
25 |   for (int i = 0; i < 16; i++)
26 |     array[i] = i;
27 | 
28 |   // Invoke the kernel and display the result
29 |   k(&array);
30 |   for (int i = 0; i < 16; i++)
31 |     printf("%i: %i\n", i, array[i]);
32 |   
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/Tests/TriFloat.cpp:
--------------------------------------------------------------------------------
 1 | #include "QPULib.h"
 2 | 
 3 | // Define function that runs on the GPU.
 4 | 
 5 | void tri(Ptr<Float> p)
 6 | {
 7 |   Int n = toInt(*p);
 8 |   Int sum = 0;
 9 |   While (any(n > 0))
10 |     Where (n > 0)
11 |       sum = sum+n;
12 |       n = n-1;
13 |     End
14 |   End
15 |   *p = toFloat(sum);
16 | }
17 | 
18 | int main()
19 | {
20 |   // Construct kernel
21 |   auto k = compile(tri);
22 | 
23 |   // Allocate and initialise array shared between ARM and GPU
24 |   SharedArray<float> array(16);
25 |   for (int i = 0; i < 16; i++)
26 |     array[i] = (float) i;
27 | 
28 |   // Invoke the kernel and display the result
29 |   k(&array);
30 |   for (int i = 0; i < 16; i++)
31 |     printf("%i: %f\n", i, array[i]);
32 |   
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------